This source file includes following definitions.
- get_coord
- get_value_as
- read_from_stdin
- read_stdin
- bad_type_error
- dump
- observe_load
- observe_store
- observe_load_or_store
- report
- composite
- draw_text
- usage
- expect
- fill_realization
- run
- main
#include <stdio.h>
#include <stdint.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <map>
#include <vector>
#include <array>
#include <string>
#include <queue>
#include <iostream>
#include <algorithm>
#ifdef _MSC_VER
#include <io.h>
typedef int64_t ssize_t;
#include <unistd.h>
#include <string.h>
#include "inconsolata.h"
#include "HalideRuntime.h"
namespace {
using std::map;
using std::vector;
using std::string;
using std::queue;
using std::array;
using std::pair;
struct Packet : public halide_trace_packet_t {
uint8_t payload[4096];
int get_coord(int idx) const {
return coordinates()[idx];
template<typename T>
T get_value_as(int idx) const {
switch (type.code) {
case halide_type_int:
switch (type.bits) {
case 8:
return (T)(((const int8_t *)value())[idx]);
case 16:
return (T)(((const int16_t *)value())[idx]);
case 32:
return (T)(((const int32_t *)value())[idx]);
case 64:
return (T)(((const int64_t *)value())[idx]);
case halide_type_uint:
switch (type.bits) {
case 8:
return (T)(((const uint8_t *)value())[idx]);
case 16:
return (T)(((const uint16_t *)value())[idx]);
case 32:
return (T)(((const uint32_t *)value())[idx]);
case 64:
return (T)(((const uint64_t *)value())[idx]);
case halide_type_float:
switch (type.bits) {
case 32:
return (T)(((const float *)value())[idx]);
case 64:
return (T)(((const double *)value())[idx]);
return (T)0;
bool read_from_stdin() {
uint32_t header_size = (uint32_t)sizeof(halide_trace_packet_t);
if (!read_stdin(this, header_size)) {
return false;
uint32_t payload_size = size - header_size;
if (payload_size > (uint32_t)sizeof(payload)) {
fprintf(stderr, "Payload larger than %d bytes in trace stream (%d)\n", (int)sizeof(payload), (int)payload_size);
return false;
if (!read_stdin(payload, payload_size)) {
fprintf(stderr, "Unexpected EOF mid-packet");
return false;
return true;
bool read_stdin(void *d, ssize_t size) {
uint8_t *dst = (uint8_t *)d;
if (!size) return true;
for (;;) {
ssize_t s = read(0, dst, size);
if (s == 0) {
return false;
} else if (s < 0) {
perror("Failed during read");
return 0;
} else if (s == size) {
return true;
size -= s;
dst += s;
void bad_type_error() const {
fprintf(stderr, "Can't visualize packet with type: %d bits: %d\n", type.code, type.bits);
struct Label {
const char *text;
int x, y, n;
struct FuncInfo {
bool configured = false;
struct Config {
int zoom = 0;
int load_cost = 0;
int store_cost = 0;
int dims = 0;
int x, y = 0;
int x_stride[16];
int y_stride[16];
int color_dim = 0;
float min = 0.0f, max = 0.0f;
vector<Label> labels;
bool blank_on_end_realization = false;
uint32_t uninitialized_memory_color = 0xff000000;
void dump(const char *name) {
"Func %s:\n"
" min: %f max: %f\n"
" color_dim: %d\n"
" blank: %d\n"
" dims: %d\n"
" zoom: %d\n"
" load cost: %d\n"
" store cost: %d\n"
" x: %d y: %d\n"
" x_stride: %d %d %d %d\n"
" y_stride: %d %d %d %d\n",
min, max,
zoom, load_cost, store_cost, x, y,
x_stride[0], x_stride[1], x_stride[2], x_stride[3],
y_stride[0], y_stride[1], y_stride[2], y_stride[3]);
Config() {
memset(x_stride, 0, sizeof(x_stride));
memset(y_stride, 0, sizeof(y_stride));
} config;
struct Observed {
string qualified_name;
int first_draw_time = 0, first_packet_idx = 0;
double min_value = 0.0, max_value = 0.0;
int min_coord[16];
int max_coord[16];
int num_realizations = 0, num_productions = 0;
uint64_t stores = 0, loads = 0;
Observed() {
memset(min_coord, 0, sizeof(min_coord));
memset(max_coord, 0, sizeof(max_coord));
void observe_load(const Packet &p) {
loads += p.type.lanes;
void observe_store(const Packet &p) {
stores += p.type.lanes;
void observe_load_or_store(const Packet &p) {
for (int i = 0; i < std::min(16, p.dimensions / p.type.lanes); i++) {
for (int lane = 0; lane < p.type.lanes; lane++) {
int coord = p.get_coord(i*p.type.lanes + lane);
if (loads + stores == 0 && lane == 0) {
min_coord[i] = coord;
max_coord[i] = coord + 1;
} else {
min_coord[i] = std::min(min_coord[i], coord);
max_coord[i] = std::max(max_coord[i], coord + 1);
for (int i = 0; i < p.type.lanes; i++) {
double value = p.get_value_as<double>(i);
if (stores + loads == 0) {
min_value = value;
max_value = value;
} else {
min_value = std::min(min_value, value);
max_value = std::max(max_value, value);
void report() {
"Func %s:\n"
" bounds of domain: ", qualified_name.c_str());
for (int i = 0; i < 16; i++) {
if (min_coord[i] == 0 && max_coord[i] == 0) break;
if (i > 0) {
fprintf(stderr, " x ");
fprintf(stderr, "[%d, %d)", min_coord[i], max_coord[i]);
" range of values: [%f, %f]\n"
" number of realizations: %d\n"
" number of productions: %d\n"
" number of loads: %g\n"
" number of stores: %g\n",
min_value, max_value,
num_realizations, num_productions,
} stats;
void composite(uint8_t *a, uint8_t *b, uint8_t *dst) {
uint8_t alpha = b[3];
if (alpha == 0) {
((uint32_t *)dst)[0] = ((uint32_t *)a)[0];
} else if (alpha == 255) {
((uint32_t *)dst)[0] = ((uint32_t *)b)[0];
} else {
dst[0] = (alpha * b[0] + (255 - alpha) * a[0]) / 255;
dst[1] = (alpha * b[1] + (255 - alpha) * a[1]) / 255;
dst[2] = (alpha * b[2] + (255 - alpha) * a[2]) / 255;
dst[3] = 255 - (((255 - a[3]) * (255 - alpha)) / 255);
#define FONT_W 12
#define FONT_H 32
void draw_text(const char *text, int x, int y, uint32_t color, uint32_t *dst, int dst_width, int dst_height) {
assert(inconsolata_raw_len == 96 * FONT_W * FONT_H);
color &= 0xffffff;
for (int c = 0; ; c++) {
int chr = text[c];
if (chr == 0) return;
if (chr < 32 || chr > 127) chr = 32;
chr -= 32;
uint8_t *font_ptr = inconsolata_raw + chr * (FONT_W * FONT_H);
for (int fy = 0; fy < FONT_H; fy++) {
for (int fx = 0; fx < FONT_W; fx++) {
int px = x + FONT_W*c + fx;
int py = y - FONT_H + fy + 1;
if (px < 0 || px >= dst_width ||
py < 0 || py >= dst_height) continue;
dst[py * dst_width + px] = (font_ptr[fy * FONT_W + fx] << 24) | color;
void usage() {
HalideTraceViz accepts Halide-generated binary tracing packets from
stdin, and outputs them as raw 8-bit rgba32 pixel values to
stdout. You should pipe the output of HalideTraceViz into a video
encoder or player.
E.g. to encode a video:
HL_TARGET=host-trace_stores-trace_loads-trace_realizations <command to make pipeline> && \
HL_TRACE_FILE=/dev/stdout <command to run pipeline> | \
HalideTraceViz -s 1920 1080 -t 10000 <the -f args> | \
avconv -f rawvideo -pix_fmt bgr32 -s 1920x1080 -i /dev/stdin -c:v h264 output.avi
To just watch the trace instead of encoding a video replace the last
line with something like:
mplayer -demuxer rawvideo -rawvideo w=1920:h=1080:format=rgba:fps=30 -idle -fixed-vo -
The arguments to HalideTraceViz specify how to lay out and render the
Funcs of interest. It acts like a stateful drawing API. The following
parameters should be set zero or one times:
--size width height: The size of the output frames. Defaults to
--timestep timestep: How many Halide computations should be covered
by each frame. Defaults to 10000.
--decay A B: How quickly should the yellow and blue highlights decay
over time. This is a two-stage exponential decay with a knee in
it. A controls the rate at which they decay while a value is in
the process of being computed, and B controls the rate at which
they decay over time after the corresponding value has finished
being computed. 1 means never decay, 2 means halve in opacity
every frame, and 256 or larger means instant decay. The default
values for A and B are 1 and 2 respectively, which means that the
highlight holds while the value is being computed, and then
decays slowly.
--hold frames: How many frames to output after the end of the
trace. Defaults to 250.
The following parameters can be set once per Func. With the exception
of label, they continue to take effect for all subsequently defined
--min: The minimum value taken on by a Func. Maps to black.
--max: The maximum value taken on by a Func. Maps to white.
--rgb dim: Render Funcs as rgb, with the dimension dim indexing the
color channels.
--gray: Render Funcs as grayscale.
--blank: Specify that the output occupied by a Func should be set to
black on its end-realization event.
--no-blank: The opposite of --blank. Leaves the Func's values on the
screen. This is the default
--zoom factor: Each value of a Func will draw as a factor x factor
box in the output.
--load time: Each load from a Func costs the given number of ticks.
--store time: Each store to a Func costs the given number of ticks.
--move x y: Sets the position on the screen corresponding to the
Func's 0, 0 coordinate.
--left dx: Moves the currently set position leftward by the given
--right dx: Moves the currently set position rightward by the given
--up dy: Moves the currently set position upward by the given amount.
--down dy: Moves the currently set position downward by the given
--push: Copies the currently set position onto a stack of positions.
--pop: Sets the current position to the value most-recently pushed,
and removes it from the stack.
--strides ... : Specifies the matrix that maps the coordinates of the
Func to screen pixels. Specified column major. For example,
--strides 1 0 0 1 0 0 specifies that the Func has three
dimensions where the first one maps to screen-space x
coordinates, the second one maps to screen-space y coordinates,
and the third one does not affect screen-space coordinates.
--uninit r g b : Specifies the on-screen color corresponding to
uninitialized memory. Defaults to black.
--func name: Mark a Func to be visualized. Uses the currently set
values of the parameters above to specify how.
--label func label n: When the named Func is first touched, the label
appears with its bottom left corner at the current coordinates
and fades in over n frames.
void expect(bool cond, int i) {
if (!cond) {
if (i) {
fprintf(stderr, "Argument parsing failed at argument %d\n", i);
void fill_realization(uint32_t *image, int image_width, uint32_t color, const FuncInfo &fi,
Packet &p, int current_dimension = 0, int x_off = 0, int y_off = 0) {
assert(p.dimensions >= 2 * fi.config.dims);
if (2 * current_dimension == p.dimensions) {
int x_min = x_off * fi.config.zoom + fi.config.x;
int y_min = y_off * fi.config.zoom + fi.config.y;
for (int y = 0; y < fi.config.zoom; y++) {
for (int x = 0; x < fi.config.zoom; x++) {
int idx = (y_min + y) * image_width + (x_min + x);
image[idx] = color;
} else {
int min = p.get_coord(current_dimension * 2 + 0);
int extent = p.get_coord(current_dimension * 2 + 1);
x_off += fi.config.x_stride[current_dimension] * min;
y_off += fi.config.y_stride[current_dimension] * min;
for (int i = min; i < min + extent; i++) {
fill_realization(image, image_width, color, fi, p, current_dimension + 1, x_off, y_off);
x_off += fi.config.x_stride[current_dimension];
y_off += fi.config.y_stride[current_dimension];
int run(int argc, char **argv) {
if (argc == 1) {
return 0;
int frame_width = 1920, frame_height = 1080;
float decay_factor[2] = {1, 2};
map<string, FuncInfo> func_info;
int timestep = 10000;
int hold_frames = 250;
FuncInfo::Config config;
config.x = config.y = 0;
config.zoom = 1;
config.color_dim = -1;
config.min = 0;
config.max = 1;
config.store_cost = 1;
config.load_cost = 0;
config.blank_on_end_realization = false;
config.dims = 2;
config.x_stride[0] = 1;
config.y_stride[0] = 0;
config.x_stride[1] = 0;
config.y_stride[1] = 1;
config.uninitialized_memory_color = 255 << 24;
vector<pair<int, int>> pos_stack;
int i = 1;
while (i < argc) {
string next = argv[i];
if (next == "--size") {
expect(i + 2 < argc, i);
frame_width = atoi(argv[++i]);
frame_height = atoi(argv[++i]);
} else if (next == "--func") {
expect(i + 1 < argc, i);
const char *func = argv[++i];
FuncInfo &fi = func_info[func];
fi.config = config;
fi.configured = true;
} else if (next == "--min") {
expect(i + 1 < argc, i);
config.min = atof(argv[++i]);
} else if (next == "--max") {
expect(i + 1 < argc, i);
config.max = atof(argv[++i]);
} else if (next == "--move") {
expect(i + 2 < argc, i);
config.x = atoi(argv[++i]);
config.y = atoi(argv[++i]);
} else if (next == "--left") {
expect(i + 1 < argc, i);
config.x -= atoi(argv[++i]);
} else if (next == "--right") {
expect(i + 1 < argc, i);
config.x += atoi(argv[++i]);
} else if (next == "--up") {
expect(i + 1 < argc, i);
config.y -= atoi(argv[++i]);
} else if (next == "--down") {
expect(i + 1 < argc, i);
config.y += atoi(argv[++i]);
} else if (next == "--push") {
pos_stack.push_back({config.x, config.y});
} else if (next == "--pop") {
expect(!pos_stack.empty(), i);
config.x = pos_stack.back().first;
config.y = pos_stack.back().second;
} else if (next == "--rgb") {
expect(i + 1 < argc, i);
config.color_dim = atoi(argv[++i]);
} else if (next == "--gray") {
config.color_dim = -1;
} else if (next == "--blank") {
config.blank_on_end_realization = true;
} else if (next == "--no-blank") {
config.blank_on_end_realization = false;
} else if (next == "--zoom") {
expect(i + 1 < argc, i);
config.zoom = atoi(argv[++i]);
} else if (next == "--load") {
expect(i + 1 < argc, i);
config.load_cost = atoi(argv[++i]);
} else if (next == "--store") {
expect(i + 1 < argc, i);
config.store_cost = atoi(argv[++i]);
} else if (next == "--strides") {
config.dims = 0;
while (i + 1 < argc) {
const char *next_arg = argv[i + 1];
if (next_arg[0] == '-' &&
next_arg[1] == '-') {
expect(i + 2 < argc, i);
config.x_stride[config.dims] = atoi(argv[++i]);
config.y_stride[config.dims] = atoi(argv[++i]);
} else if (next == "--label") {
expect(i + 3 < argc, i);
char *func = argv[++i];
char *text = argv[++i];
int n = atoi(argv[++i]);
Label l = {text, config.x, config.y, n};
} else if (next == "--timestep") {
expect(i + 1 < argc, i);
timestep = atoi(argv[++i]);
} else if (next == "--decay") {
expect(i + 2 < argc, i);
decay_factor[0] = atof(argv[++i]);
decay_factor[1] = atof(argv[++i]);
} else if (next == "--hold") {
expect(i + 1 < argc, i);
hold_frames = atoi(argv[++i]);
} else if (next == "--uninit") {
expect(i + 3 < argc, i);
int r = atoi(argv[++i]);
int g = atoi(argv[++i]);
int b = atoi(argv[++i]);
config.uninitialized_memory_color = (255 << 24) | ((b & 255) << 16) | ((g & 255) << 8) | (r & 255);
} else {
expect(false, i);
size_t halide_clock = 0, video_clock = 0;
uint32_t *image = new uint32_t[frame_width * frame_height];
memset(image, 0, 4 * frame_width * frame_height);
uint32_t *anim = new uint32_t[frame_width * frame_height];
memset(anim, 0, 4 * frame_width * frame_height);
uint32_t *anim_decay = new uint32_t[frame_width * frame_height];
memset(anim_decay, 0, 4 * frame_width * frame_height);
uint32_t *text = new uint32_t[frame_width * frame_height];
memset(text, 0, 4 * frame_width * frame_height);
uint32_t *blend = new uint32_t[frame_width * frame_height];
memset(blend, 0, 4 * frame_width * frame_height);
struct PipelineInfo {
string name;
int32_t id;
map<uint32_t, PipelineInfo> pipeline_info;
size_t end_counter = 0;
size_t packet_clock = 0;
for (;;) {
if (end_counter) {
halide_clock += timestep;
if (end_counter == (size_t)hold_frames) {
if (halide_clock >= video_clock) {
const ssize_t frame_bytes = 4 * frame_width * frame_height;
while (halide_clock >= video_clock) {
for (int i = 0; i < frame_width * frame_height; i++) {
uint8_t *anim_decay_px = (uint8_t *)(anim_decay + i);
uint8_t *anim_px = (uint8_t *)(anim + i);
uint8_t *image_px = (uint8_t *)(image + i);
uint8_t *text_px = (uint8_t *)(text + i);
uint8_t *blend_px = (uint8_t *)(blend + i);
composite(anim_decay_px, anim_px, anim_decay_px);
composite(image_px, anim_decay_px, blend_px);
composite(blend_px, text_px, blend_px);
ssize_t bytes_written = write(1, blend, frame_bytes);
if (bytes_written < frame_bytes) {
fprintf(stderr, "Could not write frame to stdout.\n");
return -1;
video_clock += timestep;
if (decay_factor[1] != 1) {
const uint32_t inv_d1 = (1 << 24) / decay_factor[1];
for (int i = 0; i < frame_width * frame_height; i++) {
uint32_t color = anim_decay[i];
uint32_t rgb = color & 0x00ffffff;
uint32_t alpha = (color >> 24);
alpha *= inv_d1;
alpha &= 0xff000000;
anim_decay[i] = alpha | rgb;
const uint32_t inv_d0 = (1 << 24) / decay_factor[0];
for (int i = 0; i < frame_width * frame_height; i++) {
uint32_t color = anim[i];
uint32_t rgb = color & 0x00ffffff;
uint32_t alpha = (color >> 24);
alpha *= inv_d0;
alpha &= 0xff000000;
anim[i] = alpha | rgb;
memset(anim, 0, frame_bytes);
Packet p;
if (!p.read_from_stdin()) {
if (p.event == halide_trace_begin_pipeline) {
pipeline_info[] = {p.func(),};
} else if (p.event == halide_trace_end_pipeline) {
PipelineInfo pipeline = pipeline_info[p.parent_id];
if (p.event == halide_trace_begin_realization ||
p.event == halide_trace_produce ||
p.event == halide_trace_consume) {
pipeline_info[] = pipeline;
} else if (p.event == halide_trace_end_realization ||
p.event == halide_trace_end_produce ||
p.event == halide_trace_end_consume) {
string qualified_name = + ":" + p.func();
if (func_info.find(qualified_name) == func_info.end()) {
if (func_info.find(p.func()) != func_info.end()) {
func_info[qualified_name] = func_info[p.func()];
} else {
fprintf(stderr, "Warning: ignoring func %s event %d \n", qualified_name.c_str(), p.event);
fprintf(stderr, "Parent event %d %s\n", p.parent_id,;
FuncInfo &fi = func_info[qualified_name];
if (!fi.configured) continue;
if (fi.stats.first_draw_time == 0) {
fi.stats.first_draw_time = halide_clock;
if (fi.stats.first_packet_idx == 0) {
fi.stats.first_packet_idx = packet_clock;
fi.stats.qualified_name = qualified_name;
int frames_since_first_draw = (halide_clock - fi.stats.first_draw_time) / timestep;
for (size_t i = 0; i < fi.config.labels.size(); i++) {
const Label &label = fi.config.labels[i];
if (frames_since_first_draw <= label.n) {
uint32_t color = ((1 + frames_since_first_draw) * 255) / label.n;
if (color > 255) color = 255;
color *= 0x10101;
draw_text(label.text, label.x, label.y, color, text, frame_width, frame_height);
switch (p.event) {
case halide_trace_load:
case halide_trace_store:
if (p.event == halide_trace_store) {
halide_clock += fi.config.store_cost * p.type.lanes;
} else {
halide_clock += fi.config.load_cost * p.type.lanes;
assert(p.dimensions >= p.type.lanes * fi.config.dims);
if (p.dimensions >= p.type.lanes * fi.config.dims) {
for (int lane = 0; lane < p.type.lanes; lane++) {
int x = fi.config.x;
int y = fi.config.y;
for (int d = 0; d < fi.config.dims; d++) {
int a = p.get_coord(d * p.type.lanes + lane);
x += fi.config.zoom * fi.config.x_stride[d] * a;
y += fi.config.zoom * fi.config.y_stride[d] * a;
uint32_t color = p.event == halide_trace_load ? 0xffffdd44 : 0xff44ddff;
uint32_t image_color;
bool update_image = false;
if (p.event == halide_trace_store ||
fi.stats.num_realizations == 0 ) {
update_image = true;
image_color = image[frame_width * y + x];
double value = p.get_value_as<double>(lane);
value = 255 * (value - fi.config.min) / (fi.config.max - fi.config.min);
if (value < 0) value = 0;
if (value > 255) value = 255;
uint8_t int_value = (uint8_t)value;
if (fi.config.color_dim < 0) {
image_color = (int_value * 0x00010101) | 0xff000000;
} else {
uint32_t channel = p.get_coord(fi.config.color_dim * p.type.lanes + lane);
uint32_t mask = ~(255 << (channel * 8));
image_color &= mask;
image_color |= int_value << (channel * 8);
for (int dy = 0; dy < fi.config.zoom; dy++) {
for (int dx = 0; dx < fi.config.zoom; dx++) {
if (y + dy >= 0 && y + dy < frame_height &&
x + dx >= 0 && x + dx < frame_width) {
int px = frame_width * (y + dy) + x + dx;
anim[px] = color;
if (update_image) {
image[px] = image_color;
case halide_trace_begin_realization:
fill_realization(image, frame_width, fi.config.uninitialized_memory_color, fi, p);
case halide_trace_end_realization:
if (fi.config.blank_on_end_realization) {
fill_realization(image, frame_width, 0, fi, p);
case halide_trace_produce:
case halide_trace_end_produce:
case halide_trace_consume:
case halide_trace_end_consume:
case halide_trace_begin_pipeline:
case halide_trace_end_pipeline:
fprintf(stderr, "Unknown tracing event code: %d\n", p.event);
fprintf(stderr, "Total number of Funcs: %d\n", (int)func_info.size());
vector<std::pair<std::string, FuncInfo> > funcs;
for (std::pair<std::string, FuncInfo> p : func_info) {
struct by_first_packet_idx {
bool operator()(const std::pair<std::string, FuncInfo> &a,
const std::pair<std::string, FuncInfo> &b) const {
return a.second.stats.first_packet_idx < b.second.stats.first_packet_idx;
std::sort(funcs.begin(), funcs.end(), by_first_packet_idx());
for (std::pair<std::string, FuncInfo> p : funcs) {;
return 0;
int main(int argc, char **argv) {
run(argc, argv);