This source file includes following definitions.
- main
#include "Halide.h"
#include <stdio.h>
using namespace Halide;
int main(int argc, char **argv) {
if (!get_jit_target_from_environment().has_gpu_feature()) {
printf("Not running test because no gpu target enabled\n");
return 0;
}
ImageParam in(UInt(8), 2);
Var x, y;
Func in_func;
in_func(x, y) = in(x, y);
Func out;
out(x, y) = in_func(y, x);
Var xi, yi, xo, yo, xii, xio, yii, yio, tile_idx, subtile_idx;
out.tile(x, y, xo, yo, xi, yi, 64, 64)
.fuse(xo, yo, tile_idx)
.tile(xi, yi, xio, yio, xii, yii, 16, 16)
.fuse(xio, yio, subtile_idx)
.gpu_blocks(subtile_idx, tile_idx)
.gpu_threads(xii, yii);
in_func.compute_at(out, subtile_idx).gpu_threads(x, y);
Buffer<uint8_t> input(256, 256);
lambda(x, y, cast<uint8_t>(x * 17 + y)).realize(input);
in.set(input);
Buffer<uint8_t> output = out.realize(256, 256);
for (int y = 0; y < 256; y++) {
for (int x = 0; x < 256; x++) {
uint8_t correct = y * 17 + x;
if (output(x, y) != correct) {
printf("output(%d, %d) = %d instead of %d\n",
x, y, output(x, y), correct);
return -1;
}
}
}
printf("Success!\n");
return 0;
}