This source file includes following definitions.
- visit
- mutate
- main
#include "Halide.h"
#include <stdio.h>
using namespace Halide;
using namespace Halide::Internal;
class CountBarriers : public IRVisitor {
public:
int count;
CountBarriers() : count(0) {}
protected:
using IRVisitor::visit;
void visit(const Call *op) {
if (op->name == "halide_gpu_thread_barrier") {
count++;
}
IRVisitor::visit(op);
}
};
class CheckBarrierCount : public IRMutator {
int correct;
public:
CheckBarrierCount(int correct) : correct(correct) {}
using IRMutator::mutate;
Stmt mutate(Stmt s) {
CountBarriers c;
s.accept(&c);
if (c.count != correct) {
printf("There were %d barriers. There were supposed to be %d\n", c.count, correct);
exit(-1);
}
return s;
}
};
int main(int argc, char **argv) {
if (!get_jit_target_from_environment().has_gpu_feature()) {
printf("Not running test because no gpu target enabled\n");
return 0;
}
{
Func f;
Var x, y;
f(x, y) = x + 100 * y;
const int passes = 10;
for (int i = 0; i < passes; i++) {
RDom rx(0, 10);
f(rx + 10, y) = f(9 - rx, y);
f(rx, y) = f(rx + 10, y);
RDom ry(0, 8);
f(x, ry + 8) = f(x, 7 - ry);
f(x, ry) = f(x, ry + 8);
}
Func g;
g(x, y) = f(0, 0)+ f(9, 7);
Var xi, yi;
g.gpu_tile(x, y, xi, yi, 16, 8);
f.compute_at(g, x);
for (int i = 0; i < passes; i++) {
f.update(i*4 + 0).gpu_threads(y);
f.update(i*4 + 1).gpu_threads(y);
f.update(i*4 + 2).gpu_threads(x);
f.update(i*4 + 3).gpu_threads(x);
}
Buffer<int> out = g.realize(100, 100);
for (int y = 0; y < out.height(); y++) {
for (int x = 0; x < out.width(); x++) {
int correct = 7*100 + 9;
if (out(x, y) != correct) {
printf("out(%d, %d) = %d instead of %d\n",
x, y, out(x, y), correct);
return -1;
}
}
}
}
{
Func f;
Var x, y;
f(x, y) = undef<int>();
f(x, y) += x + 100 * y;
f(x, y) += undef<int>();
f(x, y) += y * 100 + x;
Func g;
g(x, y) = f(0, 0) + f(7, 7);
Var xi, yi;
g.gpu_tile(x, y, xi, yi, 8, 8);
f.compute_at(g, x);
f.gpu_threads(x, y);
f.update(0).gpu_threads(x, y);
f.update(1).gpu_threads(x, y);
f.update(2).gpu_threads(x, y);
g.add_custom_lowering_pass(new CheckBarrierCount(2));
Buffer<int> out = g.realize(100, 100);
}
printf("Success!\n");
return 0;
}