This source file includes following definitions.
- test
- main
#include "Halide.h"
#include <cstdio>
#include <algorithm>
#include "halide_benchmark.h"
using namespace Halide;
using namespace Halide::Tools;
Buffer<uint16_t> input;
Buffer<uint16_t> output;
#define MIN 1
#define MAX 1020
double test(Func f, bool test_correctness = true) {
f.compile_to_assembly(f.name() + ".s", {input}, f.name());
f.compile_jit();
f.realize(output);
if (test_correctness) {
for (int y = 0; y < output.height(); y++) {
for (int x = 0; x < output.width(); x++) {
int ix1 = std::max(std::min(x, MAX), MIN);
int ix2 = std::max(std::min(x+1, MAX), MIN);
uint16_t correct = input(ix1, y) * 3 + input(ix2, y);
if (output(x, y) != correct) {
printf("output(%d, %d) = %d instead of %d\n",
x, y, output(x, y), correct);
exit(-1);
}
}
}
}
return benchmark(1, 10, [&]() { f.realize(output); });
}
int main(int argc, char **argv) {
input = Buffer<uint16_t>(1024+8, 320);
for (int y = 0; y < input.height(); y++) {
for (int x = 0; x < input.width(); x++) {
input(x, y) = rand() & 0xfff;
}
}
output = Buffer<uint16_t>(1024, 320);
Var x, y;
double t_ref, t_clamped, t_scalar, t_pad;
{
Func f;
f(x, y) = input(x, y) * 3 + input(x+1, y);
f.vectorize(x, 8);
t_ref = test(f, false);
}
{
Func g;
g(x, y) = input(clamp(x, MIN, MAX), y);
Func f;
f(x, y) = g(x, y) * 3 + g(x+1, y);
f.vectorize(x, 8);
f.compile_to_lowered_stmt("debug_clamped_vector_load.stmt", f.infer_arguments());
t_clamped = test(f);
}
{
Func g;
g(x, y) = input(clamp(x, MIN, MAX), y);
Func f;
f(x, y) = g(x, y) * 3 + g(x+1, y);
f.vectorize(x, 8);
g.compute_at(f, x);
t_scalar = test(f);
}
{
Func g;
g(x, y) = input(clamp(x, MIN, MAX), y);
Func f;
f(x, y) = g(x, y) * 3 + g(x+1, y);
f.vectorize(x, 8);
g.compute_at(f, y);
t_pad = test(f);
}
if (t_clamped > t_scalar || t_clamped > t_pad) {
printf("Clamped load timings suspicious:\n"
"Unclamped: %f\n"
"Clamped: %f\n"
"Scalarize the load: %f\n"
"Pad the input: %f\n",
t_ref, t_clamped, t_scalar, t_pad);
return -1;
}
printf("Success!\n");
input = Buffer<uint16_t>();
output = Buffer<uint16_t>();
return 0;
}