This source file includes following definitions.
- blur5
- generate
#include "Halide.h"
using namespace Halide;
Expr blur5(Expr x0, Expr x1, Expr x2, Expr x3, Expr x4) {
x0 = cast<uint16_t>(x0);
x1 = cast<uint16_t>(x1);
x2 = cast<uint16_t>(x2);
x3 = cast<uint16_t>(x3);
x4 = cast<uint16_t>(x4);
return cast<uint8_t>((x0 + 4*x1 + 6*x2 + 4*x3 + x4 + 8)/16);
}
class Blur : public Generator<Blur> {
public:
Input<Buffer<uint8_t>> input{"input", 3};
Output<Buffer<uint8_t>> blur{"blur", 3};
std::function<void()> schedule;
void generate() {
Var x{"x"}, y{"y"}, c{"c"};
Func input_bounded{"input_bounded"};
input_bounded(x, y, c) = BoundaryConditions::repeat_edge(input)(x, y, c);
Func blur_y{"blur_y"};
blur_y(x, y, c) = blur5(input_bounded(x, y - 2, c),
input_bounded(x, y - 1, c),
input_bounded(x, y, c),
input_bounded(x, y + 1, c),
input_bounded(x, y + 2, c));
blur(x, y, c) = blur5(blur_y(x - 2, y, c),
blur_y(x - 1, y, c),
blur_y(x, y, c),
blur_y(x + 1, y, c),
blur_y(x + 2, y, c));
schedule = [=]() mutable {
blur.bound(c, 0, 3);
input.dim(2).set_bounds(0, 3);
if (get_target().features_any_of({Target::HVX_64, Target::HVX_128})) {
const int vector_size = get_target().has_feature(Target::HVX_128) ? 128 : 64;
Var yo("yo");
blur.compute_root()
.hexagon()
.prefetch(input, y, 2)
.split(y, yo, y, 128).parallel(yo)
.vectorize(x, vector_size * 2, TailStrategy::RoundUp);
blur_y
.compute_at(blur, y)
.vectorize(x, vector_size, TailStrategy::RoundUp);
input_bounded
.compute_at(blur, y)
.store_at(blur, yo)
.align_storage(x, 64)
.fold_storage(y, 8)
.vectorize(x, vector_size, TailStrategy::RoundUp);
input.dim(0).set_bounds(0, (input.dim(0).extent()/vector_size)*vector_size);
blur.dim(0).set_bounds(0, (blur.dim(0).extent()/vector_size)*vector_size);
for (int i = 1; i < 3; i++) {
input.dim(i).set_stride((input.dim(i).stride()/vector_size)*vector_size);
blur.dim(i).set_stride((blur.dim(i).stride()/vector_size)*vector_size);
}
} else {
const int vector_size = natural_vector_size<uint8_t>();
blur.compute_root()
.parallel(y, 16)
.vectorize(x, vector_size);
blur_y.compute_at(blur, y)
.vectorize(x, vector_size);
}
};
}
};
HALIDE_REGISTER_GENERATOR(Blur, "blur");