This source file includes following definitions.
- blurGPUScheduleEnumMap
- build
#include "Halide.h"
namespace {
enum class BlurGPUSchedule {
Inline,
Cache,
Slide,
SlideVectorize,
};
std::map<std::string, BlurGPUSchedule> blurGPUScheduleEnumMap() {
return {
{"inline", BlurGPUSchedule::Inline},
{"cache", BlurGPUSchedule::Cache},
{"slide", BlurGPUSchedule::Slide},
{"slide_vector", BlurGPUSchedule::SlideVectorize},
};
};
class HalideBlur : public Halide::Generator<HalideBlur> {
public:
GeneratorParam<BlurGPUSchedule> schedule{
"schedule",
BlurGPUSchedule::SlideVectorize,
blurGPUScheduleEnumMap()
};
GeneratorParam<int> tile_x{"tile_x", 32};
GeneratorParam<int> tile_y{"tile_y", 8};
ImageParam input{UInt(16), 2, "input"};
Func build() {
Func blur_x("blur_x"), blur_y("blur_y");
Var x("x"), y("y"), xi("xi"), yi("yi");
blur_x(x, y) = (input(x, y) + input(x+1, y) + input(x+2, y))/3;
blur_y(x, y) = (blur_x(x, y) + blur_x(x, y+1) + blur_x(x, y+2))/3;
if (get_target().has_gpu_feature()) {
switch (schedule) {
case BlurGPUSchedule::Inline:
blur_y.gpu_tile(x, y, xi, yi, tile_x, tile_y);
break;
case BlurGPUSchedule::Cache:
blur_y.gpu_tile(x, y, xi, yi, tile_x, tile_y);
blur_x.compute_at(blur_y, x).gpu_threads(x, y);
break;
case BlurGPUSchedule::Slide: {
Var y_inner("y_inner");
blur_y.split(y, y, y_inner, tile_y).reorder(y_inner, x).unroll(y_inner)
.gpu_tile(x, y, xi, yi, tile_x, 1);
break;
}
case BlurGPUSchedule::SlideVectorize: {
int factor = sizeof(int)/sizeof(short);
Var y_inner("y_inner");
blur_y.vectorize(x, factor)
.split(y, y, y_inner, tile_y).reorder(y_inner, x).unroll(y_inner)
.gpu_tile(x, y, xi, yi, tile_x, 1);
break;
}
default:
break;
}
} else {
blur_y.split(y, y, yi, 8).parallel(y).vectorize(x, 8);
blur_x.store_at(blur_y, y).compute_at(blur_y, yi).vectorize(x, 8);
}
return blur_y;
}
};
Halide::RegisterGenerator<HalideBlur> register_me{"halide_blur"};
}