This source file includes following definitions.
- test_deinterleave
 
- test_interleave
 
- main
 
#include "Halide.h"
#include <cstdio>
#include <memory>
#include "halide_benchmark.h"
using namespace Halide;
using namespace Halide::Tools;
void test_deinterleave() {
    ImageParam src(UInt(8), 3);
    Func dst;
    Var x, y, c;
    dst(x, y, c) = src(x, y, c);
    src.dim(0).set_stride(3)
        .dim(2).set_stride(1).set_bounds(0, 3);
    
    dst.output_buffer()
        .dim(0).set_stride(1)
        .dim(2).set_extent(3);
    dst.reorder(c, x, y).unroll(c);
    dst.vectorize(x, 16);
    
    
    Buffer<uint8_t> src_image = Buffer<uint8_t>::make_interleaved(1 << 12, 1 << 12, 3);
    
    Buffer<uint8_t> dst_image(1 << 12, 1 << 12, 3);
    src_image.for_each_element([&](int x, int y) {
            src_image(x, y, 0) = 0;
            src_image(x, y, 1) = 128;
            src_image(x, y, 2) = 255;
        });
    dst_image.fill(0);
    src.set(src_image);
    dst.compile_jit();
    
    dst.realize(dst_image);
    double t1 = benchmark(1, 20, [&]() {
        dst.realize(dst_image);
    });
    printf("Interleaved to planar bandwidth %.3e byte/s.\n",
           dst_image.number_of_elements() / t1);
    dst_image.for_each_element([&](int x, int y) {
            assert(dst_image(x, y, 0) == 0);
            assert(dst_image(x, y, 1) == 128);
            assert(dst_image(x, y, 2) == 255);
        });
    
    dst_image = Buffer<uint8_t>(1 << 12, 3, 1 << 12);
    dst_image.transpose(1, 2);
    dst_image.fill(0);
    double t2 = benchmark(1, 20, [&]() {
        dst.realize(dst_image);
    });
    dst_image.for_each_element([&](int x, int y) {
            assert(dst_image(x, y, 0) == 0);
            assert(dst_image(x, y, 1) == 128);
            assert(dst_image(x, y, 2) == 255);
        });
    printf("Interleaved to semi-planar bandwidth %.3e byte/s.\n",
           dst_image.number_of_elements() / t2);
}
void test_interleave(bool fast) {
    ImageParam src(UInt(8), 3);
    Func dst;
    Var x, y, c;
    dst(x, y, c) = src(x, y, c);
    
    src.dim(0).set_stride(1).dim(2).set_extent(3);
    dst.output_buffer()
        .dim(0).set_stride(3)
        .dim(2).set_stride(1).set_bounds(0, 3);
    if( fast ) {
        dst.reorder(c, x, y).bound(c, 0, 3).unroll(c);
        dst.vectorize(x, 16);
    } else {
        dst.reorder(c, x, y).vectorize(x, 16);
    }
    
    
    Buffer<uint8_t> src_image(1 << 12, 1 << 12, 3);
    
    Buffer<uint8_t> dst_image = Buffer<uint8_t>::make_interleaved(1 << 12, 1 << 12, 3);
    src_image.for_each_element([&](int x, int y) {
            src_image(x, y, 0) = 0;
            src_image(x, y, 1) = 128;
            src_image(x, y, 2) = 255;
        });
    dst_image.fill(0);
    src.set(src_image);
    if (fast) {
        dst.compile_to_lowered_stmt("rgb_interleave_fast.stmt", dst.infer_arguments());
    } else {
        dst.compile_to_lowered_stmt("rgb_interleave_slow.stmt", dst.infer_arguments());
    }
    
    dst.realize(dst_image);
    double t = benchmark(1, 20, [&]() {
        dst.realize(dst_image);
    });
    printf("Planar to interleaved bandwidth %.3e byte/s.\n",
           dst_image.number_of_elements() / t);
    dst_image.for_each_element([&](int x, int y) {
            assert(dst_image(x, y, 0) == 0);
            assert(dst_image(x, y, 1) == 128);
            assert(dst_image(x, y, 2) == 255);
        });
}
int main(int argc, char **argv) {
    test_deinterleave();
    test_interleave(false);
    test_interleave(true);
    printf("Success!\n");
    return 0;
}