root/test/performance/memcpy.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. main

#include "Halide.h"
#include "halide_benchmark.h"
#include <cstdio>
#include <chrono>

using namespace Halide;
using namespace Halide::Tools;

int main(int argc, char **argv) {
    ImageParam src(UInt(8), 1);
    Func dst;
    Var x;
    dst(x) = src(x);

    dst.vectorize(x, 32, TailStrategy::GuardWithIf);

    dst.compile_to_assembly("halide_memcpy.s", {src}, "halide_memcpy");
    dst.compile_jit();

    const int32_t buffer_size = 12345678;

    Buffer<uint8_t> input(buffer_size);
    Buffer<uint8_t> output(buffer_size);

    src.set(input);

    double t1 = benchmark(10, 10, [&]() {
        dst.realize(output);
    });

    double t2 = benchmark(10, 10, [&]() {
        memcpy(output.data(), input.data(), input.width());
    });

    printf("system memcpy: %.3e byte/s\n", buffer_size / t2);
    printf("halide memcpy: %.3e byte/s\n", buffer_size / t1);

    // memcpy will win by a little bit for large inputs because it uses streaming stores
    if (t1 > t2 * 3) {
        printf("Halide memcpy is slower than it should be.\n");
        return -1;
    }

    printf("Success!\n");
    return 0;
}

/* [<][>][^][v][top][bottom][index][help] */