root/apps/hexagon_benchmarks/process.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. clamp

#ifndef PROCESS_H
#define PROCESS_H

#include "HalideRuntimeHexagonHost.h"
#include "HalideBuffer.h"

#ifdef CONV3X3A16
#include "conv3x3a16_hvx128.h"
#include "conv3x3a16_hvx64.h"
#include "conv3x3a16_cpu.h"
#endif

#ifdef DILATE3X3
#include "dilate3x3_hvx128.h"
#include "dilate3x3_hvx64.h"
#include "dilate3x3_cpu.h"
#endif

#ifdef MEDIAN3X3
#include "median3x3_hvx128.h"
#include "median3x3_hvx64.h"
#include "median3x3_cpu.h"
#endif

#ifdef GAUSSIAN5X5
#include "gaussian5x5_hvx128.h"
#include "gaussian5x5_hvx64.h"
#include "gaussian5x5_cpu.h"
#endif

#ifdef SOBEL
#include "sobel_hvx128.h"
#include "sobel_hvx64.h"
#include "sobel_cpu.h"
#endif

#ifdef CONV3X3A32
#include "conv3x3a32_hvx128.h"
#include "conv3x3a32_hvx64.h"
#include "conv3x3a32_cpu.h"
#endif

enum bmark_run_mode_t {
    hvx64 = 1,
    hvx128 = 2,
    cpu = 3
};

template <typename T>
T clamp(T val, T min, T max) {
    if (val < min)
        return min;
    if (val > max)
        return max;
    return val;
}

struct PipelineDescriptorBase {
    virtual void init() = 0;
    virtual const char * name() = 0;
    virtual int run(bmark_run_mode_t mode) = 0;
    virtual bool verify(int W, int H) = 0;
    virtual bool defined() = 0;
    virtual void finalize() = 0;
};

class Conv3x3a16Descriptor : public PipelineDescriptorBase {
    Halide::Runtime::Buffer<uint8_t> u8_in, u8_out;
    Halide::Runtime::Buffer<int8_t> i8_mask;

public:
    Conv3x3a16Descriptor(int W, int H) : u8_in(nullptr, W, H, 2),
                                         u8_out(nullptr, W, H, 2),
                                         i8_mask(nullptr, 3, 3, 2) {}

    void init() {
        u8_in.device_malloc(halide_hexagon_device_interface());
        u8_out.device_malloc(halide_hexagon_device_interface());
        i8_mask.device_malloc(halide_hexagon_device_interface());

        u8_in.for_each_value([&](uint8_t &x) {
            x = static_cast<uint8_t>(rand());
        });
        u8_out.fill(0);

        i8_mask(0, 0) = 1;
        i8_mask(1, 0) = -4;
        i8_mask(2, 0) = 7;

        i8_mask(0, 1) = 2;
        i8_mask(1, 1) = -5;
        i8_mask(2, 1) = 8;

        i8_mask(0, 2) = 3;
        i8_mask(1, 2) = -6;
        i8_mask(2, 2) = 9;
    }


    const char *name() { return "conv3x3a16"; }

    bool defined() {
#ifdef CONV3X3A16
        return true;
#else
        return false;
#endif
    }

    bool verify(const int W, const int H) {
        u8_out.copy_to_host();
        u8_out.for_each_element([&](int x, int y) {
            int16_t sum = 0;
            for (int ry = -1; ry <= 1; ry++) {
                for (int rx = -1; rx <= 1; rx++) {
                    sum += static_cast<int16_t>(u8_in(clamp(x+rx, 0, W-1), clamp(y+ry, 0, H-1)))
                                                * static_cast<int16_t>(i8_mask(rx+1, ry+1));
                }
            }
            sum = sum >> 4;
            sum = clamp<int16_t>(sum, 0, 255);
            uint8_t out_xy = u8_out(x, y);
            if (sum != out_xy) {
                printf("Conv3x3a16: Mismatch at %d %d : %d != %d\n", x, y, out_xy, sum);
                abort();
            }
        });
        return true;
    }

    int run(bmark_run_mode_t mode) {
#ifdef CONV3X3A16
        if (mode == bmark_run_mode_t::hvx64) {
            return conv3x3a16_hvx64(u8_in, i8_mask, u8_out);
        } else if (mode == bmark_run_mode_t::hvx128) {
            return conv3x3a16_hvx128(u8_in, i8_mask, u8_out);
        } else if (mode == bmark_run_mode_t::cpu) {
            return conv3x3a16_cpu(u8_in, i8_mask, u8_out);
        }
#endif
        return 1;
    }
    void finalize() {
        u8_in.device_free();
        i8_mask.device_free();
        u8_out.device_free();
    }
};

class Dilate3x3Descriptor : public PipelineDescriptorBase {
    Halide::Runtime::Buffer<uint8_t> u8_in, u8_out;
 private:
    static uint8_t max3(uint8_t a, uint8_t b, uint8_t c) {
        return std::max(std::max(a, b), c);
    }
 public:
     Dilate3x3Descriptor(int W, int H) : u8_in(nullptr, W, H, 2),
                                         u8_out(nullptr, W, H, 2) {}

    void init() {
        u8_in.device_malloc(halide_hexagon_device_interface());
        u8_out.device_malloc(halide_hexagon_device_interface());

        u8_in.for_each_value([&](uint8_t &x) {
            x = static_cast<uint8_t>(rand());
        });
        u8_out.fill(0);
    }

    const char *name() { return "dilate3x3"; }

    bool defined() {
#ifdef DILATE3X3
        return true;
#else
        return false;
#endif
    }

    bool verify(const int W, const int H) {
        u8_out.copy_to_host();
        u8_out.for_each_element([&](int x, int y) {
            auto u8_in_bounded = [&](int x_, int y_) { return u8_in(clamp(x_, 0, W-1), clamp(y_, 0, H-1)); };

            uint8_t max_y[3];
            max_y[0] = max3(u8_in_bounded(x-1, y-1), u8_in_bounded(x-1, y), u8_in_bounded(x-1, y+1));

            max_y[1] = max3(u8_in_bounded(x, y-1), u8_in_bounded(x, y), u8_in_bounded(x, y+1));

            max_y[2] = max3(u8_in_bounded(x+1, y-1), u8_in_bounded(x+1, y), u8_in_bounded(x+1, y+1));

            uint8_t max_val = max3(max_y[0], max_y[1], max_y[2]);

            uint8_t out_xy = u8_out(x, y);
            if (max_val != out_xy) {
                printf("Dilate3x3: Mismatch at %d %d : %d != %d\n", x, y, out_xy, max_val);
                abort();
            }
        });
        return true;
    }

    int run(bmark_run_mode_t mode) {
#ifdef DILATE3X3
        if (mode == bmark_run_mode_t::hvx64) {
            return dilate3x3_hvx64(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::hvx128) {
            return dilate3x3_hvx128(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::cpu) {
            return dilate3x3_cpu(u8_in, u8_out);
        }
#endif
        return 1;
    }
    void finalize() {
        u8_in.device_free();
        u8_out.device_free();
    }
};

class Median3x3Descriptor : public PipelineDescriptorBase {
    Halide::Runtime::Buffer<uint8_t> u8_in, u8_out;

 public:
      Median3x3Descriptor(int W, int H) : u8_in(nullptr, W, H, 2),
                                          u8_out(nullptr, W, H, 2) {}

    void init() {
        u8_in.device_malloc(halide_hexagon_device_interface());
        u8_out.device_malloc(halide_hexagon_device_interface());

        u8_in.for_each_value([&](uint8_t &x) {
            x = static_cast<uint8_t>(rand());
        });
        u8_out.fill(0);
    }

    const char *name() { return "median3x3"; };

    bool defined() {
#ifdef MEDIAN3X3
        return true;
#else
        return false;
#endif
    }

    bool verify(const int W, const int H) {
        u8_out.copy_to_host();
        u8_out.for_each_element([&](int x, int y) {
            auto u8_in_bounded = [&](int x_, int y_) { return u8_in(clamp(x_, 0, W-1), clamp(y_, 0, H-1)); };

            uint8_t inp9[9] = { u8_in_bounded(x-1, y-1), u8_in_bounded(x, y-1), u8_in_bounded(x+1, y-1),
                                u8_in_bounded(x-1, y), u8_in_bounded(x, y), u8_in_bounded(x+1, y),
                                u8_in_bounded(x-1, y+1), u8_in_bounded(x, y+1), u8_in_bounded(x+1, y+1) };

            std::nth_element(&inp9[0], &inp9[4], &inp9[9]);

            uint8_t median_val = inp9[4];
            uint8_t out_xy = u8_out(x, y);
            if (median_val != out_xy) {
                printf("Median3x3: Mismatch at %d %d : %d != %d\n", x, y, out_xy, median_val);
                abort();
            }
        });
        return true;
    }

    int run(bmark_run_mode_t mode) {
#ifdef MEDIAN3X3
        if (mode == bmark_run_mode_t::hvx64) {
            return median3x3_hvx64(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::hvx128) {
            return median3x3_hvx128(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::cpu) {
            return median3x3_cpu(u8_in, u8_out);
        }
#endif
        return 1;
    }
    void finalize() {
        u8_in.device_free();
        u8_out.device_free();
    }
};

class Gaussian5x5Descriptor : public PipelineDescriptorBase {
    Halide::Runtime::Buffer<uint8_t> u8_in, u8_out;

 public:
     Gaussian5x5Descriptor(int W, int H) : u8_in(nullptr, W, H, 2),
                                           u8_out(nullptr, W, H, 2) {}

    void init() {
        u8_in.device_malloc(halide_hexagon_device_interface());
        u8_out.device_malloc(halide_hexagon_device_interface());

        u8_in.for_each_value([&](uint8_t &x) {
            x = static_cast<uint8_t>(rand());
        });
        u8_out.fill(0);
    }

    const char *name() { return "gaussian5x5"; };

    bool defined() {
#ifdef GAUSSIAN5X5
        return true;
#else
        return false;
#endif
    }

    bool verify(const int W, const int H) {
        const int16_t coeffs[5] = { 1, 4, 6, 4, 1 };
        u8_out.copy_to_host();
        u8_out.for_each_element([&](int x, int y) {
            int16_t blur = 0;
            for (int rx = -2; rx < 3; ++rx) {
                int16_t blur_y = 0;
                for (int ry = -2; ry < 3; ++ry) {
                    int16_t val = static_cast<int16_t>(u8_in(clamp(x+rx, 0, W-1), clamp(y+ry, 0, H-1)));
                    blur_y += val * coeffs[ry + 2];
                }
                blur += blur_y * coeffs[rx + 2];
            }
            uint8_t blur_val = blur >> 8;
            uint8_t out_xy = u8_out(x, y);
            if (blur_val != out_xy) {
                printf("Gaussian5x5: Mismatch at %d %d : %d != %d\n", x, y, out_xy, blur_val);
                abort();
            }
        });
        return true;
    }

    int run(bmark_run_mode_t mode) {
#ifdef GAUSSIAN5X5
        if (mode == bmark_run_mode_t::hvx64) {
            return gaussian5x5_hvx64(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::hvx128) {
            return gaussian5x5_hvx128(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::cpu) {
            return gaussian5x5_cpu(u8_in, u8_out);
        }
#endif
        return 1;
    }
    void finalize() {
        u8_in.device_free();
        u8_out.device_free();
    }
};

class SobelDescriptor : public PipelineDescriptorBase {
    Halide::Runtime::Buffer<uint8_t> u8_in, u8_out;

 public:
     SobelDescriptor(int W, int H) : u8_in(nullptr, W, H, 2),
                                     u8_out(nullptr, W, H, 2) {}

    void init() {
        u8_in.device_malloc(halide_hexagon_device_interface());
        u8_out.device_malloc(halide_hexagon_device_interface());

        u8_in.for_each_value([&](uint8_t &x) {
            x = static_cast<uint8_t>(rand());
        });
        u8_out.fill(0);
    }

    const char *name() { return "sobel"; };

    uint16_t sobel3(uint16_t a, uint16_t b, uint16_t c) {
        return (a + 2*b + c);
    }

    bool defined() {
#ifdef SOBEL
        return true;
#else
        return false;
#endif
    }

    bool verify(const int W, const int H) {
        u8_out.copy_to_host();
        u8_out.for_each_element([&](int x, int y) {
            auto u16_in_bounded = [&](int x_, int y_) { return static_cast<uint16_t>(u8_in(clamp(x_, 0, W-1), clamp(y_, 0, H-1))); };

            uint16_t sobel_x_avg0 = sobel3(u16_in_bounded(x-1, y-1), u16_in_bounded(x, y-1), u16_in_bounded(x+1, y-1));
            uint16_t sobel_x_avg1 = sobel3(u16_in_bounded(x-1, y+1), u16_in_bounded(x, y+1), u16_in_bounded(x+1, y+1));
            uint16_t sobel_x = abs(sobel_x_avg0 - sobel_x_avg1);


            uint16_t sobel_y_avg0 = sobel3(u16_in_bounded(x-1, y-1), u16_in_bounded(x-1, y), u16_in_bounded(x-1, y+1));
            uint16_t sobel_y_avg1 = sobel3(u16_in_bounded(x+1, y-1), u16_in_bounded(x+1, y), u16_in_bounded(x+1, y+1));
            uint16_t sobel_y = abs(sobel_y_avg0 - sobel_y_avg1);

            uint8_t sobel_val = static_cast<uint8_t>(clamp(sobel_x + sobel_y, 0, 255));

            uint8_t out_xy = u8_out(x, y);
            if (sobel_val != out_xy) {
                printf("Sobel: Mismatch at %d %d : %d != %d\n", x, y, out_xy, sobel_val);
                abort();
            }
        });
        return true;
    }

    int run(bmark_run_mode_t mode) {
#ifdef SOBEL
        if (mode == bmark_run_mode_t::hvx64) {
            return sobel_hvx64(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::hvx128) {
            return sobel_hvx128(u8_in, u8_out);
        } else if (mode == bmark_run_mode_t::cpu) {
            return sobel_cpu(u8_in, u8_out);
        }
#endif
        return 1;
    }
    void finalize() {
        u8_in.device_free();
        u8_out.device_free();
    }
};

class Conv3x3a32Descriptor : public PipelineDescriptorBase {
    Halide::Runtime::Buffer<uint8_t> u8_in, u8_out;
    Halide::Runtime::Buffer<int8_t> i8_mask;

public:
    Conv3x3a32Descriptor(int W, int H) : u8_in(nullptr, W, H, 2),
                                         u8_out(nullptr, W, H, 2),
                                         i8_mask(nullptr, 3, 3, 2) {}

    void init() {
        u8_in.device_malloc(halide_hexagon_device_interface());
        u8_out.device_malloc(halide_hexagon_device_interface());
        i8_mask.device_malloc(halide_hexagon_device_interface());

        u8_in.for_each_value([&](uint8_t &x) {
            x = static_cast<uint8_t>(rand());
        });
        u8_out.fill(0);

        i8_mask(0, 0) = 1;
        i8_mask(1, 0) = -4;
        i8_mask(2, 0) = 7;

        i8_mask(0, 1) = 2;
        i8_mask(1, 1) = -5;
        i8_mask(2, 1) = 8;

        i8_mask(0, 2) = 3;
        i8_mask(1, 2) = -6;
        i8_mask(2, 2) = 9;
    }

    const char *name() { return "conv3x3a32"; }

    bool defined() {
#ifdef CONV3X3A32
        return true;
#else
        return false;
#endif
    }

    bool verify(const int W, const int H) {
        u8_out.copy_to_host();
        u8_out.for_each_element([&](int x, int y) {
            int32_t sum = 0;
            for (int ry = -1; ry <= 1; ry++) {
                for (int rx = -1; rx <= 1; rx++) {
                    sum += static_cast<int16_t>(u8_in(clamp(x+rx, 0, W-1), clamp(y+ry, 0, H-1)))
                                                * static_cast<int16_t>(i8_mask(rx+1, ry+1));
                }
            }
            sum = sum >> 4;
            sum = clamp(sum, 0, 255);
            uint8_t out_xy = u8_out(x, y);
            if (sum != out_xy) {
                printf("Conv3x3a32: Mismatch at %d %d : %d != %d\n", x, y, out_xy, sum);
                abort();
            }
        });
        return true;
    }

    int run(bmark_run_mode_t mode) {
#ifdef CONV3X3A32
        if (mode == bmark_run_mode_t::hvx64) {
            return conv3x3a32_hvx64(u8_in, i8_mask, u8_out);
        } else if (mode == bmark_run_mode_t::hvx128) {
            return conv3x3a32_hvx128(u8_in, i8_mask, u8_out);
        } else if (mode == bmark_run_mode_t::cpu) {
            return conv3x3a32_cpu(u8_in, i8_mask, u8_out);
        }
#endif
        return 1;
    }
    void finalize() {
        u8_in.device_free();
        i8_mask.device_free();
        u8_out.device_free();
    }
};

#endif

/* [<][>][^][v][top][bottom][index][help] */