root/src/runtime/hexagon_host.cpp

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. poll_log
  2. get_remote_profiler_state
  3. get_symbol
  4. init_hexagon_runtime
  5. halide_is_hexagon_available
  6. halide_hexagon_initialize_kernels
  7. map_arguments
  8. halide_hexagon_run
  9. halide_hexagon_device_release
  10. halide_hexagon_device_malloc
  11. halide_hexagon_device_free
  12. halide_hexagon_copy_to_device
  13. halide_hexagon_copy_to_host
  14. halide_hexagon_device_sync
  15. halide_hexagon_wrap_device_handle
  16. halide_hexagon_detach_device_handle
  17. halide_hexagon_get_device_handle
  18. halide_hexagon_get_device_size
  19. halide_hexagon_device_and_host_malloc
  20. halide_hexagon_device_and_host_free
  21. halide_hexagon_power_hvx_on
  22. halide_hexagon_power_hvx_off
  23. halide_hexagon_power_hvx_off_as_destructor
  24. halide_hexagon_set_performance_mode
  25. halide_hexagon_set_performance
  26. halide_hexagon_device_interface
  27. halide_hexagon_cleanup

#include "runtime_internal.h"
#include "device_buffer_utils.h"
#include "device_interface.h"
#include "HalideRuntimeHexagonHost.h"
#include "printer.h"
#include "scoped_mutex_lock.h"

namespace Halide { namespace Runtime { namespace Internal { namespace Hexagon {

struct ion_device_handle {
    void *buffer;
    size_t size;
};

WEAK halide_mutex thread_lock = { { 0 } };

extern WEAK halide_device_interface_t hexagon_device_interface;

// Define dynamic version of hexagon_remote/halide_hexagon_remote.h
typedef struct _remote_buffer__seq_octet _remote_buffer__seq_octet;
typedef _remote_buffer__seq_octet remote_buffer;
struct _remote_buffer__seq_octet {
   unsigned char* data;
   int dataLen;
};

typedef int (*remote_initialize_kernels_fn)(const unsigned char* codeptr,
                                            int codesize,
                                            halide_hexagon_handle_t*);
typedef int (*remote_get_symbol_fn)(halide_hexagon_handle_t, const char*, int, halide_hexagon_handle_t*);
typedef int (*remote_run_fn)(halide_hexagon_handle_t, int,
                             const remote_buffer*, int, const remote_buffer*, int,
                             remote_buffer*, int);
typedef int (*remote_release_kernels_fn)(halide_hexagon_handle_t);
typedef int (*remote_poll_log_fn)(char *, int, int *);
typedef void (*remote_poll_profiler_state_fn)(int *, int *);
typedef int (*remote_power_fn)();
typedef int (*remote_power_mode_fn)(int);
typedef int (*remote_power_perf_fn)(int, unsigned int, unsigned int, int, unsigned int, unsigned int, int, int);

typedef void (*host_malloc_init_fn)();
typedef void *(*host_malloc_fn)(size_t);
typedef void (*host_free_fn)(void *);

WEAK remote_initialize_kernels_fn remote_initialize_kernels = NULL;
WEAK remote_get_symbol_fn remote_get_symbol = NULL;
WEAK remote_run_fn remote_run = NULL;
WEAK remote_release_kernels_fn remote_release_kernels = NULL;
WEAK remote_poll_log_fn remote_poll_log = NULL;
WEAK remote_poll_profiler_state_fn remote_poll_profiler_state = NULL;
WEAK remote_power_fn remote_power_hvx_on = NULL;
WEAK remote_power_fn remote_power_hvx_off = NULL;
WEAK remote_power_perf_fn remote_set_performance = NULL;
WEAK remote_power_mode_fn remote_set_performance_mode = NULL;

WEAK host_malloc_init_fn host_malloc_init = NULL;
WEAK host_malloc_init_fn host_malloc_deinit = NULL;
WEAK host_malloc_fn host_malloc = NULL;
WEAK host_free_fn host_free = NULL;

// This checks if there are any log messages available on the remote
// side. It should be called after every remote call.
WEAK void poll_log(void *user_context) {
    if (!remote_poll_log) return;

    while (true) {
        char message[1024];
        int read = 0;
        int result = remote_poll_log(&message[0], sizeof(message), &read);
        if (result != 0) {
            // Don't make this an error, otherwise we might obscure
            // more information about errors that would come later.
            print(user_context) << "Hexagon: remote_poll_log failed " << result << "\n";
            return;
        }

        if (read > 0) {
            halide_print(user_context, message);
        } else {
            break;
        }
    }
}

WEAK void get_remote_profiler_state(int *func, int *threads) {
    if (!remote_poll_profiler_state) {
        // This should only have been called if there's a remote profiler func installed.
        error(NULL) << "Hexagon: remote_poll_profiler_func not found\n";
    }

    remote_poll_profiler_state(func, threads);
}

template <typename T>
__attribute__((always_inline)) void get_symbol(void *user_context, void *host_lib, const char* name, T &sym, bool required = true) {
    debug(user_context) << "    halide_get_library_symbol('" << name << "') -> \n";
    sym = (T) halide_get_library_symbol(host_lib, name);
    debug(user_context) << "        " << (void *)sym << "\n";
    if (!sym && required) {
        error(user_context) << "Required Hexagon runtime symbol '" << name << "' not found.\n";
    }
}

// Load the hexagon remote runtime.
WEAK int init_hexagon_runtime(void *user_context) {
    if (remote_initialize_kernels && remote_run && remote_release_kernels) {
        // Already loaded.
        return 0;
    }

    // The "support library" for Hexagon is essentially a way to delegate Hexagon
    // code execution based on the runtime; devices with Hexagon hardware will
    // simply provide conduits for execution on that hardware, while test/desktop/etc
    // environments can instead connect a simulator via the API.
    // Load the .so for Linux or Android, and if that fails try the .dll
    // as we may be running the windows hosted simulator.
    void *host_lib = halide_load_library("libhalide_hexagon_host.so");
    if (!host_lib) {
        host_lib = halide_load_library("libhalide_hexagon_host.dll");
    }


    debug(user_context) << "Hexagon: init_hexagon_runtime (user_context: " << user_context << ")\n";

    // Get the symbols we need from the library.
    get_symbol(user_context, host_lib, "halide_hexagon_remote_initialize_kernels_v3", remote_initialize_kernels);
    if (!remote_initialize_kernels) return -1;
    get_symbol(user_context, host_lib, "halide_hexagon_remote_get_symbol_v4", remote_get_symbol);
    if (!remote_get_symbol) return -1;
    get_symbol(user_context, host_lib, "halide_hexagon_remote_run", remote_run);
    if (!remote_run) return -1;
    get_symbol(user_context, host_lib, "halide_hexagon_remote_release_kernels_v2", remote_release_kernels);
    if (!remote_release_kernels) return -1;

    get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_init", host_malloc_init);
    if (!host_malloc_init) return -1;
    get_symbol(user_context, host_lib, "halide_hexagon_host_malloc_deinit", host_malloc_deinit);
    if (!host_malloc_deinit) return -1;
    get_symbol(user_context, host_lib, "halide_hexagon_host_malloc", host_malloc);
    if (!host_malloc) return -1;
    get_symbol(user_context, host_lib, "halide_hexagon_host_free", host_free);
    if (!host_free) return -1;

    // These symbols are optional.
    get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_log", remote_poll_log, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_poll_profiler_state", remote_poll_profiler_state, /* required */ false);

    // If these are unavailable, then the runtime always powers HVX on and so these are not necessary.
    get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_on", remote_power_hvx_on, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_power_hvx_off", remote_power_hvx_off, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance", remote_set_performance, /* required */ false);
    get_symbol(user_context, host_lib, "halide_hexagon_remote_set_performance_mode", remote_set_performance_mode, /* required */ false);

    host_malloc_init();

    return 0;
}

// Structure to hold the state of a module attached to the context.
// Also used as a linked-list to keep track of all the different
// modules that are attached to a context in order to release them all
// when then context is released.
struct module_state {
    halide_hexagon_handle_t module;
    size_t size;
    module_state *next;
};
WEAK module_state *state_list = NULL;

}}}}  // namespace Halide::Runtime::Internal::Hexagon

using namespace Halide::Runtime::Internal;
using namespace Halide::Runtime::Internal::Hexagon;

extern "C" {

WEAK bool halide_is_hexagon_available(void *user_context) {
    int result = init_hexagon_runtime(user_context);
    return result == 0;
}

WEAK int halide_hexagon_initialize_kernels(void *user_context, void **state_ptr,
                                           const uint8_t *code, uint64_t code_size) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;
    debug(user_context) << "Hexagon: halide_hexagon_initialize_kernels (user_context: " << user_context
                        << ", state_ptr: " << state_ptr
                        << ", *state_ptr: " << *state_ptr
                        << ", code: " << code
                        << ", code_size: " << (int)code_size << ")\n";
    halide_assert(user_context, state_ptr != NULL);

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    // Create the state object if necessary. This only happens once,
    // regardless of how many times halide_hexagon_initialize_kernels
    // or halide_hexagon_device_release is called.
    // halide_hexagon_device_release traverses this list and releases
    // the module objects, but it does not modify the list nodes
    // created/inserted here.
    ScopedMutexLock lock(&thread_lock);

    module_state **state = (module_state**)state_ptr;
    if (!(*state)) {
        debug(user_context) << "    allocating module state -> \n";
        *state = (module_state*)malloc(sizeof(module_state));
        debug(user_context) << "        " << *state << "\n";
        (*state)->module = 0;
        (*state)->size = 0;
        (*state)->next = state_list;
        state_list = *state;
    }

    // Create the module itself if necessary.
    if (!(*state)->module) {
        debug(user_context) << "    halide_remote_initialize_kernels -> ";
        halide_hexagon_handle_t module = 0;
        result = remote_initialize_kernels(code, code_size, &module);
        poll_log(user_context);
        if (result == 0) {
            debug(user_context) << "        " << module << "\n";
            (*state)->module = module;
            (*state)->size = code_size;
        } else {
            debug(user_context) << "        " << result << "\n";
            error(user_context) << "Initialization of Hexagon kernels failed\n";
        }
    } else {
        debug(user_context) << "    re-using existing module " << (*state)->module << "\n";
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return result != 0 ? -1 : 0;
}
namespace {

// Prepare an array of remote_buffer arguments, mapping buffers if
// necessary. Only arguments with flags&flag_mask == flag_value are
// added to the mapped_args array. Returns the number of arguments
// mapped, or a negative number on error.
WEAK int map_arguments(void *user_context, int arg_count,
                       uint64_t arg_sizes[], void *args[], int arg_flags[], int flag_mask, int flag_value,
                       remote_buffer *mapped_args) {
    int mapped_count = 0;
    for (int i = 0; i < arg_count; i++) {
        if ((arg_flags[i] & flag_mask) != flag_value) continue;
        remote_buffer &mapped_arg = mapped_args[mapped_count++];
        if (arg_flags[i] != 0) {
            // This is a buffer, map it and put the mapped buffer into
            // the result.
            halide_assert(user_context, arg_sizes[i] == sizeof(uint64_t));
            uint64_t device_handle = *((uint64_t *)args[i]);
            ion_device_handle *ion_handle = reinterpret<ion_device_handle *>(device_handle);
            debug(user_context) << i << ", " << device_handle << "\n";
            mapped_arg.data = reinterpret_cast<uint8_t*>(ion_handle->buffer);
            mapped_arg.dataLen = ion_handle->size;
        } else {
            // This is a scalar, just put the pointer/size in the result.
            mapped_arg.data = (uint8_t*)args[i];
            mapped_arg.dataLen = arg_sizes[i];
        }
    }
    return mapped_count;
}

}  // namespace

WEAK int halide_hexagon_run(void *user_context,
                            void *state_ptr,
                            const char *name,
                            halide_hexagon_handle_t* function,
                            uint64_t arg_sizes[],
                            void *args[],
                            int arg_flags[]) {
    halide_assert(user_context, state_ptr != NULL);
    halide_assert(user_context, function != NULL);
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;

    halide_hexagon_handle_t module = state_ptr ? ((module_state *)state_ptr)->module : 0;
    debug(user_context) << "Hexagon: halide_hexagon_run ("
                        << "user_context: " << user_context << ", "
                        << "state_ptr: " << state_ptr << " (" << module << "), "
                        << "name: " << name << ", "
                        << "function: " << function << " (" << *function << "))\n";

    // If we haven't gotten the symbol for this function, do so now.
    if (*function == 0) {
        debug(user_context) << "    halide_hexagon_remote_get_symbol " << name << " -> ";
        halide_hexagon_handle_t sym = 0;
        int result = remote_get_symbol(module, name, strlen(name) + 1, &sym);
        *function = result == 0 ? sym : 0;
        poll_log(user_context);
        debug(user_context) << "        " << *function << "\n";
        if (*function == 0) {
            error(user_context) << "Failed to find function " << name << " in module.\n";
            return -1;
        }
    }

    // Allocate some remote_buffer objects on the stack.
    int arg_count = 0;
    while(arg_sizes[arg_count] > 0) arg_count++;
    remote_buffer *mapped_buffers =
        (remote_buffer *)__builtin_alloca(arg_count * sizeof(remote_buffer));

    // Map the arguments.
    // First grab the input buffers (bit 0 of flags is set).
    remote_buffer *input_buffers = mapped_buffers;
    int input_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x1,
                                           input_buffers);
    if (input_buffer_count < 0) return input_buffer_count;

    // Then the output buffers (bit 1 of flags is set).
    remote_buffer *output_buffers = input_buffers + input_buffer_count;
    int output_buffer_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x2, 0x2,
                                            output_buffers);
    if (output_buffer_count < 0) return output_buffer_count;

    // And the input scalars (neither bits 0 or 1 of flags is set).
    remote_buffer *input_scalars = output_buffers + output_buffer_count;
    int input_scalar_count = map_arguments(user_context, arg_count, arg_sizes, args, arg_flags, 0x3, 0x0,
                                           input_scalars);
    if (input_scalar_count < 0) return input_scalar_count;

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    // If remote profiling is supported, tell the profiler to call
    // get_remote_profiler_func to retrieve the current
    // func. Otherwise leave it alone - the cost of remote running
    // will be billed to the calling Func.
    if (remote_poll_profiler_state) {
        halide_profiler_get_state()->get_remote_profiler_state = get_remote_profiler_state;
    }

    // Call the pipeline on the device side.
    debug(user_context) << "    halide_hexagon_remote_run -> ";
    result = remote_run(module, *function,
                        input_buffers, input_buffer_count,
                        output_buffers, output_buffer_count,
                        input_scalars, input_scalar_count);
    poll_log(user_context);
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "Hexagon pipeline failed.\n";
        return result;
    }

    halide_profiler_get_state()->get_remote_profiler_state = NULL;

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return result != 0 ? -1 : 0;
}

WEAK int halide_hexagon_device_release(void *user_context) {
    debug(user_context)
        << "Hexagon: halide_hexagon_device_release (user_context: " <<  user_context << ")\n";

    ScopedMutexLock lock(&thread_lock);

    // Release all of the remote side modules.
    module_state *state = state_list;
    while (state) {
        if (state->module) {
            debug(user_context) << "    halide_remote_release_kernels " << state
                                << " (" << state->module << ") -> ";
            int result = remote_release_kernels(state->module);
            poll_log(user_context);
            debug(user_context) << "        " << result << "\n";
            state->module = 0;
            state->size = 0;
        }
        state = state->next;
    }
    state_list = NULL;

    return 0;
}

// When allocations for Hexagon are at least as large as this
// threshold, use an ION allocation (to get zero copy). If the
// allocation is smaller, use a standard allocation instead.  This is
// done because allocating an entire page for a small allocation is
// wasteful, and the copy is not significant.  Additionally, the
// FastRPC interface can probably do a better job with many small
// arguments than simply mapping the pages.
static const int min_ion_allocation_size = 4096;

WEAK int halide_hexagon_device_malloc(void *user_context, halide_buffer_t *buf) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;

    debug(user_context)
        << "Hexagon: halide_hexagon_device_malloc (user_context: " << user_context
        << ", buf: " << buf << ")\n";

    if (buf->device) {
        // This buffer already has a device allocation
        return 0;
    }

    size_t size = buf->size_in_bytes();
    halide_assert(user_context, size != 0);

    // Hexagon code generation generates clamped ramp loads in a way
    // that requires up to an extra vector beyond the end of the
    // buffer to be legal to access.
    size += 128;

    for (int i = 0; i < buf->dimensions; i++) {
        halide_assert(user_context, buf->dim[i].stride >= 0);
    }

    debug(user_context) << "    allocating buffer of " << (uint64_t)size << " bytes\n";

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    void *ion;
    if (size >= min_ion_allocation_size) {
        debug(user_context) << "    host_malloc len=" << (uint64_t)size << " -> ";
        ion = host_malloc(size);
        debug(user_context) << "        " << ion << "\n";
        if (!ion) {
            error(user_context) << "host_malloc failed\n";
            return -1;
        }
    } else {
        debug(user_context) << "    halide_malloc size=" << (uint64_t)size << " -> ";
        ion = halide_malloc(user_context, size);
        debug(user_context) << "        " << ion << "\n";
        if (!ion) {
            error(user_context) << "halide_malloc failed\n";
            return -1;
        }
    }

    int err = halide_hexagon_wrap_device_handle(user_context, buf, ion, size);
    if (err != 0) {
        if (size >= min_ion_allocation_size) {
            host_free(ion);
        } else {
            halide_free(user_context, ion);
        }
        return err;
    }

    if (!buf->host) {
        // If the host pointer has also not been allocated yet, set it to
        // the ion buffer. This buffer will be zero copy.
        buf->host = (uint8_t *)ion;
        debug(user_context) << "    host <- " << buf->host << "\n";
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return 0;
}

WEAK int halide_hexagon_device_free(void *user_context, halide_buffer_t* buf) {
    debug(user_context)
        << "Hexagon: halide_hexagon_device_free (user_context: " << user_context
        << ", buf: " << buf << ")\n";

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    uint64_t size = halide_hexagon_get_device_size(user_context, buf);
    void *ion = halide_hexagon_detach_device_handle(user_context, buf);
    if (size >= min_ion_allocation_size) {
        debug(user_context) << "    host_free ion=" << ion << "\n";
        host_free(ion);
    } else {
        debug(user_context) << "    halide_free ion=" << ion << "\n";
        halide_free(user_context, ion);
    }

    if (buf->host == ion) {
        // If we also set the host pointer, reset it.
        buf->host = NULL;
        debug(user_context) << "    host <- 0x0\n";
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    // This is to match what the default implementation of halide_device_free does.
    buf->set_device_dirty(false);
    return 0;
}

WEAK int halide_hexagon_copy_to_device(void *user_context, halide_buffer_t* buf) {
    int err = halide_hexagon_device_malloc(user_context, buf);
    if (err) {
        return err;
    }

    debug(user_context)
        <<  "Hexagon: halide_hexagon_copy_to_device (user_context: " << user_context
        << ", buf: " << buf << ")\n";

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    halide_assert(user_context, buf->host && buf->device);
    device_copy c = make_host_to_device_copy(buf);

    // Get the descriptor associated with the ion buffer.
    c.dst = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
    copy_memory(c, user_context);

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return 0;
}

WEAK int halide_hexagon_copy_to_host(void *user_context, struct halide_buffer_t *buf) {
    debug(user_context)
        << "Hexagon: halide_hexagon_copy_to_host (user_context: " << user_context
        << ", buf: " << buf << ")\n";

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    halide_assert(user_context, buf->host && buf->device);
    device_copy c = make_device_to_host_copy(buf);

    // Get the descriptor associated with the ion buffer.
    c.src = reinterpret<uintptr_t>(halide_hexagon_get_device_handle(user_context, buf));
    copy_memory(c, user_context);

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return 0;
}

WEAK int halide_hexagon_device_sync(void *user_context, struct halide_buffer_t *) {
    debug(user_context)
        << "Hexagon: halide_hexagon_device_sync (user_context: " << user_context << ")\n";
    // Nothing to do.
    return 0;
}

WEAK int halide_hexagon_wrap_device_handle(void *user_context, struct halide_buffer_t *buf,
                                           void *ion_buf, uint64_t size) {
    halide_assert(user_context, buf->device == 0);
    if (buf->device != 0) {
        return -2;
    }

    ion_device_handle *handle = (ion_device_handle *)malloc(sizeof(ion_device_handle));
    if (!handle) {
        return -1;
    }
    handle->buffer = ion_buf;
    handle->size = size;
    buf->device_interface = &hexagon_device_interface;
    buf->device_interface->use_module();
    buf->device = reinterpret<uint64_t>(handle);
    return 0;
}

WEAK void *halide_hexagon_detach_device_handle(void *user_context, struct halide_buffer_t *buf) {
    if (buf->device == 0) {
        return NULL;
    }
    halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
    ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
    void *ion_buf = handle->buffer;
    free(handle);

    buf->device_interface->release_module();
    buf->device = 0;
    buf->device_interface = NULL;
    return ion_buf;
}

WEAK void *halide_hexagon_get_device_handle(void *user_context, struct halide_buffer_t *buf) {
    if (buf->device == 0) {
        return NULL;
    }
    halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
    ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
    return handle->buffer;
}

WEAK uint64_t halide_hexagon_get_device_size(void *user_context, struct halide_buffer_t *buf) {
    if (buf->device == 0) {
        return 0;
    }
    halide_assert(user_context, buf->device_interface == &hexagon_device_interface);
    ion_device_handle *handle = reinterpret<ion_device_handle *>(buf->device);
    return handle->size;
}

WEAK int halide_hexagon_device_and_host_malloc(void *user_context, struct halide_buffer_t *buf) {
    debug(user_context) << "halide_hexagon_device_and_host_malloc called.\n";
    int result = halide_hexagon_device_malloc(user_context, buf);
    if (result == 0) {
        buf->host = (uint8_t *)halide_hexagon_get_device_handle(user_context, buf);
    }
    return result;
}

WEAK int halide_hexagon_device_and_host_free(void *user_context, struct halide_buffer_t *buf) {
    debug(user_context) << "halide_hexagon_device_and_host_free called.\n";
    halide_hexagon_device_free(user_context, buf);
    buf->host = NULL;
    return 0;
}

WEAK int halide_hexagon_power_hvx_on(void *user_context) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;

    debug(user_context) << "halide_hexagon_power_hvx_on\n";
    if (!remote_power_hvx_on) {
        // The function is not available in this version of the
        // runtime, this runtime always powers HVX on.
        return 0;
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    debug(user_context) << "    remote_power_hvx_on -> ";
    result = remote_power_hvx_on();
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_power_hvx_on failed.\n";
        return result;
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return 0;
}

WEAK int halide_hexagon_power_hvx_off(void *user_context) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;

    debug(user_context) << "halide_hexagon_power_hvx_off\n";
    if (!remote_power_hvx_off) {
        // The function is not available in this version of the
        // runtime, this runtime always powers HVX on.
        return 0;
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_before = halide_current_time_ns(user_context);
    #endif

    debug(user_context) << "    remote_power_hvx_off -> ";
    result = remote_power_hvx_off();
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_power_hvx_off failed.\n";
        return result;
    }

    #ifdef DEBUG_RUNTIME
    uint64_t t_after = halide_current_time_ns(user_context);
    debug(user_context) << "    Time: " << (t_after - t_before) / 1.0e6 << " ms\n";
    #endif

    return 0;
}

WEAK void halide_hexagon_power_hvx_off_as_destructor(void *user_context, void * /* obj */) {
    halide_hexagon_power_hvx_off(user_context);
}

WEAK int halide_hexagon_set_performance_mode(void *user_context, halide_hexagon_power_mode_t mode) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;

    debug(user_context) << "halide_hexagon_set_performance_mode\n";
    if (!remote_set_performance_mode) {
        // This runtime doesn't support changing the performance target.
        return 0;
    }

    debug(user_context) << "    remote_set_performance_mode -> ";
    result = remote_set_performance_mode(mode);
    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_set_performance_mode failed.\n";
        return result;
    }

    return 0;
}

WEAK int halide_hexagon_set_performance(void *user_context, halide_hexagon_power_t *perf) {
    int result = init_hexagon_runtime(user_context);
    if (result != 0) return result;

    debug(user_context) << "halide_hexagon_set_performance\n";
    if (!remote_set_performance) {
        // This runtime doesn't support changing the performance target.
        return 0;
    }

    debug(user_context) << "    remote_set_performance -> ";
    result = remote_set_performance(perf->set_mips,
                                    perf->mipsPerThread,
                                    perf->mipsTotal,
                                    perf->set_bus_bw,
                                    perf->bwMegabytesPerSec,
                                    perf->busbwUsagePercentage,
                                    perf->set_latency,
                                    perf->latency);

    debug(user_context) << "        " << result << "\n";
    if (result != 0) {
        error(user_context) << "remote_set_performance failed.\n";
        return result;
    }

    return 0;
}

WEAK const halide_device_interface_t *halide_hexagon_device_interface() {
    return &hexagon_device_interface;
}

namespace {
__attribute__((destructor))
WEAK void halide_hexagon_cleanup() {
    halide_hexagon_device_release(NULL);
}
}

} // extern "C" linkage

namespace Halide { namespace Runtime { namespace Internal { namespace Hexagon {

WEAK halide_device_interface_t hexagon_device_interface = {
    halide_use_jit_module,
    halide_release_jit_module,
    halide_hexagon_device_malloc,
    halide_hexagon_device_free,
    halide_hexagon_device_sync,
    halide_hexagon_device_release,
    halide_hexagon_copy_to_host,
    halide_hexagon_copy_to_device,
    halide_hexagon_device_and_host_malloc,
    halide_hexagon_device_and_host_free,
};

}}}} // namespace Halide::Runtime::Internal::Hexagon

/* [<][>][^][v][top][bottom][index][help] */