This source file includes following definitions.
- parse_bitcode_file
- DECLARE_CPP_INITMOD
- get_triple_for_target
- link_modules
- undo_win32_name_mangling
- add_underscore_to_posix_call
- add_underscores_to_posix_calls_on_windows
- get_initial_module_for_target
- get_initial_module_for_ptx_device
- add_bitcode_to_module
#include "LLVM_Runtime_Linker.h"
#include "LLVM_Headers.h"
namespace Halide {
using std::string;
using std::vector;
namespace {
std::unique_ptr<llvm::Module> parse_bitcode_file(llvm::StringRef buf, llvm::LLVMContext *context, const char *id) {
llvm::MemoryBufferRef bitcode_buffer = llvm::MemoryBufferRef(buf, id);
#if LLVM_VERSION >= 40
auto ret_val = llvm::expectedToErrorOr(
llvm::parseBitcodeFile(bitcode_buffer, *context));
#else
auto ret_val = llvm::parseBitcodeFile(bitcode_buffer, *context);
#endif
if (!ret_val) {
internal_error << "Could not parse built-in bitcode file " << id
<< " llvm error is " << ret_val.getError() << "\n";
}
std::unique_ptr<llvm::Module> result(std::move(*ret_val));
result->setModuleIdentifier(id);
return result;
}
}
#define DECLARE_INITMOD(mod) \
extern "C" unsigned char halide_internal_initmod_##mod[]; \
extern "C" int halide_internal_initmod_##mod##_length; \
std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context) { \
llvm::StringRef sb = llvm::StringRef((const char *)halide_internal_initmod_##mod, \
halide_internal_initmod_##mod##_length); \
return parse_bitcode_file(sb, context, #mod); \
}
#define DECLARE_NO_INITMOD(mod) \
std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *, bool, bool) { \
user_error << "Halide was compiled without support for this target\n"; \
return std::unique_ptr<llvm::Module>(); \
} \
std::unique_ptr<llvm::Module> get_initmod_##mod##_ll(llvm::LLVMContext *) { \
user_error << "Halide was compiled without support for this target\n"; \
return std::unique_ptr<llvm::Module>(); \
}
#define DECLARE_CPP_INITMOD(mod) \
DECLARE_INITMOD(mod ## _32_debug) \
DECLARE_INITMOD(mod ## _64_debug) \
DECLARE_INITMOD(mod ## _32) \
DECLARE_INITMOD(mod ## _64) \
std::unique_ptr<llvm::Module> get_initmod_##mod(llvm::LLVMContext *context, bool bits_64, bool debug) { \
if (bits_64) { \
if (debug) return get_initmod_##mod##_64_debug(context); \
else return get_initmod_##mod##_64(context); \
} else { \
if (debug) return get_initmod_##mod##_32_debug(context); \
else return get_initmod_##mod##_32(context); \
} \
}
#define DECLARE_LL_INITMOD(mod) \
DECLARE_INITMOD(mod ## _ll)
DECLARE_CPP_INITMOD(android_clock)
DECLARE_CPP_INITMOD(android_host_cpu_count)
DECLARE_CPP_INITMOD(android_io)
DECLARE_CPP_INITMOD(android_opengl_context)
DECLARE_CPP_INITMOD(android_tempfile)
DECLARE_CPP_INITMOD(buffer_t)
DECLARE_CPP_INITMOD(cache)
DECLARE_CPP_INITMOD(can_use_target)
DECLARE_CPP_INITMOD(cuda)
DECLARE_CPP_INITMOD(destructors)
DECLARE_CPP_INITMOD(device_interface)
DECLARE_CPP_INITMOD(errors)
DECLARE_CPP_INITMOD(fake_thread_pool)
DECLARE_CPP_INITMOD(float16_t)
DECLARE_CPP_INITMOD(gcd_thread_pool)
DECLARE_CPP_INITMOD(gpu_device_selection)
DECLARE_CPP_INITMOD(hexagon_host)
DECLARE_CPP_INITMOD(ios_io)
DECLARE_CPP_INITMOD(linux_clock)
DECLARE_CPP_INITMOD(linux_host_cpu_count)
DECLARE_CPP_INITMOD(linux_opengl_context)
DECLARE_CPP_INITMOD(matlab)
DECLARE_CPP_INITMOD(metadata)
DECLARE_CPP_INITMOD(mingw_math)
DECLARE_CPP_INITMOD(module_aot_ref_count)
DECLARE_CPP_INITMOD(module_jit_ref_count)
DECLARE_CPP_INITMOD(msan)
DECLARE_CPP_INITMOD(msan_stubs)
DECLARE_CPP_INITMOD(noos)
DECLARE_CPP_INITMOD(old_buffer_t)
DECLARE_CPP_INITMOD(opencl)
DECLARE_CPP_INITMOD(opengl)
DECLARE_CPP_INITMOD(openglcompute)
DECLARE_CPP_INITMOD(osx_clock)
DECLARE_CPP_INITMOD(osx_get_symbol)
DECLARE_CPP_INITMOD(osx_host_cpu_count)
DECLARE_CPP_INITMOD(osx_opengl_context)
DECLARE_CPP_INITMOD(posix_allocator)
DECLARE_CPP_INITMOD(posix_clock)
DECLARE_CPP_INITMOD(posix_error_handler)
DECLARE_CPP_INITMOD(posix_get_symbol)
DECLARE_CPP_INITMOD(posix_io)
DECLARE_CPP_INITMOD(posix_tempfile)
DECLARE_CPP_INITMOD(posix_print)
DECLARE_CPP_INITMOD(posix_threads)
DECLARE_CPP_INITMOD(prefetch)
DECLARE_CPP_INITMOD(profiler)
DECLARE_CPP_INITMOD(profiler_inlined)
DECLARE_CPP_INITMOD(qurt_allocator)
DECLARE_CPP_INITMOD(qurt_hvx)
DECLARE_CPP_INITMOD(runtime_api)
DECLARE_CPP_INITMOD(ssp)
DECLARE_CPP_INITMOD(thread_pool)
DECLARE_CPP_INITMOD(to_string)
DECLARE_CPP_INITMOD(tracing)
DECLARE_CPP_INITMOD(windows_clock)
DECLARE_CPP_INITMOD(windows_cuda)
DECLARE_CPP_INITMOD(windows_get_symbol)
DECLARE_CPP_INITMOD(windows_io)
DECLARE_CPP_INITMOD(windows_opencl)
DECLARE_CPP_INITMOD(windows_tempfile)
DECLARE_CPP_INITMOD(windows_threads)
DECLARE_CPP_INITMOD(write_debug_image)
DECLARE_LL_INITMOD(posix_math)
DECLARE_LL_INITMOD(win32_math)
DECLARE_LL_INITMOD(ptx_dev)
#ifdef WITH_METAL
DECLARE_CPP_INITMOD(metal)
#ifdef WITH_ARM
DECLARE_CPP_INITMOD(metal_objc_arm)
#else
DECLARE_NO_INITMOD(metal_objc_arm)
#endif
#ifdef WITH_X86
DECLARE_CPP_INITMOD(metal_objc_x86)
#else
DECLARE_NO_INITMOD(metal_objc_x86)
#endif
#else
DECLARE_NO_INITMOD(metal)
DECLARE_NO_INITMOD(metal_objc_arm)
DECLARE_NO_INITMOD(metal_objc_x86)
#endif
#ifdef WITH_ARM
DECLARE_LL_INITMOD(arm)
DECLARE_LL_INITMOD(arm_no_neon)
DECLARE_CPP_INITMOD(arm_cpu_features)
#else
DECLARE_NO_INITMOD(arm)
DECLARE_NO_INITMOD(arm_no_neon)
DECLARE_NO_INITMOD(arm_cpu_features)
#endif
#ifdef WITH_AARCH64
DECLARE_LL_INITMOD(aarch64)
DECLARE_CPP_INITMOD(aarch64_cpu_features)
#else
DECLARE_NO_INITMOD(aarch64)
DECLARE_NO_INITMOD(aarch64_cpu_features)
#endif
#ifdef WITH_PTX
DECLARE_LL_INITMOD(ptx_compute_20)
DECLARE_LL_INITMOD(ptx_compute_30)
DECLARE_LL_INITMOD(ptx_compute_35)
#endif
#ifdef WITH_X86
DECLARE_LL_INITMOD(x86_avx)
DECLARE_LL_INITMOD(x86)
DECLARE_LL_INITMOD(x86_sse41)
DECLARE_CPP_INITMOD(x86_cpu_features)
#else
DECLARE_NO_INITMOD(x86_avx)
DECLARE_NO_INITMOD(x86)
DECLARE_NO_INITMOD(x86_sse41)
DECLARE_NO_INITMOD(x86_cpu_features)
#endif
#ifdef WITH_MIPS
DECLARE_LL_INITMOD(mips)
DECLARE_CPP_INITMOD(mips_cpu_features)
#else
DECLARE_NO_INITMOD(mips)
DECLARE_NO_INITMOD(mips_cpu_features)
#endif
#ifdef WITH_POWERPC
DECLARE_LL_INITMOD(powerpc)
DECLARE_CPP_INITMOD(powerpc_cpu_features)
#else
DECLARE_NO_INITMOD(powerpc)
DECLARE_NO_INITMOD(powerpc_cpu_features)
#endif
#ifdef WITH_HEXAGON
DECLARE_LL_INITMOD(hvx_64)
DECLARE_LL_INITMOD(hvx_128)
#else
DECLARE_NO_INITMOD(hvx_64)
DECLARE_NO_INITMOD(hvx_128)
#endif
namespace {
llvm::DataLayout get_data_layout_for_target(Target target) {
if (target.arch == Target::X86) {
if (target.bits == 32) {
if (target.os == Target::OSX) {
return llvm::DataLayout("e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128");
} else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
return llvm::DataLayout("e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
} else if (target.os == Target::Windows) {
return llvm::DataLayout("e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32");
} else {
return llvm::DataLayout("e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128");
}
} else {
if (target.os == Target::OSX) {
return llvm::DataLayout("e-m:o-i64:64-f80:128-n8:16:32:64-S128");
} else if (target.os == Target::Windows && !target.has_feature(Target::JIT)) {
return llvm::DataLayout("e-m:w-i64:64-f80:128-n8:16:32:64-S128");
} else if (target.os == Target::Windows) {
return llvm::DataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");
} else {
return llvm::DataLayout("e-m:e-i64:64-f80:128-n8:16:32:64-S128");
}
}
} else if (target.arch == Target::ARM) {
if (target.bits == 32) {
if (target.os == Target::IOS) {
return llvm::DataLayout("e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32");
} else {
return llvm::DataLayout("e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64");
}
} else {
if (target.os == Target::IOS) {
return llvm::DataLayout("e-m:o-i64:64-i128:128-n32:64-S128");
} else {
#if LLVM_VERSION < 39
return llvm::DataLayout("e-m:e-i64:64-i128:128-n32:64-S128");
#else
return llvm::DataLayout("e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128");
#endif
}
}
} else if (target.arch == Target::MIPS) {
if (target.bits == 32) {
return llvm::DataLayout("e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64");
} else {
return llvm::DataLayout("e-m:m-i8:8:32-i16:16:32-i64:64-n32:64-S128");
}
} else if (target.arch == Target::POWERPC) {
if (target.bits == 32) {
return llvm::DataLayout("e-m:e-i32:32-n32");
} else {
return llvm::DataLayout("e-m:e-i64:64-n32:64");
}
} else if (target.arch == Target::Hexagon) {
return llvm::DataLayout(
"e-m:e-p:32:32:32-a:0-n16:32-i64:64:64-i32:32:32-i16:16:16-i1:8:8"
"-f32:32:32-f64:64:64-v32:32:32-v64:64:64-v512:512:512-v1024:1024:1024-v2048:2048:2048");
} else {
internal_error << "Bad target arch: " << target.arch << "\n";
return llvm::DataLayout("unreachable");
}
}
}
namespace Internal {
llvm::Triple get_triple_for_target(const Target &target) {
llvm::Triple triple;
if (target.arch == Target::X86) {
if (target.bits == 32) {
triple.setArch(llvm::Triple::x86);
} else {
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::x86_64);
}
if (target.os == Target::Linux) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::GNU);
} else if (target.os == Target::OSX) {
triple.setVendor(llvm::Triple::Apple);
triple.setOS(llvm::Triple::MacOSX);
} else if (target.os == Target::Windows) {
triple.setVendor(llvm::Triple::PC);
triple.setOS(llvm::Triple::Win32);
if (target.has_feature(Target::MinGW)) {
triple.setEnvironment(llvm::Triple::GNU);
} else {
triple.setEnvironment(llvm::Triple::MSVC);
}
if (target.has_feature(Target::JIT)) {
triple.setObjectFormat(llvm::Triple::ELF);
}
} else if (target.os == Target::Android) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::Android);
} else if (target.os == Target::IOS) {
triple.setVendor(llvm::Triple::Apple);
triple.setOS(llvm::Triple::IOS);
}
} else if (target.arch == Target::ARM) {
if (target.bits == 32) {
if (target.has_feature(Target::ARMv7s)) {
triple.setArchName("armv7s");
} else {
triple.setArch(llvm::Triple::arm);
}
} else {
user_assert(target.bits == 64) << "Target bits must be 32 or 64\n";
#if (WITH_AARCH64)
triple.setArch(llvm::Triple::aarch64);
#else
user_error << "AArch64 llvm target not enabled in this build of Halide\n";
#endif
}
if (target.os == Target::Android) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::EABI);
} else if (target.os == Target::IOS) {
triple.setOS(llvm::Triple::IOS);
triple.setVendor(llvm::Triple::Apple);
} else if (target.os == Target::Linux) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::GNUEABIHF);
} else {
user_error << "No arm support for this OS\n";
}
} else if (target.arch == Target::MIPS) {
if (target.bits == 32) {
triple.setArch(llvm::Triple::mipsel);
} else {
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::mips64el);
}
if (target.os == Target::Android) {
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::Android);
} else {
user_error << "No mips support for this OS\n";
}
} else if (target.arch == Target::POWERPC) {
#if (WITH_POWERPC)
user_assert(target.os == Target::Linux) << "PowerPC target is Linux-only.\n";
triple.setVendor(llvm::Triple::UnknownVendor);
triple.setOS(llvm::Triple::Linux);
triple.setEnvironment(llvm::Triple::GNU);
if (target.bits == 32) {
triple.setArch(llvm::Triple::ppc);
} else {
user_assert(target.bits == 64) << "Target must be 32- or 64-bit.\n";
triple.setArch(llvm::Triple::ppc64le);
}
#else
user_error << "PowerPC llvm target not enabled in this build of Halide\n";
#endif
} else if (target.arch == Target::Hexagon) {
triple.setVendor(llvm::Triple::UnknownVendor);
triple.setArch(llvm::Triple::hexagon);
triple.setObjectFormat(llvm::Triple::ELF);
} else {
internal_error << "Bad target arch: " << target.arch << "\n";
}
return triple;
}
}
namespace {
void link_modules(std::vector<std::unique_ptr<llvm::Module>> &modules, Target t) {
llvm::DataLayout data_layout = get_data_layout_for_target(t);
llvm::Triple triple = Internal::get_triple_for_target(t);
for (size_t i = 0; i < modules.size(); i++) {
modules[i]->setDataLayout(data_layout);
modules[i]->setTargetTriple(triple.str());
}
for (size_t i = 1; i < modules.size(); i++) {
#if LLVM_VERSION >= 38
bool failed = llvm::Linker::linkModules(*modules[0],
std::move(modules[i]));
#else
bool failed = llvm::Linker::LinkModules(modules[0].get(),
modules[i].release());
#endif
if (failed) {
internal_error << "Failure linking initial modules\n";
}
}
vector<string> retain = {"__stack_chk_guard",
"__stack_chk_fail"};
if (t.has_feature(Target::MinGW)) {
retain.insert(retain.end(),
{"sincos", "sincosf",
"asinh", "asinhf",
"acosh", "acoshf",
"atanh", "atanhf"});
}
for (auto &gv : modules[0]->globals()) {
llvm::GlobalValue::LinkageTypes linkage = gv.getLinkage();
if (linkage == llvm::GlobalValue::WeakAnyLinkage) {
gv.setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
} else if (linkage == llvm::GlobalValue::WeakODRLinkage) {
gv.setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
}
}
for (auto &f : *modules[0]) {
bool can_strip = true;
for (const string &r : retain) {
if (f.getName() == r) {
can_strip = false;
}
}
bool is_halide_extern_c_sym = Internal::starts_with(f.getName(), "halide_");
internal_assert(t.os == Target::NoOS || !is_halide_extern_c_sym || f.isWeakForLinker() || f.isDeclaration())
<< " for function " << (std::string)f.getName() << "\n";
can_strip = can_strip && !is_halide_extern_c_sym;
llvm::GlobalValue::LinkageTypes linkage = f.getLinkage();
if (can_strip || t.os == Target::NoOS) {
if (linkage == llvm::GlobalValue::WeakAnyLinkage) {
f.setLinkage(llvm::GlobalValue::LinkOnceAnyLinkage);
} else if (linkage == llvm::GlobalValue::WeakODRLinkage) {
f.setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
}
}
}
llvm::GlobalValue *llvm_used = modules[0]->getNamedGlobal("llvm.used");
if (llvm_used) {
llvm_used->eraseFromParent();
}
llvm::GlobalValue *runtime_api =
modules[0]->getNamedGlobal("halide_runtime_api_functions");
if (runtime_api) {
runtime_api->eraseFromParent();
}
}
}
namespace Internal {
void undo_win32_name_mangling(llvm::Module *m) {
llvm::IRBuilder<> builder(m->getContext());
for (llvm::Module::iterator iter = m->begin(); iter != m->end(); ++iter) {
llvm::Function &f = *iter;
string n = f.getName();
if (f.getCallingConv() == llvm::CallingConv::X86_StdCall &&
f.empty() &&
n.size() > 2 && n[0] == 1 && n[1] == '_') {
string unmangled_name = n.substr(2);
size_t at = unmangled_name.rfind('@');
unmangled_name = unmangled_name.substr(0, at);
llvm::Function *unmangled = llvm::Function::Create(f.getFunctionType(), f.getLinkage(), unmangled_name, m);
unmangled->setCallingConv(f.getCallingConv());
llvm::BasicBlock *block = llvm::BasicBlock::Create(m->getContext(), "entry", &f);
builder.SetInsertPoint(block);
vector<llvm::Value *> args;
for (auto &arg : f.args()) {
args.push_back(&arg);
}
llvm::CallInst *c = builder.CreateCall(unmangled, args);
c->setCallingConv(f.getCallingConv());
if (f.getReturnType()->isVoidTy()) {
builder.CreateRetVoid();
} else {
builder.CreateRet(c);
}
}
}
}
void add_underscore_to_posix_call(llvm::CallInst *call, llvm::Function *fn, llvm::Module *m) {
string new_name = "_" + fn->getName().str();
llvm::Function *alt = m->getFunction(new_name);
if (!alt) {
alt = llvm::Function::Create(fn->getFunctionType(),
llvm::GlobalValue::ExternalLinkage,
new_name, m);
}
internal_assert(alt->getName() == new_name);
call->setCalledFunction(alt);
}
void add_underscores_to_posix_calls_on_windows(llvm::Module *m) {
string posix_fns[] = {"vsnprintf", "open", "close", "write", "fileno"};
string *posix_fns_begin = posix_fns;
string *posix_fns_end = posix_fns + sizeof(posix_fns) / sizeof(posix_fns[0]);
for (auto &fn : *m) {
for (auto &basic_block : fn) {
for (auto &instruction : basic_block) {
if (llvm::CallInst *call = llvm::dyn_cast<llvm::CallInst>(&instruction)) {
if (llvm::Function *called_fn = call->getCalledFunction()) {
if (std::find(posix_fns_begin, posix_fns_end, called_fn->getName()) != posix_fns_end) {
add_underscore_to_posix_call(call, called_fn, m);
}
}
}
}
}
}
}
std::unique_ptr<llvm::Module> get_initial_module_for_target(Target t, llvm::LLVMContext *c, bool for_shared_jit_runtime, bool just_gpu) {
enum InitialModuleType {
ModuleAOT,
ModuleAOTNoRuntime,
ModuleJITShared,
ModuleJITInlined,
ModuleGPU
} module_type;
if (t.has_feature(Target::JIT)) {
if (just_gpu) {
module_type = ModuleGPU;
} else if (for_shared_jit_runtime) {
module_type = ModuleJITShared;
} else {
module_type = ModuleJITInlined;
}
} else if (t.has_feature(Target::NoRuntime)) {
module_type = ModuleAOTNoRuntime;
} else {
module_type = ModuleAOT;
}
internal_assert(t.bits == 32 || t.bits == 64);
bool bits_64 = (t.bits == 64);
bool debug = t.has_feature(Target::Debug);
vector<std::unique_ptr<llvm::Module>> modules;
if (module_type != ModuleGPU) {
if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
if (t.os == Target::Linux) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
if (t.arch == Target::X86) {
modules.push_back(get_initmod_linux_clock(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
}
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_posix_tempfile(c, bits_64, debug));
modules.push_back(get_initmod_linux_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
modules.push_back(get_initmod_thread_pool(c, bits_64, debug));
modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
} else if (t.os == Target::OSX) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_osx_clock(c, bits_64, debug));
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_posix_tempfile(c, bits_64, debug));
modules.push_back(get_initmod_gcd_thread_pool(c, bits_64, debug));
modules.push_back(get_initmod_osx_get_symbol(c, bits_64, debug));
} else if (t.os == Target::Android) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
if (t.arch == Target::ARM) {
modules.push_back(get_initmod_android_clock(c, bits_64, debug));
} else {
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
}
modules.push_back(get_initmod_android_io(c, bits_64, debug));
modules.push_back(get_initmod_android_tempfile(c, bits_64, debug));
modules.push_back(get_initmod_android_host_cpu_count(c, bits_64, debug));
modules.push_back(get_initmod_posix_threads(c, bits_64, debug));
modules.push_back(get_initmod_thread_pool(c, bits_64, debug));
modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
} else if (t.os == Target::Windows) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_windows_clock(c, bits_64, debug));
modules.push_back(get_initmod_windows_io(c, bits_64, debug));
modules.push_back(get_initmod_windows_tempfile(c, bits_64, debug));
modules.push_back(get_initmod_windows_threads(c, bits_64, debug));
modules.push_back(get_initmod_thread_pool(c, bits_64, debug));
modules.push_back(get_initmod_windows_get_symbol(c, bits_64, debug));
if (t.has_feature(Target::MinGW)) {
modules.push_back(get_initmod_mingw_math(c, bits_64, debug));
}
} else if (t.os == Target::IOS) {
modules.push_back(get_initmod_posix_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_posix_clock(c, bits_64, debug));
modules.push_back(get_initmod_ios_io(c, bits_64, debug));
modules.push_back(get_initmod_posix_tempfile(c, bits_64, debug));
modules.push_back(get_initmod_gcd_thread_pool(c, bits_64, debug));
} else if (t.os == Target::QuRT) {
modules.push_back(get_initmod_qurt_allocator(c, bits_64, debug));
modules.push_back(get_initmod_posix_error_handler(c, bits_64, debug));
modules.push_back(get_initmod_posix_print(c, bits_64, debug));
modules.push_back(get_initmod_posix_get_symbol(c, bits_64, debug));
modules.push_back(get_initmod_posix_io(c, bits_64, debug));
modules.push_back(get_initmod_fake_thread_pool(c, bits_64, debug));
} else if (t.os == Target::NoOS) {
modules.push_back(get_initmod_noos(c, bits_64, debug));
}
}
if (module_type != ModuleJITShared) {
modules.push_back(get_initmod_buffer_t(c, bits_64, debug));
modules.push_back(get_initmod_destructors(c, bits_64, debug));
if (t.os == Target::Windows) {
if (t.bits == 32) {
modules.push_back(get_initmod_win32_math_ll(c));
} else {
modules.push_back(get_initmod_posix_math_ll(c));
}
} else {
modules.push_back(get_initmod_posix_math_ll(c));
}
}
if (module_type != ModuleJITInlined && module_type != ModuleAOTNoRuntime) {
modules.push_back(get_initmod_gpu_device_selection(c, bits_64, debug));
modules.push_back(get_initmod_tracing(c, bits_64, debug));
modules.push_back(get_initmod_write_debug_image(c, bits_64, debug));
modules.push_back(get_initmod_cache(c, bits_64, debug));
modules.push_back(get_initmod_to_string(c, bits_64, debug));
modules.push_back(get_initmod_device_interface(c, bits_64, debug));
modules.push_back(get_initmod_metadata(c, bits_64, debug));
modules.push_back(get_initmod_float16_t(c, bits_64, debug));
modules.push_back(get_initmod_old_buffer_t(c, bits_64, debug));
modules.push_back(get_initmod_errors(c, bits_64, debug));
if (t.arch != Target::MIPS && t.os != Target::NoOS) {
modules.push_back(get_initmod_profiler(c, bits_64, debug));
}
if (t.has_feature(Target::MSAN)) {
modules.push_back(get_initmod_msan(c, bits_64, debug));
} else {
modules.push_back(get_initmod_msan_stubs(c, bits_64, debug));
}
}
if (module_type != ModuleJITShared) {
if (t.arch == Target::X86) {
modules.push_back(get_initmod_x86_ll(c));
}
if (t.arch == Target::ARM) {
if (t.bits == 64) {
modules.push_back(get_initmod_aarch64_ll(c));
} else if (t.has_feature(Target::ARMv7s)) {
modules.push_back(get_initmod_arm_ll(c));
} else if (!t.has_feature(Target::NoNEON)) {
modules.push_back(get_initmod_arm_ll(c));
} else {
modules.push_back(get_initmod_arm_no_neon_ll(c));
}
}
if (t.arch == Target::MIPS) {
modules.push_back(get_initmod_mips_ll(c));
}
if (t.arch == Target::POWERPC) {
modules.push_back(get_initmod_powerpc_ll(c));
}
if (t.arch == Target::Hexagon) {
modules.push_back(get_initmod_qurt_hvx(c, bits_64, debug));
if (t.has_feature(Target::HVX_64)) {
modules.push_back(get_initmod_hvx_64_ll(c));
} else if (t.has_feature(Target::HVX_128)) {
modules.push_back(get_initmod_hvx_128_ll(c));
}
} else {
modules.push_back(get_initmod_prefetch(c, bits_64, debug));
}
if (t.has_feature(Target::SSE41)) {
modules.push_back(get_initmod_x86_sse41_ll(c));
}
if (t.has_feature(Target::AVX)) {
modules.push_back(get_initmod_x86_avx_ll(c));
}
if (t.has_feature(Target::Profile)) {
modules.push_back(get_initmod_profiler_inlined(c, bits_64, debug));
}
}
if (module_type == ModuleAOT) {
modules.push_back(get_initmod_can_use_target(c, bits_64, debug));
if (t.arch == Target::X86) {
modules.push_back(get_initmod_x86_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::ARM) {
if (t.bits == 64) {
modules.push_back(get_initmod_arm_cpu_features(c, bits_64, debug));
} else {
modules.push_back(get_initmod_aarch64_cpu_features(c, bits_64, debug));
}
}
if (t.arch == Target::MIPS) {
modules.push_back(get_initmod_mips_cpu_features(c, bits_64, debug));
}
if (t.arch == Target::POWERPC) {
modules.push_back(get_initmod_powerpc_cpu_features(c, bits_64, debug));
}
}
}
if (module_type == ModuleJITShared || module_type == ModuleGPU) {
modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
} else if (module_type == ModuleAOT) {
modules.push_back(get_initmod_module_aot_ref_count(c, bits_64, debug));
}
if (module_type == ModuleAOT || module_type == ModuleGPU) {
if (t.has_feature(Target::CUDA)) {
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_cuda(c, bits_64, debug));
} else {
modules.push_back(get_initmod_cuda(c, bits_64, debug));
}
}
if (t.has_feature(Target::OpenCL)) {
if (t.os == Target::Windows) {
modules.push_back(get_initmod_windows_opencl(c, bits_64, debug));
} else {
modules.push_back(get_initmod_opencl(c, bits_64, debug));
}
}
if (t.has_feature(Target::OpenGL)) {
modules.push_back(get_initmod_opengl(c, bits_64, debug));
if (t.os == Target::Linux) {
modules.push_back(get_initmod_linux_opengl_context(c, bits_64, debug));
} else if (t.os == Target::OSX) {
modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
} else if (t.os == Target::Android) {
modules.push_back(get_initmod_android_opengl_context(c, bits_64, debug));
} else {
}
}
if (t.has_feature(Target::OpenGLCompute)) {
modules.push_back(get_initmod_openglcompute(c, bits_64, debug));
if (t.os == Target::Android) {
modules.push_back(get_initmod_android_opengl_context(c, bits_64, debug));
} else if (t.os == Target::Linux) {
modules.push_back(get_initmod_linux_opengl_context(c, bits_64, debug));
} else if (t.os == Target::OSX) {
modules.push_back(get_initmod_osx_opengl_context(c, bits_64, debug));
} else {
}
}
if (t.has_feature(Target::Metal)) {
modules.push_back(get_initmod_metal(c, bits_64, debug));
if (t.arch == Target::ARM) {
modules.push_back(get_initmod_metal_objc_arm(c, bits_64, debug));
} else if (t.arch == Target::X86) {
modules.push_back(get_initmod_metal_objc_x86(c, bits_64, debug));
} else {
user_error << "Metal can only be used on ARM or X86 architectures.\n";
}
}
if (t.arch != Target::Hexagon && t.features_any_of({Target::HVX_64, Target::HVX_128})) {
modules.push_back(get_initmod_module_jit_ref_count(c, bits_64, debug));
modules.push_back(get_initmod_hexagon_host(c, bits_64, debug));
}
}
if (module_type == ModuleAOT && t.has_feature(Target::Matlab)) {
modules.push_back(get_initmod_matlab(c, bits_64, debug));
}
if (module_type == ModuleAOTNoRuntime ||
module_type == ModuleJITInlined) {
modules.push_back(get_initmod_runtime_api(c, bits_64, debug));
}
link_modules(modules, t);
if (t.os == Target::Windows &&
t.bits == 32 &&
(t.has_feature(Target::JIT))) {
undo_win32_name_mangling(modules[0].get());
}
if (t.os == Target::Windows) {
add_underscores_to_posix_calls_on_windows(modules[0].get());
}
return std::move(modules[0]);
}
#ifdef WITH_PTX
std::unique_ptr<llvm::Module> get_initial_module_for_ptx_device(Target target, llvm::LLVMContext *c) {
std::vector<std::unique_ptr<llvm::Module>> modules;
modules.push_back(get_initmod_ptx_dev_ll(c));
std::unique_ptr<llvm::Module> module;
if (target.has_feature(Target::CUDACapability35)) {
module = get_initmod_ptx_compute_35_ll(c);
} else if (target.features_any_of({Target::CUDACapability32,
Target::CUDACapability50})) {
module = get_initmod_ptx_compute_20_ll(c);
} else if (target.has_feature(Target::CUDACapability30)) {
module = get_initmod_ptx_compute_30_ll(c);
} else {
module = get_initmod_ptx_compute_20_ll(c);
}
modules.push_back(std::move(module));
link_modules(modules, target);
for (llvm::Module::iterator iter = modules[0]->begin(); iter != modules[0]->end(); iter++) {
llvm::Function &f = *iter;
if (!f.isDeclaration() && !f.hasFnAttribute(llvm::Attribute::NoInline)) {
f.setLinkage(llvm::GlobalValue::AvailableExternallyLinkage);
}
if (f.getName() == "halide_gpu_thread_barrier") {
f.addFnAttr(llvm::Attribute::NoDuplicate);
}
}
llvm::Triple triple("nvptx64--");
modules[0]->setTargetTriple(triple.str());
llvm::DataLayout dl("e-i64:64-v16:16-v32:32-n16:32:64");
modules[0]->setDataLayout(dl);
return std::move(modules[0]);
}
#endif
void add_bitcode_to_module(llvm::LLVMContext *context, llvm::Module &module,
const std::vector<uint8_t> &bitcode, const std::string &name) {
llvm::StringRef sb = llvm::StringRef((const char *)&bitcode[0], bitcode.size());
std::unique_ptr<llvm::Module> add_in = parse_bitcode_file(sb, context, name.c_str());
#if LLVM_VERSION >= 38
bool failed = llvm::Linker::linkModules(module, std::move(add_in));
#else
bool failed = llvm::Linker::LinkModules(&module, add_in.release());
#endif
if (failed) {
internal_error << "Failure linking in additional module: " << name << "\n";
}
}
}
}