#ifndef HALIDE_CODEGEN_PTX_DEV_H
#define HALIDE_CODEGEN_PTX_DEV_H
#include "CodeGen_LLVM.h"
#include "CodeGen_GPU_Host.h"
#include "CodeGen_GPU_Dev.h"
namespace llvm {
class BasicBlock;
}
namespace Halide {
namespace Internal {
class CodeGen_PTX_Dev : public CodeGen_LLVM, public CodeGen_GPU_Dev {
public:
friend class CodeGen_GPU_Host<CodeGen_X86>;
friend class CodeGen_GPU_Host<CodeGen_ARM>;
CodeGen_PTX_Dev(Target host);
~CodeGen_PTX_Dev();
void add_kernel(Stmt stmt,
const std::string &name,
const std::vector<DeviceArgument> &args);
static void test();
std::vector<char> compile_to_src();
std::string get_current_kernel_name();
void dump();
virtual std::string print_gpu_name(const std::string &name);
std::string api_unique_name() { return "cuda"; }
protected:
using CodeGen_LLVM::visit;
virtual void init_module();
llvm::BasicBlock *entry_block;
void visit(const For *);
void visit(const Allocate *);
void visit(const Free *);
void visit(const AssertStmt *);
std::string march() const;
std::string mcpu() const;
std::string mattrs() const;
bool use_soft_float_abi() const;
int native_vector_bits() const;
bool promote_indices() const {return false;}
std::string simt_intrinsic(const std::string &name);
};
}}
#endif