This source file includes following definitions.
- lower
- lower_main_stmt
#include <iostream>
#include <set>
#include <sstream>
#include <algorithm>
#include "Lower.h"
#include "AddImageChecks.h"
#include "AddParameterChecks.h"
#include "AllocationBoundsInference.h"
#include "Bounds.h"
#include "BoundsInference.h"
#include "CSE.h"
#include "CanonicalizeGPUVars.h"
#include "Debug.h"
#include "DebugArguments.h"
#include "DebugToFile.h"
#include "DeepCopy.h"
#include "Deinterleave.h"
#include "EarlyFree.h"
#include "FindCalls.h"
#include "Func.h"
#include "Function.h"
#include "FuseGPUThreadLoops.h"
#include "FuzzFloatStores.h"
#include "HexagonOffload.h"
#include "InferArguments.h"
#include "InjectHostDevBufferCopies.h"
#include "InjectImageIntrinsics.h"
#include "InjectOpenGLIntrinsics.h"
#include "Inline.h"
#include "IRMutator.h"
#include "IROperator.h"
#include "IRPrinter.h"
#include "LoopCarry.h"
#include "Memoization.h"
#include "PartitionLoops.h"
#include "Prefetch.h"
#include "Profiling.h"
#include "Qualify.h"
#include "RealizationOrder.h"
#include "RemoveDeadAllocations.h"
#include "RemoveTrivialForLoops.h"
#include "RemoveUndef.h"
#include "ScheduleFunctions.h"
#include "SelectGPUAPI.h"
#include "SkipStages.h"
#include "SlidingWindow.h"
#include "Simplify.h"
#include "SimplifySpecializations.h"
#include "SplitTuples.h"
#include "StorageFlattening.h"
#include "StorageFolding.h"
#include "Substitute.h"
#include "Tracing.h"
#include "TrimNoOps.h"
#include "UnifyDuplicateLets.h"
#include "UniquifyVariableNames.h"
#include "UnpackBuffers.h"
#include "UnrollLoops.h"
#include "VaryingAttributes.h"
#include "VectorizeLoops.h"
#include "WrapCalls.h"
#include "WrapExternStages.h"
namespace Halide {
namespace Internal {
using std::set;
using std::ostringstream;
using std::string;
using std::vector;
using std::map;
Module lower(const vector<Function> &output_funcs, const string &pipeline_name, const Target &t,
vector<Argument> &args, const Internal::LoweredFunc::LinkageType linkage_type,
const vector<IRMutator *> &custom_passes) {
std::vector<std::string> namespaces;
std::string simple_pipeline_name = extract_namespaces(pipeline_name, namespaces);
Module result_module(simple_pipeline_name, t);
map<string, Function> env;
for (Function f : output_funcs) {
map<string, Function> more_funcs = find_transitive_calls(f);
env.insert(more_funcs.begin(), more_funcs.end());
}
vector<Function> outputs;
std::tie(outputs, env) = deep_copy(output_funcs, env);
for (Function f: outputs) {
Func(f).compute_root().store_root();
}
for (auto &f : env) {
f.second.substitute_schedule_param_exprs();
}
env = wrap_func_calls(env);
vector<string> order = realization_order(outputs, env);
simplify_specializations(env);
bool any_memoized = false;
debug(1) << "Creating initial loop nests...\n";
Stmt s = schedule_functions(outputs, order, env, t, any_memoized);
debug(2) << "Lowering after creating initial loop nests:\n" << s << '\n';
debug(1) << "Canonicalizing GPU var names...\n";
s = canonicalize_gpu_vars(s);
debug(2) << "Lowering after canonicalizing GPU var names:\n" << s << '\n';
if (any_memoized) {
debug(1) << "Injecting memoization...\n";
s = inject_memoization(s, env, pipeline_name, outputs);
debug(2) << "Lowering after injecting memoization:\n" << s << '\n';
} else {
debug(1) << "Skipping injecting memoization...\n";
}
debug(1) << "Injecting tracing...\n";
s = inject_tracing(s, pipeline_name, env, outputs, t);
debug(2) << "Lowering after injecting tracing:\n" << s << '\n';
debug(1) << "Adding checks for parameters\n";
s = add_parameter_checks(s, t);
debug(2) << "Lowering after injecting parameter checks:\n" << s << '\n';
debug(1) << "Computing bounds of each function's value\n";
FuncValueBounds func_bounds = compute_function_value_bounds(order, env);
debug(1) << "Adding checks for images\n";
s = add_image_checks(s, outputs, t, order, env, func_bounds);
debug(2) << "Lowering after injecting image checks:\n" << s << '\n';
debug(1) << "Performing computation bounds inference...\n";
s = bounds_inference(s, outputs, order, env, func_bounds, t);
debug(2) << "Lowering after computation bounds inference:\n" << s << '\n';
debug(1) << "Performing sliding window optimization...\n";
s = sliding_window(s, env);
debug(2) << "Lowering after sliding window:\n" << s << '\n';
debug(1) << "Performing allocation bounds inference...\n";
s = allocation_bounds_inference(s, env, func_bounds);
debug(2) << "Lowering after allocation bounds inference:\n" << s << '\n';
debug(1) << "Removing code that depends on undef values...\n";
s = remove_undef(s);
debug(2) << "Lowering after removing code that depends on undef values:\n" << s << "\n\n";
debug(1) << "Uniquifying variable names...\n";
s = uniquify_variable_names(s);
debug(2) << "Lowering after uniquifying variable names:\n" << s << "\n\n";
debug(1) << "Performing storage folding optimization...\n";
s = storage_folding(s, env);
debug(2) << "Lowering after storage folding:\n" << s << '\n';
debug(1) << "Injecting debug_to_file calls...\n";
s = debug_to_file(s, outputs, env);
debug(2) << "Lowering after injecting debug_to_file calls:\n" << s << '\n';
debug(1) << "Simplifying...\n";
s = simplify(s, false);
debug(2) << "Lowering after first simplification:\n" << s << "\n\n";
debug(1) << "Injecting prefetches...\n";
s = inject_prefetch(s, env);
debug(2) << "Lowering after injecting prefetches:\n" << s << "\n\n";
debug(1) << "Dynamically skipping stages...\n";
s = skip_stages(s, order);
debug(2) << "Lowering after dynamically skipping stages:\n" << s << "\n\n";
debug(1) << "Destructuring tuple-valued realizations...\n";
s = split_tuples(s, env);
debug(2) << "Lowering after destructuring tuple-valued realizations:\n" << s << "\n\n";
if (t.has_feature(Target::OpenGL)) {
debug(1) << "Injecting image intrinsics...\n";
s = inject_image_intrinsics(s, env);
debug(2) << "Lowering after image intrinsics:\n" << s << "\n\n";
}
debug(1) << "Performing storage flattening...\n";
s = storage_flattening(s, outputs, env, t);
debug(2) << "Lowering after storage flattening:\n" << s << "\n\n";
debug(1) << "Unpacking buffer arguments...\n";
s = unpack_buffers(s);
debug(2) << "Lowering after unpacking buffer arguments...\n";
if (any_memoized) {
debug(1) << "Rewriting memoized allocations...\n";
s = rewrite_memoized_allocations(s, env);
debug(2) << "Lowering after rewriting memoized allocations:\n" << s << "\n\n";
} else {
debug(1) << "Skipping rewriting memoized allocations...\n";
}
if (t.has_gpu_feature() ||
t.has_feature(Target::OpenGLCompute) ||
t.has_feature(Target::OpenGL) ||
(t.arch != Target::Hexagon && (t.features_any_of({Target::HVX_64, Target::HVX_128})))) {
debug(1) << "Selecting a GPU API for GPU loops...\n";
s = select_gpu_api(s, t);
debug(2) << "Lowering after selecting a GPU API:\n" << s << "\n\n";
debug(1) << "Injecting host <-> dev buffer copies...\n";
s = inject_host_dev_buffer_copies(s, t);
debug(2) << "Lowering after injecting host <-> dev buffer copies:\n" << s << "\n\n";
}
if (t.has_feature(Target::OpenGL)) {
debug(1) << "Injecting OpenGL texture intrinsics...\n";
s = inject_opengl_intrinsics(s);
debug(2) << "Lowering after OpenGL intrinsics:\n" << s << "\n\n";
}
if (t.has_gpu_feature() ||
t.has_feature(Target::OpenGLCompute)) {
debug(1) << "Injecting per-block gpu synchronization...\n";
s = fuse_gpu_thread_loops(s);
debug(2) << "Lowering after injecting per-block gpu synchronization:\n" << s << "\n\n";
}
debug(1) << "Simplifying...\n";
s = simplify(s);
s = unify_duplicate_lets(s);
s = remove_trivial_for_loops(s);
debug(2) << "Lowering after second simplifcation:\n" << s << "\n\n";
debug(1) << "Reduce prefetch dimension...\n";
s = reduce_prefetch_dimension(s, t);
debug(2) << "Lowering after reduce prefetch dimension:\n" << s << "\n";
debug(1) << "Unrolling...\n";
s = unroll_loops(s);
s = simplify(s);
debug(2) << "Lowering after unrolling:\n" << s << "\n\n";
debug(1) << "Vectorizing...\n";
s = vectorize_loops(s, t);
s = simplify(s);
debug(2) << "Lowering after vectorizing:\n" << s << "\n\n";
debug(1) << "Detecting vector interleavings...\n";
s = rewrite_interleavings(s);
s = simplify(s);
debug(2) << "Lowering after rewriting vector interleavings:\n" << s << "\n\n";
debug(1) << "Partitioning loops to simplify boundary conditions...\n";
s = partition_loops(s);
s = simplify(s);
debug(2) << "Lowering after partitioning loops:\n" << s << "\n\n";
debug(1) << "Trimming loops to the region over which they do something...\n";
s = trim_no_ops(s);
debug(2) << "Lowering after loop trimming:\n" << s << "\n\n";
debug(1) << "Injecting early frees...\n";
s = inject_early_frees(s);
debug(2) << "Lowering after injecting early frees:\n" << s << "\n\n";
if (t.has_feature(Target::Profile)) {
debug(1) << "Injecting profiling...\n";
s = inject_profiling(s, pipeline_name);
debug(2) << "Lowering after injecting profiling:\n" << s << "\n\n";
}
if (t.has_feature(Target::FuzzFloatStores)) {
debug(1) << "Fuzzing floating point stores...\n";
s = fuzz_float_stores(s);
debug(2) << "Lowering after fuzzing floating point stores:\n" << s << "\n\n";
}
debug(1) << "Simplifying...\n";
s = common_subexpression_elimination(s);
if (t.has_feature(Target::OpenGL)) {
debug(1) << "Detecting varying attributes...\n";
s = find_linear_expressions(s);
debug(2) << "Lowering after detecting varying attributes:\n" << s << "\n\n";
debug(1) << "Moving varying attribute expressions out of the shader...\n";
s = setup_gpu_vertex_buffer(s);
debug(2) << "Lowering after removing varying attributes:\n" << s << "\n\n";
}
s = remove_dead_allocations(s);
s = remove_trivial_for_loops(s);
s = simplify(s);
debug(1) << "Lowering after final simplification:\n" << s << "\n\n";
debug(1) << "Splitting off Hexagon offload...\n";
s = inject_hexagon_rpc(s, t, result_module);
debug(2) << "Lowering after splitting off Hexagon offload:\n" << s << '\n';
if (!custom_passes.empty()) {
for (size_t i = 0; i < custom_passes.size(); i++) {
debug(1) << "Running custom lowering pass " << i << "...\n";
s = custom_passes[i]->mutate(s);
debug(1) << "Lowering after custom pass " << i << ":\n" << s << "\n\n";
}
}
vector<Argument> public_args = args;
for (const auto &out : outputs) {
for (Parameter buf : out.output_buffers()) {
public_args.push_back(Argument(buf.name(),
Argument::OutputBuffer,
buf.type(), buf.dimensions()));
}
}
vector<InferredArgument> inferred_args = infer_arguments(s, outputs);
for (const InferredArgument &arg : inferred_args) {
if (arg.param.defined() && arg.param.name() == "__user_context") {
continue;
}
internal_assert(arg.arg.is_input()) << "Expected only input Arguments here";
bool found = false;
for (Argument a : args) {
found |= (a.name == arg.arg.name);
}
if (arg.buffer.defined() && !found) {
debug(1) << "Embedding image " << arg.buffer.name() << "\n";
result_module.append(arg.buffer);
} else if (!found) {
std::ostringstream err;
err << "Generated code refers to ";
if (arg.arg.is_buffer()) {
err << "image ";
}
err << "parameter " << arg.arg.name
<< ", which was not found in the argument list.\n";
err << "\nArgument list specified: ";
for (size_t i = 0; i < args.size(); i++) {
err << args[i].name << " ";
}
err << "\n\nParameters referenced in generated code: ";
for (const InferredArgument &ia : inferred_args) {
if (ia.arg.name != "__user_context") {
err << ia.arg.name << " ";
}
}
err << "\n\n";
user_error << err.str();
}
}
LoweredFunc main_func(pipeline_name, public_args, s, linkage_type);
if (t.has_feature(Target::Debug)) {
debug_arguments(&main_func);
}
result_module.append(main_func);
if (!t.has_feature(Target::JIT)) {
add_legacy_wrapper(result_module, main_func);
}
wrap_legacy_extern_stages(result_module);
return result_module;
}
EXPORT Stmt lower_main_stmt(const std::vector<Function> &output_funcs, const std::string &pipeline_name,
const Target &t, const std::vector<IRMutator *> &custom_passes) {
vector<InferredArgument> inferred_args = infer_arguments(Stmt(), output_funcs);
vector<Argument> args;
for (const auto &ia : inferred_args) {
if (!ia.arg.name.empty() && ia.arg.is_input()) {
args.push_back(ia.arg);
}
}
Module module = lower(output_funcs, pipeline_name, t, args, Internal::LoweredFunc::External, custom_passes);
return module.functions().front().body;
}
}
}