gpu_threads 81 apps/bilateral_grid/bilateral_grid_generator.cpp histogram.reorder(c, z, x, y).compute_at(blurz, x).gpu_threads(x, y); gpu_threads 82 apps/bilateral_grid/bilateral_grid_generator.cpp histogram.update().reorder(c, r.x, r.y, x, y).gpu_threads(x, y).unroll(c); gpu_threads 53 apps/blur/halide_blur_generator.cpp blur_x.compute_at(blur_y, x).gpu_threads(x, y); gpu_threads 30 apps/cuda_mat_mul/mat_mul_generator.cpp .gpu_blocks(x, y).gpu_threads(xii); gpu_threads 193 apps/interpolate/interpolate.cpp .gpu_threads(x, y, c); gpu_threads 21 python_bindings/python/Func_gpu.h return that.gpu_threads(thread_x, device_api); gpu_threads 26 python_bindings/python/Func_gpu.h return that.gpu_threads(thread_x, thread_y, device_api); gpu_threads 31 python_bindings/python/Func_gpu.h return that.gpu_threads(thread_x, thread_y, thread_z, device_api); gpu_threads 111 python_bindings/python/Var.cpp .def("gpu_threads", &Var::gpu_threads, // no args gpu_threads 1574 src/Func.cpp return gpu_blocks(bx).gpu_threads(tx); gpu_threads 1579 src/Func.cpp return gpu_blocks(bx, by).gpu_threads(tx, ty); gpu_threads 1585 src/Func.cpp return gpu_blocks(bx, by, bz).gpu_threads(tx, ty, tz); gpu_threads 2026 src/Func.cpp Stage(func.definition(), name(), args(), func.schedule().storage_dims()).gpu_threads(tx, device_api); gpu_threads 2032 src/Func.cpp Stage(func.definition(), name(), args(), func.schedule().storage_dims()).gpu_threads(tx, ty, device_api); gpu_threads 2038 src/Func.cpp Stage(func.definition(), name(), args(), func.schedule().storage_dims()).gpu_threads(tx, ty, tz, device_api); gpu_threads 206 src/Func.h EXPORT Stage &gpu_threads(VarOrRVar thread_x, DeviceAPI device_api = DeviceAPI::Default_GPU); gpu_threads 207 src/Func.h EXPORT Stage &gpu_threads(VarOrRVar thread_x, VarOrRVar thread_y, DeviceAPI device_api = DeviceAPI::Default_GPU); gpu_threads 208 src/Func.h EXPORT Stage &gpu_threads(VarOrRVar thread_x, VarOrRVar thread_y, VarOrRVar thread_z, DeviceAPI device_api = DeviceAPI::Default_GPU); gpu_threads 1439 src/Func.h EXPORT Func &gpu_threads(VarOrRVar thread_x, DeviceAPI device_api = DeviceAPI::Default_GPU); gpu_threads 1440 src/Func.h EXPORT Func &gpu_threads(VarOrRVar thread_x, VarOrRVar thread_y, DeviceAPI device_api = DeviceAPI::Default_GPU); gpu_threads 1441 src/Func.h EXPORT Func &gpu_threads(VarOrRVar thread_x, VarOrRVar thread_y, VarOrRVar thread_z, DeviceAPI device_api = DeviceAPI::Default_GPU); gpu_threads 1625 src/Generator.h HALIDE_OUTPUT_FORWARD(gpu_threads) gpu_threads 159 src/Var.h static Var gpu_threads() { gpu_threads 21 test/correctness/chunk.cpp f.compute_at(g, xo).gpu_threads(x, y); gpu_threads 37 test/correctness/gpu_assertion_in_kernel.cpp f.compute_at(g, x).gpu_threads(x); gpu_threads 19 test/correctness/gpu_dynamic_shared.cpp f.compute_at(g, x).gpu_threads(x); gpu_threads 28 test/correctness/gpu_mixed_dimensionality.cpp h.compute_at(out, x).gpu_threads(x, y); gpu_threads 29 test/correctness/gpu_mixed_dimensionality.cpp h.update().gpu_threads(x); gpu_threads 30 test/correctness/gpu_mixed_dimensionality.cpp g.compute_at(h, y).gpu_threads(x); gpu_threads 62 test/correctness/gpu_mixed_shared_mem_types.cpp funcs[i].compute_at(out, x).gpu_threads(x); gpu_threads 18 test/correctness/gpu_reuse_shared_memory.cpp f5.compute_at(f6, x).gpu_threads(x, y); gpu_threads 19 test/correctness/gpu_reuse_shared_memory.cpp f4.compute_at(f6, x).gpu_threads(x, y); gpu_threads 20 test/correctness/gpu_reuse_shared_memory.cpp f3.compute_at(f6, x).gpu_threads(x, y); gpu_threads 21 test/correctness/gpu_reuse_shared_memory.cpp f2.compute_at(f6, x).gpu_threads(x, y); gpu_threads 22 test/correctness/gpu_reuse_shared_memory.cpp f1.compute_at(f6, x).gpu_threads(x, y); gpu_threads 66 test/correctness/gpu_reuse_shared_memory.cpp .gpu_threads(xo, y); gpu_threads 104 test/correctness/gpu_reuse_shared_memory.cpp .gpu_threads(xi, yi); gpu_threads 108 test/correctness/gpu_reuse_shared_memory.cpp .gpu_threads(xi, yi); gpu_threads 145 test/correctness/gpu_reuse_shared_memory.cpp f3.compute_at(f4, xo).split(x, xo, xi, 16).gpu_threads(xi); gpu_threads 146 test/correctness/gpu_reuse_shared_memory.cpp f2.compute_at(f4, xo).split(x, xo, xi, 16).gpu_threads(xi); gpu_threads 147 test/correctness/gpu_reuse_shared_memory.cpp f1.compute_at(f4, xo).split(x, xo, xi, 16).gpu_threads(xi); gpu_threads 41 test/correctness/gpu_specialize.cpp g.specialize(use_gpu).gpu_threads(x, y); gpu_threads 82 test/correctness/gpu_specialize.cpp f.specialize(p).tile(x, y, xi, yi, 4, 4).gpu_threads(x, y); gpu_threads 83 test/correctness/gpu_specialize.cpp f.tile(x, y, xo, yo, xi, yi, 8, 8).gpu_threads(xo, yo); gpu_threads 23 test/correctness/gpu_sum_scan.cpp f.compute_root().gpu_blocks(y).gpu_threads(x); gpu_threads 38 test/correctness/gpu_sum_scan.cpp f.update(2).gpu_blocks(y).gpu_threads(r3); gpu_threads 78 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 0).gpu_threads(y); gpu_threads 79 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 1).gpu_threads(y); gpu_threads 80 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 2).gpu_threads(x); gpu_threads 81 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 3).gpu_threads(x); gpu_threads 121 test/correctness/gpu_thread_barrier.cpp f.gpu_threads(x, y); gpu_threads 122 test/correctness/gpu_thread_barrier.cpp f.update(0).gpu_threads(x, y); gpu_threads 123 test/correctness/gpu_thread_barrier.cpp f.update(1).gpu_threads(x, y); gpu_threads 124 test/correctness/gpu_thread_barrier.cpp f.update(2).gpu_threads(x, y); gpu_threads 33 test/correctness/gpu_transpose.cpp .gpu_threads(xii, yii); gpu_threads 36 test/correctness/gpu_transpose.cpp in_func.compute_at(out, subtile_idx).gpu_threads(x, y); gpu_threads 22 test/correctness/gpu_vectorized_shared_memory.cpp h.split(x, xo, xi, 16).vectorize(xi, 4).gpu_threads(xi).gpu_blocks(xo); gpu_threads 24 test/correctness/gpu_vectorized_shared_memory.cpp g.split(x, xo, xi, 4).gpu_threads(xo).vectorize(xi); gpu_threads 25 test/correctness/gpu_vectorized_shared_memory.cpp g.update().split(x, xo, xi, 4).gpu_threads(xo).vectorize(xi); gpu_threads 23 test/error/five_d_gpu_buffer.cpp f.compute_root().gpu_blocks(v3, v4).gpu_threads(v1, v2); gpu_threads 59 test/performance/wrap.cpp f[i].compute_at(final, xo).gpu_threads(x, y); gpu_threads 66 test/performance/wrap.cpp staged.compute_at(final, xo).unroll(x, 2).unroll(y, 2).gpu_threads(x, y); gpu_threads 103 test/performance/wrap.cpp f[i].compute_at(final, xo).gpu_threads(x, y); gpu_threads 108 test/performance/wrap.cpp host.in().compute_at(final, xo).unroll(x, 2).unroll(y, 2).gpu_threads(x, y); gpu_threads 128 tutorial/lesson_12_using_the_gpu.cpp .gpu_threads(thread); gpu_threads 163 tutorial/lesson_12_using_the_gpu.cpp padded.gpu_threads(x, y);