update 48 apps/HelloMatlab/iir_blur.cpp blur.update(1) update 51 apps/HelloMatlab/iir_blur.cpp blur.update(2) update 126 apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp new_state.update(i) update 132 apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp new_state.update(i) update 137 apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp new_state.update(4) update 82 apps/bilateral_grid/bilateral_grid_generator.cpp histogram.update().reorder(c, r.x, r.y, x, y).gpu_threads(x, y).unroll(c); update 96 apps/bilateral_grid/bilateral_grid_generator.cpp histogram.update().reorder(c, r.x, r.y, x, y).unroll(c); update 34 apps/cuda_mat_mul/mat_mul_generator.cpp .update() update 381 apps/fft/fft.cpp V.update(i).vectorize(V.args()[2]); update 385 apps/fft/fft.cpp exchange.update().unroll(r_); update 401 apps/fft/fft.cpp x.update() update 406 apps/fft/fft.cpp x.update().parallel(group); update 410 apps/fft/fft.cpp stage.compute_at(x, group).update().vectorize(n0); update 838 apps/fft/fft.cpp dft.update(1).allow_race_conditions() update 840 apps/fft/fft.cpp dft.update(2).allow_race_conditions() update 842 apps/fft/fft.cpp dft.update(4).allow_race_conditions() update 844 apps/fft/fft.cpp dft.update(5).allow_race_conditions() update 83 apps/hexagon_matmul/pipeline.cpp AB.update(0) update 113 apps/hexagon_matmul/pipeline.cpp AB.update(0) update 75 apps/linear_algebra/src/blas_l1_generators.cpp result.update().vectorize(vecs, vec_size); update 142 apps/linear_algebra/src/blas_l1_generators.cpp dot.update(0).vectorize(i); update 209 apps/linear_algebra/src/blas_l1_generators.cpp norm.update(0).vectorize(i); update 89 apps/linear_algebra/src/blas_l2_generators.cpp .update().reorder(i, j, k).unroll(i).unroll(j); update 93 apps/linear_algebra/src/blas_l2_generators.cpp .compute_at(result, i).update().unroll(lanes); update 96 apps/linear_algebra/src/blas_l2_generators.cpp .update().reorder(i, tail);//.unroll(i); update 101 apps/linear_algebra/src/blas_l2_generators.cpp .update().vectorize(j); update 105 apps/linear_algebra/src/blas_l2_generators.cpp sum_lanes.update().specialize(size >= vec_size).vectorize(i, vec_size);//.unroll(i); update 108 apps/linear_algebra/src/blas_l2_generators.cpp sum_tail.update().specialize(size >= vec_size).vectorize(i, vec_size);//.unroll(i); update 144 apps/linear_algebra/src/blas_l2_generators.cpp block.update().specialize(size >= vec_size && sum_size >= unroll_size) update 149 apps/linear_algebra/src/blas_l2_generators.cpp block.update().specialize(size >= vec_size).vectorize(i, vec_size); update 150 apps/linear_algebra/src/blas_l2_generators.cpp block.update(1).reorder(i, tail) update 146 apps/linear_algebra/src/blas_l3_generators.cpp .update() update 647 python_bindings/python/Func.cpp func_class.def("update", &Func::update, (p::arg("self"), p::arg("idx") = 0), update 237 src/BoundsInference.cpp const Definition &def = func.update(stage - 1); update 98 src/Func.cpp return func.update(idx).args(); update 109 src/Func.cpp user_assert(func.update(idx).values().size() == 1) update 111 src/Func.cpp return func.update(idx).values()[0]; update 121 src/Func.cpp return Tuple(func.update(idx).values()); update 134 src/Func.cpp const std::vector<ReductionVariable> rvars = func.update(idx).schedule().rvars(); update 807 src/Func.cpp intm.function().update(0).schedule().dims() = dims; update 808 src/Func.cpp intm.function().update(0).schedule().splits() = splits; update 820 src/Func.cpp intm.update(0).purify(rvars_kept[i], vars_rename[i]); update 2371 src/Func.cpp return Stage(func.update(idx), update 2506 src/Func.cpp return Stage(func.update(update_stage), update 1921 src/Func.h EXPORT Stage update(int idx = 0); update 185 src/Function.h EXPORT Definition &update(int idx = 0); update 189 src/Function.h EXPORT const Definition &update(int idx = 0) const; update 2021 src/Generator.cpp g.update(0).reorder(rdom.y, rdom.x); // check Func::reorder() overloads for RDom::operator RVar() update 1649 src/Generator.h HALIDE_OUTPUT_FORWARD(update) update 190 src/InlineReductions.cpp Tuple update = tuple_select(better, update_tup, f(v.free_vars)); update 191 src/InlineReductions.cpp f(v.free_vars) = update; update 27 src/Prefetch.cpp return f.update(stage_num - 1); update 519 src/ScheduleFunctions.cpp const Definition &def = f.update(i); update 1001 src/ScheduleFunctions.cpp const Definition &r = f.update(i); update 31 test/correctness/autotune_bug_2.cpp f.update(); update 30 test/correctness/compute_at_split_rvar.cpp g.update().split(r, ro, ri, 2); update 54 test/correctness/compute_at_split_rvar.cpp g.update().split(r, ro, ri, 2); update 78 test/correctness/compute_at_split_rvar.cpp g.update().split(r, ro, ri, 2).unroll(ri); update 104 test/correctness/compute_at_split_rvar.cpp g.update().split(r, ro, ri, 2).reorder(ro, ri); update 129 test/correctness/compute_at_split_rvar.cpp g.update().split(r, ro, ri, 4).split(ri, rio, rii, 2).fuse(rio, ro, fused); update 77 test/correctness/convolution.cpp blur1.update().reorder(x, y, r.x, r.y).gpu_tile(x, y, xi, yi, 16, 16); update 87 test/correctness/convolution.cpp blur1.update().hexagon().tile(x, y, xi, yi, hvx_vector_width, 4).vectorize(xi); update 95 test/correctness/convolution.cpp blur1.update().tile(x, y, xi, yi, 4, 4).vectorize(xi).parallel(y); update 27 test/correctness/gpu_mixed_dimensionality.cpp out.update().gpu_tile(x, y, xi, yi, 4, 4); update 29 test/correctness/gpu_mixed_dimensionality.cpp h.update().gpu_threads(x); update 31 test/correctness/gpu_mixed_dimensionality.cpp g.update(); update 33 test/correctness/gpu_mixed_dimensionality.cpp f.update(); update 28 test/correctness/gpu_sum_scan.cpp f.update(0).gpu_blocks(y); update 33 test/correctness/gpu_sum_scan.cpp f.update(1).gpu_single_thread(); update 38 test/correctness/gpu_sum_scan.cpp f.update(2).gpu_blocks(y).gpu_threads(r3); update 78 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 0).gpu_threads(y); update 79 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 1).gpu_threads(y); update 80 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 2).gpu_threads(x); update 81 test/correctness/gpu_thread_barrier.cpp f.update(i*4 + 3).gpu_threads(x); update 122 test/correctness/gpu_thread_barrier.cpp f.update(0).gpu_threads(x, y); update 123 test/correctness/gpu_thread_barrier.cpp f.update(1).gpu_threads(x, y); update 124 test/correctness/gpu_thread_barrier.cpp f.update(2).gpu_threads(x, y); update 25 test/correctness/gpu_vectorized_shared_memory.cpp g.update().split(x, xo, xi, 4).gpu_threads(xo).vectorize(xi); update 261 test/correctness/image_wrap.cpp g.update(0).split(r.x, rxo, rxi, 2).unroll(rxi); update 319 test/correctness/interleave.cpp output6.update(j).vectorize(r); update 23 test/correctness/legal_race_condition.cpp f.update().allow_race_conditions().parallel(r); update 54 test/correctness/legal_race_condition.cpp f.update().allow_race_conditions().vectorize(r, 4).parallel(r); update 123 test/correctness/multi_pass_reduction.cpp f.update(0).vectorize(x, 4); update 128 test/correctness/multi_pass_reduction.cpp f.update(1).reorder(Var(r.x.name()), y).parallel(y); update 14 test/correctness/multipass_constraints.cpp out.update().vectorize(x, 4); update 70 test/correctness/parallel_reductions.cpp sum_rows.update().parallel(j); update 72 test/correctness/parallel_reductions.cpp sum_cols.update(); update 29 test/correctness/parallel_rvar.cpp f[0].update(0).tile(r.x, r.y, rxo, ryo, rxi, ryi, 4, 2).fuse(rxo, ryo, rt).parallel(rt); update 30 test/correctness/parallel_rvar.cpp f[0].update(1).parallel(r.x).parallel(r.y).unroll(r.y, 2); update 31 test/correctness/parallel_rvar.cpp f[0].update(2).vectorize(r2, 4).unroll(r2); update 32 test/correctness/parallel_rvar.cpp f[0].update(3).parallel(r2, 4); update 102 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.x, 32); update 104 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.x, 32); update 169 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.x, 32); update 171 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.x, 32); update 202 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.x, 32); update 204 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.x, 32); update 233 test/correctness/predicated_store_load.cpp f.update(0).allow_race_conditions(); update 237 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.x, 32); update 239 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.x, 32); update 268 test/correctness/predicated_store_load.cpp f.update(0).allow_race_conditions(); update 272 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.z, 32); update 274 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.z, 32); update 304 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.x, 32); update 305 test/correctness/predicated_store_load.cpp f.update(1).hexagon().vectorize(r.y, 32); update 307 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.x, 32); update 308 test/correctness/predicated_store_load.cpp f.update(1).vectorize(r.y, 32); update 339 test/correctness/predicated_store_load.cpp f.update(0).hexagon().vectorize(r.x, 32); update 341 test/correctness/predicated_store_load.cpp f.update(0).vectorize(r.x, 32); update 119 test/correctness/reduction_non_rectangular.cpp f.update().reorder(r.y, r.x); update 120 test/correctness/reduction_non_rectangular.cpp f.update().split(r.x, rx_outer, rx_inner, 4); update 121 test/correctness/reduction_non_rectangular.cpp f.update().fuse(rx_inner, r.y, r_fused); update 478 test/correctness/reduction_non_rectangular.cpp f.update(0).tile(r.x, r.y, rxi, ryi, 8, 8); update 479 test/correctness/reduction_non_rectangular.cpp f.update(0).reorder(rxi, ryi, r.x, r.y); update 688 test/correctness/reduction_non_rectangular.cpp f.update(0).gpu_tile(r.x, r.y, r.x, r.y, rxi, ryi, 4, 4); update 727 test/correctness/reduction_non_rectangular.cpp f.update(0).specialize(p >= 2).gpu_tile(r1.x, r1.y, r1xi, r1yi, 4, 4); update 792 test/correctness/reduction_non_rectangular.cpp f.update(0).unroll(r.x, 2) update 27 test/correctness/reorder_rvars.cpp g.update(0).reorder(r1.x, y, x, r1.y); update 28 test/correctness/reorder_rvars.cpp g.update(1).reorder(r2.x, x, r2.y, r2.z); update 58 test/correctness/reorder_rvars.cpp sat.update().split(x, xo, xi, 4).reorder(xi, r, xo).vectorize(xi).parallel(xo); update 62 test/correctness/reorder_rvars.cpp sat.update(1).parallel(y); update 26 test/correctness/rfactor.cpp Func intm = g.update(0).rfactor(r.y, u); update 29 test/correctness/rfactor.cpp intm.update(0).vectorize(r.x, 2); update 68 test/correctness/rfactor.cpp g.update(0).reorder({r.y, r.x}); update 71 test/correctness/rfactor.cpp g.update(0).split(r.x, rxo, rxi, 2); update 74 test/correctness/rfactor.cpp Func intm1 = g.update(0).rfactor({{rxo, u}, {r.y, v}}); update 75 test/correctness/rfactor.cpp Func intm2 = g.update(0).rfactor(r.y, v); update 117 test/correctness/rfactor.cpp g.update(0).reorder({r.y, r.x}); update 122 test/correctness/rfactor.cpp g.update(0).split(r.x, rxo, rxi, 2); update 123 test/correctness/rfactor.cpp Func intm1 = g.update(0).rfactor({{rxo, u}, {r.y, v}}); update 125 test/correctness/rfactor.cpp g.update(0).split(r.y, ryo, ryi, 2, TailStrategy::GuardWithIf); update 126 test/correctness/rfactor.cpp g.update(0).split(ryo, ryoo, ryoi, 4, TailStrategy::GuardWithIf); update 127 test/correctness/rfactor.cpp Func intm2 = g.update(0).rfactor({{rxo, u}, {ryoo, v}, {ryoi, w}}); update 168 test/correctness/rfactor.cpp g.update(0).reorder({r.y, r.x}); update 171 test/correctness/rfactor.cpp g.update(0).fuse(r.x, r.y, rf); update 172 test/correctness/rfactor.cpp g.update(0).reorder({r.z, rf}); update 175 test/correctness/rfactor.cpp Func intm = g.update(0).rfactor(r.z, u); update 177 test/correctness/rfactor.cpp intm.update(0).split(rf, rfi, rfo, 2); update 251 test/correctness/rfactor.cpp Func intm = f.update(0).rfactor({{r.x, u}, {r.y, v}}); update 252 test/correctness/rfactor.cpp intm.update(0).split(r.z, rzo, rzi, 2); update 298 test/correctness/rfactor.cpp Func intm = g.update(0).specialize(p >= 10).rfactor(r.y, u); update 301 test/correctness/rfactor.cpp intm.update(0).vectorize(r.x, 2); update 357 test/correctness/rfactor.cpp Func intm = g.update(0).rfactor({{r.y, u}, {r.x, v}}); update 361 test/correctness/rfactor.cpp intm.update(0).vectorize(r.z, 2); update 417 test/correctness/rfactor.cpp Func intm = hist.update(0).rfactor(r.y, u); update 419 test/correctness/rfactor.cpp intm.update(0).parallel(u); update 473 test/correctness/rfactor.cpp dot.update(0).split(r.x, rxo, rxi, 128); update 476 test/correctness/rfactor.cpp Func intm1 = dot.update(0).rfactor(rxo, u); update 478 test/correctness/rfactor.cpp intm1.update(0).split(rxi, rxio, rxii, 8); update 481 test/correctness/rfactor.cpp Func intm2 = intm1.update(0).rfactor(rxii, v); update 483 test/correctness/rfactor.cpp intm2.update(0).vectorize(v, 8); update 486 test/correctness/rfactor.cpp intm1.update(0).parallel(u); update 535 test/correctness/rfactor.cpp g.update(0).tile(x, y, xi, yi, 4, 4); update 538 test/correctness/rfactor.cpp Func intm1 = g.update(0).rfactor(r.y, u); update 541 test/correctness/rfactor.cpp intm1.update(0).split(r.x, rxo, rxi, 2); update 544 test/correctness/rfactor.cpp Func intm2 = intm1.update(0).rfactor(rxo, v); update 547 test/correctness/rfactor.cpp intm1.update(0).parallel(u, 2); update 617 test/correctness/rfactor.cpp Func intm1 = g.update(0).specialize(p >= 5).rfactor({{r.y, v}, {r.z, w}}); update 618 test/correctness/rfactor.cpp intm1.update(0).parallel(v, 4); update 622 test/correctness/rfactor.cpp intm1.update(0).split(r.x, rxo, rxi, 2); update 624 test/correctness/rfactor.cpp Func intm2 = intm1.update(0).specialize(q).rfactor(rxi, t).compute_root(); update 625 test/correctness/rfactor.cpp Func intm3 = intm1.update(0).specialize(!q).rfactor(rxo, t).compute_root(); update 626 test/correctness/rfactor.cpp Func intm4 = g.update(0).rfactor({{r.x, u}, {r.z, w}}).compute_root(); update 627 test/correctness/rfactor.cpp intm4.update(0).vectorize(u); update 771 test/correctness/rfactor.cpp g.update(0).split(r.x, rxo, rxi, 2); update 774 test/correctness/rfactor.cpp Func intm = g.update(0).rfactor(rxo, u); update 776 test/correctness/rfactor.cpp intm.update(0).vectorize(u, 2); update 825 test/correctness/rfactor.cpp g.update(0).split(r.x, rxo, rxi, 2); update 828 test/correctness/rfactor.cpp Func intm = g.update(0).rfactor(rxo, u); update 830 test/correctness/rfactor.cpp intm.update(0).vectorize(u, 2); update 887 test/correctness/rfactor.cpp g.update(0).split(r.x, rxo, rxi, 2); update 889 test/correctness/rfactor.cpp g.update(0).rfactor({{rxo, u}}).compute_at(g, rxo); update 918 test/correctness/rfactor.cpp f.update() update 921 test/correctness/rfactor.cpp .update().tile(u, v, ui, vi, 4, 4) update 100 test/correctness/sliding_reduction.cpp f.update(0); update 101 test/correctness/sliding_reduction.cpp f.update(1); update 295 test/correctness/specialize.cpp f.update().specialize(size == 1); update 298 test/correctness/specialize.cpp f.update().specialize(size == 0); update 15 test/correctness/split_by_non_factor.cpp f.update().unroll(x, 2, TailStrategy::GuardWithIf); update 36 test/correctness/split_by_non_factor.cpp f.update(0).vectorize(x, 8, TailStrategy::GuardWithIf); update 37 test/correctness/split_by_non_factor.cpp f.update(1).unroll(r, 4); update 57 test/correctness/split_by_non_factor.cpp f.update().split(x, xo, xi, 7, TailStrategy::GuardWithIf); update 20 test/correctness/split_fuse_rvar.cpp g.update(0).fuse(r.x, r.y, rxy).split(rxy, rxyo, rxyi, 2); update 47 test/correctness/split_fuse_rvar.cpp g.update(0).split(r, ro, ri, 2).fuse(ro, ri, roi); update 35 test/correctness/stream_compaction.cpp ones.update().allow_race_conditions().parallel(r, 50); update 195 test/correctness/trim_no_ops.cpp f.update(0).gpu_tile(r.x, r.y, rxi, ryi, 4, 4); update 22 test/correctness/tuple_reduction.cpp f.update().gpu_tile(x, y, xo, yo, xi, yi, 16, 16); update 25 test/correctness/tuple_reduction.cpp f.update().hexagon(y).vectorize(x, 32); update 66 test/correctness/tuple_reduction.cpp f.update(i).gpu_tile(x, y, xo, yo, xi, yi, 16, 16); update 68 test/correctness/tuple_reduction.cpp f.update(i).hexagon(y).vectorize(x, 32); update 71 test/correctness/tuple_reduction.cpp f.update(i); update 109 test/correctness/tuple_reduction.cpp f.update(i).gpu_tile(x, y, xo, yo, xi, yi, 16, 16); update 111 test/correctness/tuple_reduction.cpp f.update(i).hexagon(y).vectorize(x, 32); update 114 test/correctness/tuple_reduction.cpp f.update(i); update 152 test/correctness/tuple_reduction.cpp f.update(i); update 155 test/correctness/tuple_reduction.cpp f.update(i).gpu_tile(x, y, xo, yo, xi, yi, 16, 16); update 157 test/correctness/tuple_reduction.cpp f.update(i).hexagon(y).vectorize(x, 32); update 24 test/correctness/unrolled_reduction.cpp g.compute_at(f, y).update().split(r.x, rxo, rxi, 2).unroll(rxi); update 37 test/correctness/unsafe_dedup_lets.cpp f.update().unroll(r, 4); update 263 test/correctness/vector_math.cpp f6.update().vectorize(x, lanes); update 22 test/correctness/vectorized_initialization.cpp f.update(); update 19 test/correctness/vectorized_load_from_vectorized_allocation.cpp f.update(0).vectorize(r.z, 8); update 52 test/correctness/vectorized_reduction_bug.cpp g.update(0).vectorize(x); update 242 test/correctness/wrap.cpp g.update(0).split(r.x, rxo, rxi, 2).unroll(rxi); update 16 test/error/bad_rvar_order.cpp f.update().reorder(r1.y, r1.x); update 10 test/error/nonexistent_update_stage.cpp f.update().vectorize(x, 4); update 17 test/error/race_condition.cpp f.update().parallel(r.y); update 25 test/error/rfactor_inner_dim_non_commutative.cpp g.update(0).rfactor(r.x, u); update 22 test/performance/fast_inverse.cpp slow.update().vectorize(x, 4); update 23 test/performance/fast_inverse.cpp fast.update().vectorize(x, 4); update 39 test/performance/matrix_multiplication.cpp matrix_mul.update(0) update 45 test/performance/profiler.cpp out.update().reorder(c, x, r); update 28 test/performance/rfactor.cpp maxf.update().split(r.x, rxo, rxi, 4*8192); update 31 test/performance/rfactor.cpp Func intm = maxf.update().rfactor(rxo, u); update 33 test/performance/rfactor.cpp .update() update 39 test/performance/rfactor.cpp .update() update 97 test/performance/rfactor.cpp .update() update 102 test/performance/rfactor.cpp .update().parallel(u); update 103 test/performance/rfactor.cpp hist.update().vectorize(x, 8); update 153 test/performance/rfactor.cpp Func intm1 = amin.update(0).rfactor(r.w, u); update 155 test/performance/rfactor.cpp intm1.update(0).parallel(u); update 159 test/performance/rfactor.cpp Func intm2 = intm1.update(0).split(r.x, rxo, rxi, 16).rfactor(rxi, v); update 161 test/performance/rfactor.cpp intm2.update(0).vectorize(v); update 221 test/performance/rfactor.cpp mult.update(0).split(r.x, rxo, rxi, 2*8192); update 224 test/performance/rfactor.cpp Func intm = mult.update().rfactor(rxo, u); update 227 test/performance/rfactor.cpp .update() update 233 test/performance/rfactor.cpp .update() update 289 test/performance/rfactor.cpp dot.update().split(r.x, rxo, rxi, 4*8192); update 292 test/performance/rfactor.cpp Func intm = dot.update().rfactor(rxo, u); update 294 test/performance/rfactor.cpp .update() update 300 test/performance/rfactor.cpp .update() update 370 test/performance/rfactor.cpp sink.update().split(r.x, rxo, rxi, 8192); update 373 test/performance/rfactor.cpp Func intm = sink.update().rfactor(rxo, u); update 375 test/performance/rfactor.cpp .update() update 381 test/performance/rfactor.cpp .update() update 49 test/performance/vectorize_pred.cpp f.update(0).vectorize(r.x); update 259 tutorial/lesson_09_update_definitions.cpp f.update(0).vectorize(x, 4); update 264 tutorial/lesson_09_update_definitions.cpp f.update(1).split(y, yo, yi, 4).parallel(yo); update 89 tutorial/lesson_18_parallel_associative_reductions.cpp intermediate.compute_root().update().parallel(y); update 136 tutorial/lesson_18_parallel_associative_reductions.cpp Func intermediate = histogram.update().rfactor({{r.y, y}}); update 140 tutorial/lesson_18_parallel_associative_reductions.cpp intermediate.compute_root().update().parallel(y); update 209 tutorial/lesson_18_parallel_associative_reductions.cpp Func intermediate = histogram.update().rfactor(r.x, u); update 213 tutorial/lesson_18_parallel_associative_reductions.cpp intermediate.compute_root().update().vectorize(u, 8); update 278 tutorial/lesson_18_parallel_associative_reductions.cpp histogram.update() update 284 tutorial/lesson_18_parallel_associative_reductions.cpp Func intermediate = histogram.update().rfactor({{rx_outer, u}, {ry_outer, v}}); update 287 tutorial/lesson_18_parallel_associative_reductions.cpp intermediate.compute_root().update().parallel(u).parallel(v); update 291 tutorial/lesson_18_parallel_associative_reductions.cpp intermediate.update().reorder(rx_inner, ry_inner, u, v);