A 191 apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp Expr A = 1.0f; A 200 apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp /*c==3*/cast<uint8_t>(A * 255)); A 205 apps/HelloiOS/HelloiOS/reaction_diffusion_2_generator.cpp /*c==3*/cast<uint8_t>(A * 255)); A 11 apps/cuda_mat_mul/mat_mul_generator.cpp ImageParam A {Float(32), 2, "A"}; A 19 apps/cuda_mat_mul/mat_mul_generator.cpp prod(x, y) += A(x, r) * B(r, y); A 43 apps/cuda_mat_mul/mat_mul_generator.cpp OutputImageParam bufs[] = {A, B, prod.output_buffer()}; A 16 apps/cuda_mat_mul/runner.cpp Buffer<float> A(size, size), B(size, size), C(size, size); A 18 apps/cuda_mat_mul/runner.cpp mat_mul(A, B, C); A 25 apps/cuda_mat_mul/runner.cpp float *A, *B, *C; A 26 apps/cuda_mat_mul/runner.cpp cudaMalloc((void **)&A, size*size*4); A 34 apps/cuda_mat_mul/runner.cpp size, size, size, &alpha, A, size, B, size, &beta, C, size); A 37 apps/cuda_mat_mul/runner.cpp cudaFree(A); A 94 apps/fft/fft.cpp refs.push_back(x(A({Expr(-i - 1)}, args))); A 96 apps/fft/fft.cpp refs.push_back(x(A({Expr(i)}, args))); A 240 apps/fft/fft.cpp ComplexExpr dft = x(A({Expr(0)}, args)); A 242 apps/fft/fft.cpp dft += expj((sign*2*kPi*k*n)/N) * x(A({Expr(k)}, args)); A 244 apps/fft/fft.cpp X(A({n}, args)) = dft; A 248 apps/fft/fft.cpp X(A({n}, args)) = sum(expj((sign*2*kPi*k*n)/N) * x(A({k}, args))); A 330 apps/fft/fft.cpp ComplexExpr x_rs = x(A({n0, s + r * (N / R)}, args)); A 334 apps/fft/fft.cpp v(A({r, s, n0}, args)) = select(r > 0, likely(x_rs * W(r * (s % S))), x_rs * gain); A 339 apps/fft/fft.cpp v(A({r, s, n0}, args)) = x_rs; A 353 apps/fft/fft.cpp exchange(A({n0, n1}, args)) = undef_z(V.output_types()[0]); A 358 apps/fft/fft.cpp ComplexExpr V_rs = V(A({r_, s_, n0}, args)); A 365 apps/fft/fft.cpp exchange(A({n0, ((s_ / S) * R * S) + (s_ % S) + (r_ * S)}, args)) = V_rs; A 452 apps/fft/fft.cpp f_tiled(A({x, y, xo, yo}, args)) = f(A({xo * tile_size + x, yo * tile_size + y}, args)); A 456 apps/fft/fft.cpp f_tiledT(A({y, x, xo, yo}, args)) = f_tiled(A({x, y, xo, yo}, args)); A 459 apps/fft/fft.cpp fT_tiled(A({y, x, yo, xo}, args)) = f_tiledT(A({y, x, xo, yo}, args)); A 463 apps/fft/fft.cpp fT(A({y, x}, args)) = fT_tiled(A({y % tile_size, x % tile_size, y / tile_size, x / tile_size}, args)); A 709 apps/fft/fft.cpp zipped(A({n0, n1}, args)) = A 710 apps/fft/fft.cpp ComplexExpr(r(A({zip_n0, n1}, args)), A 711 apps/fft/fft.cpp r(A({zip_n0 + zip_width, n1}, args))); A 729 apps/fft/fft.cpp ComplexExpr Z = dft1(A({unzip_n0, n1}, args)); A 730 apps/fft/fft.cpp ComplexExpr conjsymZ = conj(dft1(A({unzip_n0, (N1 - n1) % N1}, args))); A 738 apps/fft/fft.cpp unzipped(A({n0, n1}, args)) = A 744 apps/fft/fft.cpp zipped_0(A({n0, n1}, args)) = A 745 apps/fft/fft.cpp select(n1 > 0, likely(unzipped(A({n0, n1}, args))), A 746 apps/fft/fft.cpp ComplexExpr(re(unzipped(A({n0, 0}, args))), A 747 apps/fft/fft.cpp re(unzipped(A({n0, N1 / 2}, args))))); A 782 apps/fft/fft.cpp dft(A({0, N1 / 2}, args)) = im(dft(A({0, 0}, args))); A 784 apps/fft/fft.cpp dft(A({n0z1, N1 / 2}, args)) = A 785 apps/fft/fft.cpp 0.5f * -j * (dft(A({n0z1, 0}, args)) - conj(dft(A({N0 - n0z1, 0}, args)))); A 789 apps/fft/fft.cpp dft(A({n0z2, N1 / 2}, args)) = conj(dft(A({N0 - n0z2, N1 / 2}, args))); A 792 apps/fft/fft.cpp dft(A({0, 0}, args)) = re(dft(A({0, 0}, args))); A 794 apps/fft/fft.cpp dft(A({n0z1, 0}, args)) = A 795 apps/fft/fft.cpp 0.5f * (dft(A({n0z1, 0}, args)) + conj(dft(A({N0 - n0z1, 0}, args)))); A 799 apps/fft/fft.cpp dft(A({n0z2, 0}, args)) = conj(dft(A({N0 - n0z2, 0}, args))); A 885 apps/fft/fft.cpp ComplexExpr X = c(A({n0, 0}, args)); A 886 apps/fft/fft.cpp ComplexExpr Y = c(A({n0, N1 / 2}, args)); A 887 apps/fft/fft.cpp c_zipped(A({n0, n1}, args)) = select(n1 > 0, likely(c(A({n0, n1}, args))), X + j * Y); A 918 apps/fft/fft.cpp dft0_unzipped(A({n0, n1}, args)) = A 919 apps/fft/fft.cpp select(n1 <= 0, re(dft0(A({n0, 0}, args))), A 920 apps/fft/fft.cpp n1 >= N1 / 2, im(dft0(A({n0, 0}, args))), A 921 apps/fft/fft.cpp likely(dft0(A({n0, min(n1, (N1 / 2) - 1)}, args)))); A 942 apps/fft/fft.cpp dft0_bounded(A({n0_X, n1}, args)), A 943 apps/fft/fft.cpp conj(dft0_bounded(A({n0_X, n1_sym}, args)))); A 947 apps/fft/fft.cpp dft0_bounded(A({n0_Y, n1}, args)), A 948 apps/fft/fft.cpp conj(dft0_bounded(A({n0_Y, n1_sym}, args)))); A 949 apps/fft/fft.cpp zipped(A({n0, n1}, args)) = X + j * Y; A 968 apps/fft/fft.cpp unzipped(A({n0, n1}, args)) = A 970 apps/fft/fft.cpp re(dft_padded(A({unzip_n0, n1}, args))), A 971 apps/fft/fft.cpp im(dft_padded(A({unzip_n0, n1}, args)))); A 9 apps/hexagon_matmul/pipeline.cpp Input<Buffer<uint8_t>> A{"A", 2}; A 23 apps/hexagon_matmul/pipeline.cpp Expr k_extent = A.dim(0).extent(); A 40 apps/hexagon_matmul/pipeline.cpp u32(u16(A(4*rk + 0, y))*u16(B_swizzled(x, rk, 0))) + A 41 apps/hexagon_matmul/pipeline.cpp u32(u16(A(4*rk + 1, y))*u16(B_swizzled(x, rk, 1))) + A 42 apps/hexagon_matmul/pipeline.cpp u32(u16(A(4*rk + 2, y))*u16(B_swizzled(x, rk, 2))) + A 43 apps/hexagon_matmul/pipeline.cpp u32(u16(A(4*rk + 3, y))*u16(B_swizzled(x, rk, 3))); A 73 apps/hexagon_matmul/pipeline.cpp .prefetch(A, yo, 1) A 127 apps/hexagon_matmul/pipeline.cpp A.dim(0) A 129 apps/hexagon_matmul/pipeline.cpp A.dim(1) A 130 apps/hexagon_matmul/pipeline.cpp .set_bounds(0, (A.dim(1).extent()/tile_rows)*tile_rows) A 131 apps/hexagon_matmul/pipeline.cpp .set_stride((A.dim(1).stride()/vector_size_u8)*vector_size_u8); A 125 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(x[0]), 1, A 129 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(x[0]), 1, A 133 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp &(y[0]), 1, &(A[0]), N)) A 136 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 140 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 144 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 148 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 166 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(x[0]), 1, A 170 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(x[0]), 1, A 174 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp &(y[0]), 1, &(A[0]), N)) A 177 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 181 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 185 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 189 apps/linear_algebra/benchmarks/cblas_benchmarks.cpp alpha, &(A[0]), N, &(B[0]), N, A 48 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp Matrix A(N, N); A 49 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp A.setRandom(); A 50 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp return A; A 91 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L2Benchmark(gemv_notrans, type_name<T>(), y = alpha * A * x + beta * y); A 92 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L2Benchmark(gemv_trans, type_name<T>(), y = alpha * A.transpose() * x + beta * y); A 93 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L2Benchmark(ger, type_name<T>(), A = alpha * x * y.transpose() + A); A 95 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L3Benchmark(gemm_notrans, type_name<T>(), C = alpha * A * B + beta * C); A 96 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L3Benchmark(gemm_transA, type_name<T>(), C = alpha * A.transpose() * B + beta * C); A 97 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L3Benchmark(gemm_transB, type_name<T>(), C = alpha * A * B.transpose() + beta * C); A 98 apps/linear_algebra/benchmarks/eigen_benchmarks.cpp L3Benchmark(gemm_transAB, type_name<T>(), C = alpha * A.transpose() * B.transpose() + beta * C); A 50 apps/linear_algebra/benchmarks/halide_benchmarks.cpp Scalar *A = (Scalar*)buff.data(); A 52 apps/linear_algebra/benchmarks/halide_benchmarks.cpp A[i] = random_scalar(); A 115 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L2Benchmark(gemv_notrans, "s", halide_sgemv(false, alpha, A.raw_buffer(), x.raw_buffer(), A 118 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L2Benchmark(gemv_trans, "s", halide_sgemv(true, alpha, A.raw_buffer(), x.raw_buffer(), A 121 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L2Benchmark(ger, "s", halide_sger(alpha, x.raw_buffer(), y.raw_buffer(), A.raw_buffer())) A 123 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_notrans, "s", halide_sgemm(false, false, alpha, A.raw_buffer(), A 126 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_transA, "s", halide_sgemm(true, false, alpha, A.raw_buffer(), A 129 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_transB, "s", halide_sgemm(false, true, alpha, A.raw_buffer(), A 132 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_transAB, "s", halide_sgemm(true, true, alpha, A.raw_buffer(), A 150 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L2Benchmark(gemv_notrans, "d", halide_dgemv(false, alpha, A.raw_buffer(), x.raw_buffer(), A 153 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L2Benchmark(gemv_trans, "d", halide_dgemv(true, alpha, A.raw_buffer(), x.raw_buffer(), A 156 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L2Benchmark(ger, "d", halide_dger(alpha, x.raw_buffer(), y.raw_buffer(), A.raw_buffer())) A 158 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_notrans, "d", halide_dgemm(false, false, alpha, A.raw_buffer(), A 161 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_transA, "d", halide_dgemm(true, false, alpha, A.raw_buffer(), A 164 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_transB, "d", halide_dgemm(false, true, alpha, A.raw_buffer(), A 167 apps/linear_algebra/benchmarks/halide_benchmarks.cpp L3Benchmark(gemm_transAB, "d", halide_dgemm(true, true, alpha, A.raw_buffer(), A 41 apps/linear_algebra/benchmarks/macros.h Matrix A(random_matrix(N)); \ A 58 apps/linear_algebra/benchmarks/macros.h Matrix A(random_matrix(N)); \ A 59 apps/linear_algebra/src/blas_l3_generators.cpp Func A("A"), B("B"), Btmp("Btmp"), As("As"), Atmp("Atmp"); A 68 apps/linear_algebra/src/blas_l3_generators.cpp A(i, j) = As(i % s, j, i / s); A 80 apps/linear_algebra/src/blas_l3_generators.cpp prod(k, i, j) = A(i, k) * B(k, j); A 27 apps/linear_algebra/src/halide_blas.cpp Buffer<T> init_matrix_buffer(const int M, const int N, T *A, const int lda) { A 29 apps/linear_algebra/src/halide_blas.cpp return Buffer<T>(A, 2, shape); A 157 apps/linear_algebra/src/halide_blas.cpp const int M, const int N, const float a, const float *A, const int lda, A 168 apps/linear_algebra/src/halide_blas.cpp auto buff_A = init_matrix_buffer(M, N, A, lda); A 176 apps/linear_algebra/src/halide_blas.cpp const int M, const int N, const double a, const double *A, const int lda, A 187 apps/linear_algebra/src/halide_blas.cpp auto buff_A = init_matrix_buffer(M, N, A, lda); A 200 apps/linear_algebra/src/halide_blas.cpp const float *y, const int incy, float *A, const int lda) A 204 apps/linear_algebra/src/halide_blas.cpp auto buff_A = init_matrix_buffer(M, N, A, lda); A 211 apps/linear_algebra/src/halide_blas.cpp const double *y, const int incy, double *A, const int lda) A 215 apps/linear_algebra/src/halide_blas.cpp auto buff_A = init_matrix_buffer(M, N, A, lda); A 226 apps/linear_algebra/src/halide_blas.cpp const int K, const float alpha, const float *A, A 246 apps/linear_algebra/src/halide_blas.cpp auto buff_A = init_matrix_buffer(tA ? K : M, tA ? M : K, A, lda); A 255 apps/linear_algebra/src/halide_blas.cpp const int K, const double alpha, const double *A, A 275 apps/linear_algebra/src/halide_blas.cpp auto buff_A = init_matrix_buffer(tA ? K : M, tA ? M : K, A, lda); A 56 apps/linear_algebra/src/halide_blas.h inline int halide_sgemv(bool trans, float a, halide_buffer_t *A, halide_buffer_t *x, float b, halide_buffer_t *y) { A 58 apps/linear_algebra/src/halide_blas.h return halide_sgemv_trans(a, A, x, b, y, y); A 60 apps/linear_algebra/src/halide_blas.h return halide_sgemv_notrans(a, A, x, b, y, y); A 64 apps/linear_algebra/src/halide_blas.h inline int halide_dgemv(bool trans, double a, halide_buffer_t *A, halide_buffer_t *x, double b, halide_buffer_t *y) { A 66 apps/linear_algebra/src/halide_blas.h return halide_dgemv_trans(a, A, x, b, y, y); A 68 apps/linear_algebra/src/halide_blas.h return halide_dgemv_notrans(a, A, x, b, y, y); A 72 apps/linear_algebra/src/halide_blas.h inline int halide_sger(float a, halide_buffer_t *x, halide_buffer_t *y, halide_buffer_t *A) { A 73 apps/linear_algebra/src/halide_blas.h return halide_sger_impl(a, x, y, A, A); A 76 apps/linear_algebra/src/halide_blas.h inline int halide_dger(float a, halide_buffer_t *x, halide_buffer_t *y, halide_buffer_t *A) { A 77 apps/linear_algebra/src/halide_blas.h return halide_dger_impl(a, x, y, A, A); A 80 apps/linear_algebra/src/halide_blas.h inline int halide_sgemm(bool transA, bool transB, float a, halide_buffer_t *A, halide_buffer_t *B, float b, halide_buffer_t *C) { A 82 apps/linear_algebra/src/halide_blas.h return halide_sgemm_transAB(a, A, B, b, C, C); A 84 apps/linear_algebra/src/halide_blas.h return halide_sgemm_transA(a, A, B, b, C, C); A 86 apps/linear_algebra/src/halide_blas.h return halide_sgemm_transB(a, A, B, b, C, C); A 88 apps/linear_algebra/src/halide_blas.h return halide_sgemm_notrans(a, A, B, b, C, C); A 93 apps/linear_algebra/src/halide_blas.h inline int halide_dgemm(bool transA, bool transB, double a, halide_buffer_t *A, halide_buffer_t *B, double b, halide_buffer_t *C) { A 95 apps/linear_algebra/src/halide_blas.h return halide_dgemm_transAB(a, A, B, b, C, C); A 97 apps/linear_algebra/src/halide_blas.h return halide_dgemm_transA(a, A, B, b, C, C); A 99 apps/linear_algebra/src/halide_blas.h return halide_dgemm_transB(a, A, B, b, C, C); A 101 apps/linear_algebra/src/halide_blas.h return halide_dgemm_notrans(a, A, B, b, C, C); A 208 apps/linear_algebra/src/halide_blas.h const float alpha, const float *A, const int lda, A 214 apps/linear_algebra/src/halide_blas.h const double alpha, const double *A, const int lda, A 220 apps/linear_algebra/src/halide_blas.h const float *Y, const int incY, float *A, const int lda); A 224 apps/linear_algebra/src/halide_blas.h const double *Y, const int incY, double *A, const int lda); A 237 apps/linear_algebra/src/halide_blas.h const int K, const float alpha, const float *A, A 243 apps/linear_algebra/src/halide_blas.h const int K, const double alpha, const double *A, A 75 apps/linear_algebra/tests/test_halide_blas.cpp Scalar *A = &(eA[0]); \ A 82 apps/linear_algebra/tests/test_halide_blas.cpp Scalar *A = &(aA[0]); \ A 99 apps/linear_algebra/tests/test_halide_blas.cpp Scalar *A = &(eA[0]); \ A 106 apps/linear_algebra/tests/test_halide_blas.cpp Scalar *A = &(aA[0]); \ A 186 apps/linear_algebra/tests/test_halide_blas.cpp bool compareMatrices(int N, const Matrix &A, const Matrix &B, A 190 apps/linear_algebra/tests/test_halide_blas.cpp if (!compareScalars(A[i], B[i], epsilon)) { A 224 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sgemv(CblasColMajor, CblasNoTrans, N, N, alpha, A, N, x, 1, beta, y, 1), A 225 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sgemv(HblasColMajor, HblasNoTrans, N, N, alpha, A, N, x, 1, beta, y, 1)); A 227 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sgemv(CblasColMajor, CblasTrans, N, N, alpha, A, N, x, 1, beta, y, 1), A 228 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sgemv(HblasColMajor, HblasTrans, N, N, alpha, A, N, x, 1, beta, y, 1)); A 230 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sger(CblasColMajor, N, N, alpha, x, 1, y, 1, A, N), A 231 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sger(HblasColMajor, N, N, alpha, x, 1, y, 1, A, N)); A 234 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 235 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sgemm(HblasColMajor, HblasNoTrans, HblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 237 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 238 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sgemm(HblasColMajor, HblasTrans, HblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 240 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 241 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sgemm(HblasColMajor, HblasNoTrans, HblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 243 apps/linear_algebra/tests/test_halide_blas.cpp cblas_sgemm(CblasColMajor, CblasTrans, CblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 244 apps/linear_algebra/tests/test_halide_blas.cpp hblas_sgemm(HblasColMajor, HblasTrans, HblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 271 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dgemv(CblasColMajor, CblasNoTrans, N, N, alpha, A, N, x, 1, beta, y, 1), A 272 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dgemv(HblasColMajor, HblasNoTrans, N, N, alpha, A, N, x, 1, beta, y, 1)); A 274 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dgemv(CblasColMajor, CblasTrans, N, N, alpha, A, N, x, 1, beta, y, 1), A 275 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dgemv(HblasColMajor, HblasTrans, N, N, alpha, A, N, x, 1, beta, y, 1)); A 277 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dger(CblasColMajor, N, N, alpha, x, 1, y, 1, A, N), A 278 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dger(HblasColMajor, N, N, alpha, x, 1, y, 1, A, N)); A 281 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 282 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dgemm(HblasColMajor, HblasNoTrans, HblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 284 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dgemm(CblasColMajor, CblasTrans, CblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 285 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dgemm(HblasColMajor, HblasTrans, HblasNoTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 287 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 288 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dgemm(HblasColMajor, HblasNoTrans, HblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 290 apps/linear_algebra/tests/test_halide_blas.cpp cblas_dgemm(CblasColMajor, CblasTrans, CblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N), A 291 apps/linear_algebra/tests/test_halide_blas.cpp hblas_dgemm(HblasColMajor, HblasTrans, HblasTrans, N, N, N, alpha, A, N, B, N, beta, C, N)); A 15 python_bindings/python/Func_Ref.cpp template <typename A, typename B> A 16 python_bindings/python/Func_Ref.cpp A &iadd_func(A a, B b) { A 24 python_bindings/python/Func_Ref.cpp template <typename A, typename B> A 25 python_bindings/python/Func_Ref.cpp A &isub_func(A a, B b) { A 30 python_bindings/python/Func_Ref.cpp template <typename A, typename B> A 31 python_bindings/python/Func_Ref.cpp A &imul_func(A a, B b) { A 36 python_bindings/python/Func_Ref.cpp template <typename A, typename B> A 37 python_bindings/python/Func_Ref.cpp A &idiv_func(A a, B b) { A 8 python_bindings/python/add_operators.h template <typename A, typename B, typename WrappedType> A 10 python_bindings/python/add_operators.h auto floordiv(A a, B b) -> decltype(a / b) { A 319 src/HexagonOffload.cpp intptr_t A = addend; A 352 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B22, intptr_t(S + A - P) >> 2, _signed, verify); A 356 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B15, intptr_t(S + A - P) >> 2, _signed, verify); A 359 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B7, intptr_t(S + A - P) >> 2, _signed, verify); A 363 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A), _unsigned, truncate); A 367 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A) >> 16, _unsigned, truncate); A 371 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32, intptr_t(S + A), _unsigned, truncate); A 375 src/HexagonOffload.cpp do_reloc(fixup_addr, Word16, uintptr_t(S + A), _unsigned, truncate); A 379 src/HexagonOffload.cpp do_reloc(fixup_addr, Word8, uintptr_t(S + A), _unsigned, truncate); A 383 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP), _unsigned, verify); A 387 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 1, _unsigned, verify); A 391 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 2, _unsigned, verify); A 395 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_GP, uintptr_t(S + A - GP) >> 3, _unsigned, verify); A 399 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_LO, uintptr_t(S + A) >> 16, _unsigned, truncate); A 400 src/HexagonOffload.cpp do_reloc(fixup_addr+4, Word32_LO, uintptr_t(S + A), _unsigned, truncate); A 403 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B13, intptr_t(S + A - P) >> 2, _signed, verify); A 406 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B9, intptr_t(S + A - P) >> 2, _signed, verify); A 409 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_X26, intptr_t(S + A - P) >> 6, _signed, truncate); A 413 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_X26, uintptr_t(S + A) >> 6, _unsigned, verify); A 416 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B22, intptr_t(S + A - P) & 0x3f, _signed, verify); A 419 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B15, intptr_t(S + A - P) & 0x3f, _signed, verify); A 422 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B13, intptr_t(S + A - P) & 0x3f, _signed, verify); A 425 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B9, intptr_t(S + A - P) & 0x3f, _signed, verify); A 428 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_B7, intptr_t(S + A - P) & 0x3f, _signed, verify); A 432 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A), _unsigned, truncate); A 436 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_R6, uintptr_t(S + A), _unsigned, truncate); A 445 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A), _unsigned, truncate); A 448 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32, intptr_t(S + A - P), _signed, verify); A 451 src/HexagonOffload.cpp do_reloc(fixup_addr, Word32_U6, uintptr_t(S + A - P), _unsigned, truncate); A 694 src/IROperator.h template<typename A, typename B, typename C, typename... Rest, A 696 src/IROperator.h inline Expr max(const A &a, const B &b, const C &c, Rest&&... rest) { A 738 src/IROperator.h template<typename A, typename B, typename C, typename... Rest, A 740 src/IROperator.h inline Expr min(const A &a, const B &b, const C &c, Rest&&... rest) { A 97 src/Introspection.h A *parent; A 105 src/Introspection.h A() { A 113 src/Introspection.h const A *a = (const A *)a_ptr; A 124 src/Introspection.h A a1, a2; A 748 src/VaryingAttributes.cpp template<typename T, typename A> A 749 src/VaryingAttributes.cpp void mutate_operator(IRFilter *mutator, const T *op, const A op_a, Stmt *stmt) { A 753 src/VaryingAttributes.cpp template<typename T, typename A, typename B> A 754 src/VaryingAttributes.cpp void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, Stmt *stmt) { A 759 src/VaryingAttributes.cpp template<typename T, typename A, typename B, typename C> A 760 src/VaryingAttributes.cpp void mutate_operator(IRFilter *mutator, const T *op, const A op_a, const B op_b, const C op_c, Stmt *stmt) { A 200 src/runtime/hexagon_remote/dlib.cpp int32_t A = r.r_addend; A 225 src/runtime/hexagon_remote/dlib.cpp case R_HEX_GLOB_DAT: *fixup_addr = (uint32_t)(S + A); break; A 226 src/runtime/hexagon_remote/dlib.cpp case R_HEX_JMP_SLOT: *fixup_addr = (uint32_t)(S + A); break; A 227 src/runtime/hexagon_remote/dlib.cpp case R_HEX_RELATIVE: *fixup_addr = (uint32_t)(B + A); break; A 42 test/correctness/tuple_undef.cpp const int A = (int) 0xdeadbeef; A 57 test/correctness/tuple_undef.cpp a.fill(A); A 85 test/correctness/tuple_undef.cpp a.fill(A); A 113 test/correctness/tuple_undef.cpp a.fill(A); A 141 test/correctness/tuple_undef.cpp a.fill(A); A 7 test/correctness/vector_cast.cpp template<typename A> A 49 test/correctness/vector_cast.cpp template<typename A, typename B> A 51 test/correctness/vector_cast.cpp if (!is_type_supported<A>(vec_width, target) || !is_type_supported<B>(vec_width, target)) { A 59 test/correctness/vector_cast.cpp Buffer<A> input(W, H); A 62 test/correctness/vector_cast.cpp input(x, y) = (A)((rand()&0xffff)*0.1); A 100 test/correctness/vector_cast.cpp string_of_type<A>(), vec_width, A 115 test/correctness/vector_cast.cpp template<typename A> A 118 test/correctness/vector_cast.cpp success = success && test<A, float>(vec_width, target); A 119 test/correctness/vector_cast.cpp success = success && test<A, double>(vec_width, target); A 120 test/correctness/vector_cast.cpp success = success && test<A, uint8_t>(vec_width, target); A 121 test/correctness/vector_cast.cpp success = success && test<A, uint16_t>(vec_width, target); A 122 test/correctness/vector_cast.cpp success = success && test<A, uint32_t>(vec_width, target); A 123 test/correctness/vector_cast.cpp success = success && test<A, int8_t>(vec_width, target); A 124 test/correctness/vector_cast.cpp success = success && test<A, int16_t>(vec_width, target); A 125 test/correctness/vector_cast.cpp success = success && test<A, int32_t>(vec_width, target); A 22 test/correctness/vector_math.cpp template<typename A> A 38 test/correctness/vector_math.cpp template<typename A> A 39 test/correctness/vector_math.cpp A mod(A x, A y); A 51 test/correctness/vector_math.cpp template<typename A> A 52 test/correctness/vector_math.cpp A mod(A x, A y) { A 56 test/correctness/vector_math.cpp template<typename A> A 57 test/correctness/vector_math.cpp bool close_enough(A x, A y) { A 86 test/correctness/vector_math.cpp template <typename A> A 87 test/correctness/vector_math.cpp A absd(A x, A y) { A 123 test/correctness/vector_math.cpp template<typename A> A 130 test/correctness/vector_math.cpp printf("Testing %sx%d\n", string_of_type<A>(), lanes); A 132 test/correctness/vector_math.cpp Buffer<A> input(W+16, H+16); A 135 test/correctness/vector_math.cpp input(x, y) = (A)((rand() % 1024)*0.125 + 1.0); A 136 test/correctness/vector_math.cpp if ((A)(-1) < 0) { A 148 test/correctness/vector_math.cpp Buffer<A> im1 = f1.realize(W, H); A 152 test/correctness/vector_math.cpp A correct = input(x, y) + input(x+1, y); A 165 test/correctness/vector_math.cpp Buffer<A> im2 = f2.realize(W, H); A 169 test/correctness/vector_math.cpp A correct = input(x, y) - input(x+1, y); A 182 test/correctness/vector_math.cpp Buffer<A> im3 = f3.realize(W, H); A 186 test/correctness/vector_math.cpp A correct = input(x, y) * input(x+1, y); A 199 test/correctness/vector_math.cpp Buffer<A> im4 = f4.realize(W, H); A 203 test/correctness/vector_math.cpp A correct = input(x, y) > input(x+1, y) ? input(x+2, y) : input(x+3, y); A 219 test/correctness/vector_math.cpp Buffer<A> im5 = f5.realize(W, H); A 231 test/correctness/vector_math.cpp A correct = input(xCoord, yCoord); A 242 test/correctness/vector_math.cpp f5a(x, y) = input(x, y)*cast<A>(2); A 244 test/correctness/vector_math.cpp Buffer<A> im5a = f5a.realize(W, H); A 248 test/correctness/vector_math.cpp A correct = input(x, y) * ((A)(2)); A 283 test/correctness/vector_math.cpp f7(x, y) = clamp(input(x, y), cast<A>(10), cast<A>(20)); A 285 test/correctness/vector_math.cpp Buffer<A> im7 = f7.realize(W, H); A 289 test/correctness/vector_math.cpp if (im7(x, y) < (A)10 || im7(x, y) > (A)20) { A 317 test/correctness/vector_math.cpp f9(x, y) = input(x, y) / clamp(input(x+1, y), cast<A>(1), cast<A>(3)); A 319 test/correctness/vector_math.cpp Buffer<A> im9 = f9.realize(W, H); A 323 test/correctness/vector_math.cpp A clamped = input(x+1, y); A 324 test/correctness/vector_math.cpp if (clamped < (A)1) clamped = (A)1; A 325 test/correctness/vector_math.cpp if (clamped > (A)3) clamped = (A)3; A 326 test/correctness/vector_math.cpp A correct = divide(input(x, y), clamped); A 342 test/correctness/vector_math.cpp f10(x, y) = (input(x, y)) / cast<A>(Expr(c)); A 344 test/correctness/vector_math.cpp Buffer<A> im10 = f10.realize(W, H); A 348 test/correctness/vector_math.cpp A correct = divide(input(x, y), (A)c); A 367 test/correctness/vector_math.cpp Buffer<A> im11 = f11.realize(W, H); A 371 test/correctness/vector_math.cpp A correct = ((x%2)==0) ? input(x/2, y) : input(x/2, y+1); A 384 test/correctness/vector_math.cpp Buffer<A> im12 = f12.realize(W, H); A 388 test/correctness/vector_math.cpp A correct = input(W-1-x, H-1-y); A 401 test/correctness/vector_math.cpp Buffer<A> im13 = f13.realize(W, H); A 405 test/correctness/vector_math.cpp A correct = input(x+3, y); A 413 test/correctness/vector_math.cpp if (!type_of<A>().is_uint()) { A 416 test/correctness/vector_math.cpp f14(x, y) = cast<A>(abs(input(x, y))); A 417 test/correctness/vector_math.cpp Buffer<A> im14 = f14.realize(W, H); A 421 test/correctness/vector_math.cpp A correct = input(x, y); A 431 test/correctness/vector_math.cpp if (type_of<A>() == Int(16)) { A 455 test/correctness/vector_math.cpp if (type_of<A>() == Float(32)) { A 568 test/correctness/vector_math.cpp Type t = type_of<A>(); A 570 test/correctness/vector_math.cpp weight = clamp(weight, cast<A>(0), cast<A>(1)); A 575 test/correctness/vector_math.cpp Buffer<A> im21 = f21.realize(W, H); A 595 test/correctness/vector_math.cpp A correct = (A)(lerped); A 608 test/correctness/vector_math.cpp Buffer<typename with_unsigned<A>::type> im22 = f22.realize(W, H); A 612 test/correctness/vector_math.cpp typename with_unsigned<A>::type correct = absd((double)input(x, y), (double)input(x+1, y)); A 8 test/performance/matrix_multiplication.cpp void simple_version(float* A, float *B, float *C, int width, int stride) { A 15 test/performance/matrix_multiplication.cpp *cc = *cc + A[iy * stride + ik] * B[ik * stride + ix]; A 25 test/performance/matrix_multiplication.cpp ImageParam A(type_of<float>(), 2); A 35 test/performance/matrix_multiplication.cpp matrix_mul(x, y) += A(k, y) * B(x, k); A 66 test/performance/matrix_multiplication.cpp A.set(mat_A); A 16 test/performance/rfactor.cpp ImageParam A(Float(32), 1); A 22 test/performance/rfactor.cpp max_ref() = max(max_ref(), abs(A(r))); A 27 test/performance/rfactor.cpp maxf() = max(maxf(), abs(A(r))); A 54 test/performance/rfactor.cpp A.set(vec_A); A 273 test/performance/rfactor.cpp ImageParam A(Float(32), 1); A 283 test/performance/rfactor.cpp dot_ref() += (A(r.x))*B(r.x); A 287 test/performance/rfactor.cpp dot() += (A(r.x))*B(r.x); A 316 test/performance/rfactor.cpp A.set(vec_A); A 341 test/performance/rfactor.cpp ImageParam A(Int(32), 1); A 347 test/performance/rfactor.cpp sink_ref() = {sink_ref()[0] * A(r), // Product A 348 test/performance/rfactor.cpp sink_ref()[1] + A(r), // Sum A 349 test/performance/rfactor.cpp max(sink_ref()[2], A(r)), // Max A 350 test/performance/rfactor.cpp select(sink_ref()[2] > A(r), sink_ref()[3], r), // Argmax A 351 test/performance/rfactor.cpp min(sink_ref()[4], A(r)), // Min A 352 test/performance/rfactor.cpp select(sink_ref()[4] < A(r), sink_ref()[5], r), // Argmin A 353 test/performance/rfactor.cpp sink_ref()[6] + A(r)*A(r), // Sum of squares A 354 test/performance/rfactor.cpp sink_ref()[7] + select(A(r) % 2 == 0, 1, 0) // Number of even items A 359 test/performance/rfactor.cpp sink() = {sink()[0] * A(r), // Product A 360 test/performance/rfactor.cpp sink()[1] + A(r), // Sum A 361 test/performance/rfactor.cpp max(sink()[2], A(r)), // Max A 362 test/performance/rfactor.cpp select(sink()[2] > A(r), sink()[3], r), // Argmax A 363 test/performance/rfactor.cpp min(sink()[4], A(r)), // Min A 364 test/performance/rfactor.cpp select(sink()[4] < A(r), sink()[5], r), // Argmin A 365 test/performance/rfactor.cpp sink()[6] + A(r)*A(r), // Sum of squares A 366 test/performance/rfactor.cpp sink()[7] + select(A(r) % 2 == 0, 1, 0) // Number of even items A 394 test/performance/rfactor.cpp A.set(vec_A); A 8 test/performance/vectorize.cpp template<typename A> A 24 test/performance/vectorize.cpp template<typename A> A 30 test/performance/vectorize.cpp Buffer<A> input(W, H+20); A 33 test/performance/vectorize.cpp input(x, y) = (A)((rand() & 0xffff)*0.125 + 1.0); A 58 test/performance/vectorize.cpp Buffer<A> outputg = g.realize(W, H); A 59 test/performance/vectorize.cpp Buffer<A> outputf = f.realize(W, H); A 72 test/performance/vectorize.cpp string_of_type<A>(), vec_width, A 83 test/performance/vectorize.cpp string_of_type<A>(), vec_width, t_f * 1e3, t_g * 1e3, t_g / t_f); A 8 test/performance/vectorize_pred.cpp template<typename A> A 17 test/performance/vectorize_pred.cpp template<typename A> A 23 test/performance/vectorize_pred.cpp Buffer<A> input(W, H+20); A 26 test/performance/vectorize_pred.cpp input(x, y) = (A)((rand() & 0xffff)*0.125 + 1.0); A 45 test/performance/vectorize_pred.cpp f(x, y) = undef<A>(); A 47 test/performance/vectorize_pred.cpp g(x, y) = undef<A>(); A 51 test/performance/vectorize_pred.cpp Buffer<A> outputg = g.realize(W, H); A 52 test/performance/vectorize_pred.cpp Buffer<A> outputf = f.realize(W, H); A 65 test/performance/vectorize_pred.cpp string_of_type<A>(), vec_width, A 76 test/performance/vectorize_pred.cpp string_of_type<A>(), vec_width, t_f * 1e3, t_g * 1e3, t_g / t_f);