This source file includes following definitions.
- ConvolveHorizontally_mips_dspr2
- ConvolveVertically_mips_dspr2
#include <algorithm>
#include "skia/ext/convolver.h"
#include "skia/ext/convolver_mips_dspr2.h"
#include "third_party/skia/include/core/SkTypes.h"
namespace skia {
void ConvolveHorizontally_mips_dspr2(const unsigned char* src_data,
const ConvolutionFilter1D& filter,
unsigned char* out_row,
bool has_alpha) {
#if SIMD_MIPS_DSPR2
int row_to_filter = 0;
int num_values = filter.num_values();
if (has_alpha) {
for (int out_x = 0; out_x < num_values; out_x++) {
int filter_offset, filter_length;
const ConvolutionFilter1D::Fixed* filter_values =
filter.FilterForValue(out_x, &filter_offset, &filter_length);
int filter_x = 0;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[filter_len], 3f \n"
" sll $t0, %[filter_offset], 2 \n"
"addu %[rtf], %[src_data], $t0 \n"
"mtlo $0, $ac0 \n"
"mtlo $0, $ac1 \n"
"mtlo $0, $ac2 \n"
"mtlo $0, $ac3 \n"
"srl $t7, %[filter_len], 2 \n"
"beqz $t7, 2f \n"
" li %[fx], 0 \n"
"11: \n"
"addu $t4, %[filter_val], %[fx] \n"
"sll $t5, %[fx], 1 \n"
"ulw $t6, 0($t4) \n"
"ulw $t8, 4($t4) \n"
"addu $t0, %[rtf], $t5 \n"
"lw $t1, 0($t0) \n"
"lw $t2, 4($t0) \n"
"lw $t3, 8($t0) \n"
"lw $t4, 12($t0) \n"
"precrq.qb.ph $t0, $t2, $t1 \n"
"precr.qb.ph $t5, $t2, $t1 \n"
"preceu.ph.qbla $t1, $t0 \n"
"preceu.ph.qbra $t2, $t0 \n"
"preceu.ph.qbla $t0, $t5 \n"
"preceu.ph.qbra $t5, $t5 \n"
"dpa.w.ph $ac0, $t1, $t6 \n"
"dpa.w.ph $ac1, $t0, $t6 \n"
"dpa.w.ph $ac2, $t2, $t6 \n"
"dpa.w.ph $ac3, $t5, $t6 \n"
"precrq.qb.ph $t0, $t4, $t3 \n"
"precr.qb.ph $t5, $t4, $t3 \n"
"preceu.ph.qbla $t1, $t0 \n"
"preceu.ph.qbra $t2, $t0 \n"
"preceu.ph.qbla $t0, $t5 \n"
"preceu.ph.qbra $t5, $t5 \n"
"dpa.w.ph $ac0, $t1, $t8 \n"
"dpa.w.ph $ac1, $t0, $t8 \n"
"dpa.w.ph $ac2, $t2, $t8 \n"
"dpa.w.ph $ac3, $t5, $t8 \n"
"addiu $t7, $t7, -1 \n"
"bgtz $t7, 11b \n"
" addiu %[fx], %[fx], 8 \n"
"2: \n"
"andi $t7, %[filter_len], 0x3 \n"
"beqz $t7, 3f \n"
" nop \n"
"21: \n"
"sll $t1, %[fx], 1 \n"
"addu $t2, %[filter_val], %[fx] \n"
"addu $t0, %[rtf], $t1 \n"
"lh $t6, 0($t2) \n"
"lbu $t1, 0($t0) \n"
"lbu $t2, 1($t0) \n"
"lbu $t3, 2($t0) \n"
"lbu $t4, 3($t0) \n"
"maddu $ac3, $t6, $t1 \n"
"maddu $ac2, $t6, $t2 \n"
"maddu $ac1, $t6, $t3 \n"
"maddu $ac0, $t6, $t4 \n"
"addiu $t7, $t7, -1 \n"
"bgtz $t7, 21b \n"
" addiu %[fx], %[fx], 2 \n"
"3: \n"
"extrv.w $t0, $ac0, %[kShiftBits] \n"
"extrv.w $t1, $ac1, %[kShiftBits] \n"
"extrv.w $t2, $ac2, %[kShiftBits] \n"
"extrv.w $t3, $ac3, %[kShiftBits] \n"
"sll $t5, %[out_x], 2 \n"
"repl.ph $t6, 128 \n"
"addu $t5, %[out_row], $t5 \n"
"append $t2, $t3, 16 \n"
"append $t0, $t1, 16 \n"
"subu.ph $t1, $t0, $t6 \n"
"shll_s.ph $t1, $t1, 8 \n"
"shra.ph $t1, $t1, 8 \n"
"addu.ph $t1, $t1, $t6 \n"
"subu.ph $t3, $t2, $t6 \n"
"shll_s.ph $t3, $t3, 8 \n"
"shra.ph $t3, $t3, 8 \n"
"addu.ph $t3, $t3, $t6 \n"
"precr.qb.ph $t0, $t1, $t3 \n"
"usw $t0, 0($t5) \n"
".set pop \n"
: [fx] "+r" (filter_x), [out_x] "+r" (out_x), [out_row] "+r" (out_row),
[rtf] "+r" (row_to_filter)
: [filter_val] "r" (filter_values), [filter_len] "r" (filter_length),
[kShiftBits] "r" (ConvolutionFilter1D::kShiftBits),
[filter_offset] "r" (filter_offset), [src_data] "r" (src_data)
: "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"
);
}
} else {
for (int out_x = 0; out_x < num_values; out_x++) {
int filter_offset, filter_length;
const ConvolutionFilter1D::Fixed* filter_values =
filter.FilterForValue(out_x, &filter_offset, &filter_length);
int filter_x = 0;
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[filter_len], 3f \n"
" sll $t0, %[filter_offset], 2 \n"
"addu %[rtf], %[src_data], $t0 \n"
"mtlo $0, $ac1 \n"
"mtlo $0, $ac2 \n"
"mtlo $0, $ac3 \n"
"srl $t7, %[filter_len], 2 \n"
"beqz $t7, 2f \n"
" li %[fx], 0 \n"
"11: \n"
"addu $t4, %[filter_val], %[fx] \n"
"sll $t5, %[fx], 1 \n"
"ulw $t6, 0($t4) \n"
"ulw $t8, 4($t4) \n"
"addu $t0, %[rtf], $t5 \n"
"lw $t1, 0($t0) \n"
"lw $t2, 4($t0) \n"
"lw $t3, 8($t0) \n"
"lw $t4, 12($t0) \n"
"precrq.qb.ph $t0, $t2, $t1 \n"
"precr.qb.ph $t5, $t2, $t1 \n"
"preceu.ph.qbra $t2, $t0 \n"
"preceu.ph.qbla $t0, $t5 \n"
"preceu.ph.qbra $t5, $t5 \n"
"dpa.w.ph $ac1, $t0, $t6 \n"
"dpa.w.ph $ac2, $t2, $t6 \n"
"dpa.w.ph $ac3, $t5, $t6 \n"
"precrq.qb.ph $t0, $t4, $t3 \n"
"precr.qb.ph $t5, $t4, $t3 \n"
"preceu.ph.qbra $t2, $t0 \n"
"preceu.ph.qbla $t0, $t5 \n"
"preceu.ph.qbra $t5, $t5 \n"
"dpa.w.ph $ac1, $t0, $t8 \n"
"dpa.w.ph $ac2, $t2, $t8 \n"
"dpa.w.ph $ac3, $t5, $t8 \n"
"addiu $t7, $t7, -1 \n"
"bgtz $t7, 11b \n"
" addiu %[fx], %[fx], 8 \n"
"2: \n"
"andi $t7, %[filter_len], 0x3 \n"
"beqz $t7, 3f \n"
" nop \n"
"21: \n"
"sll $t1, %[fx], 1 \n"
"addu $t2, %[filter_val], %[fx] \n"
"addu $t0, %[rtf], $t1 \n"
"lh $t6, 0($t2) \n"
"lbu $t1, 0($t0) \n"
"lbu $t2, 1($t0) \n"
"lbu $t3, 2($t0) \n"
"maddu $ac3, $t6, $t1 \n"
"maddu $ac2, $t6, $t2 \n"
"maddu $ac1, $t6, $t3 \n"
"addiu $t7, $t7, -1 \n"
"bgtz $t7, 21b \n"
" addiu %[fx], %[fx], 2 \n"
"3: \n"
"extrv.w $t1, $ac1, %[kShiftBits] \n"
"extrv.w $t2, $ac2, %[kShiftBits] \n"
"extrv.w $t3, $ac3, %[kShiftBits] \n"
"repl.ph $t6, 128 \n"
"sll $t8, %[out_x], 2 \n"
"addu $t8, %[out_row], $t8 \n"
"append $t2, $t3, 16 \n"
"andi $t1, 0xFFFF \n"
"subu.ph $t5, $t1, $t6 \n"
"shll_s.ph $t5, $t5, 8 \n"
"shra.ph $t5, $t5, 8 \n"
"addu.ph $t5, $t5, $t6 \n"
"subu.ph $t4, $t2, $t6 \n"
"shll_s.ph $t4, $t4, 8 \n"
"shra.ph $t4, $t4, 8 \n"
"addu.ph $t4, $t4, $t6 \n"
"precr.qb.ph $t0, $t5, $t4 \n"
"usw $t0, 0($t8) \n"
".set pop \n"
: [fx] "+r" (filter_x), [out_x] "+r" (out_x), [out_row] "+r" (out_row),
[rtf] "+r" (row_to_filter)
: [filter_val] "r" (filter_values), [filter_len] "r" (filter_length),
[kShiftBits] "r" (ConvolutionFilter1D::kShiftBits),
[filter_offset] "r" (filter_offset), [src_data] "r" (src_data)
: "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
"t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8"
);
}
}
#endif
}
void ConvolveVertically_mips_dspr2(const ConvolutionFilter1D::Fixed* filter_val,
int filter_length,
unsigned char* const* source_data_rows,
int pixel_width,
unsigned char* out_row,
bool has_alpha) {
#if SIMD_MIPS_DSPR2
int byte_offset;
int cnt;
int filter_y;
if (has_alpha) {
for (int out_x = 0; out_x < pixel_width; out_x++) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[filter_len], 3f \n"
" sll %[offset], %[out_x], 2 \n"
"mtlo $0, $ac0 \n"
"mtlo $0, $ac1 \n"
"mtlo $0, $ac2 \n"
"mtlo $0, $ac3 \n"
"srl %[cnt], %[filter_len], 2 \n"
"beqz %[cnt], 2f \n"
" li %[fy], 0 \n"
"11: \n"
"sll $t1, %[fy], 1 \n"
"addu $t0, %[src_data_rows], $t1 \n"
"lw $t1, 0($t0) \n"
"lw $t2, 4($t0) \n"
"lw $t3, 8($t0) \n"
"lw $t4, 12($t0) \n"
"addu $t1, $t1, %[offset] \n"
"addu $t2, $t2, %[offset] \n"
"addu $t3, $t3, %[offset] \n"
"addu $t4, $t4, %[offset] \n"
"lw $t1, 0($t1) \n"
"lw $t2, 0($t2) \n"
"lw $t3, 0($t3) \n"
"lw $t4, 0($t4) \n"
"precrq.qb.ph $t5, $t2, $t1 \n"
"precr.qb.ph $t6, $t2, $t1 \n"
"preceu.ph.qbla $t0, $t5 \n"
"preceu.ph.qbra $t1, $t5 \n"
"preceu.ph.qbla $t2, $t6 \n"
"preceu.ph.qbra $t5, $t6 \n"
"addu $t6, %[filter_val], %[fy] \n"
"ulw $t7, 0($t6) \n"
"ulw $t6, 4($t6) \n"
"dpa.w.ph $ac0, $t5, $t7 \n"
"dpa.w.ph $ac1, $t1, $t7 \n"
"dpa.w.ph $ac2, $t2, $t7 \n"
"dpa.w.ph $ac3, $t0, $t7 \n"
"precrq.qb.ph $t5, $t4, $t3 \n"
"precr.qb.ph $t7, $t4, $t3 \n"
"preceu.ph.qbla $t0, $t5 \n"
"preceu.ph.qbra $t1, $t5 \n"
"preceu.ph.qbla $t2, $t7 \n"
"preceu.ph.qbra $t5, $t7 \n"
"dpa.w.ph $ac0, $t5, $t6 \n"
"dpa.w.ph $ac1, $t1, $t6 \n"
"dpa.w.ph $ac2, $t2, $t6 \n"
"dpa.w.ph $ac3, $t0, $t6 \n"
"addiu %[cnt], %[cnt], -1 \n"
"bgtz %[cnt], 11b \n"
" addiu %[fy], %[fy], 8 \n"
"2: \n"
"andi %[cnt], %[filter_len], 0x3 \n"
"beqz %[cnt], 3f \n"
" nop \n"
"21: \n"
"addu $t0, %[filter_val], %[fy] \n"
"lh $t4, 0($t0) \n"
"sll $t1, %[fy], 1 \n"
"addu $t0, %[src_data_rows], $t1 \n"
"lw $t1, 0($t0) \n"
"addu $t0, $t1, %[offset] \n"
"lbu $t1, 0($t0) \n"
"lbu $t2, 1($t0) \n"
"lbu $t3, 2($t0) \n"
"lbu $t0, 3($t0) \n"
"maddu $ac0, $t4, $t1 \n"
"maddu $ac1, $t4, $t2 \n"
"maddu $ac2, $t4, $t3 \n"
"maddu $ac3, $t4, $t0 \n"
"addiu %[cnt], %[cnt], -1 \n"
"bgtz %[cnt], 21b \n"
" addiu %[fy], %[fy], 2 \n"
"3: \n"
"extrv.w $t3, $ac0, %[kShiftBits] \n"
"extrv.w $t2, $ac1, %[kShiftBits] \n"
"extrv.w $t1, $ac2, %[kShiftBits] \n"
"extrv.w $t0, $ac3, %[kShiftBits] \n"
"repl.ph $t4, 128 \n"
"addu $t5, %[out_row], %[offset] \n"
"append $t2, $t3, 16 \n"
"append $t0, $t1, 16 \n"
"subu.ph $t1, $t0, $t4 \n"
"shll_s.ph $t1, $t1, 8 \n"
"shra.ph $t1, $t1, 8 \n"
"addu.ph $t1, $t1, $t4 \n"
"subu.ph $t2, $t2, $t4 \n"
"shll_s.ph $t2, $t2, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"addu.ph $t2, $t2, $t4 \n"
"andi $t3, $t1, 0xFF \n"
"cmp.lt.ph $t3, $t2 \n"
"pick.ph $t0, $t2, $t3 \n"
"andi $t3, $t0, 0xFF \n"
"srl $t4, $t0, 16 \n"
"cmp.lt.ph $t3, $t4 \n"
"pick.ph $t0, $t4, $t3 \n"
"srl $t3, $t1, 16 \n"
"cmp.lt.ph $t3, $t0 \n"
"pick.ph $t0, $t0, $t3 \n"
"ins $t1, $t0, 16, 8 \n"
"precr.qb.ph $t0, $t1, $t2 \n"
"usw $t0, 0($t5) \n"
".set pop \n"
: [filter_val] "+r" (filter_val), [filter_len] "+r" (filter_length),
[offset] "+r" (byte_offset), [fy] "+r" (filter_y), [cnt] "+r" (cnt),
[out_x] "+r" (out_x), [pixel_width] "+r" (pixel_width)
: [src_data_rows] "r" (source_data_rows), [out_row] "r" (out_row),
[kShiftBits] "r" (ConvolutionFilter1D::kShiftBits)
: "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
"t0", "t1", "t2", "t3", "t4", "t5", "t6","t7", "memory"
);
}
} else {
for (int out_x = 0; out_x < pixel_width; out_x++) {
__asm__ __volatile__ (
".set push \n"
".set noreorder \n"
"beqz %[filter_len], 3f \n"
" sll %[offset], %[out_x], 2 \n"
"mtlo $0, $ac0 \n"
"mtlo $0, $ac1 \n"
"mtlo $0, $ac2 \n"
"srl %[cnt], %[filter_len], 2 \n"
"beqz %[cnt], 2f \n"
" li %[fy], 0 \n"
"11: \n"
"sll $t1, %[fy], 1 \n"
"addu $t0, %[src_data_rows], $t1 \n"
"lw $t1, 0($t0) \n"
"lw $t2, 4($t0) \n"
"lw $t3, 8($t0) \n"
"lw $t4, 12($t0) \n"
"addu $t1, $t1, %[offset] \n"
"addu $t2, $t2, %[offset] \n"
"addu $t3, $t3, %[offset] \n"
"addu $t4, $t4, %[offset] \n"
"lw $t1, 0($t1) \n"
"lw $t2, 0($t2) \n"
"lw $t3, 0($t3) \n"
"lw $t4, 0($t4) \n"
"precrq.qb.ph $t5, $t2, $t1 \n"
"precr.qb.ph $t6, $t2, $t1 \n"
"preceu.ph.qbra $t1, $t5 \n"
"preceu.ph.qbla $t2, $t6 \n"
"preceu.ph.qbra $t5, $t6 \n"
"addu $t6, %[filter_val], %[fy] \n"
"ulw $t0, 0($t6) \n"
"ulw $t6, 4($t6) \n"
"dpa.w.ph $ac0, $t5, $t0 \n"
"dpa.w.ph $ac1, $t1, $t0 \n"
"dpa.w.ph $ac2, $t2, $t0 \n"
"precrq.qb.ph $t5, $t4, $t3 \n"
"precr.qb.ph $t0, $t4, $t3 \n"
"preceu.ph.qbra $t1, $t5 \n"
"preceu.ph.qbla $t2, $t0 \n"
"preceu.ph.qbra $t5, $t0 \n"
"dpa.w.ph $ac0, $t5, $t6 \n"
"dpa.w.ph $ac1, $t1, $t6 \n"
"dpa.w.ph $ac2, $t2, $t6 \n"
"addiu %[cnt], %[cnt], -1 \n"
"bgtz %[cnt], 11b \n"
" addiu %[fy], %[fy], 8 \n"
"2: \n"
"andi %[cnt], %[filter_len], 0x3 \n"
"beqz %[cnt], 3f \n"
" nop \n"
"21: \n"
"addu $t0, %[filter_val], %[fy] \n"
"lh $t4, 0($t0) \n"
"sll $t1, %[fy], 1 \n"
"addu $t0, %[src_data_rows], $t1 \n"
"lw $t1, 0($t0) \n"
"addu $t0, $t1, %[offset] \n"
"lbu $t1, 0($t0) \n"
"lbu $t2, 1($t0) \n"
"lbu $t3, 2($t0) \n"
"maddu $ac0, $t4, $t1 \n"
"maddu $ac1, $t4, $t2 \n"
"maddu $ac2, $t4, $t3 \n"
"addiu %[cnt], %[cnt], -1 \n"
"bgtz %[cnt], 21b \n"
" addiu %[fy], %[fy], 2 \n"
"3: \n"
"extrv.w $t3, $ac0, %[kShiftBits] \n"
"extrv.w $t2, $ac1, %[kShiftBits] \n"
"extrv.w $t1, $ac2, %[kShiftBits] \n"
"repl.ph $t6, 128 \n"
"addu $t5, %[out_row], %[offset] \n"
"append $t2, $t3, 16 \n"
"andi $t1, $t1, 0xFFFF \n"
"subu.ph $t1, $t1, $t6 \n"
"shll_s.ph $t1, $t1, 8 \n"
"shra.ph $t1, $t1, 8 \n"
"addu.ph $t1, $t1, $t6 \n"
"subu.ph $t2, $t2, $t6 \n"
"shll_s.ph $t2, $t2, 8 \n"
"shra.ph $t2, $t2, 8 \n"
"addu.ph $t2, $t2, $t6 \n"
"li $t0, 0xFF \n"
"ins $t1, $t0, 16, 8 \n"
"precr.qb.ph $t0, $t1, $t2 \n"
"usw $t0, 0($t5) \n"
".set pop \n"
: [filter_val] "+r" (filter_val), [filter_len] "+r" (filter_length),
[offset] "+r" (byte_offset), [fy] "+r" (filter_y), [cnt] "+r" (cnt),
[out_x] "+r" (out_x), [pixel_width] "+r" (pixel_width)
: [src_data_rows] "r" (source_data_rows), [out_row] "r" (out_row),
[kShiftBits] "r" (ConvolutionFilter1D::kShiftBits)
: "lo", "hi", "$ac1lo", "$ac1hi", "$ac2lo", "$ac2hi", "$ac3lo", "$ac3hi",
"t0", "t1", "t2", "t3", "t4", "t5", "t6", "memory"
);
}
}
#endif
}
}