#include "libavutil/arm/asm.S"
#define W1 22725
#define W2 21407
#define W3 19266
#define W4 16383
#define W5 12873
#define W6 8867
#define W7 4520
#define ROW_SHIFT 11
#define COL_SHIFT 20
#define W13 (W1 | (W3 << 16))
#define W26 (W2 | (W6 << 16))
#define W42 (W4 | (W2 << 16))
#define W42n (-W4&0xffff | (-W2 << 16))
#define W46 (W4 | (W6 << 16))
#define W57 (W5 | (W7 << 16))
.macro idct_row shift
ldr lr, =W46
mov r1, #(1<<(\shift-1))
smlad r4, r2, ip, r1
smlsd r7, r2, ip, r1
ldr ip, =W13
ldr r10,=W57
smlad r5, r2, lr, r1
smlsd r6, r2, lr, r1
smuad r8, r3, ip
smusdx r11,r3, r10
ldr lr, [r0, #12]
pkhtb r2, ip, r10,asr #16
pkhbt r1, ip, r10,lsl #16
smusdx r9, r2, r3
smlad r8, lr, r10,r8
smusdx r10,r3, r1
ldr r3, =W42n
smlad r10,lr, r2, r10
ldr r2, [r0, #4]
smlsdx r11,lr, ip, r11
ldr ip, =W46
smlad r9, lr, r1, r9
smlad r5, r2, r3, r5
smlsd r6, r2, r3, r6
smlad r4, r2, ip, r4
smlsd r7, r2, ip, r7
.endm
.macro idct_row4 shift
ldr lr, =W46
ldr r10,=W57
mov r1, #(1<<(\shift-1))
smlad r4, r2, ip, r1
smlsd r7, r2, ip, r1
ldr ip, =W13
smlad r5, r2, lr, r1
smlsd r6, r2, lr, r1
smusdx r11,r3, r10
smuad r8, r3, ip
pkhtb r2, ip, r10,asr #16
pkhbt r1, ip, r10,lsl #16
smusdx r9, r2, r3
smusdx r10,r3, r1
.endm
.macro idct_finish
add ip, r4, r8
sub lr, r4, r8
sub r4, r5, r9
add r8, r5, r9
add r5, r6, r10
sub r9, r6, r10
add r6, r7, r11
sub r10,r7, r11
.endm
.macro idct_finish_shift shift
add r3, r4, r8
sub r2, r4, r8
mov r4, r3, asr #\shift
mov r8, r2, asr #\shift
sub r3, r5, r9
add r2, r5, r9
mov r5, r3, asr #\shift
mov r9, r2, asr #\shift
add r3, r6, r10
sub r2, r6, r10
mov r6, r3, asr #\shift
mov r10,r2, asr #\shift
add r3, r7, r11
sub r2, r7, r11
mov r7, r3, asr #\shift
mov r11,r2, asr #\shift
.endm
.macro idct_finish_shift_sat shift
add r3, r4, r8
sub ip, r4, r8
usat r4, #8, r3, asr #\shift
usat r8, #8, ip, asr #\shift
sub r3, r5, r9
add ip, r5, r9
usat r5, #8, r3, asr #\shift
usat r9, #8, ip, asr #\shift
add r3, r6, r10
sub ip, r6, r10
usat r6, #8, r3, asr #\shift
usat r10,#8, ip, asr #\shift
add r3, r7, r11
sub ip, r7, r11
usat r7, #8, r3, asr #\shift
usat r11,#8, ip, asr #\shift
.endm
function idct_row_armv6
push {lr}
ldr lr, [r0, #12]
ldr ip, [r0, #4]
ldr r3, [r0, #8]
ldr r2, [r0]
orrs lr, lr, ip
itt eq
cmpeq lr, r3
cmpeq lr, r2, lsr #16
beq 1f
push {r1}
ldr ip, =W42
cmp lr, #0
beq 2f
idct_row ROW_SHIFT
b 3f
2: idct_row4 ROW_SHIFT
3: pop {r1}
idct_finish_shift ROW_SHIFT
strh r4, [r1]
strh r5, [r1, #(16*2)]
strh r6, [r1, #(16*4)]
strh r7, [r1, #(16*6)]
strh r11,[r1, #(16*1)]
strh r10,[r1, #(16*3)]
strh r9, [r1, #(16*5)]
strh r8, [r1, #(16*7)]
pop {pc}
1: mov r2, r2, lsl #3
strh r2, [r1]
strh r2, [r1, #(16*2)]
strh r2, [r1, #(16*4)]
strh r2, [r1, #(16*6)]
strh r2, [r1, #(16*1)]
strh r2, [r1, #(16*3)]
strh r2, [r1, #(16*5)]
strh r2, [r1, #(16*7)]
pop {pc}
endfunc
function idct_col_armv6
push {r1, lr}
ldr r2, [r0]
ldr ip, =W42
ldr r3, [r0, #8]
idct_row COL_SHIFT
pop {r1}
idct_finish_shift COL_SHIFT
strh r4, [r1]
strh r5, [r1, #(16*1)]
strh r6, [r1, #(16*2)]
strh r7, [r1, #(16*3)]
strh r11,[r1, #(16*4)]
strh r10,[r1, #(16*5)]
strh r9, [r1, #(16*6)]
strh r8, [r1, #(16*7)]
pop {pc}
endfunc
function idct_col_put_armv6
push {r1, r2, lr}
ldr r2, [r0]
ldr ip, =W42
ldr r3, [r0, #8]
idct_row COL_SHIFT
pop {r1, r2}
idct_finish_shift_sat COL_SHIFT
strb_post r4, r1, r2
strb_post r5, r1, r2
strb_post r6, r1, r2
strb_post r7, r1, r2
strb_post r11,r1, r2
strb_post r10,r1, r2
strb_post r9, r1, r2
strb_post r8, r1, r2
sub r1, r1, r2, lsl #3
pop {pc}
endfunc
function idct_col_add_armv6
push {r1, r2, lr}
ldr r2, [r0]
ldr ip, =W42
ldr r3, [r0, #8]
idct_row COL_SHIFT
pop {r1, r2}
idct_finish
ldrb r3, [r1]
ldrb r7, [r1, r2]
ldrb r11,[r1, r2, lsl #2]
add ip, r3, ip, asr #COL_SHIFT
usat ip, #8, ip
add r4, r7, r4, asr #COL_SHIFT
strb_post ip, r1, r2
ldrb ip, [r1, r2]
usat r4, #8, r4
ldrb r11,[r1, r2, lsl #2]
add r5, ip, r5, asr #COL_SHIFT
usat r5, #8, r5
strb_post r4, r1, r2
ldrb r3, [r1, r2]
ldrb ip, [r1, r2, lsl #2]
strb_post r5, r1, r2
ldrb r7, [r1, r2]
ldrb r4, [r1, r2, lsl #2]
add r6, r3, r6, asr #COL_SHIFT
usat r6, #8, r6
add r10,r7, r10,asr #COL_SHIFT
usat r10,#8, r10
add r9, r11,r9, asr #COL_SHIFT
usat r9, #8, r9
add r8, ip, r8, asr #COL_SHIFT
usat r8, #8, r8
add lr, r4, lr, asr #COL_SHIFT
usat lr, #8, lr
strb_post r6, r1, r2
strb_post r10,r1, r2
strb_post r9, r1, r2
strb_post r8, r1, r2
strb_post lr, r1, r2
sub r1, r1, r2, lsl #3
pop {pc}
endfunc
.macro idct_rows func width
bl \func
add r0, r0, #(16*2)
add r1, r1, #\width
bl \func
add r0, r0, #(16*2)
add r1, r1, #\width
bl \func
add r0, r0, #(16*2)
add r1, r1, #\width
bl \func
sub r0, r0, #(16*5)
add r1, r1, #\width
bl \func
add r0, r0, #(16*2)
add r1, r1, #\width
bl \func
add r0, r0, #(16*2)
add r1, r1, #\width
bl \func
add r0, r0, #(16*2)
add r1, r1, #\width
bl \func
sub r0, r0, #(16*7)
.endm
function ff_simple_idct_armv6, export=1
push {r4-r11, lr}
sub sp, sp, #128
mov r1, sp
idct_rows idct_row_armv6, 2
mov r1, r0
mov r0, sp
idct_rows idct_col_armv6, 2
add sp, sp, #128
pop {r4-r11, pc}
endfunc
function ff_simple_idct_add_armv6, export=1
push {r0, r1, r4-r11, lr}
sub sp, sp, #128
mov r0, r2
mov r1, sp
idct_rows idct_row_armv6, 2
mov r0, sp
ldr r1, [sp, #128]
ldr r2, [sp, #(128+4)]
idct_rows idct_col_add_armv6, 1
add sp, sp, #(128+8)
pop {r4-r11, pc}
endfunc
function ff_simple_idct_put_armv6, export=1
push {r0, r1, r4-r11, lr}
sub sp, sp, #128
mov r0, r2
mov r1, sp
idct_rows idct_row_armv6, 2
mov r0, sp
ldr r1, [sp, #128]
ldr r2, [sp, #(128+4)]
idct_rows idct_col_put_armv6, 1
add sp, sp, #(128+8)
pop {r4-r11, pc}
endfunc