#include "libavutil/arm/asm.S"
#define MAX_CHANNELS 8
#define MAX_FIR_ORDER 8
#define MAX_IIR_ORDER 4
#define MAX_RATEFACTOR 4
#define MAX_BLOCKSIZE (40 * MAX_RATEFACTOR)
PST .req a1
PCO .req a2
AC0 .req a3
AC1 .req a4
CO0 .req v1
CO1 .req v2
CO2 .req v3
CO3 .req v4
ST0 .req v5
ST1 .req v6
ST2 .req sl
ST3 .req fp
I .req ip
PSAMP .req lr
.macro branch_pic_label first, remainder:vararg
A .word \first - 4
T .hword (\first) / 2
.ifnb \remainder
branch_pic_label \remainder
.endif
.endm
.macro load group, index, base, offset
.altmacro
load_ \group, %(\index), \base, \offset
.noaltmacro
.endm
.macro load_ group, index, base, offset
ldr \group\index, [\base, #\offset]
.endm
.macro loadd group, index, base, offset
.altmacro
loadd_ \group, %(\index), %(\index+1), \base, \offset
.noaltmacro
.endm
.macro loadd_ group, index0, index1, base, offset
A .if \offset >= 256
A ldr \group\index0, [\base, #\offset]
A ldr \group\index1, [\base, #(\offset) + 4]
A .else
ldrd \group\index0, \group\index1, [\base, #\offset]
A .endif
.endm
.macro multiply index, accumulate, long
.altmacro
multiply_ %(\index), \accumulate, \long
.noaltmacro
.endm
.macro multiply_ index, accumulate, long
.if \long
.if \accumulate
smlal AC0, AC1, CO\index, ST\index
.else
smull AC0, AC1, CO\index, ST\index
.endif
.else
.if \accumulate
mla AC0, CO\index, ST\index, AC0
.else
mul AC0, CO\index, ST\index
.endif
.endif
.endm
.macro inc howmany
.set LOAD_REG, (LOAD_REG + \howmany) & 3
.set OFFSET_CO, OFFSET_CO + 4 * \howmany
.set OFFSET_ST, OFFSET_ST + 4 * \howmany
.if FIR_REMAIN > 0
.set FIR_REMAIN, FIR_REMAIN - \howmany
.if FIR_REMAIN == 0
.set OFFSET_CO, 4 * MAX_FIR_ORDER
.set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
.endif
.elseif IIR_REMAIN > 0
.set IIR_REMAIN, IIR_REMAIN - \howmany
.endif
.endm
.macro implement_filter mask_minus1, shift_0, shift_8, iir_taps, fir_taps
.set TOTAL_TAPS, \iir_taps + \fir_taps
.set DEFINED_SHIFT, 0
.set DEFINED_MASK, 0
.set SHUFFLE_SHIFT, 0
.set SHUFFLE_MASK, 0
.set SPILL_SHIFT, 0
.set SPILL_MASK, 0
.if TOTAL_TAPS == 0
.if !\mask_minus1
MASK .req ST1
.set DEFINED_MASK, 1
.endif
.else
.if \shift_0
.if !\mask_minus1
MASK .req AC1
.set DEFINED_MASK, 1
.set SHUFFLE_MASK, 1
.endif
.elseif \shift_8
.if !\mask_minus1
.if TOTAL_TAPS <= 4
MASK .req PCO
.set DEFINED_MASK, 1
.set SHUFFLE_MASK, 1
.else
.set SPILL_MASK, 1
.endif
.endif
.else
.if TOTAL_TAPS <= 3
.if \fir_taps & 1
SHIFT .req CO0
.set DEFINED_SHIFT, 1
.set SHUFFLE_SHIFT, 1
.else
SHIFT .req CO3
.set DEFINED_SHIFT, 1
.set SHUFFLE_SHIFT, 1
.endif
.if !\mask_minus1
MASK .req PCO
.set DEFINED_MASK, 1
.set SHUFFLE_MASK, 1
.endif
.elseif TOTAL_TAPS == 4
SHIFT .req PCO
.set DEFINED_SHIFT, 1
.set SHUFFLE_SHIFT, 1
.if !\mask_minus1
.set SPILL_MASK, 1
.endif
.else
.set SPILL_SHIFT, 1
.if !\mask_minus1
.set SPILL_MASK, 1
.endif
.endif
.endif
.endif
.if SPILL_SHIFT
SHIFT .req ST0
.set DEFINED_SHIFT, 1
.endif
.if SPILL_MASK
MASK .req ST1
.set DEFINED_MASK, 1
.endif
.if TOTAL_TAPS <= 4
.set OFFSET_CO, 0
.if \fir_taps & 1
.set LOAD_REG, 1
.else
.set LOAD_REG, 0
.endif
.rept \fir_taps
load CO, LOAD_REG, PCO, OFFSET_CO
.set LOAD_REG, (LOAD_REG + 1) & 3
.set OFFSET_CO, OFFSET_CO + 4
.endr
.set OFFSET_CO, 4 * MAX_FIR_ORDER
.rept \iir_taps
load CO, LOAD_REG, PCO, OFFSET_CO
.set LOAD_REG, (LOAD_REG + 1) & 3
.set OFFSET_CO, OFFSET_CO + 4
.endr
.endif
.if SHUFFLE_SHIFT
mov SHIFT, ST0
.endif
.if SHUFFLE_MASK
mov MASK, ST1
.endif
01:
.if TOTAL_TAPS == 0
ldr ST0, [PSAMP]
subs I, I, #1
.if !\mask_minus1
and ST0, ST0, MASK
.endif
str ST0, [PST, #-4]!
str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
str ST0, [PSAMP], #4 * MAX_CHANNELS
bne 01b
.else
.if \fir_taps & 1
.set LOAD_REG, 1
.else
.set LOAD_REG, 0
.endif
.set LOAD_BANK, 0
.set FIR_REMAIN, \fir_taps
.set IIR_REMAIN, \iir_taps
.if FIR_REMAIN == 0
.set OFFSET_CO, 4 * MAX_FIR_ORDER
.set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
.else
.set OFFSET_CO, 0
.set OFFSET_ST, 0
.endif
.set MUL_REG, LOAD_REG
.set COUNTER, 0
.rept TOTAL_TAPS + 2
.if FIR_REMAIN != 0 || IIR_REMAIN != 0
.if COUNTER == 0
.if TOTAL_TAPS > 4
load CO, LOAD_REG, PCO, OFFSET_CO
.endif
load ST, LOAD_REG, PST, OFFSET_ST
inc 1
.elseif COUNTER == 1 && (\fir_taps & 1) == 0
.if TOTAL_TAPS > 4
load CO, LOAD_REG, PCO, OFFSET_CO
.endif
load ST, LOAD_REG, PST, OFFSET_ST
inc 1
.elseif LOAD_BANK == 0
.if TOTAL_TAPS > 4
.if FIR_REMAIN == 0 && IIR_REMAIN == 1
load CO, LOAD_REG, PCO, OFFSET_CO
.else
loadd CO, LOAD_REG, PCO, OFFSET_CO
.endif
.endif
.set LOAD_BANK, 1
.else
.if FIR_REMAIN == 0 && IIR_REMAIN == 1
load ST, LOAD_REG, PST, OFFSET_ST
inc 1
.else
loadd ST, LOAD_REG, PST, OFFSET_ST
inc 2
.endif
.set LOAD_BANK, 0
.endif
.endif
.if COUNTER >= 2
multiply MUL_REG, COUNTER > 2, !\shift_0
.set MUL_REG, (MUL_REG + 1) & 3
.endif
.set COUNTER, COUNTER + 1
.endr
.if SPILL_SHIFT
ldr SHIFT, [sp, #9*4 + 0*4]
.endif
.if SPILL_MASK
ldr MASK, [sp, #9*4 + 1*4]
.endif
ldr ST2, [PSAMP]
subs I, I, #1
.if \shift_8
mov AC0, AC0, lsr #8
orr AC0, AC0, AC1, lsl #24
.elseif !\shift_0
rsb ST3, SHIFT, #32
mov AC0, AC0, lsr SHIFT
A orr AC0, AC0, AC1, lsl ST3
T mov AC1, AC1, lsl ST3
T orr AC0, AC0, AC1
.endif
.if \mask_minus1
add ST3, ST2, AC0
.else
add ST2, ST2, AC0
and ST3, ST2, MASK
sub ST2, ST3, AC0
.endif
str ST3, [PST, #-4]!
str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
str ST3, [PSAMP], #4 * MAX_CHANNELS
bne 01b
.endif
b 99f
.if DEFINED_SHIFT
.unreq SHIFT
.endif
.if DEFINED_MASK
.unreq MASK
.endif
.endm
.macro switch_on_fir_taps mask_minus1, shift_0, shift_8, iir_taps
A ldr CO0, [pc, a3, lsl #2]
A add pc, pc, CO0
T tbh [pc, a3, lsl #1]
0:
branch_pic_label (70f - 0b), (71f - 0b), (72f - 0b), (73f - 0b)
branch_pic_label (74f - 0b)
.if \iir_taps <= 3
branch_pic_label (75f - 0b)
.if \iir_taps <= 2
branch_pic_label (76f - 0b)
.if \iir_taps <= 1
branch_pic_label (77f - 0b)
.if \iir_taps == 0
branch_pic_label (78f - 0b)
.endif
.endif
.endif
.endif
70: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 0
71: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 1
72: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 2
73: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 3
74: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 4
.if \iir_taps <= 3
75: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 5
.if \iir_taps <= 2
76: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 6
.if \iir_taps <= 1
77: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 7
.if \iir_taps == 0
78: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 8
.endif
.endif
.endif
.endif
.endm
.macro switch_on_iir_taps mask_minus1, shift_0, shift_8
A ldr CO0, [pc, a4, lsl #2]
A add pc, pc, CO0
T tbh [pc, a4, lsl #1]
0:
branch_pic_label (60f - 0b), (61f - 0b), (62f - 0b), (63f - 0b)
branch_pic_label (64f - 0b)
60: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 0
61: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 1
62: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 2
63: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 3
64: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 4
.endm
function ff_mlp_filter_channel_arm, export=1
push {v1-fp,lr}
add v1, sp, #9*4
ldm v1, {ST0,ST1,I,PSAMP}
cmp ST1, #-1
bne 30f
movs ST2, ST0, lsl #29
bne 20f
bcs 10f
switch_on_iir_taps 1, 1, 0
10: switch_on_iir_taps 1, 0, 1
20: switch_on_iir_taps 1, 0, 0
30: movs ST2, ST0, lsl #29
bne 50f
bcs 40f
switch_on_iir_taps 0, 1, 0
40: switch_on_iir_taps 0, 0, 1
50: switch_on_iir_taps 0, 0, 0
99: pop {v1-fp,pc}
endfunc
.unreq PST
.unreq PCO
.unreq AC0
.unreq AC1
.unreq CO0
.unreq CO1
.unreq CO2
.unreq CO3
.unreq ST0
.unreq ST1
.unreq ST2
.unreq ST3
.unreq I
.unreq PSAMP
PSA .req a1
PCO .req a2
PBL .req a3
INDEX .req a4
CO0 .req v1
CO1 .req v2
CO2 .req v3
CO3 .req v4
SA0 .req v5
SA1 .req v6
SA2 .req sl
SA3 .req fp
AC0 .req ip
AC1 .req lr
NOISE .req SA0
LSB .req SA1
DCH .req SA2
MASK .req SA3
.macro implement_rematrix shift, index_mask, mask_minus1, maxchan
.if \maxchan == 1
ldrd CO0, CO1, [PCO]
.endif
1:
.if \maxchan == 1
ldrd SA0, SA1, [PSA]
smull AC0, AC1, CO0, SA0
.elseif \maxchan == 5
ldr CO0, [PCO, #0]
ldr SA0, [PSA, #0]
ldr CO1, [PCO, #4]
ldr SA1, [PSA, #4]
ldrd CO2, CO3, [PCO, #8]
smull AC0, AC1, CO0, SA0
ldrd SA2, SA3, [PSA, #8]
smlal AC0, AC1, CO1, SA1
ldrd CO0, CO1, [PCO, #16]
smlal AC0, AC1, CO2, SA2
ldrd SA0, SA1, [PSA, #16]
smlal AC0, AC1, CO3, SA3
smlal AC0, AC1, CO0, SA0
.else
ldr CO2, [PCO, #0]
ldr SA2, [PSA, #0]
ldr CO3, [PCO, #4]
ldr SA3, [PSA, #4]
ldrd CO0, CO1, [PCO, #8]
smull AC0, AC1, CO2, SA2
ldrd SA0, SA1, [PSA, #8]
smlal AC0, AC1, CO3, SA3
ldrd CO2, CO3, [PCO, #16]
smlal AC0, AC1, CO0, SA0
ldrd SA2, SA3, [PSA, #16]
smlal AC0, AC1, CO1, SA1
ldrd CO0, CO1, [PCO, #24]
smlal AC0, AC1, CO2, SA2
ldrd SA0, SA1, [PSA, #24]
smlal AC0, AC1, CO3, SA3
smlal AC0, AC1, CO0, SA0
.endif
ldm sp, {NOISE, DCH, MASK}
smlal AC0, AC1, CO1, SA1
.if \shift != 0
.if \index_mask == 63
add NOISE, NOISE, INDEX, lsr #32-6
ldrb LSB, [PBL], #MAX_CHANNELS
ldrsb NOISE, [NOISE]
add INDEX, INDEX, INDEX, lsl #32-6
.else
add NOISE, NOISE, INDEX, lsr #32-7
ldrb LSB, [PBL], #MAX_CHANNELS
ldrsb NOISE, [NOISE]
add INDEX, INDEX, INDEX, lsl #32-7
.endif
sub INDEX, INDEX, #1<<7
adds AC0, AC0, NOISE, lsl #\shift + 7
adc AC1, AC1, NOISE, asr #31
.else
ldrb LSB, [PBL], #MAX_CHANNELS
sub INDEX, INDEX, #1<<7
.endif
add PSA, PSA, #MAX_CHANNELS*4
mov AC0, AC0, lsr #14
orr AC0, AC0, AC1, lsl #18
.if !\mask_minus1
and AC0, AC0, MASK
.endif
add AC0, AC0, LSB
tst INDEX, #1<<15
str AC0, [PSA, DCH, lsl #2]
beq 1b
b 98f
.endm
.macro switch_on_maxchan shift, index_mask, mask_minus1
cmp v4, #5
blo 51f
beq 50f
implement_rematrix \shift, \index_mask, \mask_minus1, 7
50: implement_rematrix \shift, \index_mask, \mask_minus1, 5
51: implement_rematrix \shift, \index_mask, \mask_minus1, 1
.endm
.macro switch_on_mask shift, index_mask
cmp sl, #-1
bne 40f
switch_on_maxchan \shift, \index_mask, 1
40: switch_on_maxchan \shift, \index_mask, 0
.endm
.macro switch_on_au_size shift
.if \shift == 0
switch_on_mask \shift, undefined
.else
teq v6, #64
bne 30f
orr INDEX, INDEX, v1, lsl #32-6
switch_on_mask \shift, 63
30: orr INDEX, INDEX, v1, lsl #32-7
switch_on_mask \shift, 127
.endif
.endm
function ff_mlp_rematrix_channel_arm, export=1
push {v1-fp,lr}
add v1, sp, #9*4
ldm v1, {v1-sl}
teq v4, #1
itt ne
teqne v4, #5
teqne v4, #7
bne 99f
teq v6, #64
it ne
teqne v6, #128
bne 99f
sub v2, v2, #MAX_CHANNELS
push {a4,v2,sl}
movs INDEX, v3, lsl #7
beq 98f
subs INDEX, INDEX, #1<<7
adc lr, v1, v1
orr INDEX, INDEX, lr
cmp v5, #1
beq 11f
blo 10f
A ldr v5, [pc, v5, lsl #2]
A add pc, pc, v5
T tbh [pc, v5, lsl #1]
0:
branch_pic_label 0, 0, (12f - 0b), (13f - 0b)
branch_pic_label (14f - 0b), (15f - 0b), (16f - 0b), (17f - 0b)
branch_pic_label (18f - 0b), (19f - 0b), (20f - 0b), (21f - 0b)
branch_pic_label (22f - 0b), (23f - 0b), (24f - 0b), (25f - 0b)
10: switch_on_au_size 0
11: switch_on_au_size 1
12: switch_on_au_size 2
13: switch_on_au_size 3
14: switch_on_au_size 4
15: switch_on_au_size 5
16: switch_on_au_size 6
17: switch_on_au_size 7
18: switch_on_au_size 8
19: switch_on_au_size 9
20: switch_on_au_size 10
21: switch_on_au_size 11
22: switch_on_au_size 12
23: switch_on_au_size 13
24: switch_on_au_size 14
25: switch_on_au_size 15
98: add sp, sp, #3*4
pop {v1-fp,pc}
99:
pop {v1-fp,lr}
b X(ff_mlp_rematrix_channel)
endfunc
.unreq PSA
.unreq PCO
.unreq PBL
.unreq INDEX
.unreq CO0
.unreq CO1
.unreq CO2
.unreq CO3
.unreq SA0
.unreq SA1
.unreq SA2
.unreq SA3
.unreq AC0
.unreq AC1
.unreq NOISE
.unreq LSB
.unreq DCH
.unreq MASK