@
@ ARMv4-optimized IDCT functions
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
@ This file is part of FFmpeg.
@
@ FFmpeg is free software; you can redistribute it and/or
@ modify it under the terms of the GNU Lesser General Public
@ License as published by the Free Software Foundation; either
@ version 2.1 of the License, or (at your option) any later version.
@
@ FFmpeg is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
@ Lesser General Public License for more details.
@
@ You should have received a copy of the GNU Lesser General Public
@ License along with FFmpeg; if not, write to the Free Software
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@
#include "config.h"
#include "libavutil/arm/asm.S"
@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, ptrdiff_t stride)
function ff_add_pixels_clamped_arm, export=1, align=5
push {r4-r10}
mov r10, #8
1:
ldr r4, [r1]
ldrsh r5, [r0]
ldrsh r7, [r0, #2]
and r6, r4, #0xFF
and r8, r4, #0xFF00
add r6, r6, r5
add r8, r7, r8, lsr #8
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #4]
orr r9, r9, r8, lsl #8
ldrsh r7, [r0, #6]
and r6, r4, #0xFF0000
and r8, r4, #0xFF000000
add r6, r5, r6, lsr #16
add r8, r7, r8, lsr #24
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
ldr r4, [r1, #4]
orr r9, r9, r8, lsl #24
ldrsh r5, [r0, #8]
str r9, [r1]
ldrsh r7, [r0, #10]
and r6, r4, #0xFF
and r8, r4, #0xFF00
add r6, r6, r5
add r8, r7, r8, lsr #8
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #12]
orr r9, r9, r8, lsl #8
ldrsh r7, [r0, #14]
and r6, r4, #0xFF0000
and r8, r4, #0xFF000000
add r6, r5, r6, lsr #16
add r8, r7, r8, lsr #24
mvn r5, r5
mvn r7, r7
tst r6, #0x100
it ne
movne r6, r5, lsr #24
tst r8, #0x100
it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
add r0, r0, #16
orr r9, r9, r8, lsl #24
subs r10, r10, #1
str r9, [r1, #4]
add r1, r1, r2
bne 1b
pop {r4-r10}
bx lr
endfunc