#include <config.h>
#include "asm.h"
#ifdef DO_MMX_ASM
.text
.align 8
FN_(imlib_mmx_RotateAA)
#define src 8(%ebp)
#define dest 12(%ebp)
#define sow 16(%ebp)
#define sw 20(%ebp)
#define sh 24(%ebp)
#define dow 28(%ebp)
#define dw 32(%ebp)
#define dh 36(%ebp)
#define x 40(%ebp)
#define y 44(%ebp)
#define dxh 48(%ebp)
#define dyh 52(%ebp)
#define dxv 56(%ebp)
#define dyv 60(%ebp)
#define j -4(%ebp)
#define dly -8(%ebp)
#define dlx -12(%ebp)
#define sht -16(%ebp)
#define swt -20(%ebp)
#define m0fffh -24(%ebp)
#define m0fff -28(%ebp)
#define mulsow -32(%ebp)
PR_(imlib_mmx_RotateAA):
pushl %ebp
movl %esp, %ebp
subl $40, %esp
pushl %ebx
pushl %ecx
pushl %edx
pushl %edi
pushl %esi
cmpl $0, dw
jle .rotate_leave
cmpl $0, dh
jle .rotate_leave
pxor %mm7, %mm7
movl sow, %eax
sall $16, %eax
orl $1, %eax
movl %eax, mulsow
movl $0x0fff, %eax
movl %eax, m0fff
movl %eax, m0fffh
movq x, %mm6
movl dest, %edi
movl dw, %eax
leal (%edi, %eax, 4), %edi
movl dw, %eax
imull dxh, %eax
negl %eax
addl dxv, %eax
movl %eax, dlx
movl dw, %eax
imull dyh, %eax
negl %eax
addl dyv, %eax
movl %eax, dly
movl dh, %eax
movl %eax, j
movl sw, %edx
movl x, %ecx
cmpl %edx, %ecx
jae .rotate_outside
movl dxh, %ebx
imull dw, %ebx
addl %ebx, %ecx
cmpl %edx, %ecx
jae .rotate_outside
movl dxv, %eax
imull dh, %eax
subl %eax, %ecx
cmpl %edx, %ecx
jae .rotate_outside
subl %ebx, %ecx
cmpl %edx, %ecx
jae .rotate_outside
movl sh, %edx
movl y, %ecx
cmpl %edx, %ecx
jae .rotate_outside
movl dyh, %ebx
imull dw, %ebx
addl %ebx, %ecx
cmpl %edx, %ecx
jae .rotate_outside
movl dyv, %eax
imull dh, %eax
addl %eax, %ecx
cmpl %edx, %ecx
jae .rotate_outside
subl %ebx, %ecx
cmpl %edx, %ecx
jae .rotate_outside
.rotate_inside:
movl sow, %ebx
movl src, %edx
.inside_loop_y:
movl dw, %ecx
negl %ecx
.inside_loop_x:
movq %mm6, %mm0
psrad $12, %mm0
packssdw %mm0, %mm0
pmaddwd mulsow, %mm0
movd %mm0, %eax
leal (%edx, %eax, 4), %esi
movq %mm6, %mm0
pand m0fff, %mm0
movq %mm0, %mm1
punpcklwd %mm0, %mm0
punpckldq %mm0, %mm0
punpckhwd %mm1, %mm1
punpckldq %mm1, %mm1
movq (%esi), %mm2
movq (%esi, %ebx, 4), %mm4
movq %mm2, %mm3
movq %mm4, %mm5
punpcklbw %mm7, %mm2
punpcklbw %mm7, %mm4
punpckhbw %mm7, %mm3
punpckhbw %mm7, %mm5
psubw %mm2, %mm3
psubw %mm4, %mm5
psllw $4, %mm3
psllw $4, %mm5
pmulhw %mm0, %mm3
pmulhw %mm0, %mm5
paddw %mm2, %mm3
paddw %mm4, %mm5
psubw %mm3, %mm5
psllw $4, %mm5
pmulhw %mm1, %mm5
paddw %mm3, %mm5
packuswb %mm5, %mm5
movd %mm5, (%edi, %ecx, 4)
paddd dxh, %mm6
incl %ecx
jnz .inside_loop_x
paddd dlx, %mm6
movl dow, %ecx
leal (%edi, %ecx, 4), %edi
decl j
jnz .inside_loop_y
jmp .rotate_leave
.rotate_outside:
movl sw, %eax
decl %eax
sall $12, %eax
movl %eax, swt
movl sh, %eax
decl %eax
sall $12, %eax
movl %eax, sht
movl sow, %ebx
movl src, %edx
.outside_loop_y:
movl dw, %ecx
negl %ecx
.outside_loop_x:
movq %mm6, %mm0
psrad $12, %mm0
packssdw %mm0, %mm0
pmaddwd mulsow, %mm0
movd %mm0, %eax
leal (%edx, %eax, 4), %esi
movq %mm6, %mm0
pand m0fff, %mm0
movq %mm0, %mm1
movq %mm6, %mm2
psrlq $32, %mm2
movd %mm6, %eax
cmpl swt, %eax
jae 2f
movd %mm2, %eax
cmpl sht, %eax
jae 1f
.interp_argb:
punpcklwd %mm0, %mm0
punpckldq %mm0, %mm0
punpckhwd %mm1, %mm1
punpckldq %mm1, %mm1
movq (%esi), %mm2
movq (%esi, %ebx, 4), %mm4
movq %mm2, %mm3
movq %mm4, %mm5
punpcklbw %mm7, %mm2
punpcklbw %mm7, %mm4
punpckhbw %mm7, %mm3
punpckhbw %mm7, %mm5
psubw %mm2, %mm3
psubw %mm4, %mm5
psllw $4, %mm3
psllw $4, %mm5
pmulhw %mm0, %mm3
pmulhw %mm0, %mm5
paddw %mm2, %mm3
paddw %mm4, %mm5
psubw %mm3, %mm5
psllw $4, %mm5
pmulhw %mm1, %mm5
paddw %mm3, %mm5
packuswb %mm5, %mm5
movd %mm5, (%edi, %ecx, 4)
jmp .outside_il_end
1:
notl %eax
cmpl $4095, %eax
ja 1f
pxor m0fff, %mm1
movd (%esi, %ebx, 4), %mm2
movd 4(%esi, %ebx, 4), %mm4
.interp_rgb_a0:
punpcklwd %mm0, %mm0
punpckldq %mm0, %mm0
punpckhwd %mm1, %mm1
punpcklbw %mm7, %mm2
punpcklbw %mm7, %mm4
psubw %mm2, %mm4
psllw $4, %mm4
pmulhw %mm0, %mm4
paddw %mm2, %mm4
movq %mm4, %mm2
psllq $48, %mm1
psllw $4, %mm4
pmulhw %mm1, %mm4
psubw %mm4, %mm2
packuswb %mm2, %mm2
movd %mm2, (%edi, %ecx, 4)
jmp .outside_il_end
1:
notl %eax
subl sht, %eax
cmpl $4095, %eax
ja .outside_il_0
movd (%esi), %mm2
movd 4(%esi), %mm4
jmp .interp_rgb_a0
2:
psrlq $32, %mm0
psllq $32, %mm1
notl %eax
cmpl $4095, %eax
ja 2f
pxor m0fff, %mm1
movd %mm2, %eax
cmpl sht, %eax
jae 1f
movd 4(%esi), %mm2
movd 4(%esi, %ebx, 4), %mm4
jmp .interp_rgb_a0
1:
notl %eax
cmpl $4095, %eax
ja 1f
movd 4(%esi, %ebx, 4), %mm2
.interp_a000:
pxor m0fff, %mm1
punpcklbw %mm7, %mm2
movq %mm2, %mm3
psllq $2, %mm0
psrlq $30, %mm1
pmulhw %mm0, %mm1
pxor m0fff, %mm1
psllq $48, %mm1
psllw $4, %mm3
pmulhw %mm1, %mm3
psubw %mm3, %mm2
packuswb %mm2, %mm2
movd %mm2, (%edi, %ecx, 4)
jmp .outside_il_end
1:
notl %eax
subl sht, %eax
cmpl $4095, %eax
ja .outside_il_0
pxor m0fff, %mm0
movd 4(%esi), %mm2
jmp .interp_a000
2:
notl %eax
subl swt, %eax
cmpl $4095, %eax
ja .outside_il_0
movd %mm2, %eax
cmpl sht, %eax
jae 1f
movd (%esi), %mm2
movd (%esi, %ebx, 4), %mm4
jmp .interp_rgb_a0
1:
notl %eax
cmpl $4095, %eax
ja 1f
movd (%esi, %ebx, 4), %mm2
jmp .interp_a000
1:
notl %eax
subl sht, %eax
cmpl $4095, %eax
ja .outside_il_0
pxor m0fff, %mm0
movd (%esi), %mm2
jmp .interp_a000
.outside_il_0:
movl $0, %eax
movl %eax, (%edi, %ecx, 4)
.outside_il_end:
paddd dxh, %mm6
incl %ecx
jnz .outside_loop_x
paddd dlx, %mm6
movl dow, %ecx
leal (%edi, %ecx, 4), %edi
decl j
jnz .outside_loop_y
.rotate_leave:
emms
popl %esi
popl %edi
popl %edx
popl %ecx
popl %ebx
movl %ebp, %esp
popl %ebp
ret
SIZE(imlib_mmx_RotateAA)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif