#include <config.h>
#include "asm.h"
#ifdef DO_AMD64_ASM
.data
.align 16
m0X000000: .byte 0, 0, 0, 0, 0, 0, 255, 0
.byte 0, 0, 0, 0, 0, 0, 255, 0
m10000000: .byte 0, 0, 0, 0, 0, 0, 0, 1
.byte 0, 0, 0, 0, 0, 0, 0, 1
m00XXXXXX: .byte 255, 255, 255, 255, 255, 255, 0, 0
.byte 255, 255, 255, 255, 255, 255, 0, 0
mVX000000: .byte 0, 0, 0, 0, 0, 0, 255, 127
.byte 0, 0, 0, 0, 0, 0, 255, 127
mV0000000: .byte 0, 0, 0, 0, 0, 0, 0, 128
.byte 0, 0, 0, 0, 0, 0, 0, 128
mX000X000: .byte 0, 0, 0, 0, 0, 0, 255, 255
.byte 0, 0, 0, 0, 0, 0, 255, 255
m0XXX0XXX0XXX0XXX: .byte 255, 255, 255, 0, 255, 255, 255, 0
.byte 255, 255, 255, 0, 255, 255, 255, 0
m0XXX0XXX00000000: .byte 255, 255, 255, 0, 255, 255, 255, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0
m0XXX000000000000: .byte 255, 255, 255, 0, 0, 0, 0, 0
.byte 0, 0, 0, 0, 0, 0, 0, 0
mX000X000X000X000: .byte 0, 0, 0, 255, 0, 0, 0, 255
.byte 0, 0, 0, 255, 0, 0, 0, 255
mX000X00000000000: .byte 0, 0, 0, 255, 0, 0, 0, 255
.byte 0, 0, 0, 255, 0, 0, 0, 255
mX000000000000000: .byte 0, 0, 0, 255, 0, 0, 0, 255
.byte 0, 0, 0, 255, 0, 0, 0, 255
m1000100010001000: .byte 0, 0, 0, 1, 0, 0, 0, 1
.byte 0, 0, 0, 1, 0, 0, 0, 1
m000V0V0V000V0V0V: .byte 127, 0, 127, 0, 127, 0, 0, 0
.byte 127, 0, 127, 0, 127, 0, 0, 0
mI0000000I0000000: .byte 0, 0, 0, 0, 0, 0, 0, 64
.byte 0, 0, 0, 0, 0, 0, 0, 64
m0VVV0VVV0VVV0VVV: .byte 127, 127, 127, 0, 127, 127, 127, 0
.byte 127, 127, 127, 0, 127, 127, 127, 0
c1: .word 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1
.text
.align 16
FN_(imlib_amd64_blend_rgba_to_rgb_cmod)
FN_(imlib_amd64_blend_rgba_to_rgba_cmod)
FN_(imlib_amd64_blend_rgb_to_rgba_cmod)
FN_(imlib_amd64_blend_rgb_to_rgb_cmod)
FN_(imlib_amd64_copy_rgba_to_rgb_cmod)
FN_(imlib_amd64_copy_rgba_to_rgba_cmod)
FN_(imlib_amd64_copy_rgb_to_rgba_cmod)
FN_(imlib_amd64_add_blend_rgba_to_rgb_cmod)
FN_(imlib_amd64_add_blend_rgba_to_rgba_cmod)
FN_(imlib_amd64_add_blend_rgb_to_rgba_cmod)
FN_(imlib_amd64_add_blend_rgb_to_rgb_cmod)
FN_(imlib_amd64_add_copy_rgba_to_rgb_cmod)
FN_(imlib_amd64_add_copy_rgba_to_rgba_cmod)
FN_(imlib_amd64_add_copy_rgb_to_rgba_cmod)
FN_(imlib_amd64_subtract_blend_rgba_to_rgb_cmod)
FN_(imlib_amd64_subtract_blend_rgba_to_rgba_cmod)
FN_(imlib_amd64_subtract_blend_rgb_to_rgba_cmod)
FN_(imlib_amd64_subtract_blend_rgb_to_rgb_cmod)
FN_(imlib_amd64_subtract_copy_rgba_to_rgb_cmod)
FN_(imlib_amd64_subtract_copy_rgba_to_rgba_cmod)
FN_(imlib_amd64_subtract_copy_rgb_to_rgba_cmod)
FN_(imlib_amd64_reshade_blend_rgba_to_rgb_cmod)
FN_(imlib_amd64_reshade_blend_rgba_to_rgba_cmod)
FN_(imlib_amd64_reshade_blend_rgb_to_rgba_cmod)
FN_(imlib_amd64_reshade_blend_rgb_to_rgb_cmod)
FN_(imlib_amd64_reshade_copy_rgba_to_rgb_cmod)
FN_(imlib_amd64_reshade_copy_rgba_to_rgba_cmod)
FN_(imlib_amd64_reshade_copy_rgb_to_rgba_cmod)
.extern pow_lut
#define ENTER \
pushq %rbp ; \
movq %rsp, %rbp ; \
pushq %rbx ; \
pushq %r13 ; \
pushq %r14 ; \
movq %rsi, %r10 ; \
movq %rcx, %r11 ; \
movq %rdi, %rsi ; \
movq %rdx, %rdi ; \
movq 16(%rbp), %r14 ; \
; \
; \
testq %r8, %r8 ; \
jz 9f ; \
testq %r9, %r9 ; \
jz 9f
#define LEAVE \
popq %r14 ; \
popq %r13 ; \
popq %rbx ; \
movq %rbp, %rsp ; \
popq %rbp ; \
ret
PR_(imlib_amd64_blend_rgba_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
movdqa m00XXXXXX(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_blend_rgba_to_rgb_cmod)
PR_(imlib_amd64_blend_rgba_to_rgba_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
xorq %rax, %rax
movdqa mX000X000X000X000(%rip), %xmm6
movq pow_lut@GOTPCREL(%rip), %r13
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
movd %eax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_blend_rgba_to_rgba_cmod)
PR_(imlib_amd64_blend_rgb_to_rgba_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
xorq %rax, %rax
movdqa mX000X000X000X000(%rip), %xmm6
movq pow_lut@GOTPCREL(%rip), %r13
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
movd %eax, %xmm3
por %xmm6, %xmm1
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_blend_rgb_to_rgba_cmod)
PR_(imlib_amd64_blend_rgb_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
movdqa m00XXXXXX(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm2, %xmm1
psllw $1, %xmm1
paddw %xmm5, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_blend_rgb_to_rgb_cmod)
PR_(imlib_amd64_copy_rgba_to_rgb_cmod):
ENTER
movq mX000X000X000X000(%rip), %r13
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $16, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq (%rdi, %rcx, 4), %rax
andq %r13, %rax
orq %rax, %rdx
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movzbq %al, %rbx
movzbq 0x000(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movl (%rdi, %rcx, 4), %eax
andq %r13, %rax
orq %rax, %rdx
movl %edx, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_copy_rgba_to_rgb_cmod)
PR_(imlib_amd64_copy_rgba_to_rgba_cmod):
ENTER
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movl %edx, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_copy_rgba_to_rgba_cmod)
PR_(imlib_amd64_copy_rgb_to_rgba_cmod):
ENTER
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movq %rdx, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movl %edx, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_copy_rgb_to_rgba_cmod)
PR_(imlib_amd64_add_blend_rgba_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa m00XXXXXX(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_blend_rgba_to_rgb_cmod)
PR_(imlib_amd64_add_blend_rgba_to_rgba_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
xorq %rax, %rax
movdqa mX000X000X000X000(%rip), %xmm6
movq pow_lut@GOTPCREL(%rip), %r13
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
movd %eax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_blend_rgba_to_rgba_cmod)
PR_(imlib_amd64_add_blend_rgb_to_rgba_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
xorq %rax, %rax
movdqa mX000X000X000X000(%rip), %xmm6
movq pow_lut@GOTPCREL(%rip), %r13
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
movd %eax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
por %xmm6, %xmm1
pand %xmm6, %xmm0
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_blend_rgb_to_rgba_cmod)
PR_(imlib_amd64_add_blend_rgb_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa m00XXXXXX(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_blend_rgb_to_rgb_cmod)
PR_(imlib_amd64_add_copy_rgba_to_rgb_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
paddusb %xmm1, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_copy_rgba_to_rgb_cmod)
PR_(imlib_amd64_add_copy_rgba_to_rgba_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_copy_rgba_to_rgba_cmod)
PR_(imlib_amd64_add_copy_rgb_to_rgba_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm2
paddusb %xmm1, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_add_copy_rgb_to_rgba_cmod)
PR_(imlib_amd64_subtract_blend_rgba_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa m00XXXXXX(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_blend_rgba_to_rgb_cmod)
PR_(imlib_amd64_subtract_blend_rgba_to_rgba_cmod):
ENTER
movq pow_lut@GOTPCREL(%rip), %r13
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
movdqa mX000X000X000X000(%rip), %xmm6
movdqa mX000X000(%rip), %xmm7
xorq %rax, %rax
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
movd %eax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_blend_rgba_to_rgba_cmod)
PR_(imlib_amd64_subtract_blend_rgb_to_rgba_cmod):
ENTER
movq pow_lut@GOTPCREL(%rip), %r13
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
movdqa mX000X000X000X000(%rip), %xmm6
movdqa mX000X000(%rip), %xmm7
xorq %rax, %rax
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
movd %eax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
pxor %xmm7, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_blend_rgb_to_rgba_cmod)
PR_(imlib_amd64_subtract_blend_rgb_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa m00XXXXXX(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm6, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psllw $1, %xmm1
pmulhw %xmm3, %xmm1
psubsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_blend_rgb_to_rgb_cmod)
PR_(imlib_amd64_subtract_copy_rgba_to_rgb_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
pand %xmm5, %xmm1
psubusb %xmm1, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_copy_rgba_to_rgb_cmod)
PR_(imlib_amd64_subtract_copy_rgba_to_rgba_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
movdqa mX000X000X000X000(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm6, %xmm1
pand %xmm5, %xmm2
por %xmm1, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_copy_rgba_to_rgba_cmod)
PR_(imlib_amd64_subtract_copy_rgb_to_rgba_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
movdqa mX000X000X000X000(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
psubusb %xmm1, %xmm2
pand %xmm5, %xmm2
pand %xmm6, %xmm1
por %xmm1, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_subtract_copy_rgb_to_rgba_cmod)
PR_(imlib_amd64_reshade_blend_rgba_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa m000V0V0V000V0V0V(%rip), %xmm6
movdqa m00XXXXXX(%rip), %xmm7
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_blend_rgba_to_rgb_cmod)
PR_(imlib_amd64_reshade_blend_rgba_to_rgba_cmod):
ENTER
movq pow_lut@GOTPCREL(%rip), %r13
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
movdqa mX000X000X000X000(%rip), %xmm6
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm7
movdqa m000V0V0V000V0V0V(%rip), %xmm8
xorq %rax, %rax
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
movd %eax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_blend_rgba_to_rgba_cmod)
PR_(imlib_amd64_reshade_blend_rgb_to_rgb_cmod):
ENTER
pxor %xmm4, %xmm4
movdqa m000V0V0V000V0V0V(%rip), %xmm6
movdqa m00XXXXXX(%rip), %xmm7
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movq %xmm1, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0xFF, %xmm3, %xmm3
pshuflw $0xFF, %xmm3, %xmm3
psrlw $1, %xmm3
pand %xmm7, %xmm3
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm6, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_blend_rgb_to_rgb_cmod)
PR_(imlib_amd64_reshade_blend_rgb_to_rgba_cmod):
ENTER
movq pow_lut@GOTPCREL(%rip), %r13
pxor %xmm4, %xmm4
movdqa c1(%rip), %xmm5
movdqa mX000X000X000X000(%rip), %xmm6
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm7
movdqa m000V0V0V000V0V0V(%rip), %xmm8
xorq %rax, %rax
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movq %rdx, %rax
andl $0xff000000, %edx
roll $16, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movl %eax, %edx
andl $0xff000000, %edx
roll $16, %edx
movb 7(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
rolq $32, %rax
movd %rax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
roll $16, %edx
andl $0x0000ff00, %edx
movb 3(%rdi, %rcx, 4), %dl
movb (%r13, %rdx), %al
movb %dh, %ah
shrb $1, %ah
movd %eax, %xmm3
punpcklbw %xmm3, %xmm3
pshufhw $0x40, %xmm3, %xmm3
pshuflw $0x40, %xmm3, %xmm3
psrlw $1, %xmm3
movdqa %xmm2, %xmm0
pand %xmm6, %xmm0
por %xmm6, %xmm1
psubusb %xmm0, %xmm1
punpcklbw %xmm4, %xmm1
punpcklbw %xmm4, %xmm2
psubw %xmm8, %xmm1
psllw $2, %xmm1
pmulhw %xmm3, %xmm1
paddsw %xmm1, %xmm2
packuswb %xmm4, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_blend_rgb_to_rgba_cmod)
PR_(imlib_amd64_reshade_copy_rgba_to_rgb_cmod):
ENTER
movdqa m0XXX0XXX0XXX0XXX(%rip), %xmm5
movdqa m0VVV0VVV0VVV0VVV(%rip), %xmm6
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
pand %xmm5, %xmm1
pand %xmm5, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_copy_rgba_to_rgb_cmod)
PR_(imlib_amd64_reshade_copy_rgba_to_rgba_cmod):
ENTER
movdqu m0XXX0XXX0XXX0XXX(%rip), %xmm5
movdqu m0VVV0VVV0VVV0VVV(%rip), %xmm6
movdqu mX000X000X000X000(%rip), %xmm7
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $56, %rax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $24, %eax
movzbq %al, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_copy_rgba_to_rgba_cmod)
PR_(imlib_amd64_reshade_copy_rgb_to_rgba_cmod):
ENTER
movdqu m0XXX0XXX0XXX0XXX(%rip), %xmm5
movdqu m0VVV0VVV0VVV0VVV(%rip), %xmm6
movdqu mX000X000X000X000(%rip), %xmm7
leaq (%rsi, %r8, 4), %rsi
leaq (%rdi, %r8, 4), %rdi
subq $4, %rsi
subq $4, %rdi
negq %r8
0:
movq %r8, %rcx
incq %rcx
prefetchnta (%rsi, %rcx, 4)
prefetcht0 (%rdi, %rcx, 4)
prefetchnta 64(%rsi, %rcx, 4)
prefetcht0 64(%rdi, %rcx, 4)
jz 2f
1:
prefetchnta 128(%rsi, %rcx, 4)
prefetcht0 128(%rdi, %rcx, 4)
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
jz 2f
jns 3f
movq (%rsi, %rcx, 4), %rax
rorq $48, %rax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shlq $8, %rdx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
shlq $8, %rdx
movl $0x000000FF, %ebx
movb 0x300(%r14, %rbx), %dl
shlq $8, %rdx
rolq $16, %rax
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shlq $8, %rdx
rolq $8, %rax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %rdx, %xmm1
movq (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movq %xmm2, (%rdi, %rcx, 4)
incq %rcx
incq %rcx
js 1b
jnz 3f
2:
movl (%rsi, %rcx, 4), %eax
ror $16, %eax
movq $0x000000FF, %rbx
movzbq 0x300(%r14, %rbx), %rdx
shl $8, %edx
movb %al, %bl
movb 0x000(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x100(%r14, %rbx), %dl
shl $8, %edx
rol $8, %eax
movb %al, %bl
movb 0x200(%r14, %rbx), %dl
movd %edx, %xmm1
movd (%rdi, %rcx, 4), %xmm2
movdqa %xmm1, %xmm3
psubusb %xmm6, %xmm1
movdqa %xmm1, %xmm0
paddusb %xmm1, %xmm1
paddusb %xmm6, %xmm3
pxor %xmm5, %xmm3
paddusb %xmm3, %xmm3
paddusb %xmm1, %xmm2
psubusb %xmm3, %xmm2
pand %xmm5, %xmm2
pand %xmm7, %xmm0
por %xmm0, %xmm2
movd %xmm2, (%rdi, %rcx, 4)
3:
leaq (%rsi, %r10, 4), %rsi
leaq (%rdi, %r11, 4), %rdi
decq %r9
jnz 0b
9:
LEAVE
SIZE(imlib_amd64_reshade_copy_rgb_to_rgba_cmod)
#endif
#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif