root/src/lib/asm_rgba.S

/* [<][>][^][v][top][bottom][index][help] */
#include <config.h>
#include "asm.h"

#ifdef DO_MMX_ASM

/*\ 
|*| MMX assembly rgba rendering routines for Imlib2
|*| Written by Willem Monsuwe <willem@stack.nl>
|*|
|*| Special (hairy) constructs are only commented on first use.
\*/

/*\ All functions have the same calling convention:
|*|  __imlib_mmx_rgbXXX(void *src, int sjmp, void *dst, int dw,
|*|                     int w, int h, int dx, int dy)
\*/

#define src     8(%ebp)
#define sjmp    12(%ebp)
#define dst     16(%ebp)
#define dw      20(%ebp)
#define w       24(%ebp)
#define h       28(%ebp)
#define dx      32(%ebp)
#define dy      36(%ebp)

.text
        .align 8
FN_(imlib_mmx_rgb565_fast)
FN_(imlib_mmx_bgr565_fast)
FN_(imlib_mmx_rgb555_fast)
FN_(imlib_mmx_bgr555_fast)

FN_(imlib_get_cpuid)

#include "asm_loadimmq.S"

/*\ Common code \*/
/*\ Save registers, load common parameters \*/
#define ENTER                   \
        pushl %ebp;             \
        movl  %esp, %ebp;       \
        pushl %ebx;             \
        pushl %ecx;             \
        pushl %edx;             \
        pushl %edi;             \
        pushl %esi;             \
        movl  src,  %esi;       \
        movl  dst,  %edi;       \
        movl  w,    %ebx;       \
        movl  h,    %edx;       \
        addl %ebx, sjmp

#define LOOP_START              \
        testl %edx, %edx;       \
        jz 4f;                  \
        testl %ebx, %ebx;       \
        jz 4f;                  \
0:                              \
        movl %ebx, %ecx

#define LOOP_END                        \
3:                                      \
        movl sjmp, %ecx;                \
        leal (%esi, %ecx, 4), %esi;     \
        addl dw, %edi;                  \
        decl %edx;                      \
        jnz 0b;                         \
4:

/*\ Unset MMX mode, reset registers, return \*/
#define LEAVE                   \
        emms;                   \
        popl %esi;              \
        popl %edi;              \
        popl %edx;              \
        popl %ecx;              \
        popl %ebx;              \
        movl %ebp, %esp;        \
        popl %ebp;              \
        ret



PR_(imlib_mmx_bgr565_fast):
        LOAD_IMMQ(mul_bgr565, %mm7)     /*\ This constant is the only difference \*/
        CLEANUP_IMMQ_LOADS(1)
        jmp .rgb565_fast_entry

SIZE(imlib_mmx_bgr565_fast)

PR_(imlib_mmx_rgb565_fast):
        LOAD_IMMQ(mul_rgb565, %mm7)
        CLEANUP_IMMQ_LOADS(1)
.rgb565_fast_entry:
        ENTER

        LOAD_IMMQ(m_rb, %mm5)
        LOAD_IMMQ(m_g6, %mm6)
        CLEANUP_IMMQ_LOADS(2)

        LOOP_START

        test $1, %ecx
        jz 1f
        decl %ecx
        movd (%esi, %ecx, 4), %mm0
        movq %mm0, %mm1
        pand %mm5, %mm0
        pand %mm6, %mm1
        pmaddwd %mm7, %mm0
        por %mm1, %mm0
        psrad $5, %mm0

        movd %mm0, %eax
        movw %ax, (%edi, %ecx, 2)

        jz 3f
1:
        test $2, %ecx
        jz 2f
        subl $2, %ecx
        movq (%esi, %ecx, 4), %mm0
        movq %mm0, %mm1
        pand %mm5, %mm0
        pand %mm6, %mm1
        pmaddwd %mm7, %mm0
        por %mm1, %mm0
        pslld $11, %mm0
        psrad $16, %mm0

        packssdw %mm0, %mm0

        movd %mm0, (%edi, %ecx, 2)

        jz 3f
2:
        subl $4, %ecx
        movq (%esi, %ecx, 4), %mm0
        movq 8(%esi, %ecx, 4), %mm2
        movq %mm0, %mm1         /*\ a r g b (2x) \*/
        movq %mm2, %mm3
        pand %mm5, %mm0         /*\ 0 rrrrr000 0 bbbbb000 (2 x) \*/
        pand %mm5, %mm2
        pand %mm6, %mm1         /*\ 0 0 gggggg00 00000000 (2 x) \*/
        pand %mm6, %mm3
        pmaddwd %mm7, %mm0      /*\ 0 000rrrrr 000000bb bbb00000 (2 x) \*/
        pmaddwd %mm7, %mm2
        por %mm1, %mm0          /*\ 0 000rrrrr ggggggbb bbb00000 (2 x) \*/
        por %mm3, %mm2
        pslld $11, %mm0         /*\ rrrrrggg gggbbbbb 0 0 (2 x) \*/
        pslld $11, %mm2
        psrad $16, %mm0         /*\ x x rrrrrggg gggbbbbb (2 x) \*/
        psrad $16, %mm2

        packssdw %mm2, %mm0     /*\ rrrrrggg gggbbbbb (4 x) \*/

        movq %mm0, (%edi, %ecx, 2)

        jnz 2b
        LOOP_END
        LEAVE

SIZE(imlib_mmx_rgb565_fast)


PR_(imlib_mmx_bgr555_fast):
        LOAD_IMMQ(mul_bgr555, %mm7)     /*\ This constant is the only difference \*/
        CLEANUP_IMMQ_LOADS(1)
        jmp .rgb555_fast_entry

SIZE(imlib_mmx_bgr555_fast)

PR_(imlib_mmx_rgb555_fast):
        LOAD_IMMQ(mul_rgb555, %mm7)
        CLEANUP_IMMQ_LOADS(1)
.rgb555_fast_entry:
        ENTER

        LOAD_IMMQ(m_rb, %mm5)
        LOAD_IMMQ(m_g5, %mm6)
        CLEANUP_IMMQ_LOADS(2)

        LOOP_START

        test $1, %ecx
        jz 1f
        decl %ecx
        movd (%esi, %ecx, 4), %mm0
        movq %mm0, %mm1
        pand %mm5, %mm0
        pand %mm6, %mm1
        pmaddwd %mm7, %mm0
        por %mm1, %mm0
        psrad $5, %mm0

        movd %mm0, %eax
        movw %ax, (%edi, %ecx, 2)

        jz 3f
1:
        test $2, %ecx
        jz 2f
        subl $2, %ecx
        movq (%esi, %ecx, 4), %mm0
        movq %mm0, %mm1
        pand %mm5, %mm0
        pand %mm6, %mm1
        pmaddwd %mm7, %mm0
        por %mm1, %mm0
        psrld $6, %mm0

        packssdw %mm0, %mm0

        movd %mm0, (%edi, %ecx, 2)

        jz 3f
2:
        subl $4, %ecx
        movq (%esi, %ecx, 4), %mm0
        movq 8(%esi, %ecx, 4), %mm2
        movq %mm0, %mm1         /*\ a r g b (2x) \*/
        movq %mm2, %mm3
        pand %mm5, %mm0         /*\ 0 rrrrr000 0 bbbbb000 (2 x) \*/
        pand %mm5, %mm2
        pand %mm6, %mm1         /*\ 0 0 ggggg000 00000000 (2 x) \*/
        pand %mm6, %mm3
        pmaddwd %mm7, %mm0      /*\ 0 000rrrrr 00000bbb bb000000 (2 x) \*/
        pmaddwd %mm7, %mm2
        por %mm1, %mm0          /*\ 0 000rrrrr gggggbbb bb000000 (2 x) \*/
        por %mm3, %mm2
        psrld $6, %mm0          /*\ 0 0 0rrrrrgg gggbbbbb (2 x) \*/
        psrld $6, %mm2

        packssdw %mm2, %mm0     /*\ 0rrrrrgg gggbbbbb (4 x) \*/

        movq %mm0, (%edi, %ecx, 2)

        jnz 2b
        LOOP_END
        LEAVE

SIZE(imlib_mmx_rgb555_fast)

PR_(imlib_get_cpuid):
        pushl %ebx
        pushl %edx

        pushf
        popl %eax
        movl %eax, %ebx
        xorl $0x200000, %eax
        pushl %eax
        popf
        pushf
        popl %eax
        xorl %ebx, %eax
        andl $0x200000, %eax
        jz 1f
        xorl %eax, %eax
        cpuid
        testl %eax, %eax
        jz 1f
        movl $1, %eax
        cpuid
        and $0x00000f00, %eax
        and $0xfffff0ff, %edx
        orl %edx, %eax
1:
        popl %edx
        popl %ebx
        ret

SIZE(imlib_get_cpuid)

#endif

#ifdef __ELF__
.section .note.GNU-stack,"",@progbits
#endif

/* [<][>][^][v][top][bottom][index][help] */