#include "../../../cmd/ld/textflag.h"
#define LOAD(index) \
        MOVL    (index*4)(SI), R10; \
        BSWAPL  R10; \
        MOVL    R10, (index*4)(SP)
#define SHUFFLE(index) \
        MOVL    (((index)&0xf)*4)(SP), R10; \
        XORL    (((index-3)&0xf)*4)(SP), R10; \
        XORL    (((index-8)&0xf)*4)(SP), R10; \
        XORL    (((index-14)&0xf)*4)(SP), R10; \
        ROLL    $1, R10; \
        MOVL    R10, (((index)&0xf)*4)(SP)
#define FUNC1(a, b, c, d, e) \
        MOVL    d, R9; \
        XORL    c, R9; \
        ANDL    b, R9; \
        XORL    d, R9
#define FUNC2(a, b, c, d, e) \
        MOVL    b, R9; \
        XORL    c, R9; \
        XORL    d, R9
#define FUNC3(a, b, c, d, e) \
        MOVL    b, R8; \
        ORL     c, R8; \
        ANDL    d, R8; \
        MOVL    b, R9; \
        ANDL    c, R9; \
        ORL     R8, R9
        
#define FUNC4 FUNC2
#define MIX(a, b, c, d, e, const) \
        ROLL    $30, b; \
        ADDL    R9, e; \
        MOVL    a, R8; \
        ROLL    $5, R8; \
        LEAL    const(e)(R10*1), e; \
        ADDL    R8, e
#define ROUND1(a, b, c, d, e, index) \
        LOAD(index); \
        FUNC1(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x5A827999)
#define ROUND1x(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC1(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x5A827999)
#define ROUND2(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC2(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x6ED9EBA1)
#define ROUND3(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC3(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x8F1BBCDC)
#define ROUND4(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC4(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0xCA62C1D6)
TEXT ·block(SB),NOSPLIT,$64-32
        MOVL    dig+0(FP),      R14
        MOVL    p_base+4(FP),   SI
        MOVL    p_len+8(FP),    DX
        SHRQ    $6,             DX
        SHLQ    $6,             DX
        
        LEAQ    (SI)(DX*1),     DI
        MOVL    (0*4)(R14),     AX
        MOVL    (1*4)(R14),     BX
        MOVL    (2*4)(R14),     CX
        MOVL    (3*4)(R14),     DX
        MOVL    (4*4)(R14),     R13
        CMPQ    SI,             DI
        JEQ     end
loop:
#define BP R13 
        ROUND1(AX, BX, CX, DX, BP, 0)
        ROUND1(BP, AX, BX, CX, DX, 1)
        ROUND1(DX, BP, AX, BX, CX, 2)
        ROUND1(CX, DX, BP, AX, BX, 3)
        ROUND1(BX, CX, DX, BP, AX, 4)
        ROUND1(AX, BX, CX, DX, BP, 5)
        ROUND1(BP, AX, BX, CX, DX, 6)
        ROUND1(DX, BP, AX, BX, CX, 7)
        ROUND1(CX, DX, BP, AX, BX, 8)
        ROUND1(BX, CX, DX, BP, AX, 9)
        ROUND1(AX, BX, CX, DX, BP, 10)
        ROUND1(BP, AX, BX, CX, DX, 11)
        ROUND1(DX, BP, AX, BX, CX, 12)
        ROUND1(CX, DX, BP, AX, BX, 13)
        ROUND1(BX, CX, DX, BP, AX, 14)
        ROUND1(AX, BX, CX, DX, BP, 15)
        ROUND1x(BP, AX, BX, CX, DX, 16)
        ROUND1x(DX, BP, AX, BX, CX, 17)
        ROUND1x(CX, DX, BP, AX, BX, 18)
        ROUND1x(BX, CX, DX, BP, AX, 19)
        
        ROUND2(AX, BX, CX, DX, BP, 20)
        ROUND2(BP, AX, BX, CX, DX, 21)
        ROUND2(DX, BP, AX, BX, CX, 22)
        ROUND2(CX, DX, BP, AX, BX, 23)
        ROUND2(BX, CX, DX, BP, AX, 24)
        ROUND2(AX, BX, CX, DX, BP, 25)
        ROUND2(BP, AX, BX, CX, DX, 26)
        ROUND2(DX, BP, AX, BX, CX, 27)
        ROUND2(CX, DX, BP, AX, BX, 28)
        ROUND2(BX, CX, DX, BP, AX, 29)
        ROUND2(AX, BX, CX, DX, BP, 30)
        ROUND2(BP, AX, BX, CX, DX, 31)
        ROUND2(DX, BP, AX, BX, CX, 32)
        ROUND2(CX, DX, BP, AX, BX, 33)
        ROUND2(BX, CX, DX, BP, AX, 34)
        ROUND2(AX, BX, CX, DX, BP, 35)
        ROUND2(BP, AX, BX, CX, DX, 36)
        ROUND2(DX, BP, AX, BX, CX, 37)
        ROUND2(CX, DX, BP, AX, BX, 38)
        ROUND2(BX, CX, DX, BP, AX, 39)
        
        ROUND3(AX, BX, CX, DX, BP, 40)
        ROUND3(BP, AX, BX, CX, DX, 41)
        ROUND3(DX, BP, AX, BX, CX, 42)
        ROUND3(CX, DX, BP, AX, BX, 43)
        ROUND3(BX, CX, DX, BP, AX, 44)
        ROUND3(AX, BX, CX, DX, BP, 45)
        ROUND3(BP, AX, BX, CX, DX, 46)
        ROUND3(DX, BP, AX, BX, CX, 47)
        ROUND3(CX, DX, BP, AX, BX, 48)
        ROUND3(BX, CX, DX, BP, AX, 49)
        ROUND3(AX, BX, CX, DX, BP, 50)
        ROUND3(BP, AX, BX, CX, DX, 51)
        ROUND3(DX, BP, AX, BX, CX, 52)
        ROUND3(CX, DX, BP, AX, BX, 53)
        ROUND3(BX, CX, DX, BP, AX, 54)
        ROUND3(AX, BX, CX, DX, BP, 55)
        ROUND3(BP, AX, BX, CX, DX, 56)
        ROUND3(DX, BP, AX, BX, CX, 57)
        ROUND3(CX, DX, BP, AX, BX, 58)
        ROUND3(BX, CX, DX, BP, AX, 59)
        
        ROUND4(AX, BX, CX, DX, BP, 60)
        ROUND4(BP, AX, BX, CX, DX, 61)
        ROUND4(DX, BP, AX, BX, CX, 62)
        ROUND4(CX, DX, BP, AX, BX, 63)
        ROUND4(BX, CX, DX, BP, AX, 64)
        ROUND4(AX, BX, CX, DX, BP, 65)
        ROUND4(BP, AX, BX, CX, DX, 66)
        ROUND4(DX, BP, AX, BX, CX, 67)
        ROUND4(CX, DX, BP, AX, BX, 68)
        ROUND4(BX, CX, DX, BP, AX, 69)
        ROUND4(AX, BX, CX, DX, BP, 70)
        ROUND4(BP, AX, BX, CX, DX, 71)
        ROUND4(DX, BP, AX, BX, CX, 72)
        ROUND4(CX, DX, BP, AX, BX, 73)
        ROUND4(BX, CX, DX, BP, AX, 74)
        ROUND4(AX, BX, CX, DX, BP, 75)
        ROUND4(BP, AX, BX, CX, DX, 76)
        ROUND4(DX, BP, AX, BX, CX, 77)
        ROUND4(CX, DX, BP, AX, BX, 78)
        ROUND4(BX, CX, DX, BP, AX, 79)
#undef BP
        ADDL    (0*4)(R14), AX
        ADDL    (1*4)(R14), BX
        ADDL    (2*4)(R14), CX
        ADDL    (3*4)(R14), DX
        ADDL    (4*4)(R14), R13
        MOVL    AX, (0*4)(R14)
        MOVL    BX, (1*4)(R14)
        MOVL    CX, (2*4)(R14)
        MOVL    DX, (3*4)(R14)
        MOVL    R13, (4*4)(R14)
        ADDQ    $64, SI
        CMPQ    SI, DI
        JB      loop
end:
        RET