root/src/pkg/crypto/sha1/sha1block_amd64p32.s

/* [<][>][^][v][top][bottom][index][help] */
// Copyright 2013 The Go Authors.  All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

#include "../../../cmd/ld/textflag.h"

// SHA1 block routine. See sha1block.go for Go equivalent.
//
// There are 80 rounds of 4 types:
//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
//
// Each round loads or shuffles the data, then computes a per-round
// function of b, c, d, and then mixes the result into and rotates the
// five registers a, b, c, d, e holding the intermediate results.
//
// The register rotation is implemented by rotating the arguments to
// the round macros instead of by explicit move instructions.
//
// amd64p32 version.
// To ensure safety for Native Client, avoids use of BP and R15
// as well as two-register addressing modes.

#define LOAD(index) \
        MOVL    (index*4)(SI), R10; \
        BSWAPL  R10; \
        MOVL    R10, (index*4)(SP)

#define SHUFFLE(index) \
        MOVL    (((index)&0xf)*4)(SP), R10; \
        XORL    (((index-3)&0xf)*4)(SP), R10; \
        XORL    (((index-8)&0xf)*4)(SP), R10; \
        XORL    (((index-14)&0xf)*4)(SP), R10; \
        ROLL    $1, R10; \
        MOVL    R10, (((index)&0xf)*4)(SP)

#define FUNC1(a, b, c, d, e) \
        MOVL    d, R9; \
        XORL    c, R9; \
        ANDL    b, R9; \
        XORL    d, R9

#define FUNC2(a, b, c, d, e) \
        MOVL    b, R9; \
        XORL    c, R9; \
        XORL    d, R9

#define FUNC3(a, b, c, d, e) \
        MOVL    b, R8; \
        ORL     c, R8; \
        ANDL    d, R8; \
        MOVL    b, R9; \
        ANDL    c, R9; \
        ORL     R8, R9
        
#define FUNC4 FUNC2

#define MIX(a, b, c, d, e, const) \
        ROLL    $30, b; \
        ADDL    R9, e; \
        MOVL    a, R8; \
        ROLL    $5, R8; \
        LEAL    const(e)(R10*1), e; \
        ADDL    R8, e

#define ROUND1(a, b, c, d, e, index) \
        LOAD(index); \
        FUNC1(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x5A827999)

#define ROUND1x(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC1(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x5A827999)

#define ROUND2(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC2(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x6ED9EBA1)

#define ROUND3(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC3(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0x8F1BBCDC)

#define ROUND4(a, b, c, d, e, index) \
        SHUFFLE(index); \
        FUNC4(a, b, c, d, e); \
        MIX(a, b, c, d, e, 0xCA62C1D6)

TEXT ·block(SB),NOSPLIT,$64-32
        MOVL    dig+0(FP),      R14
        MOVL    p_base+4(FP),   SI
        MOVL    p_len+8(FP),    DX
        SHRQ    $6,             DX
        SHLQ    $6,             DX
        
        LEAQ    (SI)(DX*1),     DI
        MOVL    (0*4)(R14),     AX
        MOVL    (1*4)(R14),     BX
        MOVL    (2*4)(R14),     CX
        MOVL    (3*4)(R14),     DX
        MOVL    (4*4)(R14),     R13

        CMPQ    SI,             DI
        JEQ     end

loop:
#define BP R13 /* keep diff from sha1block_amd64.s small */
        ROUND1(AX, BX, CX, DX, BP, 0)
        ROUND1(BP, AX, BX, CX, DX, 1)
        ROUND1(DX, BP, AX, BX, CX, 2)
        ROUND1(CX, DX, BP, AX, BX, 3)
        ROUND1(BX, CX, DX, BP, AX, 4)
        ROUND1(AX, BX, CX, DX, BP, 5)
        ROUND1(BP, AX, BX, CX, DX, 6)
        ROUND1(DX, BP, AX, BX, CX, 7)
        ROUND1(CX, DX, BP, AX, BX, 8)
        ROUND1(BX, CX, DX, BP, AX, 9)
        ROUND1(AX, BX, CX, DX, BP, 10)
        ROUND1(BP, AX, BX, CX, DX, 11)
        ROUND1(DX, BP, AX, BX, CX, 12)
        ROUND1(CX, DX, BP, AX, BX, 13)
        ROUND1(BX, CX, DX, BP, AX, 14)
        ROUND1(AX, BX, CX, DX, BP, 15)

        ROUND1x(BP, AX, BX, CX, DX, 16)
        ROUND1x(DX, BP, AX, BX, CX, 17)
        ROUND1x(CX, DX, BP, AX, BX, 18)
        ROUND1x(BX, CX, DX, BP, AX, 19)
        
        ROUND2(AX, BX, CX, DX, BP, 20)
        ROUND2(BP, AX, BX, CX, DX, 21)
        ROUND2(DX, BP, AX, BX, CX, 22)
        ROUND2(CX, DX, BP, AX, BX, 23)
        ROUND2(BX, CX, DX, BP, AX, 24)
        ROUND2(AX, BX, CX, DX, BP, 25)
        ROUND2(BP, AX, BX, CX, DX, 26)
        ROUND2(DX, BP, AX, BX, CX, 27)
        ROUND2(CX, DX, BP, AX, BX, 28)
        ROUND2(BX, CX, DX, BP, AX, 29)
        ROUND2(AX, BX, CX, DX, BP, 30)
        ROUND2(BP, AX, BX, CX, DX, 31)
        ROUND2(DX, BP, AX, BX, CX, 32)
        ROUND2(CX, DX, BP, AX, BX, 33)
        ROUND2(BX, CX, DX, BP, AX, 34)
        ROUND2(AX, BX, CX, DX, BP, 35)
        ROUND2(BP, AX, BX, CX, DX, 36)
        ROUND2(DX, BP, AX, BX, CX, 37)
        ROUND2(CX, DX, BP, AX, BX, 38)
        ROUND2(BX, CX, DX, BP, AX, 39)
        
        ROUND3(AX, BX, CX, DX, BP, 40)
        ROUND3(BP, AX, BX, CX, DX, 41)
        ROUND3(DX, BP, AX, BX, CX, 42)
        ROUND3(CX, DX, BP, AX, BX, 43)
        ROUND3(BX, CX, DX, BP, AX, 44)
        ROUND3(AX, BX, CX, DX, BP, 45)
        ROUND3(BP, AX, BX, CX, DX, 46)
        ROUND3(DX, BP, AX, BX, CX, 47)
        ROUND3(CX, DX, BP, AX, BX, 48)
        ROUND3(BX, CX, DX, BP, AX, 49)
        ROUND3(AX, BX, CX, DX, BP, 50)
        ROUND3(BP, AX, BX, CX, DX, 51)
        ROUND3(DX, BP, AX, BX, CX, 52)
        ROUND3(CX, DX, BP, AX, BX, 53)
        ROUND3(BX, CX, DX, BP, AX, 54)
        ROUND3(AX, BX, CX, DX, BP, 55)
        ROUND3(BP, AX, BX, CX, DX, 56)
        ROUND3(DX, BP, AX, BX, CX, 57)
        ROUND3(CX, DX, BP, AX, BX, 58)
        ROUND3(BX, CX, DX, BP, AX, 59)
        
        ROUND4(AX, BX, CX, DX, BP, 60)
        ROUND4(BP, AX, BX, CX, DX, 61)
        ROUND4(DX, BP, AX, BX, CX, 62)
        ROUND4(CX, DX, BP, AX, BX, 63)
        ROUND4(BX, CX, DX, BP, AX, 64)
        ROUND4(AX, BX, CX, DX, BP, 65)
        ROUND4(BP, AX, BX, CX, DX, 66)
        ROUND4(DX, BP, AX, BX, CX, 67)
        ROUND4(CX, DX, BP, AX, BX, 68)
        ROUND4(BX, CX, DX, BP, AX, 69)
        ROUND4(AX, BX, CX, DX, BP, 70)
        ROUND4(BP, AX, BX, CX, DX, 71)
        ROUND4(DX, BP, AX, BX, CX, 72)
        ROUND4(CX, DX, BP, AX, BX, 73)
        ROUND4(BX, CX, DX, BP, AX, 74)
        ROUND4(AX, BX, CX, DX, BP, 75)
        ROUND4(BP, AX, BX, CX, DX, 76)
        ROUND4(DX, BP, AX, BX, CX, 77)
        ROUND4(CX, DX, BP, AX, BX, 78)
        ROUND4(BX, CX, DX, BP, AX, 79)
#undef BP

        ADDL    (0*4)(R14), AX
        ADDL    (1*4)(R14), BX
        ADDL    (2*4)(R14), CX
        ADDL    (3*4)(R14), DX
        ADDL    (4*4)(R14), R13

        MOVL    AX, (0*4)(R14)
        MOVL    BX, (1*4)(R14)
        MOVL    CX, (2*4)(R14)
        MOVL    DX, (3*4)(R14)
        MOVL    R13, (4*4)(R14)

        ADDQ    $64, SI
        CMPQ    SI, DI
        JB      loop

end:
        RET

/* [<][>][^][v][top][bottom][index][help] */