1927 lines
43 KiB
ArmAsm
1927 lines
43 KiB
ArmAsm
/*
|
|
* Copyright 2014-2015 pooler@litecoinpool.org
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the Free
|
|
* Software Foundation; either version 2 of the License, or (at your option)
|
|
* any later version. See COPYING for more details.
|
|
*/
|
|
|
|
#include "cpuminer-config.h"
|
|
|
|
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
|
|
|
|
#ifdef __APPLE__
|
|
|
|
#define HI(name) ha16(name)
|
|
#define LO(name) lo16(name)
|
|
|
|
#else
|
|
|
|
#define HI(name) name@ha
|
|
#define LO(name) name@l
|
|
|
|
#define r0 0
|
|
#define r1 1
|
|
#define r2 2
|
|
#define r3 3
|
|
#define r4 4
|
|
#define r5 5
|
|
#define r6 6
|
|
#define r7 7
|
|
#define r8 8
|
|
#define r9 9
|
|
#define r10 10
|
|
#define r11 11
|
|
#define r12 12
|
|
#define r13 13
|
|
#define r14 14
|
|
#define r15 15
|
|
#define r16 16
|
|
#define r17 17
|
|
#define r18 18
|
|
#define r19 19
|
|
#define r20 20
|
|
#define r21 21
|
|
#define r22 22
|
|
#define r23 23
|
|
#define r24 24
|
|
#define r25 25
|
|
#define r26 26
|
|
#define r27 27
|
|
#define r28 28
|
|
#define r29 29
|
|
#define r30 30
|
|
#define r31 31
|
|
|
|
#ifdef __ALTIVEC__
|
|
#define v0 0
|
|
#define v1 1
|
|
#define v2 2
|
|
#define v3 3
|
|
#define v4 4
|
|
#define v5 5
|
|
#define v6 6
|
|
#define v7 7
|
|
#define v8 8
|
|
#define v9 9
|
|
#define v10 10
|
|
#define v11 11
|
|
#define v12 12
|
|
#define v13 13
|
|
#define v14 14
|
|
#define v15 15
|
|
#define v16 16
|
|
#define v17 17
|
|
#define v18 18
|
|
#define v19 19
|
|
#define v20 20
|
|
#define v21 21
|
|
#define v22 22
|
|
#define v23 23
|
|
#define v24 24
|
|
#define v25 25
|
|
#define v26 26
|
|
#define v27 27
|
|
#define v28 28
|
|
#define v29 29
|
|
#define v30 30
|
|
#define v31 31
|
|
#endif
|
|
|
|
#endif
|
|
|
|
#if !(defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || \
|
|
defined(__64BIT__) || defined(_LP64) || defined(__LP64__))
|
|
#define ld lwz
|
|
#define std stw
|
|
#define stdu stwu
|
|
#define stdux stwux
|
|
#endif
|
|
|
|
|
|
.data
|
|
.align 2
|
|
sha256_h:
|
|
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
|
|
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
|
|
|
|
.data
|
|
.align 2
|
|
sha256_k:
|
|
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
|
|
.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
|
|
.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
|
|
.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
|
|
.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
|
|
.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
|
|
.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
|
|
.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
|
|
.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
|
|
.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
|
|
.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
|
|
.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
|
|
.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
|
|
.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
|
|
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
|
|
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
|
|
|
|
|
.macro sha256_extend_doubleround i, rw, wo, ra, rb, ry, rz
|
|
lwz r14, \wo+(\i+1)*4(\rw)
|
|
rotrwi r12, \ry, 17
|
|
rotrwi r13, \ry, 19
|
|
add r11, r11, \ra
|
|
xor r12, r12, r13
|
|
srwi r13, \ry, 10
|
|
rotrwi \ra, r14, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r14, 18
|
|
add r12, r12, r11
|
|
xor \ra, \ra, r13
|
|
srwi r13, r14, 3
|
|
lwz r11, \wo+(\i+2)*4(\rw)
|
|
xor \ra, \ra, r13
|
|
rotrwi r13, \rz, 19
|
|
add \ra, \ra, r12
|
|
|
|
rotrwi r12, \rz, 17
|
|
add r14, r14, \rb
|
|
xor r12, r12, r13
|
|
srwi r13, \rz, 10
|
|
rotrwi \rb, r11, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r11, 18
|
|
stw \ra, \wo+(\i+16)*4(\rw)
|
|
xor \rb, \rb, r13
|
|
srwi r13, r11, 3
|
|
add r14, r14, r12
|
|
xor \rb, \rb, r13
|
|
add \rb, \rb, r14
|
|
stw \rb, \wo+(\i+17)*4(\rw)
|
|
.endm
|
|
|
|
|
|
.macro sha256_main_round i, rk, rw, wo, ra, rb, rc, rd, re, rf, rg, rh
|
|
lwz r12, \wo+(\i)*4(\rw)
|
|
and r13, \rf, \re
|
|
andc r14, \rg, \re
|
|
lwz r15, (\i)*4(\rk)
|
|
or r14, r14, r13
|
|
rotrwi r13, \re, 5
|
|
add \rh, \rh, r14
|
|
xor r14, \re, r13
|
|
rotrwi r13, \re, 19
|
|
add \rh, \rh, r12
|
|
xor r14, r14, r13
|
|
add \rh, \rh, r15
|
|
rotrwi r13, r14, 6
|
|
xor r15, \ra, \rb
|
|
add \rh, \rh, r13
|
|
|
|
rotrwi r13, \ra, 11
|
|
and r15, r15, \rc
|
|
xor r12, \ra, r13
|
|
rotrwi r13, \ra, 20
|
|
and r14, \ra, \rb
|
|
xor r12, r12, r13
|
|
xor r14, r14, r15
|
|
rotrwi r13, r12, 2
|
|
add r15, \rh, r14
|
|
add \rh, \rh, \rd
|
|
add \rd, r15, r13
|
|
.endm
|
|
|
|
.macro sha256_main_quadround i, rk, rw, wo
|
|
sha256_main_round \i+0, \rk, \rw, \wo, r4, r5, r6, r7, r8, r9, r10, r11
|
|
sha256_main_round \i+1, \rk, \rw, \wo, r7, r4, r5, r6, r11, r8, r9, r10
|
|
sha256_main_round \i+2, \rk, \rw, \wo, r6, r7, r4, r5, r10, r11, r8, r9
|
|
sha256_main_round \i+3, \rk, \rw, \wo, r5, r6, r7, r4, r9, r10, r11, r8
|
|
.endm
|
|
|
|
|
|
.text
|
|
.align 2
|
|
.globl sha256_transform
|
|
.globl _sha256_transform
|
|
#ifdef __ELF__
|
|
.type sha256_transform, %function
|
|
#endif
|
|
sha256_transform:
|
|
_sha256_transform:
|
|
stdu r1, -76*4(r1)
|
|
cmpwi 0, r5, 0
|
|
std r13, 2*4(r1)
|
|
std r14, 4*4(r1)
|
|
std r15, 6*4(r1)
|
|
std r16, 72*4(r1)
|
|
|
|
bne 0, sha256_transform_swap
|
|
|
|
lwz r11, 0*4(r4)
|
|
lwz r14, 1*4(r4)
|
|
lwz r15, 2*4(r4)
|
|
lwz r7, 3*4(r4)
|
|
lwz r8, 4*4(r4)
|
|
lwz r9, 5*4(r4)
|
|
lwz r10, 6*4(r4)
|
|
lwz r0, 7*4(r4)
|
|
lwz r12, 8*4(r4)
|
|
lwz r13, 9*4(r4)
|
|
lwz r5, 10*4(r4)
|
|
lwz r6, 11*4(r4)
|
|
stw r11, 8*4+0*4(r1)
|
|
stw r14, 8*4+1*4(r1)
|
|
stw r15, 8*4+2*4(r1)
|
|
stw r7, 8*4+3*4(r1)
|
|
stw r8, 8*4+4*4(r1)
|
|
stw r9, 8*4+5*4(r1)
|
|
stw r10, 8*4+6*4(r1)
|
|
stw r0, 8*4+7*4(r1)
|
|
stw r12, 8*4+8*4(r1)
|
|
stw r13, 8*4+9*4(r1)
|
|
stw r5, 8*4+10*4(r1)
|
|
stw r6, 8*4+11*4(r1)
|
|
lwz r7, 12*4(r4)
|
|
lwz r8, 13*4(r4)
|
|
lwz r9, 14*4(r4)
|
|
lwz r10, 15*4(r4)
|
|
mr r4, r13
|
|
stw r7, 8*4+12*4(r1)
|
|
stw r8, 8*4+13*4(r1)
|
|
stw r9, 8*4+14*4(r1)
|
|
stw r10, 8*4+15*4(r1)
|
|
b sha256_transform_extend
|
|
|
|
sha256_transform_swap:
|
|
li r13, 1*4
|
|
li r14, 2*4
|
|
li r15, 3*4
|
|
lwbrx r11, 0, r4
|
|
lwbrx r7, r4, r13
|
|
lwbrx r8, r4, r14
|
|
lwbrx r9, r4, r15
|
|
addi r4, r4, 4*4
|
|
stw r11, 8*4+0*4(r1)
|
|
stw r7, 8*4+1*4(r1)
|
|
stw r8, 8*4+2*4(r1)
|
|
stw r9, 8*4+3*4(r1)
|
|
lwbrx r7, 0, r4
|
|
lwbrx r8, r4, r13
|
|
lwbrx r9, r4, r14
|
|
lwbrx r10, r4, r15
|
|
addi r4, r4, 4*4
|
|
stw r7, 8*4+4*4(r1)
|
|
stw r8, 8*4+5*4(r1)
|
|
stw r9, 8*4+6*4(r1)
|
|
stw r10, 8*4+7*4(r1)
|
|
lwbrx r8, 0, r4
|
|
lwbrx r12, r4, r13
|
|
lwbrx r5, r4, r14
|
|
lwbrx r6, r4, r15
|
|
addi r4, r4, 4*4
|
|
stw r8, 8*4+8*4(r1)
|
|
stw r12, 8*4+9*4(r1)
|
|
stw r5, 8*4+10*4(r1)
|
|
stw r6, 8*4+11*4(r1)
|
|
lwbrx r7, 0, r4
|
|
lwbrx r8, r4, r13
|
|
lwbrx r9, r4, r14
|
|
lwbrx r10, r4, r15
|
|
mr r4, r12
|
|
stw r7, 8*4+12*4(r1)
|
|
stw r8, 8*4+13*4(r1)
|
|
stw r9, 8*4+14*4(r1)
|
|
stw r10, 8*4+15*4(r1)
|
|
|
|
sha256_transform_extend:
|
|
sha256_extend_doubleround 0, r1, 8*4, r4, r5, r9, r10
|
|
sha256_extend_doubleround 2, r1, 8*4, r6, r7, r4, r5
|
|
sha256_extend_doubleround 4, r1, 8*4, r8, r9, r6, r7
|
|
sha256_extend_doubleround 6, r1, 8*4, r10, r4, r8, r9
|
|
sha256_extend_doubleround 8, r1, 8*4, r5, r6, r10, r4
|
|
sha256_extend_doubleround 10, r1, 8*4, r7, r8, r5, r6
|
|
sha256_extend_doubleround 12, r1, 8*4, r9, r10, r7, r8
|
|
sha256_extend_doubleround 14, r1, 8*4, r4, r5, r9, r10
|
|
sha256_extend_doubleround 16, r1, 8*4, r6, r7, r4, r5
|
|
sha256_extend_doubleround 18, r1, 8*4, r8, r9, r6, r7
|
|
sha256_extend_doubleround 20, r1, 8*4, r10, r4, r8, r9
|
|
sha256_extend_doubleround 22, r1, 8*4, r5, r6, r10, r4
|
|
sha256_extend_doubleround 24, r1, 8*4, r7, r8, r5, r6
|
|
sha256_extend_doubleround 26, r1, 8*4, r9, r10, r7, r8
|
|
sha256_extend_doubleround 28, r1, 8*4, r4, r5, r9, r10
|
|
sha256_extend_doubleround 30, r1, 8*4, r6, r7, r4, r5
|
|
sha256_extend_doubleround 32, r1, 8*4, r8, r9, r6, r7
|
|
sha256_extend_doubleround 34, r1, 8*4, r10, r4, r8, r9
|
|
sha256_extend_doubleround 36, r1, 8*4, r5, r6, r10, r4
|
|
sha256_extend_doubleround 38, r1, 8*4, r7, r8, r5, r6
|
|
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
|
|
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
|
|
sha256_extend_doubleround 44, r1, 8*4, r6, r7, r4, r5
|
|
sha256_extend_doubleround 46, r1, 8*4, r8, r9, r6, r7
|
|
|
|
lwz r4, 0*4(r3)
|
|
lwz r5, 1*4(r3)
|
|
lwz r6, 2*4(r3)
|
|
lwz r7, 3*4(r3)
|
|
lwz r8, 4*4(r3)
|
|
lwz r9, 5*4(r3)
|
|
lwz r10, 6*4(r3)
|
|
lwz r11, 7*4(r3)
|
|
lis r16, HI(sha256_k)
|
|
addi r16, r16, LO(sha256_k)
|
|
sha256_main_quadround 0, r16, r1, 8*4
|
|
sha256_main_quadround 4, r16, r1, 8*4
|
|
sha256_main_quadround 8, r16, r1, 8*4
|
|
sha256_main_quadround 12, r16, r1, 8*4
|
|
sha256_main_quadround 16, r16, r1, 8*4
|
|
sha256_main_quadround 20, r16, r1, 8*4
|
|
sha256_main_quadround 24, r16, r1, 8*4
|
|
sha256_main_quadround 28, r16, r1, 8*4
|
|
sha256_main_quadround 32, r16, r1, 8*4
|
|
sha256_main_quadround 36, r16, r1, 8*4
|
|
sha256_main_quadround 40, r16, r1, 8*4
|
|
sha256_main_quadround 44, r16, r1, 8*4
|
|
sha256_main_quadround 48, r16, r1, 8*4
|
|
sha256_main_quadround 52, r16, r1, 8*4
|
|
sha256_main_quadround 56, r16, r1, 8*4
|
|
sha256_main_quadround 60, r16, r1, 8*4
|
|
|
|
lwz r12, 0*4(r3)
|
|
lwz r13, 1*4(r3)
|
|
lwz r14, 2*4(r3)
|
|
lwz r15, 3*4(r3)
|
|
add r4, r4, r12
|
|
add r5, r5, r13
|
|
add r6, r6, r14
|
|
add r7, r7, r15
|
|
stw r4, 0*4(r3)
|
|
stw r5, 1*4(r3)
|
|
stw r6, 2*4(r3)
|
|
stw r7, 3*4(r3)
|
|
lwz r12, 4*4(r3)
|
|
lwz r13, 5*4(r3)
|
|
lwz r14, 6*4(r3)
|
|
lwz r15, 7*4(r3)
|
|
add r8, r8, r12
|
|
add r9, r9, r13
|
|
add r10, r10, r14
|
|
add r11, r11, r15
|
|
stw r8, 4*4(r3)
|
|
stw r9, 5*4(r3)
|
|
stw r10, 6*4(r3)
|
|
stw r11, 7*4(r3)
|
|
|
|
ld r13, 2*4(r1)
|
|
ld r14, 4*4(r1)
|
|
ld r15, 6*4(r1)
|
|
ld r16, 72*4(r1)
|
|
addi r1, r1, 76*4
|
|
blr
|
|
|
|
|
|
.text
|
|
.align 2
|
|
.globl sha256d_ms
|
|
.globl _sha256d_ms
|
|
#ifdef __ELF__
|
|
.type sha256d_ms, %function
|
|
#endif
|
|
sha256d_ms:
|
|
_sha256d_ms:
|
|
stdu r1, -80*4(r1)
|
|
std r13, 2*4(r1)
|
|
std r14, 4*4(r1)
|
|
std r15, 6*4(r1)
|
|
std r16, 72*4(r1)
|
|
std r17, 74*4(r1)
|
|
std r18, 76*4(r1)
|
|
|
|
mr r17, r4
|
|
mr r18, r5
|
|
mr r16, r6
|
|
|
|
lwz r14, 3*4(r17)
|
|
lwz r6, 18*4(r17)
|
|
lwz r7, 19*4(r17)
|
|
|
|
rotrwi r12, r14, 7
|
|
rotrwi r13, r14, 18
|
|
stw r6, 8*4+18*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r14, 3
|
|
stw r7, 8*4+19*4(r1)
|
|
xor r12, r12, r13
|
|
lwz r8, 20*4(r17)
|
|
add r6, r6, r12
|
|
lwz r10, 22*4(r17)
|
|
add r7, r7, r14
|
|
stw r6, 18*4(r17)
|
|
|
|
rotrwi r12, r6, 17
|
|
rotrwi r13, r6, 19
|
|
stw r7, 19*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r6, 10
|
|
stw r8, 8*4+20*4(r1)
|
|
xor r12, r12, r13
|
|
lwz r4, 23*4(r17)
|
|
add r8, r8, r12
|
|
lwz r5, 24*4(r17)
|
|
|
|
rotrwi r9, r7, 17
|
|
rotrwi r13, r7, 19
|
|
stw r8, 20*4(r17)
|
|
xor r9, r9, r13
|
|
srwi r13, r7, 10
|
|
stw r10, 8*4+21*4(r1)
|
|
xor r9, r9, r13
|
|
stw r4, 8*4+22*4(r1)
|
|
|
|
rotrwi r12, r8, 17
|
|
rotrwi r13, r8, 19
|
|
stw r9, 21*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r8, 10
|
|
stw r5, 8*4+23*4(r1)
|
|
xor r12, r12, r13
|
|
rotrwi r14, r9, 17
|
|
rotrwi r13, r9, 19
|
|
add r10, r10, r12
|
|
lwz r11, 30*4(r17)
|
|
|
|
xor r14, r14, r13
|
|
srwi r13, r9, 10
|
|
stw r10, 22*4(r17)
|
|
xor r14, r14, r13
|
|
stw r11, 8*4+24*4(r1)
|
|
add r4, r4, r14
|
|
|
|
rotrwi r12, r10, 17
|
|
rotrwi r13, r10, 19
|
|
stw r4, 23*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r10, 10
|
|
rotrwi r14, r4, 17
|
|
xor r12, r12, r13
|
|
rotrwi r13, r4, 19
|
|
xor r14, r14, r13
|
|
srwi r13, r4, 10
|
|
add r5, r5, r12
|
|
xor r14, r14, r13
|
|
stw r5, 24*4(r17)
|
|
add r6, r6, r14
|
|
|
|
rotrwi r12, r5, 17
|
|
rotrwi r13, r5, 19
|
|
stw r6, 25*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r5, 10
|
|
rotrwi r14, r6, 17
|
|
xor r12, r12, r13
|
|
rotrwi r13, r6, 19
|
|
xor r14, r14, r13
|
|
srwi r13, r6, 10
|
|
add r7, r7, r12
|
|
xor r14, r14, r13
|
|
stw r7, 26*4(r17)
|
|
add r8, r8, r14
|
|
|
|
rotrwi r12, r7, 17
|
|
rotrwi r13, r7, 19
|
|
stw r8, 27*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r7, 10
|
|
rotrwi r14, r8, 17
|
|
xor r12, r12, r13
|
|
rotrwi r13, r8, 19
|
|
xor r14, r14, r13
|
|
srwi r13, r8, 10
|
|
add r9, r9, r12
|
|
xor r14, r14, r13
|
|
stw r9, 28*4(r17)
|
|
add r10, r10, r14
|
|
|
|
lwz r14, 31*4(r17)
|
|
rotrwi r12, r9, 17
|
|
rotrwi r13, r9, 19
|
|
stw r10, 29*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r9, 10
|
|
stw r14, 8*4+25*4(r1)
|
|
xor r12, r12, r13
|
|
add r11, r11, r12
|
|
add r5, r5, r14
|
|
rotrwi r12, r10, 17
|
|
rotrwi r13, r10, 19
|
|
add r4, r4, r11
|
|
|
|
lwz r11, 16*4(r17)
|
|
xor r12, r12, r13
|
|
srwi r13, r10, 10
|
|
stw r4, 30*4(r17)
|
|
xor r12, r12, r13
|
|
add r5, r5, r12
|
|
stw r5, 31*4(r17)
|
|
|
|
sha256_extend_doubleround 16, r17, 0, r6, r7, r4, r5
|
|
sha256_extend_doubleround 18, r17, 0, r8, r9, r6, r7
|
|
sha256_extend_doubleround 20, r17, 0, r10, r4, r8, r9
|
|
sha256_extend_doubleround 22, r17, 0, r5, r6, r10, r4
|
|
sha256_extend_doubleround 24, r17, 0, r7, r8, r5, r6
|
|
sha256_extend_doubleround 26, r17, 0, r9, r10, r7, r8
|
|
sha256_extend_doubleround 28, r17, 0, r4, r5, r9, r10
|
|
sha256_extend_doubleround 30, r17, 0, r6, r7, r4, r5
|
|
sha256_extend_doubleround 32, r17, 0, r8, r9, r6, r7
|
|
sha256_extend_doubleround 34, r17, 0, r10, r4, r8, r9
|
|
sha256_extend_doubleround 36, r17, 0, r5, r6, r10, r4
|
|
sha256_extend_doubleround 38, r17, 0, r7, r8, r5, r6
|
|
sha256_extend_doubleround 40, r17, 0, r9, r10, r7, r8
|
|
sha256_extend_doubleround 42, r17, 0, r4, r5, r9, r10
|
|
sha256_extend_doubleround 44, r17, 0, r6, r7, r4, r5
|
|
sha256_extend_doubleround 46, r17, 0, r8, r9, r6, r7
|
|
|
|
lwz r4, 0*4(r16)
|
|
lwz r9, 1*4(r16)
|
|
lwz r10, 2*4(r16)
|
|
lwz r11, 3*4(r16)
|
|
lwz r8, 4*4(r16)
|
|
lwz r5, 5*4(r16)
|
|
lwz r6, 6*4(r16)
|
|
lwz r7, 7*4(r16)
|
|
lis r16, HI(sha256_k)
|
|
addi r16, r16, LO(sha256_k)
|
|
|
|
sha256_main_round 3, r16, r17, 0, r5, r6, r7, r4, r9, r10, r11, r8
|
|
sha256_main_quadround 4, r16, r17, 0
|
|
sha256_main_quadround 8, r16, r17, 0
|
|
sha256_main_quadround 12, r16, r17, 0
|
|
sha256_main_quadround 16, r16, r17, 0
|
|
sha256_main_quadround 20, r16, r17, 0
|
|
sha256_main_quadround 24, r16, r17, 0
|
|
sha256_main_quadround 28, r16, r17, 0
|
|
sha256_main_quadround 32, r16, r17, 0
|
|
sha256_main_quadround 36, r16, r17, 0
|
|
sha256_main_quadround 40, r16, r17, 0
|
|
sha256_main_quadround 44, r16, r17, 0
|
|
sha256_main_quadround 48, r16, r17, 0
|
|
sha256_main_quadround 52, r16, r17, 0
|
|
sha256_main_quadround 56, r16, r17, 0
|
|
sha256_main_quadround 60, r16, r17, 0
|
|
|
|
lwz r12, 0*4(r18)
|
|
lwz r13, 1*4(r18)
|
|
lwz r14, 2*4(r18)
|
|
lwz r15, 3*4(r18)
|
|
add r4, r4, r12
|
|
add r5, r5, r13
|
|
add r6, r6, r14
|
|
add r7, r7, r15
|
|
stw r4, 8*4+0*4(r1)
|
|
stw r5, 8*4+1*4(r1)
|
|
stw r6, 8*4+2*4(r1)
|
|
stw r7, 8*4+3*4(r1)
|
|
lwz r12, 4*4(r18)
|
|
lwz r13, 5*4(r18)
|
|
lwz r14, 6*4(r18)
|
|
lwz r15, 7*4(r18)
|
|
add r8, r8, r12
|
|
add r9, r9, r13
|
|
add r10, r10, r14
|
|
add r11, r11, r15
|
|
stw r8, 8*4+4*4(r1)
|
|
stw r9, 8*4+5*4(r1)
|
|
stw r10, 8*4+6*4(r1)
|
|
stw r11, 8*4+7*4(r1)
|
|
|
|
lwz r4, 8*4+18*4(r1)
|
|
lwz r5, 8*4+19*4(r1)
|
|
lwz r6, 8*4+20*4(r1)
|
|
lwz r7, 8*4+21*4(r1)
|
|
lwz r8, 8*4+22*4(r1)
|
|
lwz r9, 8*4+23*4(r1)
|
|
lwz r10, 8*4+24*4(r1)
|
|
lwz r11, 8*4+25*4(r1)
|
|
stw r4, 18*4(r17)
|
|
stw r5, 19*4(r17)
|
|
stw r6, 20*4(r17)
|
|
stw r7, 22*4(r17)
|
|
stw r8, 23*4(r17)
|
|
stw r9, 24*4(r17)
|
|
stw r10, 30*4(r17)
|
|
stw r11, 31*4(r17)
|
|
|
|
lis r8, 0x8000
|
|
li r9, 0
|
|
li r10, 0x0100
|
|
|
|
lwz r14, 8*4+1*4(r1)
|
|
lwz r4, 8*4+0*4(r1)
|
|
|
|
lwz r11, 8*4+2*4(r1)
|
|
rotrwi r12, r14, 7
|
|
rotrwi r13, r14, 18
|
|
|
|
stw r8, 8*4+8*4(r1)
|
|
stw r9, 8*4+9*4(r1)
|
|
stw r9, 8*4+10*4(r1)
|
|
stw r9, 8*4+11*4(r1)
|
|
stw r9, 8*4+12*4(r1)
|
|
stw r9, 8*4+13*4(r1)
|
|
stw r9, 8*4+14*4(r1)
|
|
stw r10, 8*4+15*4(r1)
|
|
|
|
xor r12, r12, r13
|
|
srwi r13, r14, 3
|
|
addis r5, r14, 0x00a0
|
|
xor r12, r12, r13
|
|
rotrwi r14, r11, 7
|
|
rotrwi r13, r11, 18
|
|
add r4, r4, r12
|
|
xor r14, r14, r13
|
|
srwi r13, r11, 3
|
|
stw r4, 8*4+16*4(r1)
|
|
xor r14, r14, r13
|
|
rotrwi r12, r4, 17
|
|
rotrwi r13, r4, 19
|
|
add r5, r5, r14
|
|
lwz r14, 8*4+3*4(r1)
|
|
|
|
stw r5, 8*4+17*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r4, 10
|
|
rotrwi r6, r14, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r14, 18
|
|
xor r6, r6, r13
|
|
srwi r13, r14, 3
|
|
add r11, r11, r12
|
|
xor r6, r6, r13
|
|
rotrwi r12, r5, 17
|
|
rotrwi r13, r5, 19
|
|
add r6, r6, r11
|
|
lwz r11, 8*4+4*4(r1)
|
|
|
|
stw r6, 8*4+18*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r5, 10
|
|
rotrwi r7, r11, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r11, 18
|
|
xor r7, r7, r13
|
|
srwi r13, r11, 3
|
|
add r14, r14, r12
|
|
xor r7, r7, r13
|
|
rotrwi r12, r6, 17
|
|
rotrwi r13, r6, 19
|
|
add r7, r7, r14
|
|
lwz r14, 8*4+5*4(r1)
|
|
|
|
stw r7, 8*4+19*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r6, 10
|
|
rotrwi r8, r14, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r14, 18
|
|
xor r8, r8, r13
|
|
srwi r13, r14, 3
|
|
add r11, r11, r12
|
|
xor r8, r8, r13
|
|
rotrwi r12, r7, 17
|
|
rotrwi r13, r7, 19
|
|
add r8, r8, r11
|
|
lwz r11, 8*4+6*4(r1)
|
|
|
|
stw r8, 8*4+20*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r7, 10
|
|
rotrwi r9, r11, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r11, 18
|
|
xor r9, r9, r13
|
|
srwi r13, r11, 3
|
|
add r14, r14, r12
|
|
xor r9, r9, r13
|
|
rotrwi r12, r8, 17
|
|
rotrwi r13, r8, 19
|
|
add r9, r9, r14
|
|
lwz r14, 8*4+7*4(r1)
|
|
|
|
stw r9, 8*4+21*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r8, 10
|
|
rotrwi r10, r14, 7
|
|
xor r12, r12, r13
|
|
rotrwi r13, r14, 18
|
|
xor r10, r10, r13
|
|
srwi r13, r14, 3
|
|
add r11, r11, r12
|
|
xor r10, r10, r13
|
|
rotrwi r12, r9, 17
|
|
rotrwi r13, r9, 19
|
|
addi r11, r11, 0x0100
|
|
add r14, r14, r4
|
|
add r10, r10, r11
|
|
|
|
xor r12, r12, r13
|
|
srwi r13, r9, 10
|
|
stw r10, 8*4+22*4(r1)
|
|
addis r14, r14, 0x1100
|
|
xor r12, r12, r13
|
|
add r14, r14, r12
|
|
rotrwi r12, r10, 17
|
|
rotrwi r13, r10, 19
|
|
addi r4, r14, 0x2000
|
|
xor r12, r12, r13
|
|
srwi r13, r10, 10
|
|
stw r4, 8*4+23*4(r1)
|
|
addis r5, r5, 0x8000
|
|
xor r12, r12, r13
|
|
add r5, r5, r12
|
|
|
|
rotrwi r12, r4, 17
|
|
rotrwi r13, r4, 19
|
|
stw r5, 8*4+24*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r4, 10
|
|
rotrwi r11, r5, 17
|
|
xor r12, r12, r13
|
|
rotrwi r13, r5, 19
|
|
xor r11, r11, r13
|
|
srwi r13, r5, 10
|
|
add r6, r6, r12
|
|
xor r11, r11, r13
|
|
stw r6, 8*4+25*4(r1)
|
|
add r7, r7, r11
|
|
|
|
rotrwi r12, r6, 17
|
|
rotrwi r13, r6, 19
|
|
stw r7, 8*4+26*4(r1)
|
|
xor r12, r12, r13
|
|
srwi r13, r6, 10
|
|
rotrwi r11, r7, 17
|
|
xor r12, r12, r13
|
|
rotrwi r13, r7, 19
|
|
xor r11, r11, r13
|
|
srwi r13, r7, 10
|
|
add r8, r8, r12
|
|
xor r11, r11, r13
|
|
stw r8, 8*4+27*4(r1)
|
|
add r9, r9, r11
|
|
|
|
rotrwi r14, r8, 17
|
|
rotrwi r13, r8, 19
|
|
rotrwi r12, r9, 17
|
|
stw r9, 8*4+28*4(r1)
|
|
addis r4, r4, 0x0040
|
|
xor r14, r14, r13
|
|
rotrwi r13, r9, 19
|
|
xor r12, r12, r13
|
|
srwi r13, r8, 10
|
|
xor r14, r14, r13
|
|
srwi r13, r9, 10
|
|
xor r12, r12, r13
|
|
addi r4, r4, 0x0022
|
|
add r10, r10, r14
|
|
add r4, r4, r12
|
|
lwz r11, 8*4+16*4(r1)
|
|
|
|
addi r5, r5, 0x0100
|
|
stw r4, 8*4+30*4(r1)
|
|
rotrwi r14, r11, 7
|
|
stw r10, 8*4+29*4(r1)
|
|
rotrwi r13, r11, 18
|
|
rotrwi r12, r10, 17
|
|
xor r14, r14, r13
|
|
rotrwi r13, r10, 19
|
|
xor r12, r12, r13
|
|
srwi r13, r11, 3
|
|
xor r14, r14, r13
|
|
srwi r13, r10, 10
|
|
xor r12, r12, r13
|
|
add r5, r5, r14
|
|
add r5, r5, r12
|
|
stw r5, 8*4+31*4(r1)
|
|
|
|
sha256_extend_doubleround 16, r1, 8*4, r6, r7, r4, r5
|
|
sha256_extend_doubleround 18, r1, 8*4, r8, r9, r6, r7
|
|
sha256_extend_doubleround 20, r1, 8*4, r10, r4, r8, r9
|
|
sha256_extend_doubleround 22, r1, 8*4, r5, r6, r10, r4
|
|
sha256_extend_doubleround 24, r1, 8*4, r7, r8, r5, r6
|
|
sha256_extend_doubleround 26, r1, 8*4, r9, r10, r7, r8
|
|
sha256_extend_doubleround 28, r1, 8*4, r4, r5, r9, r10
|
|
sha256_extend_doubleround 30, r1, 8*4, r6, r7, r4, r5
|
|
sha256_extend_doubleround 32, r1, 8*4, r8, r9, r6, r7
|
|
sha256_extend_doubleround 34, r1, 8*4, r10, r4, r8, r9
|
|
sha256_extend_doubleround 36, r1, 8*4, r5, r6, r10, r4
|
|
sha256_extend_doubleround 38, r1, 8*4, r7, r8, r5, r6
|
|
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
|
|
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
|
|
|
|
lis r18, HI(sha256_h)
|
|
addi r18, r18, LO(sha256_h)
|
|
|
|
lwz r14, 8*4+(44+1)*4(r1)
|
|
rotrwi r12, r4, 17
|
|
rotrwi r13, r4, 19
|
|
add r15, r11, r6
|
|
rotrwi r6, r14, 7
|
|
rotrwi r11, r14, 18
|
|
xor r12, r12, r13
|
|
xor r6, r6, r11
|
|
|
|
lwz r8, 4*4(r18)
|
|
lwz r9, 5*4(r18)
|
|
lwz r10, 6*4(r18)
|
|
lwz r11, 7*4(r18)
|
|
|
|
srwi r13, r4, 10
|
|
srwi r14, r14, 3
|
|
xor r12, r12, r13
|
|
xor r6, r6, r14
|
|
add r12, r12, r15
|
|
add r6, r6, r12
|
|
stw r6, 8*4+(44+16)*4(r1)
|
|
|
|
lwz r4, 0*4(r18)
|
|
lwz r5, 1*4(r18)
|
|
lwz r6, 2*4(r18)
|
|
lwz r7, 3*4(r18)
|
|
|
|
sha256_main_quadround 0, r16, r1, 8*4
|
|
sha256_main_quadround 4, r16, r1, 8*4
|
|
sha256_main_quadround 8, r16, r1, 8*4
|
|
sha256_main_quadround 12, r16, r1, 8*4
|
|
sha256_main_quadround 16, r16, r1, 8*4
|
|
sha256_main_quadround 20, r16, r1, 8*4
|
|
sha256_main_quadround 24, r16, r1, 8*4
|
|
sha256_main_quadround 28, r16, r1, 8*4
|
|
sha256_main_quadround 32, r16, r1, 8*4
|
|
sha256_main_quadround 36, r16, r1, 8*4
|
|
sha256_main_quadround 40, r16, r1, 8*4
|
|
sha256_main_quadround 44, r16, r1, 8*4
|
|
sha256_main_quadround 48, r16, r1, 8*4
|
|
sha256_main_quadround 52, r16, r1, 8*4
|
|
sha256_main_round 56, r16, r1, 8*4, r4, r5, r6, r7, r8, r9, r10, r11
|
|
|
|
.macro sha256_main_round_red i, rk, rw, wo, rd, re, rf, rg, rh
|
|
lwz r12, \wo+(\i)*4(\rw)
|
|
and r15, \rf, \re
|
|
andc r14, \rg, \re
|
|
add \rh, \rh, \rd
|
|
or r14, r14, r15
|
|
lwz r15, (\i)*4(\rk)
|
|
rotrwi r13, \re, 5
|
|
add \rh, \rh, r14
|
|
xor r14, \re, r13
|
|
rotrwi r13, \re, 19
|
|
add \rh, \rh, r12
|
|
xor r14, r14, r13
|
|
add \rh, \rh, r15
|
|
rotrwi r13, r14, 6
|
|
add \rh, \rh, r13
|
|
.endm
|
|
|
|
sha256_main_round_red 57, r16, r1, 8*4, r6, r11, r8, r9, r10
|
|
sha256_main_round_red 58, r16, r1, 8*4, r5, r10, r11, r8, r9
|
|
sha256_main_round_red 59, r16, r1, 8*4, r4, r9, r10, r11, r8
|
|
lwz r5, 7*4(r18)
|
|
sha256_main_round_red 60, r16, r1, 8*4, r7, r8, r9, r10, r11
|
|
|
|
add r11, r11, r5
|
|
stw r11, 7*4(r3)
|
|
|
|
ld r13, 2*4(r1)
|
|
ld r14, 4*4(r1)
|
|
ld r15, 6*4(r1)
|
|
ld r16, 72*4(r1)
|
|
ld r17, 74*4(r1)
|
|
ld r18, 76*4(r1)
|
|
addi r1, r1, 80*4
|
|
blr
|
|
|
|
|
|
#ifdef __ALTIVEC__
|
|
|
|
#ifdef __APPLE__
|
|
.machine ppc7400
|
|
#endif
|
|
|
|
.data
|
|
.align 4
|
|
sha256_4h:
|
|
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
|
|
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
|
|
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
|
|
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
|
|
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
|
|
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
|
|
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
|
|
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
|
|
|
|
.data
|
|
.align 4
|
|
sha256_4k:
|
|
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
|
|
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
|
|
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
|
|
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
|
|
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
|
|
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
|
|
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
|
|
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
|
|
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
|
|
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
|
|
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
|
|
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
|
|
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
|
|
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
|
|
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
|
|
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
|
|
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
|
|
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
|
|
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
|
|
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
|
|
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
|
|
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
|
|
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
|
|
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
|
|
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
|
|
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
|
|
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
|
|
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
|
|
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
|
|
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
|
|
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
|
|
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
|
|
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
|
|
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
|
|
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
|
|
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
|
|
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
|
|
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
|
|
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
|
|
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
|
|
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
|
|
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
|
|
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
|
|
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
|
|
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
|
|
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
|
|
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
|
|
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
|
|
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
|
|
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
|
|
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
|
|
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
|
|
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
|
|
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
|
|
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
|
|
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
|
|
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
|
|
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
|
|
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
|
|
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
|
|
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
|
|
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
|
|
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
|
|
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
|
|
|
|
.data
|
|
.align 4
|
|
sha256d_4preext2:
|
|
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
|
|
.long 0x11002000, 0x11002000, 0x11002000, 0x11002000
|
|
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000
|
|
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022
|
|
|
|
.data
|
|
.align 4
|
|
br_perm:
|
|
.long 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c
|
|
|
|
|
|
.macro sha256_4way_extend_setup
|
|
vspltisw v0, 10
|
|
vspltisw v1, -7
|
|
vspltisw v16, 3
|
|
vspltisw v17, 15
|
|
vspltisw v18, 14
|
|
vspltisw v19, 13
|
|
.endm
|
|
|
|
.macro sha256_4way_extend_doubleround i, rw, va, vb, vy, vz
|
|
lvx v14, \rw, r7
|
|
vrlw v12, \vy, v17
|
|
vrlw v13, \vy, v19
|
|
vadduwm v11, v11, \va
|
|
vxor v12, v12, v13
|
|
vsrw v13, \vy, v0
|
|
vrlw \va, v14, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v14, v18
|
|
vadduwm v12, v12, v11
|
|
vxor \va, \va, v13
|
|
vsrw v13, v14, v16
|
|
lvx v11, \rw, r8
|
|
vxor \va, \va, v13
|
|
vrlw v13, \vz, v19
|
|
vadduwm \va, \va, v12
|
|
|
|
vrlw v12, \vz, v17
|
|
vadduwm v14, v14, \vb
|
|
vxor v12, v12, v13
|
|
vsrw v13, \vz, v0
|
|
vrlw \vb, v11, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v11, v18
|
|
stvx \va, \rw, r10
|
|
vxor \vb, \vb, v13
|
|
vsrw v13, v11, v16
|
|
vadduwm v14, v14, v12
|
|
vxor \vb, \vb, v13
|
|
vadduwm \vb, \vb, v14
|
|
stvx \vb, \rw, r11
|
|
addi \rw, \rw, 2*16
|
|
.endm
|
|
|
|
|
|
.macro sha256_4way_main_setup
|
|
vspltisw v2, 12
|
|
vspltisw v3, -5
|
|
vspltisw v16, -6
|
|
vspltisw v17, -11
|
|
vspltisw v18, -2
|
|
.endm
|
|
|
|
.macro sha256_4way_main_round i, rk, rw, va, vb, vc, vd, ve, vf, vg, vh
|
|
li r6, (\i)*16
|
|
lvx v12, \rw, r6
|
|
vand v13, \vf, \ve
|
|
vandc v14, \vg, \ve
|
|
lvx v15, \rk, r6
|
|
vor v14, v14, v13
|
|
vrlw v13, \ve, v3
|
|
vadduwm \vh, \vh, v14
|
|
vxor v14, \ve, v13
|
|
vrlw v13, \ve, v19
|
|
vadduwm \vh, \vh, v12
|
|
vxor v14, v14, v13
|
|
vadduwm \vh, \vh, v15
|
|
vrlw v13, v14, v16
|
|
vxor v15, \va, \vb
|
|
vadduwm \vh, \vh, v13
|
|
|
|
vrlw v13, \va, v17
|
|
vand v15, v15, \vc
|
|
vxor v12, \va, v13
|
|
vrlw v13, \va, v2
|
|
vand v14, \va, \vb
|
|
vxor v12, v12, v13
|
|
vxor v14, v14, v15
|
|
vrlw v13, v12, v18
|
|
vadduwm v15, \vh, v14
|
|
vadduwm \vh, \vh, \vd
|
|
vadduwm \vd, v15, v13
|
|
.endm
|
|
|
|
.macro sha256_4way_main_quadround i, rk, rw
|
|
sha256_4way_main_round \i+0, \rk, \rw, v4, v5, v6, v7, v8, v9, v10, v11
|
|
sha256_4way_main_round \i+1, \rk, \rw, v7, v4, v5, v6, v11, v8, v9, v10
|
|
sha256_4way_main_round \i+2, \rk, \rw, v6, v7, v4, v5, v10, v11, v8, v9
|
|
sha256_4way_main_round \i+3, \rk, \rw, v5, v6, v7, v4, v9, v10, v11, v8
|
|
.endm
|
|
|
|
|
|
.text
|
|
.align 2
|
|
.globl sha256_init_4way
|
|
.globl _sha256_init_4way
|
|
#ifdef __ELF__
|
|
.type sha256_init_4way, %function
|
|
#endif
|
|
sha256_init_4way:
|
|
_sha256_init_4way:
|
|
mfspr r0, 256
|
|
oris r12, r0, 0xff00
|
|
mtspr 256, r12
|
|
|
|
lis r4, HI(sha256_4h)
|
|
addi r4, r4, LO(sha256_4h)
|
|
li r5, 1*16
|
|
li r6, 2*16
|
|
li r7, 3*16
|
|
li r8, 4*16
|
|
li r9, 5*16
|
|
li r10, 6*16
|
|
li r11, 7*16
|
|
lvx v0, 0, r4
|
|
lvx v1, r4, r5
|
|
lvx v2, r4, r6
|
|
lvx v3, r4, r7
|
|
lvx v4, r4, r8
|
|
lvx v5, r4, r9
|
|
lvx v6, r4, r10
|
|
lvx v7, r4, r11
|
|
stvx v0, 0, r3
|
|
stvx v1, r3, r5
|
|
stvx v2, r3, r6
|
|
stvx v3, r3, r7
|
|
stvx v4, r3, r8
|
|
stvx v5, r3, r9
|
|
stvx v6, r3, r10
|
|
stvx v7, r3, r11
|
|
|
|
mtspr 256, r0
|
|
blr
|
|
|
|
|
|
.text
|
|
.align 2
|
|
.globl sha256_transform_4way
|
|
.globl _sha256_transform_4way
|
|
#ifdef __ELF__
|
|
.type sha256_transform_4way, %function
|
|
#endif
|
|
sha256_transform_4way:
|
|
_sha256_transform_4way:
|
|
mfspr r0, 256
|
|
oris r12, r0, 0xffff
|
|
ori r12, r12, 0xf000
|
|
mtspr 256, r12
|
|
|
|
andi. r6, r1, 15
|
|
cmpwi 0, r5, 0
|
|
li r7, -(4*4+64*16)
|
|
subf r6, r6, r7
|
|
stdux r1, r1, r6
|
|
|
|
li r7, 1*16
|
|
li r8, 2*16
|
|
li r9, 3*16
|
|
li r10, 4*16
|
|
li r11, 5*16
|
|
li r12, 6*16
|
|
li r6, 7*16
|
|
|
|
bne 0, sha256_transform_4way_swap
|
|
|
|
lvx v11, 0, r4
|
|
lvx v1, r4, r7
|
|
lvx v2, r4, r8
|
|
lvx v3, r4, r9
|
|
lvx v4, r4, r10
|
|
lvx v5, r4, r11
|
|
lvx v6, r4, r12
|
|
lvx v7, r4, r6
|
|
addi r5, r1, 4*4
|
|
stvx v11, 0, r5
|
|
stvx v1, r5, r7
|
|
stvx v2, r5, r8
|
|
stvx v3, r5, r9
|
|
stvx v4, r5, r10
|
|
stvx v5, r5, r11
|
|
stvx v6, r5, r12
|
|
stvx v7, r5, r6
|
|
addi r4, r4, 8*16
|
|
lvx v0, 0, r4
|
|
lvx v4, r4, r7
|
|
lvx v5, r4, r8
|
|
lvx v6, r4, r9
|
|
lvx v7, r4, r10
|
|
lvx v8, r4, r11
|
|
lvx v9, r4, r12
|
|
lvx v10, r4, r6
|
|
addi r4, r1, 4*4+8*16
|
|
stvx v0, 0, r4
|
|
stvx v4, r4, r7
|
|
stvx v5, r4, r8
|
|
stvx v6, r4, r9
|
|
stvx v7, r4, r10
|
|
stvx v8, r4, r11
|
|
stvx v9, r4, r12
|
|
stvx v10, r4, r6
|
|
b sha256_transform_4way_extend
|
|
|
|
sha256_transform_4way_swap:
|
|
lis r5, HI(br_perm)
|
|
addi r5, r5, LO(br_perm)
|
|
lvx v19, 0, r5
|
|
|
|
lvx v11, 0, r4
|
|
lvx v1, r4, r7
|
|
lvx v2, r4, r8
|
|
lvx v3, r4, r9
|
|
lvx v4, r4, r10
|
|
lvx v5, r4, r11
|
|
lvx v6, r4, r12
|
|
lvx v7, r4, r6
|
|
vperm v11, v11, v11, v19
|
|
vperm v1, v1, v1, v19
|
|
vperm v2, v2, v2, v19
|
|
vperm v3, v3, v3, v19
|
|
vperm v4, v4, v4, v19
|
|
vperm v5, v5, v5, v19
|
|
vperm v6, v6, v6, v19
|
|
vperm v7, v7, v7, v19
|
|
addi r5, r1, 4*4
|
|
stvx v11, 0, r5
|
|
stvx v1, r5, r7
|
|
stvx v2, r5, r8
|
|
stvx v3, r5, r9
|
|
stvx v4, r5, r10
|
|
stvx v5, r5, r11
|
|
stvx v6, r5, r12
|
|
stvx v7, r5, r6
|
|
addi r4, r4, 8*16
|
|
lvx v0, 0, r4
|
|
lvx v4, r4, r7
|
|
lvx v5, r4, r8
|
|
lvx v6, r4, r9
|
|
lvx v7, r4, r10
|
|
lvx v8, r4, r11
|
|
lvx v9, r4, r12
|
|
lvx v10, r4, r6
|
|
vperm v0, v0, v0, v19
|
|
vperm v4, v4, v4, v19
|
|
vperm v5, v5, v5, v19
|
|
vperm v6, v6, v6, v19
|
|
vperm v7, v7, v7, v19
|
|
vperm v8, v8, v8, v19
|
|
vperm v9, v9, v9, v19
|
|
vperm v10, v10, v10, v19
|
|
addi r4, r1, 4*4+8*16
|
|
stvx v0, 0, r4
|
|
stvx v4, r4, r7
|
|
stvx v5, r4, r8
|
|
stvx v6, r4, r9
|
|
stvx v7, r4, r10
|
|
stvx v8, r4, r11
|
|
stvx v9, r4, r12
|
|
stvx v10, r4, r6
|
|
|
|
sha256_transform_4way_extend:
|
|
li r10, 16*16
|
|
li r11, 17*16
|
|
sha256_4way_extend_setup
|
|
sha256_4way_extend_doubleround 0, r5, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 2, r5, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 4, r5, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 6, r5, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 8, r5, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 10, r5, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 12, r5, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 14, r5, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 16, r5, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 18, r5, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 20, r5, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 22, r5, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 24, r5, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 26, r5, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 28, r5, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 30, r5, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 32, r5, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 34, r5, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 36, r5, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 38, r5, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 40, r5, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 42, r5, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 44, r5, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 46, r5, v8, v9, v6, v7
|
|
|
|
addi r11, r3, 4*16
|
|
lvx v4, 0, r3
|
|
lvx v5, r3, r7
|
|
lvx v6, r3, r8
|
|
lvx v7, r3, r9
|
|
lvx v8, 0, r11
|
|
lvx v9, r11, r7
|
|
lvx v10, r11, r8
|
|
lvx v11, r11, r9
|
|
lis r12, HI(sha256_4k)
|
|
addi r12, r12, LO(sha256_4k)
|
|
addi r5, r1, 4*4
|
|
sha256_4way_main_setup
|
|
sha256_4way_main_quadround 0, r12, r5
|
|
sha256_4way_main_quadround 4, r12, r5
|
|
sha256_4way_main_quadround 8, r12, r5
|
|
sha256_4way_main_quadround 12, r12, r5
|
|
sha256_4way_main_quadround 16, r12, r5
|
|
sha256_4way_main_quadround 20, r12, r5
|
|
sha256_4way_main_quadround 24, r12, r5
|
|
sha256_4way_main_quadround 28, r12, r5
|
|
sha256_4way_main_quadround 32, r12, r5
|
|
sha256_4way_main_quadround 36, r12, r5
|
|
sha256_4way_main_quadround 40, r12, r5
|
|
sha256_4way_main_quadround 44, r12, r5
|
|
sha256_4way_main_quadround 48, r12, r5
|
|
sha256_4way_main_quadround 52, r12, r5
|
|
sha256_4way_main_quadround 56, r12, r5
|
|
sha256_4way_main_quadround 60, r12, r5
|
|
|
|
lvx v12, 0, r3
|
|
lvx v13, r3, r7
|
|
lvx v14, r3, r8
|
|
lvx v15, r3, r9
|
|
lvx v16, 0, r11
|
|
lvx v17, r11, r7
|
|
lvx v18, r11, r8
|
|
lvx v19, r11, r9
|
|
vadduwm v4, v4, v12
|
|
vadduwm v5, v5, v13
|
|
vadduwm v6, v6, v14
|
|
vadduwm v7, v7, v15
|
|
vadduwm v8, v8, v16
|
|
vadduwm v9, v9, v17
|
|
vadduwm v10, v10, v18
|
|
vadduwm v11, v11, v19
|
|
stvx v4, 0, r3
|
|
stvx v5, r3, r7
|
|
stvx v6, r3, r8
|
|
stvx v7, r3, r9
|
|
stvx v8, 0, r11
|
|
stvx v9, r11, r7
|
|
stvx v10, r11, r8
|
|
stvx v11, r11, r9
|
|
|
|
ld r1, 0(r1)
|
|
mtspr 256, r0
|
|
blr
|
|
|
|
|
|
.text
|
|
.align 2
|
|
.globl sha256d_ms_4way
|
|
.globl _sha256d_ms_4way
|
|
#ifdef __ELF__
|
|
.type sha256d_ms_4way, %function
|
|
#endif
|
|
sha256d_ms_4way:
|
|
_sha256d_ms_4way:
|
|
mfspr r0, 256
|
|
oris r12, r0, 0xffff
|
|
ori r12, r12, 0xf000
|
|
mtspr 256, r12
|
|
|
|
andi. r12, r1, 15
|
|
li r11, -(4*4+64*16)
|
|
subf r12, r12, r11
|
|
stdux r1, r1, r12
|
|
|
|
li r7, 1*16
|
|
li r8, 2*16
|
|
li r9, 3*16
|
|
li r10, 16*16
|
|
li r11, 17*16
|
|
|
|
sha256_4way_extend_setup
|
|
|
|
addi r4, r4, 2*16
|
|
addi r12, r1, 4*4+18*16
|
|
lvx v14, r4, r7
|
|
lvx v6, r4, r10
|
|
lvx v7, r4, r11
|
|
|
|
vrlw v12, v14, v1
|
|
vrlw v13, v14, v18
|
|
stvx v6, 0, r12
|
|
vxor v12, v12, v13
|
|
vsrw v13, v14, v16
|
|
stvx v7, r12, r7
|
|
vxor v12, v12, v13
|
|
vadduwm v6, v6, v12
|
|
vadduwm v7, v7, v14
|
|
stvx v6, r4, r10
|
|
|
|
vrlw v12, v6, v17
|
|
vrlw v13, v6, v19
|
|
stvx v7, r4, r11
|
|
addi r4, r4, 18*16
|
|
lvx v8, 0, r4
|
|
vxor v12, v12, v13
|
|
vsrw v13, v6, v0
|
|
stvx v8, r12, r8
|
|
vxor v12, v12, v13
|
|
vadduwm v8, v8, v12
|
|
|
|
vrlw v9, v7, v17
|
|
vrlw v13, v7, v19
|
|
stvx v8, 0, r4
|
|
vxor v9, v9, v13
|
|
vsrw v13, v7, v0
|
|
vxor v9, v9, v13
|
|
|
|
vrlw v12, v8, v17
|
|
vrlw v13, v8, v19
|
|
stvx v9, r4, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v8, v0
|
|
lvx v10, r4, r8
|
|
lvx v4, r4, r9
|
|
vxor v12, v12, v13
|
|
stvx v10, r12, r9
|
|
addi r12, r12, 4*16
|
|
stvx v4, 0, r12
|
|
vrlw v14, v9, v17
|
|
vrlw v13, v9, v19
|
|
vadduwm v10, v10, v12
|
|
|
|
vxor v14, v14, v13
|
|
vsrw v13, v9, v0
|
|
stvx v10, r4, r8
|
|
vxor v14, v14, v13
|
|
vadduwm v4, v4, v14
|
|
|
|
vrlw v12, v10, v17
|
|
vrlw v13, v10, v19
|
|
stvx v4, r4, r9
|
|
vxor v12, v12, v13
|
|
vsrw v13, v10, v0
|
|
vrlw v14, v4, v17
|
|
vxor v12, v12, v13
|
|
vrlw v13, v4, v19
|
|
addi r4, r4, 4*16
|
|
lvx v5, 0, r4
|
|
vxor v14, v14, v13
|
|
stvx v5, r12, r7
|
|
vsrw v13, v4, v0
|
|
vadduwm v5, v5, v12
|
|
vxor v14, v14, v13
|
|
stvx v5, 0, r4
|
|
vadduwm v6, v6, v14
|
|
|
|
vrlw v12, v5, v17
|
|
vrlw v13, v5, v19
|
|
stvx v6, r4, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v5, v0
|
|
vrlw v14, v6, v17
|
|
vxor v12, v12, v13
|
|
vrlw v13, v6, v19
|
|
vxor v14, v14, v13
|
|
vsrw v13, v6, v0
|
|
vadduwm v7, v7, v12
|
|
vxor v14, v14, v13
|
|
stvx v7, r4, r8
|
|
vadduwm v8, v8, v14
|
|
|
|
vrlw v12, v7, v17
|
|
vrlw v13, v7, v19
|
|
stvx v8, r4, r9
|
|
vxor v12, v12, v13
|
|
vsrw v13, v7, v0
|
|
vrlw v14, v8, v17
|
|
vxor v12, v12, v13
|
|
vrlw v13, v8, v19
|
|
vxor v14, v14, v13
|
|
vsrw v13, v8, v0
|
|
vadduwm v9, v9, v12
|
|
vxor v14, v14, v13
|
|
addi r4, r4, 4*16
|
|
stvx v9, 0, r4
|
|
vadduwm v10, v10, v14
|
|
|
|
vrlw v12, v9, v17
|
|
vrlw v13, v9, v19
|
|
stvx v10, r4, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v9, v0
|
|
lvx v11, r4, r8
|
|
lvx v14, r4, r9
|
|
stvx v11, r12, r8
|
|
stvx v14, r12, r9
|
|
vxor v12, v12, v13
|
|
vadduwm v11, v11, v12
|
|
vadduwm v5, v5, v14
|
|
vrlw v12, v10, v17
|
|
vrlw v13, v10, v19
|
|
vadduwm v4, v4, v11
|
|
|
|
vxor v12, v12, v13
|
|
vsrw v13, v10, v0
|
|
stvx v4, r4, r8
|
|
vxor v12, v12, v13
|
|
vadduwm v5, v5, v12
|
|
stvx v5, r4, r9
|
|
addi r4, r4, -12*16
|
|
lvx v11, 0, r4
|
|
|
|
sha256_4way_extend_doubleround 16, r4, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 18, r4, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 20, r4, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 22, r4, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 24, r4, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 26, r4, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 28, r4, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 30, r4, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 32, r4, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 34, r4, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 36, r4, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 38, r4, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 40, r4, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 42, r4, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 44, r4, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 46, r4, v8, v9, v6, v7
|
|
addi r4, r4, -48*16
|
|
|
|
lvx v4, 0, r6
|
|
lvx v9, r6, r7
|
|
lvx v10, r6, r8
|
|
lvx v11, r6, r9
|
|
addi r12, r6, 4*16
|
|
lvx v8, 0, r12
|
|
lvx v5, r12, r7
|
|
lvx v6, r12, r8
|
|
lvx v7, r12, r9
|
|
lis r12, HI(sha256_4k)
|
|
addi r12, r12, LO(sha256_4k)
|
|
sha256_4way_main_setup
|
|
sha256_4way_main_round 3, r12, r4, v5, v6, v7, v4, v9, v10, v11, v8
|
|
sha256_4way_main_quadround 4, r12, r4
|
|
sha256_4way_main_quadround 8, r12, r4
|
|
sha256_4way_main_quadround 12, r12, r4
|
|
sha256_4way_main_quadround 16, r12, r4
|
|
sha256_4way_main_quadround 20, r12, r4
|
|
sha256_4way_main_quadround 24, r12, r4
|
|
sha256_4way_main_quadround 28, r12, r4
|
|
sha256_4way_main_quadround 32, r12, r4
|
|
sha256_4way_main_quadround 36, r12, r4
|
|
sha256_4way_main_quadround 40, r12, r4
|
|
sha256_4way_main_quadround 44, r12, r4
|
|
sha256_4way_main_quadround 48, r12, r4
|
|
sha256_4way_main_quadround 52, r12, r4
|
|
sha256_4way_main_quadround 56, r12, r4
|
|
sha256_4way_main_quadround 60, r12, r4
|
|
|
|
lvx v12, 0, r5
|
|
lvx v13, r5, r7
|
|
lvx v14, r5, r8
|
|
lvx v15, r5, r9
|
|
addi r12, r5, 4*16
|
|
lvx v16, 0, r12
|
|
lvx v17, r12, r7
|
|
lvx v18, r12, r8
|
|
lvx v19, r12, r9
|
|
vadduwm v4, v4, v12
|
|
vadduwm v5, v5, v13
|
|
vadduwm v6, v6, v14
|
|
vadduwm v7, v7, v15
|
|
vadduwm v8, v8, v16
|
|
vadduwm v9, v9, v17
|
|
vadduwm v10, v10, v18
|
|
vadduwm v11, v11, v19
|
|
addi r12, r1, 4*4
|
|
stvx v4, 0, r12
|
|
stvx v5, r12, r7
|
|
stvx v6, r12, r8
|
|
stvx v7, r12, r9
|
|
addi r12, r12, 4*16
|
|
stvx v8, 0, r12
|
|
stvx v9, r12, r7
|
|
stvx v10, r12, r8
|
|
stvx v11, r12, r9
|
|
|
|
addi r12, r1, 4*4+18*16
|
|
lvx v4, 0, r12
|
|
lvx v5, r12, r7
|
|
lvx v6, r12, r8
|
|
lvx v7, r12, r9
|
|
addi r12, r12, 4*16
|
|
lvx v8, 0, r12
|
|
lvx v9, r12, r7
|
|
lvx v10, r12, r8
|
|
lvx v11, r12, r9
|
|
addi r12, r4, 18*16
|
|
stvx v4, 0, r12
|
|
stvx v5, r12, r7
|
|
stvx v6, r12, r8
|
|
addi r12, r4, 22*16
|
|
stvx v7, 0, r12
|
|
stvx v8, r12, r7
|
|
stvx v9, r12, r8
|
|
addi r12, r4, 30*16
|
|
stvx v10, 0, r12
|
|
stvx v11, r12, r7
|
|
|
|
addi r4, r1, 4*4
|
|
|
|
sha256_4way_extend_setup
|
|
|
|
lis r12, HI(sha256d_4preext2)
|
|
addi r12, r12, LO(sha256d_4preext2)
|
|
lvx v2, 0, r12
|
|
|
|
vxor v9, v9, v9
|
|
vspltisw v3, 1
|
|
lvx v4, r12, r8
|
|
vsldoi v3, v3, v3, 1
|
|
addi r5, r1, 4*4+8*16
|
|
stvx v4, 0, r5
|
|
stvx v9, r5, r7
|
|
stvx v9, r5, r8
|
|
stvx v9, r5, r9
|
|
addi r5, r5, 4*16
|
|
stvx v9, 0, r5
|
|
stvx v9, r5, r7
|
|
stvx v9, r5, r8
|
|
stvx v3, r5, r9
|
|
|
|
lvx v4, 0, r4
|
|
lvx v14, r4, r7
|
|
|
|
lvx v11, r4, r8
|
|
vrlw v12, v14, v1
|
|
vrlw v13, v14, v18
|
|
|
|
vxor v12, v12, v13
|
|
vsrw v13, v14, v16
|
|
vadduwm v5, v14, v2
|
|
vxor v12, v12, v13
|
|
vrlw v14, v11, v1
|
|
vrlw v13, v11, v18
|
|
vadduwm v4, v4, v12
|
|
vxor v14, v14, v13
|
|
vsrw v13, v11, v16
|
|
stvx v4, r4, r10
|
|
vxor v14, v14, v13
|
|
vrlw v12, v4, v17
|
|
vrlw v13, v4, v19
|
|
vadduwm v5, v5, v14
|
|
|
|
stvx v5, r4, r11
|
|
addi r4, r4, 2*16
|
|
lvx v14, r4, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v4, v0
|
|
vrlw v6, v14, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v14, v18
|
|
vxor v6, v6, v13
|
|
vsrw v13, v14, v16
|
|
vadduwm v11, v11, v12
|
|
vxor v6, v6, v13
|
|
vrlw v12, v5, v17
|
|
vrlw v13, v5, v19
|
|
vadduwm v6, v6, v11
|
|
lvx v11, r4, r8
|
|
|
|
stvx v6, r4, r10
|
|
vxor v12, v12, v13
|
|
vsrw v13, v5, v0
|
|
vrlw v7, v11, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v11, v18
|
|
vxor v7, v7, v13
|
|
vsrw v13, v11, v16
|
|
vadduwm v14, v14, v12
|
|
vxor v7, v7, v13
|
|
vrlw v12, v6, v17
|
|
vrlw v13, v6, v19
|
|
vadduwm v7, v7, v14
|
|
|
|
stvx v7, r4, r11
|
|
addi r4, r4, 2*16
|
|
lvx v14, r4, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v6, v0
|
|
vrlw v8, v14, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v14, v18
|
|
vxor v8, v8, v13
|
|
vsrw v13, v14, v16
|
|
vadduwm v11, v11, v12
|
|
vxor v8, v8, v13
|
|
vrlw v12, v7, v17
|
|
vrlw v13, v7, v19
|
|
vadduwm v8, v8, v11
|
|
lvx v11, r4, r8
|
|
|
|
stvx v8, r4, r10
|
|
vxor v12, v12, v13
|
|
vsrw v13, v7, v0
|
|
vrlw v9, v11, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v11, v18
|
|
vxor v9, v9, v13
|
|
vsrw v13, v11, v16
|
|
vadduwm v14, v14, v12
|
|
vxor v9, v9, v13
|
|
vrlw v12, v8, v17
|
|
vrlw v13, v8, v19
|
|
vadduwm v9, v9, v14
|
|
|
|
stvx v9, r4, r11
|
|
addi r4, r4, 2*16
|
|
lvx v14, r4, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v8, v0
|
|
vrlw v10, v14, v1
|
|
vxor v12, v12, v13
|
|
vrlw v13, v14, v18
|
|
vxor v10, v10, v13
|
|
vsrw v13, v14, v16
|
|
vadduwm v11, v11, v12
|
|
vxor v10, v10, v13
|
|
vrlw v12, v9, v17
|
|
vrlw v13, v9, v19
|
|
vadduwm v11, v11, v3
|
|
vadduwm v14, v14, v4
|
|
vadduwm v10, v10, v11
|
|
|
|
lvx v2, r12, r7
|
|
vxor v12, v12, v13
|
|
vsrw v13, v9, v0
|
|
stvx v10, r4, r10
|
|
vxor v12, v12, v13
|
|
vadduwm v14, v14, v12
|
|
vrlw v12, v10, v17
|
|
vrlw v13, v10, v19
|
|
vadduwm v4, v14, v2
|
|
lvx v2, r12, r8
|
|
vxor v12, v12, v13
|
|
vsrw v13, v10, v0
|
|
stvx v4, r4, r11
|
|
vadduwm v5, v5, v2
|
|
vxor v12, v12, v13
|
|
vadduwm v5, v5, v12
|
|
|
|
vrlw v12, v4, v17
|
|
vrlw v13, v4, v19
|
|
addi r4, r4, 2*16
|
|
stvx v5, r4, r10
|
|
vxor v12, v12, v13
|
|
vsrw v13, v4, v0
|
|
vrlw v11, v5, v17
|
|
vxor v12, v12, v13
|
|
vrlw v13, v5, v19
|
|
vxor v11, v11, v13
|
|
vsrw v13, v5, v0
|
|
vadduwm v6, v6, v12
|
|
vxor v11, v11, v13
|
|
stvx v6, r4, r11
|
|
vadduwm v7, v7, v11
|
|
|
|
vrlw v12, v6, v17
|
|
vrlw v13, v6, v19
|
|
addi r4, r4, 2*16
|
|
stvx v7, r4, r10
|
|
vxor v12, v12, v13
|
|
vsrw v13, v6, v0
|
|
vrlw v11, v7, v17
|
|
vxor v12, v12, v13
|
|
vrlw v13, v7, v19
|
|
vxor v11, v11, v13
|
|
vsrw v13, v7, v0
|
|
vadduwm v8, v8, v12
|
|
vxor v11, v11, v13
|
|
stvx v8, r4, r11
|
|
vadduwm v9, v9, v11
|
|
|
|
lvx v2, r12, r9
|
|
vrlw v14, v8, v17
|
|
vrlw v13, v8, v19
|
|
vrlw v12, v9, v17
|
|
addi r4, r4, 2*16
|
|
stvx v9, r4, r10
|
|
vxor v14, v14, v13
|
|
vrlw v13, v9, v19
|
|
vxor v12, v12, v13
|
|
vsrw v13, v8, v0
|
|
vxor v14, v14, v13
|
|
vsrw v13, v9, v0
|
|
vxor v12, v12, v13
|
|
vadduwm v4, v4, v2
|
|
vadduwm v10, v10, v14
|
|
vadduwm v4, v4, v12
|
|
stvx v10, r4, r11
|
|
addi r4, r4, 2*16
|
|
lvx v11, r4, r8
|
|
|
|
vadduwm v5, v5, v3
|
|
stvx v4, r4, r10
|
|
vrlw v14, v11, v1
|
|
vrlw v13, v11, v18
|
|
vrlw v12, v10, v17
|
|
vxor v14, v14, v13
|
|
vrlw v13, v10, v19
|
|
vxor v12, v12, v13
|
|
vsrw v13, v11, v16
|
|
vxor v14, v14, v13
|
|
vsrw v13, v10, v0
|
|
vxor v12, v12, v13
|
|
vadduwm v5, v5, v14
|
|
vadduwm v5, v5, v12
|
|
stvx v5, r4, r11
|
|
addi r4, r4, 2*16
|
|
|
|
sha256_4way_extend_doubleround 16, r4, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 18, r4, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 20, r4, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 22, r4, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 24, r4, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 26, r4, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 28, r4, v4, v5, v9, v10
|
|
sha256_4way_extend_doubleround 30, r4, v6, v7, v4, v5
|
|
sha256_4way_extend_doubleround 32, r4, v8, v9, v6, v7
|
|
sha256_4way_extend_doubleround 34, r4, v10, v4, v8, v9
|
|
sha256_4way_extend_doubleround 36, r4, v5, v6, v10, v4
|
|
sha256_4way_extend_doubleround 38, r4, v7, v8, v5, v6
|
|
sha256_4way_extend_doubleround 40, r4, v9, v10, v7, v8
|
|
sha256_4way_extend_doubleround 42, r4, v4, v5, v9, v10
|
|
|
|
lvx v14, r4, r7
|
|
vrlw v12, v4, v17
|
|
vrlw v13, v4, v19
|
|
vadduwm v15, v11, v6
|
|
vrlw v6, v14, v1
|
|
vrlw v11, v14, v18
|
|
vxor v12, v12, v13
|
|
vxor v6, v6, v11
|
|
vsrw v13, v4, v0
|
|
vsrw v14, v14, v16
|
|
vxor v12, v12, v13
|
|
vxor v6, v6, v14
|
|
vadduwm v12, v12, v15
|
|
vadduwm v6, v6, v12
|
|
stvx v6, r4, r10
|
|
addi r4, r4, -44*16
|
|
|
|
lis r5, HI(sha256_4h)
|
|
addi r5, r5, LO(sha256_4h)
|
|
lvx v4, 0, r5
|
|
lvx v5, r5, r7
|
|
lvx v6, r5, r8
|
|
lvx v7, r5, r9
|
|
addi r12, r5, 4*16
|
|
lvx v8, 0, r12
|
|
lvx v9, r12, r7
|
|
lvx v10, r12, r8
|
|
lvx v11, r12, r9
|
|
lis r12, HI(sha256_4k)
|
|
addi r12, r12, LO(sha256_4k)
|
|
sha256_4way_main_setup
|
|
sha256_4way_main_quadround 0, r12, r4
|
|
sha256_4way_main_quadround 4, r12, r4
|
|
sha256_4way_main_quadround 8, r12, r4
|
|
sha256_4way_main_quadround 12, r12, r4
|
|
sha256_4way_main_quadround 16, r12, r4
|
|
sha256_4way_main_quadround 20, r12, r4
|
|
sha256_4way_main_quadround 24, r12, r4
|
|
sha256_4way_main_quadround 28, r12, r4
|
|
sha256_4way_main_quadround 32, r12, r4
|
|
sha256_4way_main_quadround 36, r12, r4
|
|
sha256_4way_main_quadround 40, r12, r4
|
|
sha256_4way_main_quadround 44, r12, r4
|
|
sha256_4way_main_quadround 48, r12, r4
|
|
sha256_4way_main_quadround 52, r12, r4
|
|
sha256_4way_main_round 56, r12, r4, v4, v5, v6, v7, v8, v9, v10, v11
|
|
|
|
.macro sha256_4way_main_round_red i, rk, rw, vd, ve, vf, vg, vh
|
|
li r6, (\i)*16
|
|
vand v15, \vf, \ve
|
|
vandc v14, \vg, \ve
|
|
lvx v12, \rw, r6
|
|
vadduwm \vh, \vh, \vd
|
|
vor v14, v14, v15
|
|
lvx v15, \rk, r6
|
|
vrlw v13, \ve, v3
|
|
vadduwm \vh, \vh, v14
|
|
vxor v14, \ve, v13
|
|
vrlw v13, \ve, v19
|
|
vadduwm \vh, \vh, v12
|
|
vxor v14, v14, v13
|
|
vadduwm \vh, \vh, v15
|
|
vrlw v13, v14, v16
|
|
vadduwm \vh, \vh, v13
|
|
.endm
|
|
|
|
sha256_4way_main_round_red 57, r12, r4, v6, v11, v8, v9, v10
|
|
sha256_4way_main_round_red 58, r12, r4, v5, v10, v11, v8, v9
|
|
sha256_4way_main_round_red 59, r12, r4, v4, v9, v10, v11, v8
|
|
sha256_4way_main_round_red 60, r12, r4, v7, v8, v9, v10, v11
|
|
|
|
li r12, 7*16
|
|
lvx v19, r5, r12
|
|
vadduwm v11, v11, v19
|
|
stvx v11, r3, r12
|
|
|
|
ld r1, 0(r1)
|
|
mtspr 256, r0
|
|
blr
|
|
|
|
|
|
.text
|
|
.align 2
|
|
.globl sha256_use_4way
|
|
.globl _sha256_use_4way
|
|
#ifdef __ELF__
|
|
.type sha256_use_4way, %function
|
|
#endif
|
|
sha256_use_4way:
|
|
_sha256_use_4way:
|
|
li r3, 1
|
|
blr
|
|
|
|
#endif /* __ALTIVEC__ */
|
|
|
|
#endif
|