cpuminer/sha2-ppc.S

1928 lines
43 KiB
ArmAsm
Raw Normal View History

2015-02-24 17:06:20 +01:00
/*
* Copyright 2014-2015 pooler@litecoinpool.org
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version. See COPYING for more details.
*/
#include "cpuminer-config.h"
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
#ifdef __APPLE__
#define HI(name) ha16(name)
#define LO(name) lo16(name)
#else
#define HI(name) name@ha
#define LO(name) name@l
#define r0 0
#define r1 1
#define r2 2
#define r3 3
#define r4 4
#define r5 5
#define r6 6
#define r7 7
#define r8 8
#define r9 9
#define r10 10
#define r11 11
#define r12 12
#define r13 13
#define r14 14
#define r15 15
#define r16 16
#define r17 17
#define r18 18
#define r19 19
#define r20 20
#define r21 21
#define r22 22
#define r23 23
#define r24 24
#define r25 25
#define r26 26
#define r27 27
#define r28 28
#define r29 29
#define r30 30
#define r31 31
#ifdef __ALTIVEC__
#define v0 0
#define v1 1
#define v2 2
#define v3 3
#define v4 4
#define v5 5
#define v6 6
#define v7 7
#define v8 8
#define v9 9
#define v10 10
#define v11 11
#define v12 12
#define v13 13
#define v14 14
#define v15 15
#define v16 16
#define v17 17
#define v18 18
#define v19 19
#define v20 20
#define v21 21
#define v22 22
#define v23 23
#define v24 24
#define v25 25
#define v26 26
#define v27 27
#define v28 28
#define v29 29
#define v30 30
#define v31 31
#endif
2016-11-23 19:06:49 +01:00
#endif
#if !(defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) || \
defined(__64BIT__) || defined(_LP64) || defined(__LP64__))
#define ld lwz
#define std stw
#define stdu stwu
#define stdux stwux
2015-02-24 17:06:20 +01:00
#endif
.data
.align 2
sha256_h:
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
.data
.align 2
sha256_k:
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
.macro sha256_extend_doubleround i, rw, wo, ra, rb, ry, rz
lwz r14, \wo+(\i+1)*4(\rw)
rotrwi r12, \ry, 17
rotrwi r13, \ry, 19
add r11, r11, \ra
xor r12, r12, r13
srwi r13, \ry, 10
rotrwi \ra, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
add r12, r12, r11
xor \ra, \ra, r13
srwi r13, r14, 3
lwz r11, \wo+(\i+2)*4(\rw)
xor \ra, \ra, r13
rotrwi r13, \rz, 19
add \ra, \ra, r12
rotrwi r12, \rz, 17
add r14, r14, \rb
xor r12, r12, r13
srwi r13, \rz, 10
rotrwi \rb, r11, 7
xor r12, r12, r13
rotrwi r13, r11, 18
stw \ra, \wo+(\i+16)*4(\rw)
xor \rb, \rb, r13
srwi r13, r11, 3
add r14, r14, r12
xor \rb, \rb, r13
add \rb, \rb, r14
stw \rb, \wo+(\i+17)*4(\rw)
.endm
.macro sha256_main_round i, rk, rw, wo, ra, rb, rc, rd, re, rf, rg, rh
lwz r12, \wo+(\i)*4(\rw)
and r13, \rf, \re
andc r14, \rg, \re
lwz r15, (\i)*4(\rk)
or r14, r14, r13
rotrwi r13, \re, 5
add \rh, \rh, r14
xor r14, \re, r13
rotrwi r13, \re, 19
add \rh, \rh, r12
xor r14, r14, r13
add \rh, \rh, r15
rotrwi r13, r14, 6
xor r15, \ra, \rb
add \rh, \rh, r13
rotrwi r13, \ra, 11
and r15, r15, \rc
xor r12, \ra, r13
rotrwi r13, \ra, 20
and r14, \ra, \rb
xor r12, r12, r13
xor r14, r14, r15
rotrwi r13, r12, 2
add r15, \rh, r14
add \rh, \rh, \rd
add \rd, r15, r13
.endm
.macro sha256_main_quadround i, rk, rw, wo
sha256_main_round \i+0, \rk, \rw, \wo, r4, r5, r6, r7, r8, r9, r10, r11
sha256_main_round \i+1, \rk, \rw, \wo, r7, r4, r5, r6, r11, r8, r9, r10
sha256_main_round \i+2, \rk, \rw, \wo, r6, r7, r4, r5, r10, r11, r8, r9
sha256_main_round \i+3, \rk, \rw, \wo, r5, r6, r7, r4, r9, r10, r11, r8
.endm
.text
.align 2
.globl sha256_transform
.globl _sha256_transform
#ifdef __ELF__
.type sha256_transform, %function
#endif
sha256_transform:
_sha256_transform:
2016-11-23 19:06:49 +01:00
stdu r1, -76*4(r1)
2015-02-24 17:06:20 +01:00
cmpwi 0, r5, 0
2016-11-23 19:06:49 +01:00
std r13, 2*4(r1)
std r14, 4*4(r1)
std r15, 6*4(r1)
std r16, 72*4(r1)
2015-02-24 17:06:20 +01:00
bne 0, sha256_transform_swap
lwz r11, 0*4(r4)
lwz r14, 1*4(r4)
lwz r15, 2*4(r4)
lwz r7, 3*4(r4)
lwz r8, 4*4(r4)
lwz r9, 5*4(r4)
lwz r10, 6*4(r4)
lwz r0, 7*4(r4)
lwz r12, 8*4(r4)
lwz r13, 9*4(r4)
lwz r5, 10*4(r4)
lwz r6, 11*4(r4)
stw r11, 8*4+0*4(r1)
stw r14, 8*4+1*4(r1)
stw r15, 8*4+2*4(r1)
stw r7, 8*4+3*4(r1)
stw r8, 8*4+4*4(r1)
stw r9, 8*4+5*4(r1)
stw r10, 8*4+6*4(r1)
stw r0, 8*4+7*4(r1)
stw r12, 8*4+8*4(r1)
stw r13, 8*4+9*4(r1)
stw r5, 8*4+10*4(r1)
stw r6, 8*4+11*4(r1)
lwz r7, 12*4(r4)
lwz r8, 13*4(r4)
lwz r9, 14*4(r4)
lwz r10, 15*4(r4)
mr r4, r13
stw r7, 8*4+12*4(r1)
stw r8, 8*4+13*4(r1)
stw r9, 8*4+14*4(r1)
stw r10, 8*4+15*4(r1)
b sha256_transform_extend
sha256_transform_swap:
li r13, 1*4
li r14, 2*4
li r15, 3*4
lwbrx r11, 0, r4
lwbrx r7, r4, r13
lwbrx r8, r4, r14
lwbrx r9, r4, r15
addi r4, r4, 4*4
stw r11, 8*4+0*4(r1)
stw r7, 8*4+1*4(r1)
stw r8, 8*4+2*4(r1)
stw r9, 8*4+3*4(r1)
lwbrx r7, 0, r4
lwbrx r8, r4, r13
lwbrx r9, r4, r14
lwbrx r10, r4, r15
addi r4, r4, 4*4
stw r7, 8*4+4*4(r1)
stw r8, 8*4+5*4(r1)
stw r9, 8*4+6*4(r1)
stw r10, 8*4+7*4(r1)
lwbrx r8, 0, r4
lwbrx r12, r4, r13
lwbrx r5, r4, r14
lwbrx r6, r4, r15
addi r4, r4, 4*4
stw r8, 8*4+8*4(r1)
stw r12, 8*4+9*4(r1)
stw r5, 8*4+10*4(r1)
stw r6, 8*4+11*4(r1)
lwbrx r7, 0, r4
lwbrx r8, r4, r13
lwbrx r9, r4, r14
lwbrx r10, r4, r15
mr r4, r12
stw r7, 8*4+12*4(r1)
stw r8, 8*4+13*4(r1)
stw r9, 8*4+14*4(r1)
stw r10, 8*4+15*4(r1)
sha256_transform_extend:
sha256_extend_doubleround 0, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 2, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 4, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 6, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 8, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 10, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 12, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 14, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 16, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 18, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 20, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 22, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 24, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 26, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 28, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 30, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 32, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 34, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 36, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 38, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 44, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 46, r1, 8*4, r8, r9, r6, r7
lwz r4, 0*4(r3)
lwz r5, 1*4(r3)
lwz r6, 2*4(r3)
lwz r7, 3*4(r3)
lwz r8, 4*4(r3)
lwz r9, 5*4(r3)
lwz r10, 6*4(r3)
lwz r11, 7*4(r3)
lis r16, HI(sha256_k)
addi r16, r16, LO(sha256_k)
sha256_main_quadround 0, r16, r1, 8*4
sha256_main_quadround 4, r16, r1, 8*4
sha256_main_quadround 8, r16, r1, 8*4
sha256_main_quadround 12, r16, r1, 8*4
sha256_main_quadround 16, r16, r1, 8*4
sha256_main_quadround 20, r16, r1, 8*4
sha256_main_quadround 24, r16, r1, 8*4
sha256_main_quadround 28, r16, r1, 8*4
sha256_main_quadround 32, r16, r1, 8*4
sha256_main_quadround 36, r16, r1, 8*4
sha256_main_quadround 40, r16, r1, 8*4
sha256_main_quadround 44, r16, r1, 8*4
sha256_main_quadround 48, r16, r1, 8*4
sha256_main_quadround 52, r16, r1, 8*4
sha256_main_quadround 56, r16, r1, 8*4
sha256_main_quadround 60, r16, r1, 8*4
lwz r12, 0*4(r3)
lwz r13, 1*4(r3)
lwz r14, 2*4(r3)
lwz r15, 3*4(r3)
add r4, r4, r12
add r5, r5, r13
add r6, r6, r14
add r7, r7, r15
stw r4, 0*4(r3)
stw r5, 1*4(r3)
stw r6, 2*4(r3)
stw r7, 3*4(r3)
lwz r12, 4*4(r3)
lwz r13, 5*4(r3)
lwz r14, 6*4(r3)
lwz r15, 7*4(r3)
add r8, r8, r12
add r9, r9, r13
add r10, r10, r14
add r11, r11, r15
stw r8, 4*4(r3)
stw r9, 5*4(r3)
stw r10, 6*4(r3)
stw r11, 7*4(r3)
2016-11-23 19:06:49 +01:00
ld r13, 2*4(r1)
ld r14, 4*4(r1)
ld r15, 6*4(r1)
ld r16, 72*4(r1)
addi r1, r1, 76*4
2015-02-24 17:06:20 +01:00
blr
.text
.align 2
.globl sha256d_ms
.globl _sha256d_ms
#ifdef __ELF__
.type sha256d_ms, %function
#endif
sha256d_ms:
_sha256d_ms:
2016-11-23 19:06:49 +01:00
stdu r1, -80*4(r1)
std r13, 2*4(r1)
std r14, 4*4(r1)
std r15, 6*4(r1)
std r16, 72*4(r1)
std r17, 74*4(r1)
std r18, 76*4(r1)
2015-02-24 17:06:20 +01:00
mr r17, r4
mr r18, r5
mr r16, r6
lwz r14, 3*4(r17)
lwz r6, 18*4(r17)
lwz r7, 19*4(r17)
rotrwi r12, r14, 7
rotrwi r13, r14, 18
stw r6, 8*4+18*4(r1)
xor r12, r12, r13
srwi r13, r14, 3
stw r7, 8*4+19*4(r1)
xor r12, r12, r13
lwz r8, 20*4(r17)
add r6, r6, r12
lwz r10, 22*4(r17)
add r7, r7, r14
stw r6, 18*4(r17)
rotrwi r12, r6, 17
rotrwi r13, r6, 19
stw r7, 19*4(r17)
xor r12, r12, r13
srwi r13, r6, 10
stw r8, 8*4+20*4(r1)
xor r12, r12, r13
lwz r4, 23*4(r17)
add r8, r8, r12
lwz r5, 24*4(r17)
rotrwi r9, r7, 17
rotrwi r13, r7, 19
stw r8, 20*4(r17)
xor r9, r9, r13
srwi r13, r7, 10
stw r10, 8*4+21*4(r1)
xor r9, r9, r13
stw r4, 8*4+22*4(r1)
rotrwi r12, r8, 17
rotrwi r13, r8, 19
stw r9, 21*4(r17)
xor r12, r12, r13
srwi r13, r8, 10
stw r5, 8*4+23*4(r1)
xor r12, r12, r13
rotrwi r14, r9, 17
rotrwi r13, r9, 19
add r10, r10, r12
lwz r11, 30*4(r17)
xor r14, r14, r13
srwi r13, r9, 10
stw r10, 22*4(r17)
xor r14, r14, r13
stw r11, 8*4+24*4(r1)
add r4, r4, r14
rotrwi r12, r10, 17
rotrwi r13, r10, 19
stw r4, 23*4(r17)
xor r12, r12, r13
srwi r13, r10, 10
rotrwi r14, r4, 17
xor r12, r12, r13
rotrwi r13, r4, 19
xor r14, r14, r13
srwi r13, r4, 10
add r5, r5, r12
xor r14, r14, r13
stw r5, 24*4(r17)
add r6, r6, r14
rotrwi r12, r5, 17
rotrwi r13, r5, 19
stw r6, 25*4(r17)
xor r12, r12, r13
srwi r13, r5, 10
rotrwi r14, r6, 17
xor r12, r12, r13
rotrwi r13, r6, 19
xor r14, r14, r13
srwi r13, r6, 10
add r7, r7, r12
xor r14, r14, r13
stw r7, 26*4(r17)
add r8, r8, r14
rotrwi r12, r7, 17
rotrwi r13, r7, 19
stw r8, 27*4(r17)
xor r12, r12, r13
srwi r13, r7, 10
rotrwi r14, r8, 17
xor r12, r12, r13
rotrwi r13, r8, 19
xor r14, r14, r13
srwi r13, r8, 10
add r9, r9, r12
xor r14, r14, r13
stw r9, 28*4(r17)
add r10, r10, r14
lwz r14, 31*4(r17)
rotrwi r12, r9, 17
rotrwi r13, r9, 19
stw r10, 29*4(r17)
xor r12, r12, r13
srwi r13, r9, 10
stw r14, 8*4+25*4(r1)
xor r12, r12, r13
add r11, r11, r12
add r5, r5, r14
rotrwi r12, r10, 17
rotrwi r13, r10, 19
add r4, r4, r11
lwz r11, 16*4(r17)
xor r12, r12, r13
srwi r13, r10, 10
stw r4, 30*4(r17)
xor r12, r12, r13
add r5, r5, r12
stw r5, 31*4(r17)
sha256_extend_doubleround 16, r17, 0, r6, r7, r4, r5
sha256_extend_doubleround 18, r17, 0, r8, r9, r6, r7
sha256_extend_doubleround 20, r17, 0, r10, r4, r8, r9
sha256_extend_doubleround 22, r17, 0, r5, r6, r10, r4
sha256_extend_doubleround 24, r17, 0, r7, r8, r5, r6
sha256_extend_doubleround 26, r17, 0, r9, r10, r7, r8
sha256_extend_doubleround 28, r17, 0, r4, r5, r9, r10
sha256_extend_doubleround 30, r17, 0, r6, r7, r4, r5
sha256_extend_doubleround 32, r17, 0, r8, r9, r6, r7
sha256_extend_doubleround 34, r17, 0, r10, r4, r8, r9
sha256_extend_doubleround 36, r17, 0, r5, r6, r10, r4
sha256_extend_doubleround 38, r17, 0, r7, r8, r5, r6
sha256_extend_doubleround 40, r17, 0, r9, r10, r7, r8
sha256_extend_doubleround 42, r17, 0, r4, r5, r9, r10
sha256_extend_doubleround 44, r17, 0, r6, r7, r4, r5
sha256_extend_doubleround 46, r17, 0, r8, r9, r6, r7
lwz r4, 0*4(r16)
lwz r9, 1*4(r16)
lwz r10, 2*4(r16)
lwz r11, 3*4(r16)
lwz r8, 4*4(r16)
lwz r5, 5*4(r16)
lwz r6, 6*4(r16)
lwz r7, 7*4(r16)
lis r16, HI(sha256_k)
addi r16, r16, LO(sha256_k)
sha256_main_round 3, r16, r17, 0, r5, r6, r7, r4, r9, r10, r11, r8
sha256_main_quadround 4, r16, r17, 0
sha256_main_quadround 8, r16, r17, 0
sha256_main_quadround 12, r16, r17, 0
sha256_main_quadround 16, r16, r17, 0
sha256_main_quadround 20, r16, r17, 0
sha256_main_quadround 24, r16, r17, 0
sha256_main_quadround 28, r16, r17, 0
sha256_main_quadround 32, r16, r17, 0
sha256_main_quadround 36, r16, r17, 0
sha256_main_quadround 40, r16, r17, 0
sha256_main_quadround 44, r16, r17, 0
sha256_main_quadround 48, r16, r17, 0
sha256_main_quadround 52, r16, r17, 0
sha256_main_quadround 56, r16, r17, 0
sha256_main_quadround 60, r16, r17, 0
lwz r12, 0*4(r18)
lwz r13, 1*4(r18)
lwz r14, 2*4(r18)
lwz r15, 3*4(r18)
add r4, r4, r12
add r5, r5, r13
add r6, r6, r14
add r7, r7, r15
stw r4, 8*4+0*4(r1)
stw r5, 8*4+1*4(r1)
stw r6, 8*4+2*4(r1)
stw r7, 8*4+3*4(r1)
lwz r12, 4*4(r18)
lwz r13, 5*4(r18)
lwz r14, 6*4(r18)
lwz r15, 7*4(r18)
add r8, r8, r12
add r9, r9, r13
add r10, r10, r14
add r11, r11, r15
stw r8, 8*4+4*4(r1)
stw r9, 8*4+5*4(r1)
stw r10, 8*4+6*4(r1)
stw r11, 8*4+7*4(r1)
lwz r4, 8*4+18*4(r1)
lwz r5, 8*4+19*4(r1)
lwz r6, 8*4+20*4(r1)
lwz r7, 8*4+21*4(r1)
lwz r8, 8*4+22*4(r1)
lwz r9, 8*4+23*4(r1)
lwz r10, 8*4+24*4(r1)
lwz r11, 8*4+25*4(r1)
stw r4, 18*4(r17)
stw r5, 19*4(r17)
stw r6, 20*4(r17)
stw r7, 22*4(r17)
stw r8, 23*4(r17)
stw r9, 24*4(r17)
stw r10, 30*4(r17)
stw r11, 31*4(r17)
lis r8, 0x8000
li r9, 0
li r10, 0x0100
lwz r14, 8*4+1*4(r1)
lwz r4, 8*4+0*4(r1)
lwz r11, 8*4+2*4(r1)
rotrwi r12, r14, 7
rotrwi r13, r14, 18
stw r8, 8*4+8*4(r1)
stw r9, 8*4+9*4(r1)
stw r9, 8*4+10*4(r1)
stw r9, 8*4+11*4(r1)
stw r9, 8*4+12*4(r1)
stw r9, 8*4+13*4(r1)
stw r9, 8*4+14*4(r1)
stw r10, 8*4+15*4(r1)
xor r12, r12, r13
srwi r13, r14, 3
addis r5, r14, 0x00a0
xor r12, r12, r13
rotrwi r14, r11, 7
rotrwi r13, r11, 18
add r4, r4, r12
xor r14, r14, r13
srwi r13, r11, 3
stw r4, 8*4+16*4(r1)
xor r14, r14, r13
rotrwi r12, r4, 17
rotrwi r13, r4, 19
add r5, r5, r14
lwz r14, 8*4+3*4(r1)
stw r5, 8*4+17*4(r1)
xor r12, r12, r13
srwi r13, r4, 10
rotrwi r6, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
xor r6, r6, r13
srwi r13, r14, 3
add r11, r11, r12
xor r6, r6, r13
rotrwi r12, r5, 17
rotrwi r13, r5, 19
add r6, r6, r11
lwz r11, 8*4+4*4(r1)
stw r6, 8*4+18*4(r1)
xor r12, r12, r13
srwi r13, r5, 10
rotrwi r7, r11, 7
xor r12, r12, r13
rotrwi r13, r11, 18
xor r7, r7, r13
srwi r13, r11, 3
add r14, r14, r12
xor r7, r7, r13
rotrwi r12, r6, 17
rotrwi r13, r6, 19
add r7, r7, r14
lwz r14, 8*4+5*4(r1)
stw r7, 8*4+19*4(r1)
xor r12, r12, r13
srwi r13, r6, 10
rotrwi r8, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
xor r8, r8, r13
srwi r13, r14, 3
add r11, r11, r12
xor r8, r8, r13
rotrwi r12, r7, 17
rotrwi r13, r7, 19
add r8, r8, r11
lwz r11, 8*4+6*4(r1)
stw r8, 8*4+20*4(r1)
xor r12, r12, r13
srwi r13, r7, 10
rotrwi r9, r11, 7
xor r12, r12, r13
rotrwi r13, r11, 18
xor r9, r9, r13
srwi r13, r11, 3
add r14, r14, r12
xor r9, r9, r13
rotrwi r12, r8, 17
rotrwi r13, r8, 19
add r9, r9, r14
lwz r14, 8*4+7*4(r1)
stw r9, 8*4+21*4(r1)
xor r12, r12, r13
srwi r13, r8, 10
rotrwi r10, r14, 7
xor r12, r12, r13
rotrwi r13, r14, 18
xor r10, r10, r13
srwi r13, r14, 3
add r11, r11, r12
xor r10, r10, r13
rotrwi r12, r9, 17
rotrwi r13, r9, 19
addi r11, r11, 0x0100
add r14, r14, r4
add r10, r10, r11
xor r12, r12, r13
srwi r13, r9, 10
stw r10, 8*4+22*4(r1)
addis r14, r14, 0x1100
xor r12, r12, r13
add r14, r14, r12
rotrwi r12, r10, 17
rotrwi r13, r10, 19
addi r4, r14, 0x2000
xor r12, r12, r13
srwi r13, r10, 10
stw r4, 8*4+23*4(r1)
addis r5, r5, 0x8000
xor r12, r12, r13
add r5, r5, r12
rotrwi r12, r4, 17
rotrwi r13, r4, 19
stw r5, 8*4+24*4(r1)
xor r12, r12, r13
srwi r13, r4, 10
rotrwi r11, r5, 17
xor r12, r12, r13
rotrwi r13, r5, 19
xor r11, r11, r13
srwi r13, r5, 10
add r6, r6, r12
xor r11, r11, r13
stw r6, 8*4+25*4(r1)
add r7, r7, r11
rotrwi r12, r6, 17
rotrwi r13, r6, 19
stw r7, 8*4+26*4(r1)
xor r12, r12, r13
srwi r13, r6, 10
rotrwi r11, r7, 17
xor r12, r12, r13
rotrwi r13, r7, 19
xor r11, r11, r13
srwi r13, r7, 10
add r8, r8, r12
xor r11, r11, r13
stw r8, 8*4+27*4(r1)
add r9, r9, r11
rotrwi r14, r8, 17
rotrwi r13, r8, 19
rotrwi r12, r9, 17
stw r9, 8*4+28*4(r1)
addis r4, r4, 0x0040
xor r14, r14, r13
rotrwi r13, r9, 19
xor r12, r12, r13
srwi r13, r8, 10
xor r14, r14, r13
srwi r13, r9, 10
xor r12, r12, r13
addi r4, r4, 0x0022
add r10, r10, r14
add r4, r4, r12
lwz r11, 8*4+16*4(r1)
addi r5, r5, 0x0100
stw r4, 8*4+30*4(r1)
rotrwi r14, r11, 7
stw r10, 8*4+29*4(r1)
rotrwi r13, r11, 18
rotrwi r12, r10, 17
xor r14, r14, r13
rotrwi r13, r10, 19
xor r12, r12, r13
srwi r13, r11, 3
xor r14, r14, r13
srwi r13, r10, 10
xor r12, r12, r13
add r5, r5, r14
add r5, r5, r12
stw r5, 8*4+31*4(r1)
sha256_extend_doubleround 16, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 18, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 20, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 22, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 24, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 26, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 28, r1, 8*4, r4, r5, r9, r10
sha256_extend_doubleround 30, r1, 8*4, r6, r7, r4, r5
sha256_extend_doubleround 32, r1, 8*4, r8, r9, r6, r7
sha256_extend_doubleround 34, r1, 8*4, r10, r4, r8, r9
sha256_extend_doubleround 36, r1, 8*4, r5, r6, r10, r4
sha256_extend_doubleround 38, r1, 8*4, r7, r8, r5, r6
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
lis r18, HI(sha256_h)
addi r18, r18, LO(sha256_h)
lwz r14, 8*4+(44+1)*4(r1)
rotrwi r12, r4, 17
rotrwi r13, r4, 19
add r15, r11, r6
rotrwi r6, r14, 7
rotrwi r11, r14, 18
xor r12, r12, r13
xor r6, r6, r11
lwz r8, 4*4(r18)
lwz r9, 5*4(r18)
lwz r10, 6*4(r18)
lwz r11, 7*4(r18)
srwi r13, r4, 10
srwi r14, r14, 3
xor r12, r12, r13
xor r6, r6, r14
add r12, r12, r15
add r6, r6, r12
stw r6, 8*4+(44+16)*4(r1)
lwz r4, 0*4(r18)
lwz r5, 1*4(r18)
lwz r6, 2*4(r18)
lwz r7, 3*4(r18)
sha256_main_quadround 0, r16, r1, 8*4
sha256_main_quadround 4, r16, r1, 8*4
sha256_main_quadround 8, r16, r1, 8*4
sha256_main_quadround 12, r16, r1, 8*4
sha256_main_quadround 16, r16, r1, 8*4
sha256_main_quadround 20, r16, r1, 8*4
sha256_main_quadround 24, r16, r1, 8*4
sha256_main_quadround 28, r16, r1, 8*4
sha256_main_quadround 32, r16, r1, 8*4
sha256_main_quadround 36, r16, r1, 8*4
sha256_main_quadround 40, r16, r1, 8*4
sha256_main_quadround 44, r16, r1, 8*4
sha256_main_quadround 48, r16, r1, 8*4
sha256_main_quadround 52, r16, r1, 8*4
sha256_main_round 56, r16, r1, 8*4, r4, r5, r6, r7, r8, r9, r10, r11
.macro sha256_main_round_red i, rk, rw, wo, rd, re, rf, rg, rh
lwz r12, \wo+(\i)*4(\rw)
and r15, \rf, \re
andc r14, \rg, \re
add \rh, \rh, \rd
or r14, r14, r15
lwz r15, (\i)*4(\rk)
rotrwi r13, \re, 5
add \rh, \rh, r14
xor r14, \re, r13
rotrwi r13, \re, 19
add \rh, \rh, r12
xor r14, r14, r13
add \rh, \rh, r15
rotrwi r13, r14, 6
add \rh, \rh, r13
.endm
sha256_main_round_red 57, r16, r1, 8*4, r6, r11, r8, r9, r10
sha256_main_round_red 58, r16, r1, 8*4, r5, r10, r11, r8, r9
sha256_main_round_red 59, r16, r1, 8*4, r4, r9, r10, r11, r8
lwz r5, 7*4(r18)
sha256_main_round_red 60, r16, r1, 8*4, r7, r8, r9, r10, r11
add r11, r11, r5
stw r11, 7*4(r3)
2016-11-23 19:06:49 +01:00
ld r13, 2*4(r1)
ld r14, 4*4(r1)
ld r15, 6*4(r1)
ld r16, 72*4(r1)
ld r17, 74*4(r1)
ld r18, 76*4(r1)
addi r1, r1, 80*4
2015-02-24 17:06:20 +01:00
blr
#ifdef __ALTIVEC__
#ifdef __APPLE__
.machine ppc7400
#endif
.data
.align 4
sha256_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.data
.align 4
sha256_4k:
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.data
.align 4
sha256d_4preext2:
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
.long 0x11002000, 0x11002000, 0x11002000, 0x11002000
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022
.data
.align 4
br_perm:
.long 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c
.macro sha256_4way_extend_setup
vspltisw v0, 10
vspltisw v1, -7
vspltisw v16, 3
vspltisw v17, 15
vspltisw v18, 14
vspltisw v19, 13
.endm
.macro sha256_4way_extend_doubleround i, rw, va, vb, vy, vz
lvx v14, \rw, r7
vrlw v12, \vy, v17
vrlw v13, \vy, v19
vadduwm v11, v11, \va
vxor v12, v12, v13
vsrw v13, \vy, v0
vrlw \va, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vadduwm v12, v12, v11
vxor \va, \va, v13
vsrw v13, v14, v16
lvx v11, \rw, r8
vxor \va, \va, v13
vrlw v13, \vz, v19
vadduwm \va, \va, v12
vrlw v12, \vz, v17
vadduwm v14, v14, \vb
vxor v12, v12, v13
vsrw v13, \vz, v0
vrlw \vb, v11, v1
vxor v12, v12, v13
vrlw v13, v11, v18
stvx \va, \rw, r10
vxor \vb, \vb, v13
vsrw v13, v11, v16
vadduwm v14, v14, v12
vxor \vb, \vb, v13
vadduwm \vb, \vb, v14
stvx \vb, \rw, r11
addi \rw, \rw, 2*16
.endm
.macro sha256_4way_main_setup
vspltisw v2, 12
vspltisw v3, -5
vspltisw v16, -6
vspltisw v17, -11
vspltisw v18, -2
.endm
.macro sha256_4way_main_round i, rk, rw, va, vb, vc, vd, ve, vf, vg, vh
li r6, (\i)*16
lvx v12, \rw, r6
vand v13, \vf, \ve
vandc v14, \vg, \ve
lvx v15, \rk, r6
vor v14, v14, v13
vrlw v13, \ve, v3
vadduwm \vh, \vh, v14
vxor v14, \ve, v13
vrlw v13, \ve, v19
vadduwm \vh, \vh, v12
vxor v14, v14, v13
vadduwm \vh, \vh, v15
vrlw v13, v14, v16
vxor v15, \va, \vb
vadduwm \vh, \vh, v13
vrlw v13, \va, v17
vand v15, v15, \vc
vxor v12, \va, v13
vrlw v13, \va, v2
vand v14, \va, \vb
vxor v12, v12, v13
vxor v14, v14, v15
vrlw v13, v12, v18
vadduwm v15, \vh, v14
vadduwm \vh, \vh, \vd
vadduwm \vd, v15, v13
.endm
.macro sha256_4way_main_quadround i, rk, rw
sha256_4way_main_round \i+0, \rk, \rw, v4, v5, v6, v7, v8, v9, v10, v11
sha256_4way_main_round \i+1, \rk, \rw, v7, v4, v5, v6, v11, v8, v9, v10
sha256_4way_main_round \i+2, \rk, \rw, v6, v7, v4, v5, v10, v11, v8, v9
sha256_4way_main_round \i+3, \rk, \rw, v5, v6, v7, v4, v9, v10, v11, v8
.endm
.text
.align 2
.globl sha256_init_4way
.globl _sha256_init_4way
#ifdef __ELF__
.type sha256_init_4way, %function
#endif
sha256_init_4way:
_sha256_init_4way:
mfspr r0, 256
oris r12, r0, 0xff00
mtspr 256, r12
lis r4, HI(sha256_4h)
addi r4, r4, LO(sha256_4h)
li r5, 1*16
li r6, 2*16
li r7, 3*16
li r8, 4*16
li r9, 5*16
li r10, 6*16
li r11, 7*16
lvx v0, 0, r4
lvx v1, r4, r5
lvx v2, r4, r6
lvx v3, r4, r7
lvx v4, r4, r8
lvx v5, r4, r9
lvx v6, r4, r10
lvx v7, r4, r11
stvx v0, 0, r3
stvx v1, r3, r5
stvx v2, r3, r6
stvx v3, r3, r7
stvx v4, r3, r8
stvx v5, r3, r9
stvx v6, r3, r10
stvx v7, r3, r11
mtspr 256, r0
blr
.text
.align 2
.globl sha256_transform_4way
.globl _sha256_transform_4way
#ifdef __ELF__
.type sha256_transform_4way, %function
#endif
sha256_transform_4way:
_sha256_transform_4way:
mfspr r0, 256
oris r12, r0, 0xffff
ori r12, r12, 0xf000
mtspr 256, r12
andi. r6, r1, 15
cmpwi 0, r5, 0
li r7, -(4*4+64*16)
subf r6, r6, r7
2016-11-23 19:06:49 +01:00
stdux r1, r1, r6
2015-02-24 17:06:20 +01:00
li r7, 1*16
li r8, 2*16
li r9, 3*16
li r10, 4*16
li r11, 5*16
li r12, 6*16
li r6, 7*16
bne 0, sha256_transform_4way_swap
lvx v11, 0, r4
lvx v1, r4, r7
lvx v2, r4, r8
lvx v3, r4, r9
lvx v4, r4, r10
lvx v5, r4, r11
lvx v6, r4, r12
lvx v7, r4, r6
addi r5, r1, 4*4
stvx v11, 0, r5
stvx v1, r5, r7
stvx v2, r5, r8
stvx v3, r5, r9
stvx v4, r5, r10
stvx v5, r5, r11
stvx v6, r5, r12
stvx v7, r5, r6
addi r4, r4, 8*16
lvx v0, 0, r4
lvx v4, r4, r7
lvx v5, r4, r8
lvx v6, r4, r9
lvx v7, r4, r10
lvx v8, r4, r11
lvx v9, r4, r12
lvx v10, r4, r6
addi r4, r1, 4*4+8*16
stvx v0, 0, r4
stvx v4, r4, r7
stvx v5, r4, r8
stvx v6, r4, r9
stvx v7, r4, r10
stvx v8, r4, r11
stvx v9, r4, r12
stvx v10, r4, r6
b sha256_transform_4way_extend
sha256_transform_4way_swap:
lis r5, HI(br_perm)
addi r5, r5, LO(br_perm)
lvx v19, 0, r5
lvx v11, 0, r4
lvx v1, r4, r7
lvx v2, r4, r8
lvx v3, r4, r9
lvx v4, r4, r10
lvx v5, r4, r11
lvx v6, r4, r12
lvx v7, r4, r6
vperm v11, v11, v11, v19
vperm v1, v1, v1, v19
vperm v2, v2, v2, v19
vperm v3, v3, v3, v19
vperm v4, v4, v4, v19
vperm v5, v5, v5, v19
vperm v6, v6, v6, v19
vperm v7, v7, v7, v19
addi r5, r1, 4*4
stvx v11, 0, r5
stvx v1, r5, r7
stvx v2, r5, r8
stvx v3, r5, r9
stvx v4, r5, r10
stvx v5, r5, r11
stvx v6, r5, r12
stvx v7, r5, r6
addi r4, r4, 8*16
lvx v0, 0, r4
lvx v4, r4, r7
lvx v5, r4, r8
lvx v6, r4, r9
lvx v7, r4, r10
lvx v8, r4, r11
lvx v9, r4, r12
lvx v10, r4, r6
vperm v0, v0, v0, v19
vperm v4, v4, v4, v19
vperm v5, v5, v5, v19
vperm v6, v6, v6, v19
vperm v7, v7, v7, v19
vperm v8, v8, v8, v19
vperm v9, v9, v9, v19
vperm v10, v10, v10, v19
addi r4, r1, 4*4+8*16
stvx v0, 0, r4
stvx v4, r4, r7
stvx v5, r4, r8
stvx v6, r4, r9
stvx v7, r4, r10
stvx v8, r4, r11
stvx v9, r4, r12
stvx v10, r4, r6
sha256_transform_4way_extend:
li r10, 16*16
li r11, 17*16
sha256_4way_extend_setup
sha256_4way_extend_doubleround 0, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 2, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 4, r5, v8, v9, v6, v7
sha256_4way_extend_doubleround 6, r5, v10, v4, v8, v9
sha256_4way_extend_doubleround 8, r5, v5, v6, v10, v4
sha256_4way_extend_doubleround 10, r5, v7, v8, v5, v6
sha256_4way_extend_doubleround 12, r5, v9, v10, v7, v8
sha256_4way_extend_doubleround 14, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 16, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 18, r5, v8, v9, v6, v7
sha256_4way_extend_doubleround 20, r5, v10, v4, v8, v9
sha256_4way_extend_doubleround 22, r5, v5, v6, v10, v4
sha256_4way_extend_doubleround 24, r5, v7, v8, v5, v6
sha256_4way_extend_doubleround 26, r5, v9, v10, v7, v8
sha256_4way_extend_doubleround 28, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 30, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 32, r5, v8, v9, v6, v7
sha256_4way_extend_doubleround 34, r5, v10, v4, v8, v9
sha256_4way_extend_doubleround 36, r5, v5, v6, v10, v4
sha256_4way_extend_doubleround 38, r5, v7, v8, v5, v6
sha256_4way_extend_doubleround 40, r5, v9, v10, v7, v8
sha256_4way_extend_doubleround 42, r5, v4, v5, v9, v10
sha256_4way_extend_doubleround 44, r5, v6, v7, v4, v5
sha256_4way_extend_doubleround 46, r5, v8, v9, v6, v7
addi r11, r3, 4*16
lvx v4, 0, r3
lvx v5, r3, r7
lvx v6, r3, r8
lvx v7, r3, r9
lvx v8, 0, r11
lvx v9, r11, r7
lvx v10, r11, r8
lvx v11, r11, r9
lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k)
addi r5, r1, 4*4
sha256_4way_main_setup
sha256_4way_main_quadround 0, r12, r5
sha256_4way_main_quadround 4, r12, r5
sha256_4way_main_quadround 8, r12, r5
sha256_4way_main_quadround 12, r12, r5
sha256_4way_main_quadround 16, r12, r5
sha256_4way_main_quadround 20, r12, r5
sha256_4way_main_quadround 24, r12, r5
sha256_4way_main_quadround 28, r12, r5
sha256_4way_main_quadround 32, r12, r5
sha256_4way_main_quadround 36, r12, r5
sha256_4way_main_quadround 40, r12, r5
sha256_4way_main_quadround 44, r12, r5
sha256_4way_main_quadround 48, r12, r5
sha256_4way_main_quadround 52, r12, r5
sha256_4way_main_quadround 56, r12, r5
sha256_4way_main_quadround 60, r12, r5
lvx v12, 0, r3
lvx v13, r3, r7
lvx v14, r3, r8
lvx v15, r3, r9
lvx v16, 0, r11
lvx v17, r11, r7
lvx v18, r11, r8
lvx v19, r11, r9
vadduwm v4, v4, v12
vadduwm v5, v5, v13
vadduwm v6, v6, v14
vadduwm v7, v7, v15
vadduwm v8, v8, v16
vadduwm v9, v9, v17
vadduwm v10, v10, v18
vadduwm v11, v11, v19
stvx v4, 0, r3
stvx v5, r3, r7
stvx v6, r3, r8
stvx v7, r3, r9
stvx v8, 0, r11
stvx v9, r11, r7
stvx v10, r11, r8
stvx v11, r11, r9
2016-11-23 19:06:49 +01:00
ld r1, 0(r1)
2015-02-24 17:06:20 +01:00
mtspr 256, r0
blr
.text
.align 2
.globl sha256d_ms_4way
.globl _sha256d_ms_4way
#ifdef __ELF__
.type sha256d_ms_4way, %function
#endif
sha256d_ms_4way:
_sha256d_ms_4way:
mfspr r0, 256
oris r12, r0, 0xffff
ori r12, r12, 0xf000
mtspr 256, r12
andi. r12, r1, 15
li r11, -(4*4+64*16)
subf r12, r12, r11
2016-11-23 19:06:49 +01:00
stdux r1, r1, r12
2015-02-24 17:06:20 +01:00
li r7, 1*16
li r8, 2*16
li r9, 3*16
li r10, 16*16
li r11, 17*16
sha256_4way_extend_setup
addi r4, r4, 2*16
addi r12, r1, 4*4+18*16
lvx v14, r4, r7
lvx v6, r4, r10
lvx v7, r4, r11
vrlw v12, v14, v1
vrlw v13, v14, v18
stvx v6, 0, r12
vxor v12, v12, v13
vsrw v13, v14, v16
stvx v7, r12, r7
vxor v12, v12, v13
vadduwm v6, v6, v12
vadduwm v7, v7, v14
stvx v6, r4, r10
vrlw v12, v6, v17
vrlw v13, v6, v19
stvx v7, r4, r11
addi r4, r4, 18*16
lvx v8, 0, r4
vxor v12, v12, v13
vsrw v13, v6, v0
stvx v8, r12, r8
vxor v12, v12, v13
vadduwm v8, v8, v12
vrlw v9, v7, v17
vrlw v13, v7, v19
stvx v8, 0, r4
vxor v9, v9, v13
vsrw v13, v7, v0
vxor v9, v9, v13
vrlw v12, v8, v17
vrlw v13, v8, v19
stvx v9, r4, r7
vxor v12, v12, v13
vsrw v13, v8, v0
lvx v10, r4, r8
lvx v4, r4, r9
vxor v12, v12, v13
stvx v10, r12, r9
addi r12, r12, 4*16
stvx v4, 0, r12
vrlw v14, v9, v17
vrlw v13, v9, v19
vadduwm v10, v10, v12
vxor v14, v14, v13
vsrw v13, v9, v0
stvx v10, r4, r8
vxor v14, v14, v13
vadduwm v4, v4, v14
vrlw v12, v10, v17
vrlw v13, v10, v19
stvx v4, r4, r9
vxor v12, v12, v13
vsrw v13, v10, v0
vrlw v14, v4, v17
vxor v12, v12, v13
vrlw v13, v4, v19
addi r4, r4, 4*16
lvx v5, 0, r4
vxor v14, v14, v13
stvx v5, r12, r7
vsrw v13, v4, v0
vadduwm v5, v5, v12
vxor v14, v14, v13
stvx v5, 0, r4
vadduwm v6, v6, v14
vrlw v12, v5, v17
vrlw v13, v5, v19
stvx v6, r4, r7
vxor v12, v12, v13
vsrw v13, v5, v0
vrlw v14, v6, v17
vxor v12, v12, v13
vrlw v13, v6, v19
vxor v14, v14, v13
vsrw v13, v6, v0
vadduwm v7, v7, v12
vxor v14, v14, v13
stvx v7, r4, r8
vadduwm v8, v8, v14
vrlw v12, v7, v17
vrlw v13, v7, v19
stvx v8, r4, r9
vxor v12, v12, v13
vsrw v13, v7, v0
vrlw v14, v8, v17
vxor v12, v12, v13
vrlw v13, v8, v19
vxor v14, v14, v13
vsrw v13, v8, v0
vadduwm v9, v9, v12
vxor v14, v14, v13
addi r4, r4, 4*16
stvx v9, 0, r4
vadduwm v10, v10, v14
vrlw v12, v9, v17
vrlw v13, v9, v19
stvx v10, r4, r7
vxor v12, v12, v13
vsrw v13, v9, v0
lvx v11, r4, r8
lvx v14, r4, r9
stvx v11, r12, r8
stvx v14, r12, r9
vxor v12, v12, v13
vadduwm v11, v11, v12
vadduwm v5, v5, v14
vrlw v12, v10, v17
vrlw v13, v10, v19
vadduwm v4, v4, v11
vxor v12, v12, v13
vsrw v13, v10, v0
stvx v4, r4, r8
vxor v12, v12, v13
vadduwm v5, v5, v12
stvx v5, r4, r9
addi r4, r4, -12*16
lvx v11, 0, r4
sha256_4way_extend_doubleround 16, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 18, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 20, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 22, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 24, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 26, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 28, r4, v4, v5, v9, v10
sha256_4way_extend_doubleround 30, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 32, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 34, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 36, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 38, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 40, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 42, r4, v4, v5, v9, v10
sha256_4way_extend_doubleround 44, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 46, r4, v8, v9, v6, v7
addi r4, r4, -48*16
lvx v4, 0, r6
lvx v9, r6, r7
lvx v10, r6, r8
lvx v11, r6, r9
addi r12, r6, 4*16
lvx v8, 0, r12
lvx v5, r12, r7
lvx v6, r12, r8
lvx v7, r12, r9
lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k)
sha256_4way_main_setup
sha256_4way_main_round 3, r12, r4, v5, v6, v7, v4, v9, v10, v11, v8
sha256_4way_main_quadround 4, r12, r4
sha256_4way_main_quadround 8, r12, r4
sha256_4way_main_quadround 12, r12, r4
sha256_4way_main_quadround 16, r12, r4
sha256_4way_main_quadround 20, r12, r4
sha256_4way_main_quadround 24, r12, r4
sha256_4way_main_quadround 28, r12, r4
sha256_4way_main_quadround 32, r12, r4
sha256_4way_main_quadround 36, r12, r4
sha256_4way_main_quadround 40, r12, r4
sha256_4way_main_quadround 44, r12, r4
sha256_4way_main_quadround 48, r12, r4
sha256_4way_main_quadround 52, r12, r4
sha256_4way_main_quadround 56, r12, r4
sha256_4way_main_quadround 60, r12, r4
lvx v12, 0, r5
lvx v13, r5, r7
lvx v14, r5, r8
lvx v15, r5, r9
addi r12, r5, 4*16
lvx v16, 0, r12
lvx v17, r12, r7
lvx v18, r12, r8
lvx v19, r12, r9
vadduwm v4, v4, v12
vadduwm v5, v5, v13
vadduwm v6, v6, v14
vadduwm v7, v7, v15
vadduwm v8, v8, v16
vadduwm v9, v9, v17
vadduwm v10, v10, v18
vadduwm v11, v11, v19
addi r12, r1, 4*4
stvx v4, 0, r12
stvx v5, r12, r7
stvx v6, r12, r8
stvx v7, r12, r9
addi r12, r12, 4*16
stvx v8, 0, r12
stvx v9, r12, r7
stvx v10, r12, r8
stvx v11, r12, r9
addi r12, r1, 4*4+18*16
lvx v4, 0, r12
lvx v5, r12, r7
lvx v6, r12, r8
lvx v7, r12, r9
addi r12, r12, 4*16
lvx v8, 0, r12
lvx v9, r12, r7
lvx v10, r12, r8
lvx v11, r12, r9
addi r12, r4, 18*16
stvx v4, 0, r12
stvx v5, r12, r7
stvx v6, r12, r8
addi r12, r4, 22*16
stvx v7, 0, r12
stvx v8, r12, r7
stvx v9, r12, r8
addi r12, r4, 30*16
stvx v10, 0, r12
stvx v11, r12, r7
addi r4, r1, 4*4
sha256_4way_extend_setup
lis r12, HI(sha256d_4preext2)
addi r12, r12, LO(sha256d_4preext2)
lvx v2, 0, r12
vxor v9, v9, v9
vspltisw v3, 1
lvx v4, r12, r8
vsldoi v3, v3, v3, 1
addi r5, r1, 4*4+8*16
stvx v4, 0, r5
stvx v9, r5, r7
stvx v9, r5, r8
stvx v9, r5, r9
addi r5, r5, 4*16
stvx v9, 0, r5
stvx v9, r5, r7
stvx v9, r5, r8
stvx v3, r5, r9
lvx v4, 0, r4
lvx v14, r4, r7
lvx v11, r4, r8
vrlw v12, v14, v1
vrlw v13, v14, v18
vxor v12, v12, v13
vsrw v13, v14, v16
vadduwm v5, v14, v2
vxor v12, v12, v13
vrlw v14, v11, v1
vrlw v13, v11, v18
vadduwm v4, v4, v12
vxor v14, v14, v13
vsrw v13, v11, v16
stvx v4, r4, r10
vxor v14, v14, v13
vrlw v12, v4, v17
vrlw v13, v4, v19
vadduwm v5, v5, v14
stvx v5, r4, r11
addi r4, r4, 2*16
lvx v14, r4, r7
vxor v12, v12, v13
vsrw v13, v4, v0
vrlw v6, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vxor v6, v6, v13
vsrw v13, v14, v16
vadduwm v11, v11, v12
vxor v6, v6, v13
vrlw v12, v5, v17
vrlw v13, v5, v19
vadduwm v6, v6, v11
lvx v11, r4, r8
stvx v6, r4, r10
vxor v12, v12, v13
vsrw v13, v5, v0
vrlw v7, v11, v1
vxor v12, v12, v13
vrlw v13, v11, v18
vxor v7, v7, v13
vsrw v13, v11, v16
vadduwm v14, v14, v12
vxor v7, v7, v13
vrlw v12, v6, v17
vrlw v13, v6, v19
vadduwm v7, v7, v14
stvx v7, r4, r11
addi r4, r4, 2*16
lvx v14, r4, r7
vxor v12, v12, v13
vsrw v13, v6, v0
vrlw v8, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vxor v8, v8, v13
vsrw v13, v14, v16
vadduwm v11, v11, v12
vxor v8, v8, v13
vrlw v12, v7, v17
vrlw v13, v7, v19
vadduwm v8, v8, v11
lvx v11, r4, r8
stvx v8, r4, r10
vxor v12, v12, v13
vsrw v13, v7, v0
vrlw v9, v11, v1
vxor v12, v12, v13
vrlw v13, v11, v18
vxor v9, v9, v13
vsrw v13, v11, v16
vadduwm v14, v14, v12
vxor v9, v9, v13
vrlw v12, v8, v17
vrlw v13, v8, v19
vadduwm v9, v9, v14
stvx v9, r4, r11
addi r4, r4, 2*16
lvx v14, r4, r7
vxor v12, v12, v13
vsrw v13, v8, v0
vrlw v10, v14, v1
vxor v12, v12, v13
vrlw v13, v14, v18
vxor v10, v10, v13
vsrw v13, v14, v16
vadduwm v11, v11, v12
vxor v10, v10, v13
vrlw v12, v9, v17
vrlw v13, v9, v19
vadduwm v11, v11, v3
vadduwm v14, v14, v4
vadduwm v10, v10, v11
lvx v2, r12, r7
vxor v12, v12, v13
vsrw v13, v9, v0
stvx v10, r4, r10
vxor v12, v12, v13
vadduwm v14, v14, v12
vrlw v12, v10, v17
vrlw v13, v10, v19
vadduwm v4, v14, v2
lvx v2, r12, r8
vxor v12, v12, v13
vsrw v13, v10, v0
stvx v4, r4, r11
vadduwm v5, v5, v2
vxor v12, v12, v13
vadduwm v5, v5, v12
vrlw v12, v4, v17
vrlw v13, v4, v19
addi r4, r4, 2*16
stvx v5, r4, r10
vxor v12, v12, v13
vsrw v13, v4, v0
vrlw v11, v5, v17
vxor v12, v12, v13
vrlw v13, v5, v19
vxor v11, v11, v13
vsrw v13, v5, v0
vadduwm v6, v6, v12
vxor v11, v11, v13
stvx v6, r4, r11
vadduwm v7, v7, v11
vrlw v12, v6, v17
vrlw v13, v6, v19
addi r4, r4, 2*16
stvx v7, r4, r10
vxor v12, v12, v13
vsrw v13, v6, v0
vrlw v11, v7, v17
vxor v12, v12, v13
vrlw v13, v7, v19
vxor v11, v11, v13
vsrw v13, v7, v0
vadduwm v8, v8, v12
vxor v11, v11, v13
stvx v8, r4, r11
vadduwm v9, v9, v11
lvx v2, r12, r9
vrlw v14, v8, v17
vrlw v13, v8, v19
vrlw v12, v9, v17
addi r4, r4, 2*16
stvx v9, r4, r10
vxor v14, v14, v13
vrlw v13, v9, v19
vxor v12, v12, v13
vsrw v13, v8, v0
vxor v14, v14, v13
vsrw v13, v9, v0
vxor v12, v12, v13
vadduwm v4, v4, v2
vadduwm v10, v10, v14
vadduwm v4, v4, v12
stvx v10, r4, r11
addi r4, r4, 2*16
lvx v11, r4, r8
vadduwm v5, v5, v3
stvx v4, r4, r10
vrlw v14, v11, v1
vrlw v13, v11, v18
vrlw v12, v10, v17
vxor v14, v14, v13
vrlw v13, v10, v19
vxor v12, v12, v13
vsrw v13, v11, v16
vxor v14, v14, v13
vsrw v13, v10, v0
vxor v12, v12, v13
vadduwm v5, v5, v14
vadduwm v5, v5, v12
stvx v5, r4, r11
addi r4, r4, 2*16
sha256_4way_extend_doubleround 16, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 18, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 20, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 22, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 24, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 26, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 28, r4, v4, v5, v9, v10
sha256_4way_extend_doubleround 30, r4, v6, v7, v4, v5
sha256_4way_extend_doubleround 32, r4, v8, v9, v6, v7
sha256_4way_extend_doubleround 34, r4, v10, v4, v8, v9
sha256_4way_extend_doubleround 36, r4, v5, v6, v10, v4
sha256_4way_extend_doubleround 38, r4, v7, v8, v5, v6
sha256_4way_extend_doubleround 40, r4, v9, v10, v7, v8
sha256_4way_extend_doubleround 42, r4, v4, v5, v9, v10
lvx v14, r4, r7
vrlw v12, v4, v17
vrlw v13, v4, v19
vadduwm v15, v11, v6
vrlw v6, v14, v1
vrlw v11, v14, v18
vxor v12, v12, v13
vxor v6, v6, v11
vsrw v13, v4, v0
vsrw v14, v14, v16
vxor v12, v12, v13
vxor v6, v6, v14
vadduwm v12, v12, v15
vadduwm v6, v6, v12
stvx v6, r4, r10
addi r4, r4, -44*16
lis r5, HI(sha256_4h)
addi r5, r5, LO(sha256_4h)
lvx v4, 0, r5
lvx v5, r5, r7
lvx v6, r5, r8
lvx v7, r5, r9
addi r12, r5, 4*16
lvx v8, 0, r12
lvx v9, r12, r7
lvx v10, r12, r8
lvx v11, r12, r9
lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k)
sha256_4way_main_setup
sha256_4way_main_quadround 0, r12, r4
sha256_4way_main_quadround 4, r12, r4
sha256_4way_main_quadround 8, r12, r4
sha256_4way_main_quadround 12, r12, r4
sha256_4way_main_quadround 16, r12, r4
sha256_4way_main_quadround 20, r12, r4
sha256_4way_main_quadround 24, r12, r4
sha256_4way_main_quadround 28, r12, r4
sha256_4way_main_quadround 32, r12, r4
sha256_4way_main_quadround 36, r12, r4
sha256_4way_main_quadround 40, r12, r4
sha256_4way_main_quadround 44, r12, r4
sha256_4way_main_quadround 48, r12, r4
sha256_4way_main_quadround 52, r12, r4
sha256_4way_main_round 56, r12, r4, v4, v5, v6, v7, v8, v9, v10, v11
.macro sha256_4way_main_round_red i, rk, rw, vd, ve, vf, vg, vh
li r6, (\i)*16
vand v15, \vf, \ve
vandc v14, \vg, \ve
lvx v12, \rw, r6
vadduwm \vh, \vh, \vd
vor v14, v14, v15
lvx v15, \rk, r6
vrlw v13, \ve, v3
vadduwm \vh, \vh, v14
vxor v14, \ve, v13
vrlw v13, \ve, v19
vadduwm \vh, \vh, v12
vxor v14, v14, v13
vadduwm \vh, \vh, v15
vrlw v13, v14, v16
vadduwm \vh, \vh, v13
.endm
sha256_4way_main_round_red 57, r12, r4, v6, v11, v8, v9, v10
sha256_4way_main_round_red 58, r12, r4, v5, v10, v11, v8, v9
sha256_4way_main_round_red 59, r12, r4, v4, v9, v10, v11, v8
sha256_4way_main_round_red 60, r12, r4, v7, v8, v9, v10, v11
li r12, 7*16
lvx v19, r5, r12
vadduwm v11, v11, v19
stvx v11, r3, r12
2016-11-23 19:06:49 +01:00
ld r1, 0(r1)
2015-02-24 17:06:20 +01:00
mtspr 256, r0
blr
.text
.align 2
.globl sha256_use_4way
.globl _sha256_use_4way
#ifdef __ELF__
.type sha256_use_4way, %function
#endif
sha256_use_4way:
_sha256_use_4way:
li r3, 1
blr
#endif /* __ALTIVEC__ */
#endif