Add optimized code for ARM11 processors
This commit is contained in:
parent
ff69f18995
commit
023a0f2a12
5 changed files with 965 additions and 2 deletions
|
@ -15,8 +15,8 @@ bin_PROGRAMS = minerd
|
|||
|
||||
minerd_SOURCES = elist.h miner.h compat.h \
|
||||
cpu-miner.c util.c \
|
||||
sha2.c sha2-x86.S sha2-x64.S \
|
||||
scrypt.c scrypt-x86.S scrypt-x64.S
|
||||
sha2.c sha2-arm.S sha2-x86.S sha2-x64.S \
|
||||
scrypt.c scrypt-arm.S scrypt-x86.S scrypt-x64.S
|
||||
minerd_LDFLAGS = $(PTHREAD_FLAGS)
|
||||
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@
|
||||
minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
|
||||
|
|
321
scrypt-arm.S
Normal file
321
scrypt-arm.S
Normal file
|
@ -0,0 +1,321 @@
|
|||
/*
|
||||
* Copyright 2012 pooler@litecoinpool.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version. See COPYING for more details.
|
||||
*/
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
|
||||
#if defined(__arm__) && defined(__APCS_32__)
|
||||
|
||||
.macro salsa8_core_doubleround
|
||||
add r8, r8, r12
|
||||
add lr, lr, r0
|
||||
eor r3, r3, r8, ror #25
|
||||
eor r4, r4, lr, ror #25
|
||||
add r8, r5, r1
|
||||
add lr, r11, r6
|
||||
eor r9, r9, r8, ror #25
|
||||
eor r10, r10, lr, ror #25
|
||||
str r9, [sp, #9*4]
|
||||
str r10, [sp, #14*4]
|
||||
|
||||
ldr r8, [sp, #8*4]
|
||||
ldr lr, [sp, #13*4]
|
||||
add r11, r11, r10
|
||||
add r12, r12, r3
|
||||
eor r2, r2, r11, ror #23
|
||||
eor r7, r7, r12, ror #23
|
||||
add r11, r4, r0
|
||||
add r12, r9, r5
|
||||
eor r8, r8, r11, ror #23
|
||||
eor lr, lr, r12, ror #23
|
||||
str r8, [sp, #8*4]
|
||||
str lr, [sp, #13*4]
|
||||
|
||||
ldr r11, [sp, #11*4]
|
||||
ldr r12, [sp, #12*4]
|
||||
add r9, lr, r9
|
||||
add r10, r2, r10
|
||||
eor r1, r1, r9, ror #19
|
||||
eor r6, r6, r10, ror #19
|
||||
add r9, r7, r3
|
||||
add r10, r8, r4
|
||||
eor r11, r11, r9, ror #19
|
||||
eor r12, r12, r10, ror #19
|
||||
|
||||
ldr r9, [sp, #10*4]
|
||||
ldr r10, [sp, #15*4]
|
||||
add r8, r12, r8
|
||||
add lr, r1, lr
|
||||
eor r0, r0, r8, ror #14
|
||||
eor r5, r5, lr, ror #14
|
||||
add r8, r6, r2
|
||||
add lr, r11, r7
|
||||
eor r9, r9, r8, ror #14
|
||||
eor r10, r10, lr, ror #14
|
||||
|
||||
ldr r8, [sp, #9*4]
|
||||
ldr lr, [sp, #14*4]
|
||||
|
||||
str r9, [sp, #10*4]
|
||||
str r10, [sp, #15*4]
|
||||
|
||||
add r8, r9, r8
|
||||
add lr, r10, lr
|
||||
eor r11, r11, r8, ror #25
|
||||
eor r12, r12, lr, ror #25
|
||||
add r8, r0, r3
|
||||
add lr, r5, r4
|
||||
eor r1, r1, r8, ror #25
|
||||
eor r6, r6, lr, ror #25
|
||||
str r11, [sp, #11*4]
|
||||
str r12, [sp, #12*4]
|
||||
|
||||
ldr r8, [sp, #8*4]
|
||||
ldr lr, [sp, #13*4]
|
||||
add r9, r11, r9
|
||||
add r10, r12, r10
|
||||
eor r8, r8, r9, ror #23
|
||||
eor lr, lr, r10, ror #23
|
||||
add r9, r1, r0
|
||||
add r10, r6, r5
|
||||
eor r2, r2, r9, ror #23
|
||||
eor r7, r7, r10, ror #23
|
||||
str r8, [sp, #8*4]
|
||||
str lr, [sp, #13*4]
|
||||
|
||||
ldr r9, [sp, #9*4]
|
||||
ldr r10, [sp, #14*4]
|
||||
add r11, r8, r11
|
||||
add r12, lr, r12
|
||||
eor r9, r9, r11, ror #19
|
||||
eor r10, r10, r12, ror #19
|
||||
add r11, r2, r1
|
||||
add r12, r7, r6
|
||||
eor r3, r3, r11, ror #19
|
||||
eor r4, r4, r12, ror #19
|
||||
str r9, [sp, #9*4]
|
||||
str r10, [sp, #14*4]
|
||||
|
||||
ldr r11, [sp, #10*4]
|
||||
ldr r12, [sp, #15*4]
|
||||
add r8, r9, r8
|
||||
add lr, r10, lr
|
||||
eor r11, r11, r8, ror #14
|
||||
eor r12, r12, lr, ror #14
|
||||
add r8, r3, r2
|
||||
add lr, r4, r7
|
||||
eor r0, r0, r8, ror #14
|
||||
eor r5, r5, lr, ror #14
|
||||
.endm
|
||||
|
||||
.macro salsa8_core
|
||||
ldmia sp, {r0-r7}
|
||||
ldr r9, [sp, #9*4]
|
||||
ldr r10, [sp, #14*4]
|
||||
ldr r8, [sp, #11*4]
|
||||
ldr lr, [sp, #12*4]
|
||||
ldr r11, [sp, #10*4]
|
||||
ldr r12, [sp, #15*4]
|
||||
salsa8_core_doubleround
|
||||
ldr r8, [sp, #11*4]
|
||||
ldr lr, [sp, #12*4]
|
||||
str r11, [sp, #10*4]
|
||||
str r12, [sp, #15*4]
|
||||
salsa8_core_doubleround
|
||||
ldr r8, [sp, #11*4]
|
||||
ldr lr, [sp, #12*4]
|
||||
str r11, [sp, #10*4]
|
||||
str r12, [sp, #15*4]
|
||||
salsa8_core_doubleround
|
||||
ldr r8, [sp, #11*4]
|
||||
ldr lr, [sp, #12*4]
|
||||
str r11, [sp, #10*4]
|
||||
str r12, [sp, #15*4]
|
||||
salsa8_core_doubleround
|
||||
str r11, [sp, #10*4]
|
||||
str r12, [sp, #15*4]
|
||||
stmia sp, {r0-r7}
|
||||
.endm
|
||||
|
||||
|
||||
.macro scrypt_core_macro1a_x4
|
||||
ldmia r0, {r4-r7}
|
||||
ldmia lr!, {r8-r11}
|
||||
stmia r1!, {r4-r7}
|
||||
stmia r3!, {r8-r11}
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
stmia r0!, {r4-r7}
|
||||
stmia r12!, {r4-r7}
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro1b_x4
|
||||
ldmia r3!, {r8-r11}
|
||||
ldmia r2, {r4-r7}
|
||||
eor r8, r8, r4
|
||||
eor r9, r9, r5
|
||||
eor r10, r10, r6
|
||||
eor r11, r11, r7
|
||||
ldmia r0, {r4-r7}
|
||||
stmia r2!, {r8-r11}
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
ldmia r1!, {r8-r11}
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
stmia r0!, {r4-r7}
|
||||
stmia r12!, {r4-r7}
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro2_x4
|
||||
ldmia r12, {r4-r7}
|
||||
ldmia r0, {r8-r11}
|
||||
add r4, r4, r8
|
||||
add r5, r5, r9
|
||||
add r6, r6, r10
|
||||
add r7, r7, r11
|
||||
stmia r0!, {r4-r7}
|
||||
ldmia r2, {r8-r11}
|
||||
eor r4, r4, r8
|
||||
eor r5, r5, r9
|
||||
eor r6, r6, r10
|
||||
eor r7, r7, r11
|
||||
stmia r2!, {r4-r7}
|
||||
stmia r12!, {r4-r7}
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro3_x4
|
||||
ldmia r1!, {r4-r7}
|
||||
ldmia r0, {r8-r11}
|
||||
add r4, r4, r8
|
||||
add r5, r5, r9
|
||||
add r6, r6, r10
|
||||
add r7, r7, r11
|
||||
stmia r0!, {r4-r7}
|
||||
.endm
|
||||
|
||||
.macro scrypt_core_macro3_x6
|
||||
ldmia r1!, {r2-r7}
|
||||
ldmia r0, {r8-r12, lr}
|
||||
add r2, r2, r8
|
||||
add r3, r3, r9
|
||||
add r4, r4, r10
|
||||
add r5, r5, r11
|
||||
add r6, r6, r12
|
||||
add r7, r7, lr
|
||||
stmia r0!, {r2-r7}
|
||||
.endm
|
||||
|
||||
|
||||
.text
|
||||
.code 32
|
||||
.align 2
|
||||
.globl scrypt_core
|
||||
.globl _scrypt_core
|
||||
scrypt_core:
|
||||
_scrypt_core:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
sub sp, sp, #20*4
|
||||
|
||||
str r0, [sp, #16*4]
|
||||
add r12, r1, #1024*32*4
|
||||
str r12, [sp, #18*4]
|
||||
scrypt_core_loop1:
|
||||
add lr, r0, #16*4
|
||||
add r3, r1, #16*4
|
||||
mov r12, sp
|
||||
scrypt_core_macro1a_x4
|
||||
scrypt_core_macro1a_x4
|
||||
scrypt_core_macro1a_x4
|
||||
scrypt_core_macro1a_x4
|
||||
str r1, [sp, #17*4]
|
||||
|
||||
salsa8_core
|
||||
|
||||
ldr r0, [sp, #16*4]
|
||||
mov r12, sp
|
||||
add r2, r0, #16*4
|
||||
scrypt_core_macro2_x4
|
||||
scrypt_core_macro2_x4
|
||||
scrypt_core_macro2_x4
|
||||
scrypt_core_macro2_x4
|
||||
|
||||
salsa8_core
|
||||
|
||||
ldr r0, [sp, #16*4]
|
||||
mov r1, sp
|
||||
add r0, r0, #16*4
|
||||
scrypt_core_macro3_x6
|
||||
scrypt_core_macro3_x6
|
||||
ldr r3, [sp, #17*4]
|
||||
ldr r12, [sp, #18*4]
|
||||
scrypt_core_macro3_x4
|
||||
|
||||
add r1, r3, #16*4
|
||||
sub r0, r0, #32*4
|
||||
cmp r1, r12
|
||||
bne scrypt_core_loop1
|
||||
|
||||
sub r1, r1, #1024*32*4
|
||||
str r1, [sp, #17*4]
|
||||
mov r12, #1024
|
||||
scrypt_core_loop2:
|
||||
str r12, [sp, #18*4]
|
||||
|
||||
ldr r4, [r0, #16*4]
|
||||
mov r4, r4, lsl #32-10
|
||||
add r1, r1, r4, lsr #32-10-7
|
||||
|
||||
add r2, r0, #16*4
|
||||
add r3, r1, #16*4
|
||||
mov r12, sp
|
||||
scrypt_core_macro1b_x4
|
||||
scrypt_core_macro1b_x4
|
||||
scrypt_core_macro1b_x4
|
||||
scrypt_core_macro1b_x4
|
||||
|
||||
salsa8_core
|
||||
|
||||
ldr r0, [sp, #16*4]
|
||||
mov r12, sp
|
||||
add r2, r0, #16*4
|
||||
scrypt_core_macro2_x4
|
||||
scrypt_core_macro2_x4
|
||||
scrypt_core_macro2_x4
|
||||
scrypt_core_macro2_x4
|
||||
|
||||
salsa8_core
|
||||
|
||||
ldr r0, [sp, #16*4]
|
||||
mov r1, sp
|
||||
add r0, r0, #16*4
|
||||
scrypt_core_macro3_x6
|
||||
scrypt_core_macro3_x6
|
||||
scrypt_core_macro3_x4
|
||||
|
||||
ldr r12, [sp, #18*4]
|
||||
sub r0, r0, #32*4
|
||||
ldr r1, [sp, #17*4]
|
||||
subs r12, r12, #1
|
||||
bne scrypt_core_loop2
|
||||
|
||||
add sp, sp, #20*4
|
||||
#ifdef __THUMB_INTERWORK__
|
||||
ldmfd sp!, {r4-r11, lr}
|
||||
bx lr
|
||||
#else
|
||||
ldmfd sp!, {r4-r11, pc}
|
||||
#endif
|
||||
|
||||
#endif
|
4
scrypt.c
4
scrypt.c
|
@ -270,6 +270,10 @@ void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
|||
#define scrypt_best_throughput() 1
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
|
||||
#elif defined(__arm__) && defined(__APCS_32__)
|
||||
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
|
||||
#else
|
||||
|
||||
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
||||
|
|
621
sha2-arm.S
Normal file
621
sha2-arm.S
Normal file
|
@ -0,0 +1,621 @@
|
|||
/*
|
||||
* Copyright 2012 pooler@litecoinpool.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version. See COPYING for more details.
|
||||
*/
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
|
||||
#if defined(__arm__) && defined(__APCS_32__)
|
||||
|
||||
.macro sha256_k
|
||||
.align 2
|
||||
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
|
||||
.long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
|
||||
.long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
|
||||
.long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
|
||||
.long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
|
||||
.long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
|
||||
.long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
|
||||
.long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
|
||||
.long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
|
||||
.long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
|
||||
.long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
|
||||
.long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
|
||||
.long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
|
||||
.long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
|
||||
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
|
||||
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
.endm
|
||||
|
||||
.macro sha256_extend_round i, rw, ra, rb, ry, rz
|
||||
ldr lr, [\rw, #(\i+1)*4]
|
||||
mov r12, \ry, ror #17
|
||||
eor r12, r12, \ry, ror #19
|
||||
eor r12, r12, \ry, lsr #10
|
||||
add r11, r11, r12
|
||||
add r11, r11, \ra
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add \ra, r11, r12
|
||||
str \ra, [\rw, #(\i+16)*4]
|
||||
.endm
|
||||
|
||||
.macro sha256_extend_doubleround i, rw, ra, rb, ry, rz
|
||||
ldr lr, [\rw, #(\i+1)*4]
|
||||
mov r12, \ry, ror #17
|
||||
eor r12, r12, \ry, ror #19
|
||||
eor r12, r12, \ry, lsr #10
|
||||
add r11, r11, r12
|
||||
add r11, r11, \ra
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add \ra, r11, r12
|
||||
str \ra, [\rw, #(\i+16)*4]
|
||||
|
||||
ldr r11, [\rw, #(\i+2)*4]
|
||||
mov r12, \rz, ror #17
|
||||
eor r12, r12, \rz, ror #19
|
||||
eor r12, r12, \rz, lsr #10
|
||||
add lr, lr, r12
|
||||
add lr, lr, \rb
|
||||
mov r12, r11, ror #7
|
||||
eor r12, r12, r11, ror #18
|
||||
eor r12, r12, r11, lsr #3
|
||||
add \rb, lr, r12
|
||||
str \rb, [\rw, #(\i+17)*4]
|
||||
.endm
|
||||
|
||||
.macro sha256_main_round i, ka, rw, ra, rb, rc, rd, re, rf, rg, rh
|
||||
ldr r12, [\rw, #(\i)*4]
|
||||
and r3, \rf, \re
|
||||
bic lr, \rg, \re
|
||||
orr lr, lr, r3
|
||||
ldr r3, \ka + (\i)*4
|
||||
add r12, r12, lr
|
||||
eor lr, \re, \re, ror #5
|
||||
eor lr, lr, \re, ror #19
|
||||
add r12, r12, \rh
|
||||
add r12, r12, r3
|
||||
add r12, r12, lr, ror #6
|
||||
add \rh, \rd, r12
|
||||
|
||||
eor lr, \ra, \rb
|
||||
and lr, lr, \rc
|
||||
and r3, \ra, \rb
|
||||
eor lr, lr, r3
|
||||
eor r3, \ra, \ra, ror #11
|
||||
eor r3, r3, \ra, ror #20
|
||||
add r12, r12, lr
|
||||
add \rd, r12, r3, ror #2
|
||||
.endm
|
||||
|
||||
.macro sha256_main_quadround i, ka, rw
|
||||
sha256_main_round \i+0, \ka, \rw, r4, r5, r6, r7, r8, r9, r10, r11
|
||||
sha256_main_round \i+1, \ka, \rw, r7, r4, r5, r6, r11, r8, r9, r10
|
||||
sha256_main_round \i+2, \ka, \rw, r6, r7, r4, r5, r10, r11, r8, r9
|
||||
sha256_main_round \i+3, \ka, \rw, r5, r6, r7, r4, r9, r10, r11, r8
|
||||
.endm
|
||||
|
||||
|
||||
.text
|
||||
.code 32
|
||||
.align 2
|
||||
.globl sha256_transform
|
||||
.globl _sha256_transform
|
||||
sha256_transform:
|
||||
_sha256_transform:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
cmp r2, #0
|
||||
sub sp, sp, #64*4
|
||||
bne sha256_transform_swap
|
||||
|
||||
ldmia r1!, {r4-r11}
|
||||
stmia sp, {r4-r11}
|
||||
add r3, sp, #8*4
|
||||
ldmia r1, {r4-r11}
|
||||
stmia r3, {r4-r11}
|
||||
b sha256_transform_extend
|
||||
|
||||
.macro bswap rd, rn
|
||||
eor r12, \rn, \rn, ror #16
|
||||
bic r12, r12, #0x00ff0000
|
||||
mov \rd, \rn, ror #8
|
||||
eor \rd, \rd, r12, lsr #8
|
||||
.endm
|
||||
|
||||
sha256_transform_swap:
|
||||
ldmia r1!, {r4-r11}
|
||||
bswap r4, r4
|
||||
bswap r5, r5
|
||||
bswap r6, r6
|
||||
bswap r7, r7
|
||||
bswap r8, r8
|
||||
bswap r9, r9
|
||||
bswap r10, r10
|
||||
bswap r11, r11
|
||||
stmia sp, {r4-r11}
|
||||
add r3, sp, #8*4
|
||||
ldmia r1, {r4-r11}
|
||||
bswap r4, r4
|
||||
bswap r5, r5
|
||||
bswap r6, r6
|
||||
bswap r7, r7
|
||||
bswap r8, r8
|
||||
bswap r9, r9
|
||||
bswap r10, r10
|
||||
bswap r11, r11
|
||||
stmia r3, {r4-r11}
|
||||
|
||||
sha256_transform_extend:
|
||||
add r12, sp, #9*4
|
||||
ldr r11, [sp, #0*4]
|
||||
ldmia r12, {r4-r10}
|
||||
sha256_extend_doubleround 0, sp, r4, r5, r9, r10
|
||||
sha256_extend_doubleround 2, sp, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 4, sp, r8, r9, r6, r7
|
||||
sha256_extend_doubleround 6, sp, r10, r4, r8, r9
|
||||
sha256_extend_doubleround 8, sp, r5, r6, r10, r4
|
||||
sha256_extend_doubleround 10, sp, r7, r8, r5, r6
|
||||
sha256_extend_doubleround 12, sp, r9, r10, r7, r8
|
||||
sha256_extend_doubleround 14, sp, r4, r5, r9, r10
|
||||
sha256_extend_doubleround 16, sp, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 18, sp, r8, r9, r6, r7
|
||||
sha256_extend_doubleround 20, sp, r10, r4, r8, r9
|
||||
sha256_extend_doubleround 22, sp, r5, r6, r10, r4
|
||||
sha256_extend_doubleround 24, sp, r7, r8, r5, r6
|
||||
sha256_extend_doubleround 26, sp, r9, r10, r7, r8
|
||||
sha256_extend_doubleround 28, sp, r4, r5, r9, r10
|
||||
sha256_extend_doubleround 30, sp, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 32, sp, r8, r9, r6, r7
|
||||
sha256_extend_doubleround 34, sp, r10, r4, r8, r9
|
||||
sha256_extend_doubleround 36, sp, r5, r6, r10, r4
|
||||
sha256_extend_doubleround 38, sp, r7, r8, r5, r6
|
||||
sha256_extend_doubleround 40, sp, r9, r10, r7, r8
|
||||
sha256_extend_doubleround 42, sp, r4, r5, r9, r10
|
||||
sha256_extend_doubleround 44, sp, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 46, sp, r8, r9, r6, r7
|
||||
|
||||
ldmia r0, {r4-r11}
|
||||
sha256_main_quadround 0, sha256_transform_k, sp
|
||||
sha256_main_quadround 4, sha256_transform_k, sp
|
||||
sha256_main_quadround 8, sha256_transform_k, sp
|
||||
sha256_main_quadround 12, sha256_transform_k, sp
|
||||
sha256_main_quadround 16, sha256_transform_k, sp
|
||||
sha256_main_quadround 20, sha256_transform_k, sp
|
||||
sha256_main_quadround 24, sha256_transform_k, sp
|
||||
sha256_main_quadround 28, sha256_transform_k, sp
|
||||
b sha256_transform_k_over
|
||||
sha256_transform_k:
|
||||
sha256_k
|
||||
sha256_transform_k_over:
|
||||
sha256_main_quadround 32, sha256_transform_k, sp
|
||||
sha256_main_quadround 36, sha256_transform_k, sp
|
||||
sha256_main_quadround 40, sha256_transform_k, sp
|
||||
sha256_main_quadround 44, sha256_transform_k, sp
|
||||
sha256_main_quadround 48, sha256_transform_k, sp
|
||||
sha256_main_quadround 52, sha256_transform_k, sp
|
||||
sha256_main_quadround 56, sha256_transform_k, sp
|
||||
sha256_main_quadround 60, sha256_transform_k, sp
|
||||
|
||||
ldmia r0, {r1, r2, r3, r12}
|
||||
add r4, r4, r1
|
||||
add r5, r5, r2
|
||||
add r6, r6, r3
|
||||
add r7, r7, r12
|
||||
stmia r0!, {r4-r7}
|
||||
ldmia r0, {r1, r2, r3, r12}
|
||||
add r8, r8, r1
|
||||
add r9, r9, r2
|
||||
add r10, r10, r3
|
||||
add r11, r11, r12
|
||||
stmia r0, {r8-r11}
|
||||
|
||||
add sp, sp, #64*4
|
||||
#ifdef __thumb__
|
||||
ldmfd sp!, {r4-r11, lr}
|
||||
bx lr
|
||||
#else
|
||||
ldmfd sp!, {r4-r11, pc}
|
||||
#endif
|
||||
|
||||
|
||||
.text
|
||||
.code 32
|
||||
.align 2
|
||||
.globl sha256d_ms
|
||||
.globl _sha256d_ms
|
||||
sha256d_ms:
|
||||
_sha256d_ms:
|
||||
stmfd sp!, {r4-r11, lr}
|
||||
sub sp, sp, #64*4
|
||||
|
||||
cmp r0, r0
|
||||
|
||||
ldr lr, [r1, #3*4]
|
||||
ldr r6, [r1, #18*4]
|
||||
ldr r7, [r1, #19*4]
|
||||
ldr r8, [r1, #20*4]
|
||||
ldr r10, [r1, #22*4]
|
||||
ldr r4, [r1, #23*4]
|
||||
ldr r5, [r1, #24*4]
|
||||
ldr r11, [r1, #30*4]
|
||||
str r6, [sp, #18*4]
|
||||
str r7, [sp, #19*4]
|
||||
str r8, [sp, #20*4]
|
||||
str r10, [sp, #21*4]
|
||||
str r4, [sp, #22*4]
|
||||
str r5, [sp, #23*4]
|
||||
str r11, [sp, #24*4]
|
||||
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add r6, r6, r12
|
||||
str r6, [r1, #18*4]
|
||||
|
||||
add r7, r7, lr
|
||||
str r7, [r1, #19*4]
|
||||
|
||||
mov r12, r6, ror #17
|
||||
eor r12, r12, r6, ror #19
|
||||
eor r12, r12, r6, lsr #10
|
||||
add r8, r8, r12
|
||||
str r8, [r1, #20*4]
|
||||
|
||||
mov r12, r7, ror #17
|
||||
eor r12, r12, r7, ror #19
|
||||
eor r9, r12, r7, lsr #10
|
||||
str r9, [r1, #21*4]
|
||||
|
||||
mov r12, r8, ror #17
|
||||
eor r12, r12, r8, ror #19
|
||||
eor r12, r12, r8, lsr #10
|
||||
add r10, r10, r12
|
||||
str r10, [r1, #22*4]
|
||||
|
||||
mov r12, r9, ror #17
|
||||
eor r12, r12, r9, ror #19
|
||||
eor r12, r12, r9, lsr #10
|
||||
add r4, r4, r12
|
||||
str r4, [r1, #23*4]
|
||||
|
||||
mov r12, r10, ror #17
|
||||
eor r12, r12, r10, ror #19
|
||||
eor r12, r12, r10, lsr #10
|
||||
add r5, r5, r12
|
||||
str r5, [r1, #24*4]
|
||||
|
||||
mov r12, r4, ror #17
|
||||
eor r12, r12, r4, ror #19
|
||||
eor r12, r12, r4, lsr #10
|
||||
add r6, r6, r12
|
||||
str r6, [r1, #25*4]
|
||||
|
||||
mov r12, r5, ror #17
|
||||
eor r12, r12, r5, ror #19
|
||||
eor r12, r12, r5, lsr #10
|
||||
add r7, r7, r12
|
||||
str r7, [r1, #26*4]
|
||||
|
||||
mov r12, r6, ror #17
|
||||
eor r12, r12, r6, ror #19
|
||||
eor r12, r12, r6, lsr #10
|
||||
add r8, r8, r12
|
||||
str r8, [r1, #27*4]
|
||||
|
||||
mov r12, r7, ror #17
|
||||
eor r12, r12, r7, ror #19
|
||||
eor r12, r12, r7, lsr #10
|
||||
add r9, r9, r12
|
||||
str r9, [r1, #28*4]
|
||||
|
||||
mov r12, r8, ror #17
|
||||
eor r12, r12, r8, ror #19
|
||||
eor r12, r12, r8, lsr #10
|
||||
add r10, r10, r12
|
||||
str r10, [r1, #29*4]
|
||||
|
||||
ldr lr, [r1, #31*4]
|
||||
mov r12, r9, ror #17
|
||||
eor r12, r12, r9, ror #19
|
||||
eor r12, r12, r9, lsr #10
|
||||
add r11, r11, r12
|
||||
add r4, r4, r11
|
||||
str r4, [r1, #30*4]
|
||||
|
||||
str lr, [sp, #25*4]
|
||||
ldr r11, [r1, #16*4]
|
||||
mov r12, r10, ror #17
|
||||
eor r12, r12, r10, ror #19
|
||||
eor r12, r12, r10, lsr #10
|
||||
add lr, lr, r12
|
||||
add r5, r5, lr
|
||||
str r5, [r1, #31*4]
|
||||
|
||||
sha256d_ms_extend_loop2:
|
||||
sha256_extend_doubleround 16, r1, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 18, r1, r8, r9, r6, r7
|
||||
sha256_extend_doubleround 20, r1, r10, r4, r8, r9
|
||||
sha256_extend_doubleround 22, r1, r5, r6, r10, r4
|
||||
sha256_extend_doubleround 24, r1, r7, r8, r5, r6
|
||||
sha256_extend_doubleround 26, r1, r9, r10, r7, r8
|
||||
sha256_extend_doubleround 28, r1, r4, r5, r9, r10
|
||||
sha256_extend_doubleround 30, r1, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 32, r1, r8, r9, r6, r7
|
||||
sha256_extend_doubleround 34, r1, r10, r4, r8, r9
|
||||
sha256_extend_doubleround 36, r1, r5, r6, r10, r4
|
||||
sha256_extend_doubleround 38, r1, r7, r8, r5, r6
|
||||
sha256_extend_doubleround 40, r1, r9, r10, r7, r8
|
||||
sha256_extend_doubleround 42, r1, r4, r5, r9, r10
|
||||
bne sha256d_ms_extend_coda2
|
||||
sha256_extend_doubleround 44, r1, r6, r7, r4, r5
|
||||
sha256_extend_doubleround 46, r1, r8, r9, r6, r7
|
||||
|
||||
ldr r4, [r3, #0*4]
|
||||
ldr r9, [r3, #1*4]
|
||||
ldr r10, [r3, #2*4]
|
||||
ldr r11, [r3, #3*4]
|
||||
ldr r8, [r3, #4*4]
|
||||
ldr r5, [r3, #5*4]
|
||||
ldr r6, [r3, #6*4]
|
||||
ldr r7, [r3, #7*4]
|
||||
b sha256d_ms_main_loop1
|
||||
|
||||
sha256d_ms_main_loop2:
|
||||
sha256_main_round 0, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11
|
||||
sha256_main_round 1, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10
|
||||
sha256_main_round 2, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9
|
||||
sha256d_ms_main_loop1:
|
||||
sha256_main_round 3, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8
|
||||
sha256_main_quadround 4, sha256d_ms_k, r1
|
||||
sha256_main_quadround 8, sha256d_ms_k, r1
|
||||
sha256_main_quadround 12, sha256d_ms_k, r1
|
||||
sha256_main_quadround 16, sha256d_ms_k, r1
|
||||
sha256_main_quadround 20, sha256d_ms_k, r1
|
||||
sha256_main_quadround 24, sha256d_ms_k, r1
|
||||
sha256_main_quadround 28, sha256d_ms_k, r1
|
||||
b sha256d_ms_k_over
|
||||
sha256d_ms_k:
|
||||
sha256_k
|
||||
sha256d_ms_k_over:
|
||||
sha256_main_quadround 32, sha256d_ms_k, r1
|
||||
sha256_main_quadround 36, sha256d_ms_k, r1
|
||||
sha256_main_quadround 40, sha256d_ms_k, r1
|
||||
sha256_main_quadround 44, sha256d_ms_k, r1
|
||||
sha256_main_quadround 48, sha256d_ms_k, r1
|
||||
sha256_main_quadround 52, sha256d_ms_k, r1
|
||||
sha256_main_round 56, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11
|
||||
bne sha256d_ms_finish
|
||||
sha256_main_round 57, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10
|
||||
sha256_main_round 58, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9
|
||||
sha256_main_round 59, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8
|
||||
sha256_main_quadround 60, sha256d_ms_k, r1
|
||||
|
||||
ldmia r2!, {r3, r12, lr}
|
||||
add r4, r4, r3
|
||||
add r5, r5, r12
|
||||
add r6, r6, lr
|
||||
stmia sp, {r4-r6}
|
||||
ldmia r2, {r3, r4, r5, r6, r12}
|
||||
add lr, sp, #3*4
|
||||
add r7, r7, r3
|
||||
add r8, r8, r4
|
||||
add r9, r9, r5
|
||||
add r10, r10, r6
|
||||
add r11, r11, r12
|
||||
add r12, sp, #18*4
|
||||
stmia lr!, {r7-r11}
|
||||
|
||||
ldmia r12, {r4-r11}
|
||||
str r4, [r1, #18*4]
|
||||
str r5, [r1, #19*4]
|
||||
str r6, [r1, #20*4]
|
||||
str r7, [r1, #22*4]
|
||||
str r8, [r1, #23*4]
|
||||
str r9, [r1, #24*4]
|
||||
str r10, [r1, #30*4]
|
||||
str r11, [r1, #31*4]
|
||||
|
||||
mov r3, #0x80000000
|
||||
mov r4, #0
|
||||
mov r5, #0
|
||||
mov r6, #0
|
||||
mov r7, #0
|
||||
mov r8, #0
|
||||
mov r9, #0
|
||||
mov r10, #0x00000100
|
||||
stmia lr, {r3-r10}
|
||||
|
||||
ldr lr, [sp, #1*4]
|
||||
movs r1, sp
|
||||
ldr r4, [sp, #0*4]
|
||||
|
||||
ldr r11, [sp, #2*4]
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add r4, r4, r12
|
||||
str r4, [sp, #16*4]
|
||||
|
||||
add lr, lr, #0x00a00000
|
||||
mov r12, r11, ror #7
|
||||
eor r12, r12, r11, ror #18
|
||||
eor r12, r12, r11, lsr #3
|
||||
add r5, lr, r12
|
||||
str r5, [sp, #17*4]
|
||||
|
||||
ldr lr, [sp, #3*4]
|
||||
mov r12, r4, ror #17
|
||||
eor r12, r12, r4, ror #19
|
||||
eor r12, r12, r4, lsr #10
|
||||
add r11, r11, r12
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add r6, r11, r12
|
||||
str r6, [sp, #18*4]
|
||||
|
||||
ldr r11, [sp, #4*4]
|
||||
mov r12, r5, ror #17
|
||||
eor r12, r12, r5, ror #19
|
||||
eor r12, r12, r5, lsr #10
|
||||
add lr, lr, r12
|
||||
mov r12, r11, ror #7
|
||||
eor r12, r12, r11, ror #18
|
||||
eor r12, r12, r11, lsr #3
|
||||
add r7, lr, r12
|
||||
str r7, [sp, #19*4]
|
||||
|
||||
ldr lr, [sp, #5*4]
|
||||
mov r12, r6, ror #17
|
||||
eor r12, r12, r6, ror #19
|
||||
eor r12, r12, r6, lsr #10
|
||||
add r11, r11, r12
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add r8, r11, r12
|
||||
str r8, [sp, #20*4]
|
||||
|
||||
ldr r11, [sp, #6*4]
|
||||
mov r12, r7, ror #17
|
||||
eor r12, r12, r7, ror #19
|
||||
eor r12, r12, r7, lsr #10
|
||||
add lr, lr, r12
|
||||
mov r12, r11, ror #7
|
||||
eor r12, r12, r11, ror #18
|
||||
eor r12, r12, r11, lsr #3
|
||||
add r9, lr, r12
|
||||
str r9, [sp, #21*4]
|
||||
|
||||
ldr lr, [sp, #7*4]
|
||||
mov r12, r8, ror #17
|
||||
eor r12, r12, r8, ror #19
|
||||
eor r12, r12, r8, lsr #10
|
||||
add r11, r11, r12
|
||||
add r11, r11, #0x00000100
|
||||
mov r12, lr, ror #7
|
||||
eor r12, r12, lr, ror #18
|
||||
eor r12, r12, lr, lsr #3
|
||||
add r10, r11, r12
|
||||
str r10, [sp, #22*4]
|
||||
|
||||
mov r12, r9, ror #17
|
||||
eor r12, r12, r9, ror #19
|
||||
eor r12, r12, r9, lsr #10
|
||||
add lr, lr, r12
|
||||
add lr, lr, r4
|
||||
add lr, lr, #0x11000000
|
||||
add r4, lr, #0x00002000
|
||||
str r4, [sp, #23*4]
|
||||
|
||||
mov r12, r10, ror #17
|
||||
eor r12, r12, r10, ror #19
|
||||
eor r12, r12, r10, lsr #10
|
||||
add r5, r5, r12
|
||||
add r5, r5, #0x80000000
|
||||
str r5, [sp, #24*4]
|
||||
|
||||
mov r12, r4, ror #17
|
||||
eor r12, r12, r4, ror #19
|
||||
eor r12, r12, r4, lsr #10
|
||||
add r6, r6, r12
|
||||
str r6, [sp, #25*4]
|
||||
|
||||
mov r12, r5, ror #17
|
||||
eor r12, r12, r5, ror #19
|
||||
eor r12, r12, r5, lsr #10
|
||||
add r7, r7, r12
|
||||
str r7, [sp, #26*4]
|
||||
|
||||
mov r12, r6, ror #17
|
||||
eor r12, r12, r6, ror #19
|
||||
eor r12, r12, r6, lsr #10
|
||||
add r8, r8, r12
|
||||
str r8, [sp, #27*4]
|
||||
|
||||
mov r12, r7, ror #17
|
||||
eor r12, r12, r7, ror #19
|
||||
eor r12, r12, r7, lsr #10
|
||||
add r9, r9, r12
|
||||
str r9, [sp, #28*4]
|
||||
|
||||
mov r12, r8, ror #17
|
||||
eor r12, r12, r8, ror #19
|
||||
eor r12, r12, r8, lsr #10
|
||||
add r10, r10, r12
|
||||
str r10, [sp, #29*4]
|
||||
|
||||
mov r12, r9, ror #17
|
||||
eor r12, r12, r9, ror #19
|
||||
eor r12, r12, r9, lsr #10
|
||||
add r4, r4, r12
|
||||
add r4, r4, #0x00400000
|
||||
add r4, r4, #0x00000022
|
||||
str r4, [sp, #30*4]
|
||||
|
||||
ldr r11, [sp, #16*4]
|
||||
mov r12, r10, ror #17
|
||||
eor r12, r12, r10, ror #19
|
||||
eor r12, r12, r10, lsr #10
|
||||
add lr, r12, #0x00000100
|
||||
add lr, lr, r5
|
||||
mov r12, r11, ror #7
|
||||
eor r12, r12, r11, ror #18
|
||||
eor r12, r12, r11, lsr #3
|
||||
add r5, lr, r12
|
||||
str r5, [sp, #31*4]
|
||||
|
||||
b sha256d_ms_extend_loop2
|
||||
|
||||
sha256d_ms_extend_coda2:
|
||||
sha256_extend_round 44, r1, r6, r7, r4, r5
|
||||
|
||||
adr r2, sha256d_ms_h
|
||||
ldmia r2, {r4-r11}
|
||||
b sha256d_ms_main_loop2
|
||||
|
||||
sha256d_ms_h:
|
||||
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
|
||||
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
|
||||
|
||||
.macro sha256_main_round_red i, ka, rw, rd, re, rf, rg, rh
|
||||
ldr r12, [\rw, #(\i)*4]
|
||||
and r3, \rf, \re
|
||||
bic lr, \rg, \re
|
||||
orr lr, lr, r3
|
||||
ldr r3, \ka + (\i)*4
|
||||
add r12, r12, lr
|
||||
eor lr, \re, \re, ror #5
|
||||
eor lr, lr, \re, ror #19
|
||||
add r12, r12, \rh
|
||||
add r12, r12, r3
|
||||
add r12, r12, lr, ror #6
|
||||
add \rh, \rd, r12
|
||||
.endm
|
||||
|
||||
sha256d_ms_finish:
|
||||
sha256_main_round_red 57, sha256d_ms_k, r1, r6, r11, r8, r9, r10
|
||||
sha256_main_round_red 58, sha256d_ms_k, r1, r5, r10, r11, r8, r9
|
||||
sha256_main_round_red 59, sha256d_ms_k, r1, r4, r9, r10, r11, r8
|
||||
ldr r5, [r2, #7*4]
|
||||
sha256_main_round_red 60, sha256d_ms_k, r1, r7, r8, r9, r10, r11
|
||||
|
||||
add r11, r11, r5
|
||||
str r11, [r0, #7*4]
|
||||
|
||||
add sp, sp, #64*4
|
||||
#ifdef __thumb__
|
||||
ldmfd sp!, {r4-r11, lr}
|
||||
bx lr
|
||||
#else
|
||||
ldmfd sp!, {r4-r11, pc}
|
||||
#endif
|
||||
|
||||
#endif
|
17
sha2.c
17
sha2.c
|
@ -13,6 +13,10 @@
|
|||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#if defined(__arm__) && defined(__APCS_32__)
|
||||
#define EXTERN_SHA256
|
||||
#endif
|
||||
|
||||
static const uint32_t sha256_h[8] = {
|
||||
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
|
||||
0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
|
||||
|
@ -68,6 +72,8 @@ void sha256_init(uint32_t *state)
|
|||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + sha256_k[i])
|
||||
|
||||
#ifndef EXTERN_SHA256
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
|
@ -164,6 +170,8 @@ void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
|
|||
state[i] += S[i];
|
||||
}
|
||||
|
||||
#endif /* EXTERN_SHA256 */
|
||||
|
||||
|
||||
static const uint32_t sha256d_hash1[16] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -212,6 +220,13 @@ static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
|
|||
RNDr(S, W, 2);
|
||||
}
|
||||
|
||||
#ifdef EXTERN_SHA256
|
||||
|
||||
void sha256d_ms(uint32_t *hash, uint32_t *W,
|
||||
const uint32_t *midstate, const uint32_t *prehash);
|
||||
|
||||
#else
|
||||
|
||||
static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
||||
const uint32_t *midstate, const uint32_t *prehash)
|
||||
{
|
||||
|
@ -417,6 +432,8 @@ static inline void sha256d_ms(uint32_t *hash, uint32_t *W,
|
|||
+ sha256_h[7];
|
||||
}
|
||||
|
||||
#endif /* EXTERN_SHA256 */
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
|
||||
void sha256d_ms_4way(uint32_t *hash, uint32_t *data,
|
||||
|
|
Loading…
Reference in a new issue