Precompute the first few SHA-256d rounds
This commit is contained in:
parent
e52982ab7f
commit
18a34a72ab
2 changed files with 161 additions and 91 deletions
157
sha2-x64.S
157
sha2-x64.S
|
@ -128,8 +128,8 @@ _sha256_init_4way:
|
|||
|
||||
|
||||
.macro sha256_sse2_extend_round i
|
||||
movdqa (\i-15)*16(%rcx), %xmm0
|
||||
movdqa (\i-14)*16(%rcx), %xmm4
|
||||
movdqa (\i-15)*16(%rax), %xmm0
|
||||
movdqa (\i-14)*16(%rax), %xmm4
|
||||
movdqa %xmm0, %xmm2
|
||||
movdqa %xmm4, %xmm6
|
||||
psrld $3, %xmm0
|
||||
|
@ -153,10 +153,10 @@ _sha256_init_4way:
|
|||
pxor %xmm2, %xmm0
|
||||
pxor %xmm6, %xmm4
|
||||
|
||||
movdqa (\i-2)*16(%rcx), %xmm3
|
||||
movdqa (\i-1)*16(%rcx), %xmm7
|
||||
paddd (\i-16)*16(%rcx), %xmm0
|
||||
paddd (\i-15)*16(%rcx), %xmm4
|
||||
movdqa (\i-2)*16(%rax), %xmm3
|
||||
movdqa (\i-1)*16(%rax), %xmm7
|
||||
paddd (\i-16)*16(%rax), %xmm0
|
||||
paddd (\i-15)*16(%rax), %xmm4
|
||||
|
||||
movdqa %xmm3, %xmm2
|
||||
movdqa %xmm7, %xmm6
|
||||
|
@ -165,14 +165,14 @@ _sha256_init_4way:
|
|||
movdqa %xmm3, %xmm1
|
||||
movdqa %xmm7, %xmm5
|
||||
|
||||
paddd (\i-7)*16(%rcx), %xmm0
|
||||
paddd (\i-7)*16(%rax), %xmm0
|
||||
|
||||
pslld $13, %xmm2
|
||||
pslld $13, %xmm6
|
||||
psrld $7, %xmm1
|
||||
psrld $7, %xmm5
|
||||
|
||||
paddd (\i-6)*16(%rcx), %xmm4
|
||||
paddd (\i-6)*16(%rax), %xmm4
|
||||
|
||||
pxor %xmm1, %xmm3
|
||||
pxor %xmm5, %xmm7
|
||||
|
@ -189,14 +189,15 @@ _sha256_init_4way:
|
|||
|
||||
paddd %xmm3, %xmm0
|
||||
paddd %xmm7, %xmm4
|
||||
movdqa %xmm0, \i*16(%rcx)
|
||||
movdqa %xmm4, (\i+1)*16(%rcx)
|
||||
movdqa %xmm0, \i*16(%rax)
|
||||
movdqa %xmm4, (\i+1)*16(%rax)
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
sha256_sse2_extend_loop:
|
||||
sha256_sse2_extend_round 0
|
||||
sha256_sse2_extend_loop_pre:
|
||||
sha256_sse2_extend_round 2
|
||||
sha256_sse2_extend_round 4
|
||||
sha256_sse2_extend_round 6
|
||||
|
@ -293,6 +294,7 @@ sha256_sse2_main_loop:
|
|||
sha256_sse2_main_round 0
|
||||
sha256_sse2_main_round 1
|
||||
sha256_sse2_main_round 2
|
||||
sha256_sse2_main_loop_pre:
|
||||
sha256_sse2_main_round 3
|
||||
sha256_sse2_main_round 4
|
||||
sha256_sse2_main_round 5
|
||||
|
@ -360,8 +362,8 @@ sha256_sse2_main_loop:
|
|||
#if defined(USE_AVX)
|
||||
|
||||
.macro sha256_avx_extend_round i
|
||||
vmovdqa (\i-15)*16(%rcx), %xmm0
|
||||
vmovdqa (\i-14)*16(%rcx), %xmm4
|
||||
vmovdqa (\i-15)*16(%rax), %xmm0
|
||||
vmovdqa (\i-14)*16(%rax), %xmm4
|
||||
vpslld $14, %xmm0, %xmm2
|
||||
vpslld $14, %xmm4, %xmm6
|
||||
vpsrld $3, %xmm0, %xmm0
|
||||
|
@ -381,22 +383,22 @@ sha256_sse2_main_loop:
|
|||
vpxor %xmm2, %xmm0, %xmm0
|
||||
vpxor %xmm6, %xmm4, %xmm4
|
||||
|
||||
vmovdqa (\i-2)*16(%rcx), %xmm3
|
||||
vmovdqa (\i-1)*16(%rcx), %xmm7
|
||||
vpaddd (\i-16)*16(%rcx), %xmm0, %xmm0
|
||||
vpaddd (\i-15)*16(%rcx), %xmm4, %xmm4
|
||||
vmovdqa (\i-2)*16(%rax), %xmm3
|
||||
vmovdqa (\i-1)*16(%rax), %xmm7
|
||||
vpaddd (\i-16)*16(%rax), %xmm0, %xmm0
|
||||
vpaddd (\i-15)*16(%rax), %xmm4, %xmm4
|
||||
|
||||
vpslld $13, %xmm3, %xmm2
|
||||
vpslld $13, %xmm7, %xmm6
|
||||
vpsrld $10, %xmm3, %xmm3
|
||||
vpsrld $10, %xmm7, %xmm7
|
||||
|
||||
vpaddd (\i-7)*16(%rcx), %xmm0, %xmm0
|
||||
vpaddd (\i-7)*16(%rax), %xmm0, %xmm0
|
||||
|
||||
vpsrld $7, %xmm3, %xmm1
|
||||
vpsrld $7, %xmm7, %xmm5
|
||||
|
||||
vpaddd (\i-6)*16(%rcx), %xmm4, %xmm4
|
||||
vpaddd (\i-6)*16(%rax), %xmm4, %xmm4
|
||||
|
||||
vpxor %xmm1, %xmm3, %xmm3
|
||||
vpxor %xmm5, %xmm7, %xmm7
|
||||
|
@ -413,14 +415,15 @@ sha256_sse2_main_loop:
|
|||
|
||||
vpaddd %xmm3, %xmm0, %xmm0
|
||||
vpaddd %xmm7, %xmm4, %xmm4
|
||||
vmovdqa %xmm0, \i*16(%rcx)
|
||||
vmovdqa %xmm4, (\i+1)*16(%rcx)
|
||||
vmovdqa %xmm0, \i*16(%rax)
|
||||
vmovdqa %xmm4, (\i+1)*16(%rax)
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
sha256_avx_extend_loop:
|
||||
sha256_avx_extend_round 0
|
||||
sha256_avx_extend_loop_pre:
|
||||
sha256_avx_extend_round 2
|
||||
sha256_avx_extend_round 4
|
||||
sha256_avx_extend_round 6
|
||||
|
@ -501,7 +504,11 @@ sha256_avx_extend_loop:
|
|||
.text
|
||||
.p2align 6
|
||||
sha256_avx_main_loop:
|
||||
sha256_avx_main_quadround 0
|
||||
sha256_avx_main_round 0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
|
||||
sha256_avx_main_round 1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
|
||||
sha256_avx_main_round 2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
|
||||
sha256_avx_main_loop_pre:
|
||||
sha256_avx_main_round 3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
|
||||
sha256_avx_main_quadround 4
|
||||
sha256_avx_main_quadround 8
|
||||
sha256_avx_main_quadround 12
|
||||
|
@ -525,8 +532,8 @@ sha256_avx_main_loop:
|
|||
#if defined(USE_XOP)
|
||||
|
||||
.macro sha256_xop_extend_round i
|
||||
vmovdqa (\i-15)*16(%rcx), %xmm0
|
||||
vmovdqa (\i-14)*16(%rcx), %xmm4
|
||||
vmovdqa (\i-15)*16(%rax), %xmm0
|
||||
vmovdqa (\i-14)*16(%rax), %xmm4
|
||||
vprotd $25, %xmm0, %xmm1
|
||||
vprotd $25, %xmm4, %xmm5
|
||||
vprotd $14, %xmm0, %xmm2
|
||||
|
@ -538,10 +545,10 @@ sha256_avx_main_loop:
|
|||
vpxor %xmm2, %xmm0, %xmm0
|
||||
vpxor %xmm6, %xmm4, %xmm4
|
||||
|
||||
vmovdqa (\i-2)*16(%rcx), %xmm3
|
||||
vmovdqa (\i-1)*16(%rcx), %xmm7
|
||||
vpaddd (\i-16)*16(%rcx), %xmm0, %xmm0
|
||||
vpaddd (\i-15)*16(%rcx), %xmm4, %xmm4
|
||||
vmovdqa (\i-2)*16(%rax), %xmm3
|
||||
vmovdqa (\i-1)*16(%rax), %xmm7
|
||||
vpaddd (\i-16)*16(%rax), %xmm0, %xmm0
|
||||
vpaddd (\i-15)*16(%rax), %xmm4, %xmm4
|
||||
|
||||
vprotd $15, %xmm3, %xmm1
|
||||
vprotd $15, %xmm7, %xmm5
|
||||
|
@ -550,8 +557,8 @@ sha256_avx_main_loop:
|
|||
vpxor %xmm1, %xmm2, %xmm2
|
||||
vpxor %xmm5, %xmm6, %xmm6
|
||||
|
||||
vpaddd (\i-7)*16(%rcx), %xmm0, %xmm0
|
||||
vpaddd (\i-6)*16(%rcx), %xmm4, %xmm4
|
||||
vpaddd (\i-7)*16(%rax), %xmm0, %xmm0
|
||||
vpaddd (\i-6)*16(%rax), %xmm4, %xmm4
|
||||
|
||||
vpsrld $10, %xmm3, %xmm3
|
||||
vpsrld $10, %xmm7, %xmm7
|
||||
|
@ -560,14 +567,15 @@ sha256_avx_main_loop:
|
|||
|
||||
vpaddd %xmm3, %xmm0, %xmm0
|
||||
vpaddd %xmm7, %xmm4, %xmm4
|
||||
vmovdqa %xmm0, \i*16(%rcx)
|
||||
vmovdqa %xmm4, (\i+1)*16(%rcx)
|
||||
vmovdqa %xmm0, \i*16(%rax)
|
||||
vmovdqa %xmm4, (\i+1)*16(%rax)
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
sha256_xop_extend_loop:
|
||||
sha256_xop_extend_round 0
|
||||
sha256_xop_extend_loop_pre:
|
||||
sha256_xop_extend_round 2
|
||||
sha256_xop_extend_round 4
|
||||
sha256_xop_extend_round 6
|
||||
|
@ -636,7 +644,11 @@ sha256_xop_extend_loop:
|
|||
.text
|
||||
.p2align 6
|
||||
sha256_xop_main_loop:
|
||||
sha256_xop_main_quadround 0
|
||||
sha256_xop_main_round 0, %xmm10, %xmm9, %xmm8, %xmm0, %xmm3, %xmm4, %xmm5, %xmm7
|
||||
sha256_xop_main_round 1, %xmm9, %xmm8, %xmm0, %xmm10, %xmm4, %xmm5, %xmm7, %xmm3
|
||||
sha256_xop_main_round 2, %xmm8, %xmm0, %xmm10, %xmm9, %xmm5, %xmm7, %xmm3, %xmm4
|
||||
sha256_xop_main_loop_pre:
|
||||
sha256_xop_main_round 3, %xmm0, %xmm10, %xmm9, %xmm8, %xmm7, %xmm3, %xmm4, %xmm5
|
||||
sha256_xop_main_quadround 4
|
||||
sha256_xop_main_quadround 8
|
||||
sha256_xop_main_quadround 12
|
||||
|
@ -810,11 +822,12 @@ sha256_transform_4way_sse2_main_loop:
|
|||
jne sha256_transform_4way_sse2_main_loop
|
||||
jmp sha256_transform_4way_finish
|
||||
|
||||
|
||||
#if defined(USE_AVX)
|
||||
.text
|
||||
.p2align 6
|
||||
sha256_transform_4way_core_avx:
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 256(%rsp), %rax
|
||||
call sha256_avx_extend_loop
|
||||
movdqu 0(%rdi), %xmm7
|
||||
movdqu 16(%rdi), %xmm5
|
||||
|
@ -830,11 +843,12 @@ sha256_transform_4way_core_avx:
|
|||
jmp sha256_transform_4way_finish
|
||||
#endif /* USE_AVX */
|
||||
|
||||
|
||||
#if defined(USE_XOP)
|
||||
.text
|
||||
.p2align 6
|
||||
sha256_transform_4way_core_xop:
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 256(%rsp), %rax
|
||||
call sha256_xop_extend_loop
|
||||
movdqu 0(%rdi), %xmm7
|
||||
movdqu 16(%rdi), %xmm5
|
||||
|
@ -1019,24 +1033,25 @@ sha256d_4way_sse2:
|
|||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
movq %r9, %rcx
|
||||
#endif
|
||||
subq $1032, %rsp
|
||||
|
||||
leaq 256(%rsi), %rcx
|
||||
call sha256_sse2_extend_loop
|
||||
leaq 256(%rsi), %rax
|
||||
call sha256_sse2_extend_loop_pre
|
||||
|
||||
movdqa 0(%rdx), %xmm7
|
||||
movdqa 16(%rdx), %xmm5
|
||||
movdqa 32(%rdx), %xmm4
|
||||
movdqa 48(%rdx), %xmm3
|
||||
movdqa 64(%rdx), %xmm0
|
||||
movdqa 80(%rdx), %xmm8
|
||||
movdqa 96(%rdx), %xmm9
|
||||
movdqa 112(%rdx), %xmm10
|
||||
movdqa 0(%rcx), %xmm3
|
||||
movdqa 16(%rcx), %xmm0
|
||||
movdqa 32(%rcx), %xmm8
|
||||
movdqa 48(%rcx), %xmm9
|
||||
movdqa 64(%rcx), %xmm10
|
||||
movdqa 80(%rcx), %xmm7
|
||||
movdqa 96(%rcx), %xmm5
|
||||
movdqa 112(%rcx), %xmm4
|
||||
|
||||
movq %rsi, %rax
|
||||
leaq sha256_4k(%rip), %rcx
|
||||
call sha256_sse2_main_loop
|
||||
call sha256_sse2_main_loop_pre
|
||||
|
||||
paddd 0(%rdx), %xmm7
|
||||
paddd 16(%rdx), %xmm5
|
||||
|
@ -1070,7 +1085,7 @@ sha256d_4way_sse2:
|
|||
movdqa %xmm0, 224(%rsp)
|
||||
movdqa %xmm1, 240(%rsp)
|
||||
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 256(%rsp), %rax
|
||||
call sha256_sse2_extend_loop
|
||||
|
||||
movdqa sha256_4h+0(%rip), %xmm7
|
||||
|
@ -1134,24 +1149,25 @@ sha256d_4way_avx:
|
|||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
movq %r9, %rcx
|
||||
#endif
|
||||
subq $1032, %rsp
|
||||
|
||||
leaq 256(%rsi), %rcx
|
||||
call sha256_avx_extend_loop
|
||||
leaq 256(%rsi), %rax
|
||||
call sha256_avx_extend_loop_pre
|
||||
|
||||
movdqa 0(%rdx), %xmm7
|
||||
movdqa 16(%rdx), %xmm5
|
||||
movdqa 32(%rdx), %xmm4
|
||||
movdqa 48(%rdx), %xmm3
|
||||
movdqa 64(%rdx), %xmm0
|
||||
movdqa 80(%rdx), %xmm8
|
||||
movdqa 96(%rdx), %xmm9
|
||||
movdqa 112(%rdx), %xmm10
|
||||
movdqa 0(%rcx), %xmm7
|
||||
movdqa 16(%rcx), %xmm8
|
||||
movdqa 32(%rcx), %xmm9
|
||||
movdqa 48(%rcx), %xmm10
|
||||
movdqa 64(%rcx), %xmm0
|
||||
movdqa 80(%rcx), %xmm5
|
||||
movdqa 96(%rcx), %xmm4
|
||||
movdqa 112(%rcx), %xmm3
|
||||
|
||||
movq %rsi, %rax
|
||||
leaq sha256_4k(%rip), %rcx
|
||||
call sha256_avx_main_loop
|
||||
call sha256_avx_main_loop_pre
|
||||
|
||||
paddd 0(%rdx), %xmm7
|
||||
paddd 16(%rdx), %xmm5
|
||||
|
@ -1185,7 +1201,7 @@ sha256d_4way_avx:
|
|||
movdqa %xmm0, 224(%rsp)
|
||||
movdqa %xmm1, 240(%rsp)
|
||||
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 256(%rsp), %rax
|
||||
call sha256_avx_extend_loop
|
||||
|
||||
movdqa sha256_4h+0(%rip), %xmm7
|
||||
|
@ -1251,24 +1267,25 @@ sha256d_4way_xop:
|
|||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
movq %r9, %rcx
|
||||
#endif
|
||||
subq $1032, %rsp
|
||||
|
||||
leaq 256(%rsi), %rcx
|
||||
call sha256_xop_extend_loop
|
||||
leaq 256(%rsi), %rax
|
||||
call sha256_xop_extend_loop_pre
|
||||
|
||||
movdqa 0(%rdx), %xmm7
|
||||
movdqa 16(%rdx), %xmm5
|
||||
movdqa 32(%rdx), %xmm4
|
||||
movdqa 48(%rdx), %xmm3
|
||||
movdqa 64(%rdx), %xmm0
|
||||
movdqa 80(%rdx), %xmm8
|
||||
movdqa 96(%rdx), %xmm9
|
||||
movdqa 112(%rdx), %xmm10
|
||||
movdqa 0(%rcx), %xmm7
|
||||
movdqa 16(%rcx), %xmm8
|
||||
movdqa 32(%rcx), %xmm9
|
||||
movdqa 48(%rcx), %xmm10
|
||||
movdqa 64(%rcx), %xmm0
|
||||
movdqa 80(%rcx), %xmm5
|
||||
movdqa 96(%rcx), %xmm4
|
||||
movdqa 112(%rcx), %xmm3
|
||||
|
||||
movq %rsi, %rax
|
||||
leaq sha256_4k(%rip), %rcx
|
||||
call sha256_xop_main_loop
|
||||
call sha256_xop_main_loop_pre
|
||||
|
||||
paddd 0(%rdx), %xmm7
|
||||
paddd 16(%rdx), %xmm5
|
||||
|
@ -1302,7 +1319,7 @@ sha256d_4way_xop:
|
|||
movdqa %xmm0, 224(%rsp)
|
||||
movdqa %xmm1, 240(%rsp)
|
||||
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 256(%rsp), %rax
|
||||
call sha256_xop_extend_loop
|
||||
|
||||
movdqa sha256_4h+0(%rip), %xmm7
|
||||
|
|
95
sha2.c
95
sha2.c
|
@ -164,12 +164,6 @@ void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
|
|||
state[i] += S[i];
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
#define SHA256D_MAX_WAYS 4
|
||||
void sha256d_4way(uint32_t *hash, uint32_t *data, const uint32_t *midstate);
|
||||
#else
|
||||
#define SHA256D_MAX_WAYS 1
|
||||
#endif
|
||||
|
||||
static const uint32_t sha256d_hash1[16] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -178,23 +172,64 @@ static const uint32_t sha256d_hash1[16] = {
|
|||
0x00000000, 0x00000000, 0x00000000, 0x00000100
|
||||
};
|
||||
|
||||
static inline void sha256d_preextend(uint32_t *W)
|
||||
{
|
||||
W[16] = s1(W[14]) + W[ 9] + s0(W[ 1]) + W[ 0];
|
||||
W[17] = s1(W[15]) + W[10] + s0(W[ 2]) + W[ 1];
|
||||
W[18] = s1(W[16]) + W[11] + W[ 2];
|
||||
W[19] = s1(W[17]) + W[12] + s0(W[ 4]);
|
||||
W[20] = W[13] + s0(W[ 5]) + W[ 4];
|
||||
W[21] = W[14] + s0(W[ 6]) + W[ 5];
|
||||
W[22] = W[15] + s0(W[ 7]) + W[ 6];
|
||||
W[23] = W[16] + s0(W[ 8]) + W[ 7];
|
||||
W[24] = W[17] + s0(W[ 9]) + W[ 8];
|
||||
W[25] = s0(W[10]) + W[ 9];
|
||||
W[26] = s0(W[11]) + W[10];
|
||||
W[27] = s0(W[12]) + W[11];
|
||||
W[28] = s0(W[13]) + W[12];
|
||||
W[29] = s0(W[14]) + W[13];
|
||||
W[30] = s0(W[15]) + W[14];
|
||||
W[31] = s0(W[16]) + W[15];
|
||||
}
|
||||
|
||||
static inline void sha256d_prehash(uint32_t *S, const uint32_t *W)
|
||||
{
|
||||
uint32_t t0, t1;
|
||||
RNDr(S, W, 0);
|
||||
RNDr(S, W, 1);
|
||||
RNDr(S, W, 2);
|
||||
}
|
||||
|
||||
static inline void sha256d(uint32_t *hash, uint32_t *W,
|
||||
const uint32_t *midstate)
|
||||
const uint32_t *midstate, const uint32_t *prehash)
|
||||
{
|
||||
uint32_t S[64];
|
||||
uint32_t E[14];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
for (i = 16; i < 64; i += 2) {
|
||||
memcpy(E, W + 18, sizeof(E));
|
||||
W[18] += s0(W[3]);
|
||||
W[19] += W[3];
|
||||
W[20] += s1(W[18]);
|
||||
W[21] += s1(W[19]);
|
||||
W[22] += s1(W[20]);
|
||||
W[23] += s1(W[21]);
|
||||
W[24] += s1(W[22]);
|
||||
W[25] += s1(W[23]) + W[18];
|
||||
W[26] += s1(W[24]) + W[19];
|
||||
W[27] += s1(W[25]) + W[20];
|
||||
W[28] += s1(W[26]) + W[21];
|
||||
W[29] += s1(W[27]) + W[22];
|
||||
W[30] += s1(W[28]) + W[23];
|
||||
W[31] += s1(W[29]) + W[24];
|
||||
for (i = 32; i < 64; i += 2) {
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
||||
}
|
||||
|
||||
memcpy(S, midstate, 32);
|
||||
memcpy(S, prehash, 32);
|
||||
|
||||
RNDr(S, W, 0);
|
||||
RNDr(S, W, 1);
|
||||
RNDr(S, W, 2);
|
||||
RNDr(S, W, 3);
|
||||
RNDr(S, W, 4);
|
||||
RNDr(S, W, 5);
|
||||
|
@ -260,6 +295,8 @@ static inline void sha256d(uint32_t *hash, uint32_t *W,
|
|||
for (i = 0; i < 8; i++)
|
||||
S[i] += midstate[i];
|
||||
|
||||
memcpy(W + 18, E, sizeof(E));
|
||||
|
||||
memcpy(S + 8, sha256d_hash1 + 8, 32);
|
||||
for (i = 16; i < 64; i += 2) {
|
||||
S[i] = s1(S[i - 2]) + S[i - 7] + s0(S[i - 15]) + S[i - 16];
|
||||
|
@ -337,12 +374,21 @@ static inline void sha256d(uint32_t *hash, uint32_t *W,
|
|||
hash[i] += sha256_h[i];
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
#define SHA256D_MAX_WAYS 4
|
||||
void sha256d_4way(uint32_t *hash, uint32_t *data,
|
||||
const uint32_t *midstate, const uint32_t *prehash);
|
||||
#else
|
||||
#define SHA256D_MAX_WAYS 1
|
||||
#endif
|
||||
|
||||
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[SHA256D_MAX_WAYS * 64] __attribute__((aligned(128)));
|
||||
uint32_t hash[SHA256D_MAX_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[SHA256D_MAX_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t prehash[SHA256D_MAX_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
|
@ -352,15 +398,22 @@ int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
|||
#endif
|
||||
int i, j;
|
||||
|
||||
for (i = 15; i >= 0; i--)
|
||||
memcpy(data, pdata + 16, 64);
|
||||
sha256d_preextend(data);
|
||||
for (i = 31; i >= 0; i--)
|
||||
for (j = 0; j < ways; j++)
|
||||
data[i * ways + j] = pdata[16 + i];
|
||||
data[i * ways + j] = data[i];
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, pdata, 0);
|
||||
for (i = 7; i >= 0; i--)
|
||||
for (j = 0; j < ways; j++)
|
||||
memcpy(prehash, midstate, 32);
|
||||
sha256d_prehash(prehash, pdata + 16);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
for (j = 0; j < ways; j++) {
|
||||
midstate[i * ways + j] = midstate[i];
|
||||
prehash[i * ways + j] = prehash[i];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
if (ways == 4)
|
||||
|
@ -368,7 +421,7 @@ int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
|||
for (i = 0; i < 4; i++)
|
||||
data[4 * 3 + i] = ++n;
|
||||
|
||||
sha256d_4way(hash, data, midstate);
|
||||
sha256d_4way(hash, data, midstate, prehash);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (hash[4 * 7 + i] <= Htarg) {
|
||||
|
@ -386,12 +439,12 @@ int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
|||
else
|
||||
#endif
|
||||
do {
|
||||
data[3 + i] = ++n;
|
||||
sha256d(hash, data, midstate);
|
||||
if (hash[7 + i] <= Htarg) {
|
||||
data[3] = ++n;
|
||||
sha256d(hash, data, midstate, prehash);
|
||||
if (hash[7] <= Htarg) {
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = data[3 + i];
|
||||
pdata[19] = data[3];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue