Drop unused 2-way scrypt
This commit is contained in:
parent
8af4ed77e6
commit
e52982ab7f
5 changed files with 82 additions and 512 deletions
3
miner.h
3
miner.h
|
@ -113,7 +113,8 @@ void sha256_init(uint32_t *state);
|
|||
void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define SHA256_4WAY 1
|
||||
#define HAVE_SHA256_4WAY 1
|
||||
int sha256_use_4way();
|
||||
void sha256_init_4way(uint32_t *state);
|
||||
void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
|
||||
#endif
|
||||
|
|
407
scrypt-x64.S
407
scrypt-x64.S
|
@ -903,413 +903,6 @@ scrypt_core_xmm_loop2:
|
|||
ret
|
||||
|
||||
|
||||
.macro salsa8_core_2way_xmm_doubleround
|
||||
movdqa %xmm1, %xmm4
|
||||
movdqa %xmm9, %xmm6
|
||||
paddd %xmm0, %xmm4
|
||||
paddd %xmm8, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $7, %xmm4
|
||||
pslld $7, %xmm6
|
||||
psrld $25, %xmm5
|
||||
psrld $25, %xmm7
|
||||
pxor %xmm4, %xmm3
|
||||
pxor %xmm6, %xmm11
|
||||
pxor %xmm5, %xmm3
|
||||
pxor %xmm7, %xmm11
|
||||
movdqa %xmm0, %xmm4
|
||||
movdqa %xmm8, %xmm6
|
||||
|
||||
paddd %xmm3, %xmm4
|
||||
paddd %xmm11, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $9, %xmm4
|
||||
pslld $9, %xmm6
|
||||
psrld $23, %xmm5
|
||||
psrld $23, %xmm7
|
||||
pxor %xmm4, %xmm2
|
||||
pxor %xmm6, %xmm10
|
||||
movdqa %xmm3, %xmm4
|
||||
movdqa %xmm11, %xmm6
|
||||
pshufd $0x93, %xmm3, %xmm3
|
||||
pshufd $0x93, %xmm11, %xmm11
|
||||
pxor %xmm5, %xmm2
|
||||
pxor %xmm7, %xmm10
|
||||
|
||||
paddd %xmm2, %xmm4
|
||||
paddd %xmm10, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $13, %xmm4
|
||||
pslld $13, %xmm6
|
||||
psrld $19, %xmm5
|
||||
psrld $19, %xmm7
|
||||
pxor %xmm4, %xmm1
|
||||
pxor %xmm6, %xmm9
|
||||
movdqa %xmm2, %xmm4
|
||||
movdqa %xmm10, %xmm6
|
||||
pshufd $0x4e, %xmm2, %xmm2
|
||||
pshufd $0x4e, %xmm10, %xmm10
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm7, %xmm9
|
||||
|
||||
paddd %xmm1, %xmm4
|
||||
paddd %xmm9, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $18, %xmm4
|
||||
pslld $18, %xmm6
|
||||
psrld $14, %xmm5
|
||||
psrld $14, %xmm7
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm6, %xmm8
|
||||
pshufd $0x39, %xmm1, %xmm1
|
||||
pshufd $0x39, %xmm9, %xmm9
|
||||
pxor %xmm5, %xmm0
|
||||
pxor %xmm7, %xmm8
|
||||
movdqa %xmm3, %xmm4
|
||||
movdqa %xmm11, %xmm6
|
||||
|
||||
paddd %xmm0, %xmm4
|
||||
paddd %xmm8, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $7, %xmm4
|
||||
pslld $7, %xmm6
|
||||
psrld $25, %xmm5
|
||||
psrld $25, %xmm7
|
||||
pxor %xmm4, %xmm1
|
||||
pxor %xmm6, %xmm9
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm7, %xmm9
|
||||
movdqa %xmm0, %xmm4
|
||||
movdqa %xmm8, %xmm6
|
||||
|
||||
paddd %xmm1, %xmm4
|
||||
paddd %xmm9, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $9, %xmm4
|
||||
pslld $9, %xmm6
|
||||
psrld $23, %xmm5
|
||||
psrld $23, %xmm7
|
||||
pxor %xmm4, %xmm2
|
||||
pxor %xmm6, %xmm10
|
||||
movdqa %xmm1, %xmm4
|
||||
movdqa %xmm9, %xmm6
|
||||
pshufd $0x93, %xmm1, %xmm1
|
||||
pshufd $0x93, %xmm9, %xmm9
|
||||
pxor %xmm5, %xmm2
|
||||
pxor %xmm7, %xmm10
|
||||
|
||||
paddd %xmm2, %xmm4
|
||||
paddd %xmm10, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $13, %xmm4
|
||||
pslld $13, %xmm6
|
||||
psrld $19, %xmm5
|
||||
psrld $19, %xmm7
|
||||
pxor %xmm4, %xmm3
|
||||
pxor %xmm6, %xmm11
|
||||
movdqa %xmm2, %xmm4
|
||||
movdqa %xmm10, %xmm6
|
||||
pshufd $0x4e, %xmm2, %xmm2
|
||||
pshufd $0x4e, %xmm10, %xmm10
|
||||
pxor %xmm5, %xmm3
|
||||
pxor %xmm7, %xmm11
|
||||
|
||||
paddd %xmm3, %xmm4
|
||||
paddd %xmm11, %xmm6
|
||||
movdqa %xmm4, %xmm5
|
||||
movdqa %xmm6, %xmm7
|
||||
pslld $18, %xmm4
|
||||
pslld $18, %xmm6
|
||||
psrld $14, %xmm5
|
||||
psrld $14, %xmm7
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm6, %xmm8
|
||||
pshufd $0x39, %xmm3, %xmm3
|
||||
pshufd $0x39, %xmm11, %xmm11
|
||||
pxor %xmm5, %xmm0
|
||||
pxor %xmm7, %xmm8
|
||||
.endm
|
||||
|
||||
.macro salsa8_core_2way_xmm
|
||||
salsa8_core_2way_xmm_doubleround
|
||||
salsa8_core_2way_xmm_doubleround
|
||||
salsa8_core_2way_xmm_doubleround
|
||||
salsa8_core_2way_xmm_doubleround
|
||||
.endm
|
||||
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.globl scrypt_core_2way
|
||||
.globl _scrypt_core_2way
|
||||
scrypt_core_2way:
|
||||
_scrypt_core_2way:
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
#if defined(WIN64)
|
||||
subq $176, %rsp
|
||||
movdqa %xmm6, 8(%rsp)
|
||||
movdqa %xmm7, 24(%rsp)
|
||||
movdqa %xmm8, 40(%rsp)
|
||||
movdqa %xmm9, 56(%rsp)
|
||||
movdqa %xmm10, 72(%rsp)
|
||||
movdqa %xmm11, 88(%rsp)
|
||||
movdqa %xmm12, 104(%rsp)
|
||||
movdqa %xmm13, 120(%rsp)
|
||||
movdqa %xmm14, 136(%rsp)
|
||||
movdqa %xmm15, 152(%rsp)
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
#endif
|
||||
subq $264, %rsp
|
||||
|
||||
scrypt_shuffle %rdi, 0, %rsp, 0
|
||||
scrypt_shuffle %rdi, 64, %rsp, 64
|
||||
scrypt_shuffle %rdi, 128, %rsp, 128
|
||||
scrypt_shuffle %rdi, 192, %rsp, 192
|
||||
|
||||
movdqa 192(%rsp), %xmm12
|
||||
movdqa 208(%rsp), %xmm13
|
||||
movdqa 224(%rsp), %xmm14
|
||||
movdqa 240(%rsp), %xmm15
|
||||
|
||||
movq %rsi, %rbp
|
||||
leaq 262144(%rsi), %rcx
|
||||
scrypt_core_2way_loop1:
|
||||
movdqa 0(%rsp), %xmm0
|
||||
movdqa 16(%rsp), %xmm1
|
||||
movdqa 32(%rsp), %xmm2
|
||||
movdqa 48(%rsp), %xmm3
|
||||
movdqa 64(%rsp), %xmm4
|
||||
movdqa 80(%rsp), %xmm5
|
||||
movdqa 96(%rsp), %xmm6
|
||||
movdqa 112(%rsp), %xmm7
|
||||
movdqa 128(%rsp), %xmm8
|
||||
movdqa 144(%rsp), %xmm9
|
||||
movdqa 160(%rsp), %xmm10
|
||||
movdqa 176(%rsp), %xmm11
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
movdqa %xmm0, 0(%rbp)
|
||||
movdqa %xmm1, 16(%rbp)
|
||||
movdqa %xmm2, 32(%rbp)
|
||||
movdqa %xmm3, 48(%rbp)
|
||||
movdqa %xmm4, 64(%rbp)
|
||||
movdqa %xmm5, 80(%rbp)
|
||||
movdqa %xmm6, 96(%rbp)
|
||||
movdqa %xmm7, 112(%rbp)
|
||||
pxor %xmm12, %xmm8
|
||||
pxor %xmm13, %xmm9
|
||||
pxor %xmm14, %xmm10
|
||||
pxor %xmm15, %xmm11
|
||||
movdqa %xmm8, 128(%rbp)
|
||||
movdqa %xmm9, 144(%rbp)
|
||||
movdqa %xmm10, 160(%rbp)
|
||||
movdqa %xmm11, 176(%rbp)
|
||||
movdqa %xmm12, 192(%rbp)
|
||||
movdqa %xmm13, 208(%rbp)
|
||||
movdqa %xmm14, 224(%rbp)
|
||||
movdqa %xmm15, 240(%rbp)
|
||||
|
||||
salsa8_core_2way_xmm
|
||||
paddd 0(%rbp), %xmm0
|
||||
paddd 16(%rbp), %xmm1
|
||||
paddd 32(%rbp), %xmm2
|
||||
paddd 48(%rbp), %xmm3
|
||||
paddd 128(%rbp), %xmm8
|
||||
paddd 144(%rbp), %xmm9
|
||||
paddd 160(%rbp), %xmm10
|
||||
paddd 176(%rbp), %xmm11
|
||||
movdqa %xmm0, 0(%rsp)
|
||||
movdqa %xmm1, 16(%rsp)
|
||||
movdqa %xmm2, 32(%rsp)
|
||||
movdqa %xmm3, 48(%rsp)
|
||||
movdqa %xmm8, 128(%rsp)
|
||||
movdqa %xmm9, 144(%rsp)
|
||||
movdqa %xmm10, 160(%rsp)
|
||||
movdqa %xmm11, 176(%rsp)
|
||||
|
||||
pxor 64(%rsp), %xmm0
|
||||
pxor 80(%rsp), %xmm1
|
||||
pxor 96(%rsp), %xmm2
|
||||
pxor 112(%rsp), %xmm3
|
||||
pxor %xmm12, %xmm8
|
||||
pxor %xmm13, %xmm9
|
||||
pxor %xmm14, %xmm10
|
||||
pxor %xmm15, %xmm11
|
||||
movdqa %xmm0, 64(%rsp)
|
||||
movdqa %xmm1, 80(%rsp)
|
||||
movdqa %xmm2, 96(%rsp)
|
||||
movdqa %xmm3, 112(%rsp)
|
||||
movdqa %xmm8, %xmm12
|
||||
movdqa %xmm9, %xmm13
|
||||
movdqa %xmm10, %xmm14
|
||||
movdqa %xmm11, %xmm15
|
||||
salsa8_core_2way_xmm
|
||||
paddd 64(%rsp), %xmm0
|
||||
paddd 80(%rsp), %xmm1
|
||||
paddd 96(%rsp), %xmm2
|
||||
paddd 112(%rsp), %xmm3
|
||||
paddd %xmm8, %xmm12
|
||||
paddd %xmm9, %xmm13
|
||||
paddd %xmm10, %xmm14
|
||||
paddd %xmm11, %xmm15
|
||||
movdqa %xmm0, 64(%rsp)
|
||||
movdqa %xmm1, 80(%rsp)
|
||||
movdqa %xmm2, 96(%rsp)
|
||||
movdqa %xmm3, 112(%rsp)
|
||||
|
||||
addq $256, %rbp
|
||||
cmpq %rcx, %rbp
|
||||
jne scrypt_core_2way_loop1
|
||||
|
||||
movq $1024, %rcx
|
||||
scrypt_core_2way_loop2:
|
||||
movdqa 0(%rsp), %xmm0
|
||||
movdqa 16(%rsp), %xmm1
|
||||
movdqa 32(%rsp), %xmm2
|
||||
movdqa 48(%rsp), %xmm3
|
||||
movdqa 64(%rsp), %xmm4
|
||||
movdqa 80(%rsp), %xmm5
|
||||
movdqa 96(%rsp), %xmm6
|
||||
movdqa 112(%rsp), %xmm7
|
||||
movdqa 128(%rsp), %xmm8
|
||||
movdqa 144(%rsp), %xmm9
|
||||
movdqa 160(%rsp), %xmm10
|
||||
movdqa 176(%rsp), %xmm11
|
||||
movd %xmm4, %ebp
|
||||
andl $1023, %ebp
|
||||
shll $8, %ebp
|
||||
pxor 0(%rsi, %rbp), %xmm0
|
||||
pxor 16(%rsi, %rbp), %xmm1
|
||||
pxor 32(%rsi, %rbp), %xmm2
|
||||
pxor 48(%rsi, %rbp), %xmm3
|
||||
movd %xmm12, %ebx
|
||||
andl $1023, %ebx
|
||||
shll $8, %ebx
|
||||
addl $128, %ebx
|
||||
pxor 0(%rsi, %rbx), %xmm8
|
||||
pxor 16(%rsi, %rbx), %xmm9
|
||||
pxor 32(%rsi, %rbx), %xmm10
|
||||
pxor 48(%rsi, %rbx), %xmm11
|
||||
|
||||
pxor %xmm4, %xmm0
|
||||
pxor %xmm5, %xmm1
|
||||
pxor %xmm6, %xmm2
|
||||
pxor %xmm7, %xmm3
|
||||
pxor %xmm12, %xmm8
|
||||
pxor %xmm13, %xmm9
|
||||
pxor %xmm14, %xmm10
|
||||
pxor %xmm15, %xmm11
|
||||
movdqa %xmm0, 0(%rsp)
|
||||
movdqa %xmm1, 16(%rsp)
|
||||
movdqa %xmm2, 32(%rsp)
|
||||
movdqa %xmm3, 48(%rsp)
|
||||
movdqa %xmm8, 128(%rsp)
|
||||
movdqa %xmm9, 144(%rsp)
|
||||
movdqa %xmm10, 160(%rsp)
|
||||
movdqa %xmm11, 176(%rsp)
|
||||
salsa8_core_2way_xmm
|
||||
paddd 0(%rsp), %xmm0
|
||||
paddd 16(%rsp), %xmm1
|
||||
paddd 32(%rsp), %xmm2
|
||||
paddd 48(%rsp), %xmm3
|
||||
paddd 128(%rsp), %xmm8
|
||||
paddd 144(%rsp), %xmm9
|
||||
paddd 160(%rsp), %xmm10
|
||||
paddd 176(%rsp), %xmm11
|
||||
movdqa %xmm0, 0(%rsp)
|
||||
movdqa %xmm1, 16(%rsp)
|
||||
movdqa %xmm2, 32(%rsp)
|
||||
movdqa %xmm3, 48(%rsp)
|
||||
movdqa %xmm8, 128(%rsp)
|
||||
movdqa %xmm9, 144(%rsp)
|
||||
movdqa %xmm10, 160(%rsp)
|
||||
movdqa %xmm11, 176(%rsp)
|
||||
|
||||
pxor 64(%rsi, %rbp), %xmm0
|
||||
pxor 80(%rsi, %rbp), %xmm1
|
||||
pxor 96(%rsi, %rbp), %xmm2
|
||||
pxor 112(%rsi, %rbp), %xmm3
|
||||
pxor 64(%rsi, %rbx), %xmm8
|
||||
pxor 80(%rsi, %rbx), %xmm9
|
||||
pxor 96(%rsi, %rbx), %xmm10
|
||||
pxor 112(%rsi, %rbx), %xmm11
|
||||
pxor 64(%rsp), %xmm0
|
||||
pxor 80(%rsp), %xmm1
|
||||
pxor 96(%rsp), %xmm2
|
||||
pxor 112(%rsp), %xmm3
|
||||
pxor %xmm12, %xmm8
|
||||
pxor %xmm13, %xmm9
|
||||
pxor %xmm14, %xmm10
|
||||
pxor %xmm15, %xmm11
|
||||
movdqa %xmm0, 64(%rsp)
|
||||
movdqa %xmm1, 80(%rsp)
|
||||
movdqa %xmm2, 96(%rsp)
|
||||
movdqa %xmm3, 112(%rsp)
|
||||
movdqa %xmm8, %xmm12
|
||||
movdqa %xmm9, %xmm13
|
||||
movdqa %xmm10, %xmm14
|
||||
movdqa %xmm11, %xmm15
|
||||
salsa8_core_2way_xmm
|
||||
paddd 64(%rsp), %xmm0
|
||||
paddd 80(%rsp), %xmm1
|
||||
paddd 96(%rsp), %xmm2
|
||||
paddd 112(%rsp), %xmm3
|
||||
paddd %xmm8, %xmm12
|
||||
paddd %xmm9, %xmm13
|
||||
paddd %xmm10, %xmm14
|
||||
paddd %xmm11, %xmm15
|
||||
movdqa %xmm0, 64(%rsp)
|
||||
movdqa %xmm1, 80(%rsp)
|
||||
movdqa %xmm2, 96(%rsp)
|
||||
movdqa %xmm3, 112(%rsp)
|
||||
|
||||
subq $1, %rcx
|
||||
ja scrypt_core_2way_loop2
|
||||
|
||||
movdqa %xmm12, 192(%rsp)
|
||||
movdqa %xmm13, 208(%rsp)
|
||||
movdqa %xmm14, 224(%rsp)
|
||||
movdqa %xmm15, 240(%rsp)
|
||||
|
||||
scrypt_shuffle %rsp, 0, %rdi, 0
|
||||
scrypt_shuffle %rsp, 64, %rdi, 64
|
||||
scrypt_shuffle %rsp, 128, %rdi, 128
|
||||
scrypt_shuffle %rsp, 192, %rdi, 192
|
||||
|
||||
addq $264, %rsp
|
||||
#if defined(WIN64)
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
movdqa 8(%rsp), %xmm6
|
||||
movdqa 24(%rsp), %xmm7
|
||||
movdqa 40(%rsp), %xmm8
|
||||
movdqa 56(%rsp), %xmm9
|
||||
movdqa 72(%rsp), %xmm10
|
||||
movdqa 88(%rsp), %xmm11
|
||||
movdqa 104(%rsp), %xmm12
|
||||
movdqa 120(%rsp), %xmm13
|
||||
movdqa 136(%rsp), %xmm14
|
||||
movdqa 152(%rsp), %xmm15
|
||||
addq $176, %rsp
|
||||
#endif
|
||||
popq %rbp
|
||||
popq %rbx
|
||||
ret
|
||||
|
||||
|
||||
#if defined(USE_AVX)
|
||||
.macro salsa8_core_3way_avx_doubleround
|
||||
vpaddd %xmm0, %xmm1, %xmm4
|
||||
|
|
54
scrypt.c
54
scrypt.c
|
@ -119,7 +119,7 @@ static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
|
|||
}
|
||||
|
||||
|
||||
#ifdef SHA256_4WAY
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
|
||||
static const uint32_t keypad_4way[4 * 12] = {
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
|
@ -253,15 +253,15 @@ static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
|
|||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
#endif /* SHA256_4WAY */
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
#define SCRYPT_MAX_WAYS 3
|
||||
#define HAVE_SCRYPT_3WAY 1
|
||||
int scrypt_best_throughput();
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
void scrypt_core_2way(uint32_t *X, uint32_t *V);
|
||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
||||
|
||||
#elif defined(__i386__)
|
||||
|
@ -370,7 +370,7 @@ unsigned char *scrypt_buffer_alloc()
|
|||
return malloc(SCRYPT_BUFFER_SIZE);
|
||||
}
|
||||
|
||||
static void scrypt_1024_1_1_256_sp(const uint32_t *input, uint32_t *output,
|
||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||
uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[8], ostate[8];
|
||||
|
@ -388,33 +388,8 @@ static void scrypt_1024_1_1_256_sp(const uint32_t *input, uint32_t *output,
|
|||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||
}
|
||||
|
||||
#if SCRYPT_MAX_WAYS >= 2
|
||||
static void scrypt_1024_1_1_256_sp_2way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate1[8], tstate2[8];
|
||||
uint32_t ostate1[8], ostate2[8];
|
||||
uint32_t X[2 * 32], *Y = X + 32;
|
||||
uint32_t *V;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
memcpy(tstate1, midstate, 32);
|
||||
memcpy(tstate2, midstate, 32);
|
||||
HMAC_SHA256_80_init(input, tstate1, ostate1);
|
||||
HMAC_SHA256_80_init(input + 20, tstate2, ostate2);
|
||||
PBKDF2_SHA256_80_128(tstate1, ostate1, input, X);
|
||||
PBKDF2_SHA256_80_128(tstate2, ostate2, input + 20, Y);
|
||||
|
||||
scrypt_core_2way(X, V);
|
||||
|
||||
PBKDF2_SHA256_128_32(tstate1, ostate1, X, output);
|
||||
PBKDF2_SHA256_128_32(tstate2, ostate2, Y, output + 8);
|
||||
}
|
||||
#endif /* SCRYPT_MAX_WAYS >= 2 */
|
||||
|
||||
#if SCRYPT_MAX_WAYS >= 3
|
||||
static void scrypt_1024_1_1_256_sp_3way(const uint32_t *input,
|
||||
#ifdef HAVE_SCRYPT_3WAY
|
||||
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
|
||||
|
@ -456,7 +431,7 @@ static void scrypt_1024_1_1_256_sp_3way(const uint32_t *input,
|
|||
output[i + 16] = W[4 * i + 2];
|
||||
}
|
||||
}
|
||||
#endif /* SCRYPT_MAX_WAYS >= 3 */
|
||||
#endif /* HAVE_SCRYPT_3WAY */
|
||||
|
||||
int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||
|
@ -469,6 +444,10 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
|||
const int throughput = scrypt_best_throughput();
|
||||
int i;
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
sha256_use_4way();
|
||||
#endif
|
||||
|
||||
for (i = 0; i < throughput; i++)
|
||||
memcpy(data + i * 20, pdata, 80);
|
||||
|
||||
|
@ -479,17 +458,12 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
|||
for (i = 0; i < throughput; i++)
|
||||
data[i * 20 + 19] = ++n;
|
||||
|
||||
#if SCRYPT_MAX_WAYS >= 3
|
||||
#ifdef HAVE_SCRYPT_3WAY
|
||||
if (throughput == 3)
|
||||
scrypt_1024_1_1_256_sp_3way(data, hash, midstate, scratchbuf);
|
||||
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
#if SCRYPT_MAX_WAYS >= 2
|
||||
if (throughput == 2)
|
||||
scrypt_1024_1_1_256_sp_2way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
scrypt_1024_1_1_256_sp(data, hash, midstate, scratchbuf);
|
||||
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
|
||||
|
||||
for (i = 0; i < throughput; i++) {
|
||||
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
|
||||
|
|
49
sha2-x64.S
49
sha2-x64.S
|
@ -881,9 +881,6 @@ sha256_transform_4way_core_addr:
|
|||
.globl _sha256_transform_4way
|
||||
sha256_transform_4way:
|
||||
_sha256_transform_4way:
|
||||
movq sha256_transform_4way_core_addr(%rip), %rax
|
||||
testq %rax, %rax
|
||||
jz sha256_transform_4way_init
|
||||
#if defined(WIN64)
|
||||
pushq %rdi
|
||||
subq $96, %rsp
|
||||
|
@ -903,20 +900,8 @@ _sha256_transform_4way:
|
|||
andq $-128, %rsp
|
||||
|
||||
testq %rdx, %rdx
|
||||
jz sha256_transform_4way_block_copy
|
||||
jnz sha256_transform_4way_swap
|
||||
|
||||
p2bswap_rsi_rsp 0
|
||||
p2bswap_rsi_rsp 2
|
||||
p2bswap_rsi_rsp 4
|
||||
p2bswap_rsi_rsp 6
|
||||
p2bswap_rsi_rsp 8
|
||||
p2bswap_rsi_rsp 10
|
||||
p2bswap_rsi_rsp 12
|
||||
p2bswap_rsi_rsp 14
|
||||
jmp *%rax
|
||||
|
||||
.p2align 6
|
||||
sha256_transform_4way_block_copy:
|
||||
movdqu 0*16(%rsi), %xmm0
|
||||
movdqu 1*16(%rsi), %xmm1
|
||||
movdqu 2*16(%rsi), %xmm2
|
||||
|
@ -949,11 +934,19 @@ sha256_transform_4way_block_copy:
|
|||
movdqa %xmm5, 13*16(%rsp)
|
||||
movdqa %xmm6, 14*16(%rsp)
|
||||
movdqa %xmm7, 15*16(%rsp)
|
||||
jmp *%rax
|
||||
jmp *sha256_transform_4way_core_addr(%rip)
|
||||
|
||||
sha256_transform_4way_init:
|
||||
call sha2_4way_init
|
||||
jmp sha256_transform_4way
|
||||
.p2align 6
|
||||
sha256_transform_4way_swap:
|
||||
p2bswap_rsi_rsp 0
|
||||
p2bswap_rsi_rsp 2
|
||||
p2bswap_rsi_rsp 4
|
||||
p2bswap_rsi_rsp 6
|
||||
p2bswap_rsi_rsp 8
|
||||
p2bswap_rsi_rsp 10
|
||||
p2bswap_rsi_rsp 12
|
||||
p2bswap_rsi_rsp 14
|
||||
jmp *sha256_transform_4way_core_addr(%rip)
|
||||
|
||||
.p2align 6
|
||||
sha256_transform_4way_finish:
|
||||
|
@ -1009,14 +1002,7 @@ sha256d_4way_addr:
|
|||
.globl _sha256d_4way
|
||||
sha256d_4way:
|
||||
_sha256d_4way:
|
||||
movq sha256d_4way_addr(%rip), %rax
|
||||
testq %rax, %rax
|
||||
jz sha256d_4way_init
|
||||
jmp *%rax
|
||||
|
||||
sha256d_4way_init:
|
||||
call sha2_4way_init
|
||||
jmp sha256d_4way
|
||||
jmp *sha256d_4way_addr(%rip)
|
||||
|
||||
|
||||
.p2align 6
|
||||
|
@ -1366,8 +1352,12 @@ sha256d_4way_xop:
|
|||
#endif /* USE_XOP */
|
||||
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
sha2_4way_init:
|
||||
.globl sha256_use_4way
|
||||
.globl _sha256_use_4way
|
||||
sha256_use_4way:
|
||||
_sha256_use_4way:
|
||||
pushq %rbx
|
||||
pushq %rcx
|
||||
pushq %rdx
|
||||
|
@ -1414,6 +1404,7 @@ sha2_4way_init_done:
|
|||
popq %rdx
|
||||
popq %rcx
|
||||
popq %rbx
|
||||
movl $1, %eax
|
||||
ret
|
||||
|
||||
#endif
|
||||
|
|
65
sha2.c
65
sha2.c
|
@ -164,15 +164,12 @@ void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
|
|||
state[i] += S[i];
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
#define SHA256D_WAYS 4
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
#define SHA256D_MAX_WAYS 4
|
||||
void sha256d_4way(uint32_t *hash, uint32_t *data, const uint32_t *midstate);
|
||||
|
||||
#else
|
||||
|
||||
#define SHA256D_WAYS 1
|
||||
#define SHA256D_MAX_WAYS 1
|
||||
#endif
|
||||
|
||||
static const uint32_t sha256d_hash1[16] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -340,51 +337,65 @@ static inline void sha256d(uint32_t *hash, uint32_t *W,
|
|||
hash[i] += sha256_h[i];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[SHA256D_WAYS * 64] __attribute__((aligned(128)));
|
||||
uint32_t hash[SHA256D_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[SHA256D_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t tmp[8];
|
||||
uint32_t data[SHA256D_MAX_WAYS * 64] __attribute__((aligned(128)));
|
||||
uint32_t hash[SHA256D_MAX_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[SHA256D_MAX_WAYS * 8] __attribute__((aligned(32)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
const int ways = sha256_use_4way() ? 4 : 1;
|
||||
#else
|
||||
const int ways = 1;
|
||||
#endif
|
||||
int i, j;
|
||||
|
||||
for (i = 15; i >= 0; i--)
|
||||
for (j = 0; j < SHA256D_WAYS; j++)
|
||||
data[i * SHA256D_WAYS + j] = pdata[16 + i];
|
||||
for (j = 0; j < ways; j++)
|
||||
data[i * ways + j] = pdata[16 + i];
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, pdata, 0);
|
||||
for (i = 7; i >= 0; i--)
|
||||
for (j = 0; j < SHA256D_WAYS; j++)
|
||||
midstate[i * SHA256D_WAYS + j] = midstate[i];
|
||||
for (j = 0; j < ways; j++)
|
||||
midstate[i * ways + j] = midstate[i];
|
||||
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
if (ways == 4)
|
||||
do {
|
||||
for (i = 0; i < SHA256D_WAYS; i++)
|
||||
data[SHA256D_WAYS * 3 + i] = ++n;
|
||||
for (i = 0; i < 4; i++)
|
||||
data[4 * 3 + i] = ++n;
|
||||
|
||||
#if SHA256D_WAYS == 4
|
||||
sha256d_4way(hash, data, midstate);
|
||||
#else
|
||||
sha256d(hash, data, midstate);
|
||||
#endif
|
||||
|
||||
for (i = 0; i < SHA256D_WAYS; i++) {
|
||||
if (hash[SHA256D_WAYS * 7 + i] <= Htarg) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (hash[4 * 7 + i] <= Htarg) {
|
||||
uint32_t tmp[8];
|
||||
for (j = 0; j < 8; j++)
|
||||
tmp[j] = hash[SHA256D_WAYS * j + i];
|
||||
tmp[j] = hash[4 * j + i];
|
||||
if (fulltest(tmp, ptarget)) {
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = data[SHA256D_WAYS * 3 + i];
|
||||
pdata[19] = data[4 * 3 + i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
else
|
||||
#endif
|
||||
do {
|
||||
data[3 + i] = ++n;
|
||||
sha256d(hash, data, midstate);
|
||||
if (hash[7 + i] <= Htarg) {
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = data[3 + i];
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - pdata[19] + 1;
|
||||
pdata[19] = n;
|
||||
|
|
Loading…
Reference in a new issue