Add support for scrypt(N, 1, 1)

This commit is contained in:
pooler 2014-05-25 17:21:36 +02:00
parent a988337f52
commit be1b725270
7 changed files with 235 additions and 159 deletions

View file

@ -100,7 +100,7 @@ struct workio_cmd {
} u; } u;
}; };
enum sha256_algos { enum algos {
ALGO_SCRYPT, /* scrypt(1024,1,1) */ ALGO_SCRYPT, /* scrypt(1024,1,1) */
ALGO_SHA256D, /* SHA-256d */ ALGO_SHA256D, /* SHA-256d */
}; };
@ -128,7 +128,8 @@ static int opt_fail_pause = 30;
int opt_timeout = 0; int opt_timeout = 0;
static int opt_scantime = 5; static int opt_scantime = 5;
static const bool opt_time = true; static const bool opt_time = true;
static enum sha256_algos opt_algo = ALGO_SCRYPT; static enum algos opt_algo = ALGO_SCRYPT;
static int opt_scrypt_n = 1024;
static int opt_n_threads; static int opt_n_threads;
static int num_processors; static int num_processors;
static char *rpc_url; static char *rpc_url;
@ -170,6 +171,7 @@ Usage: " PROGRAM_NAME " [OPTIONS]\n\
Options:\n\ Options:\n\
-a, --algo=ALGO specify the algorithm to use\n\ -a, --algo=ALGO specify the algorithm to use\n\
scrypt scrypt(1024, 1, 1) (default)\n\ scrypt scrypt(1024, 1, 1) (default)\n\
scrypt:N scrypt(N, 1, 1)\n\
sha256d SHA-256d\n\ sha256d SHA-256d\n\
-o, --url=URL URL of mining server\n\ -o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\
@ -1080,9 +1082,13 @@ static void *miner_thread(void *userdata)
affine_to_cpu(thr_id, thr_id % num_processors); affine_to_cpu(thr_id, thr_id % num_processors);
} }
if (opt_algo == ALGO_SCRYPT) if (opt_algo == ALGO_SCRYPT) {
{ scratchbuf = scrypt_buffer_alloc(opt_scrypt_n);
scratchbuf = scrypt_buffer_alloc(); if (!scratchbuf) {
applog(LOG_ERR, "scrypt buffer allocation failed");
pthread_mutex_lock(&applog_lock);
exit(1);
}
} }
while (1) { while (1) {
@ -1133,8 +1139,16 @@ static void *miner_thread(void *userdata)
max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime) max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime)
- time(NULL); - time(NULL);
max64 *= thr_hashrates[thr_id]; max64 *= thr_hashrates[thr_id];
if (max64 <= 0) if (max64 <= 0) {
max64 = opt_algo == ALGO_SCRYPT ? 0xfffLL : 0x1fffffLL; switch (opt_algo) {
case ALGO_SCRYPT:
max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n;
break;
case ALGO_SHA256D:
max64 = 0x1fffff;
break;
}
}
if (work.data[19] + max64 > end_nonce) if (work.data[19] + max64 > end_nonce)
max_nonce = end_nonce; max_nonce = end_nonce;
else else
@ -1147,7 +1161,7 @@ static void *miner_thread(void *userdata)
switch (opt_algo) { switch (opt_algo) {
case ALGO_SCRYPT: case ALGO_SCRYPT:
rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target, rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done, opt_scrypt_n);
break; break;
case ALGO_SHA256D: case ALGO_SHA256D:
@ -1471,11 +1485,22 @@ static void parse_arg(int key, char *arg, char *pname)
switch(key) { switch(key) {
case 'a': case 'a':
for (i = 0; i < ARRAY_SIZE(algo_names); i++) { for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
if (algo_names[i] && v = strlen(algo_names[i]);
!strcmp(arg, algo_names[i])) { if (!strncmp(arg, algo_names[i], v)) {
if (arg[v] == '\0') {
opt_algo = i; opt_algo = i;
break; break;
} }
if (arg[v] == ':' && i == ALGO_SCRYPT) {
char *ep;
v = strtol(arg+v+1, &ep, 10);
if (*ep || v & (v-1) || v < 2)
continue;
opt_algo = i;
opt_scrypt_n = v;
break;
}
}
} }
if (i == ARRAY_SIZE(algo_names)) { if (i == ARRAY_SIZE(algo_names)) {
fprintf(stderr, "%s: unknown algorithm -- '%s'\n", fprintf(stderr, "%s: unknown algorithm -- '%s'\n",

View file

@ -154,10 +154,10 @@ void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
extern int scanhash_sha256d(int thr_id, uint32_t *pdata, extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done);
extern unsigned char *scrypt_buffer_alloc(); extern unsigned char *scrypt_buffer_alloc(int N);
extern int scanhash_scrypt(int thr_id, uint32_t *pdata, extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget, unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done); uint32_t max_nonce, unsigned long *hashes_done, int N);
struct thr_info { struct thr_info {
int id; int id;

View file

@ -72,6 +72,9 @@ Possible values are:
.B scrypt .B scrypt
scrypt(1024, 1, 1) (used by Litecoin) scrypt(1024, 1, 1) (used by Litecoin)
.TP .TP
.B scrypt:\fIN\fR
scrypt(\fIN\fR, 1, 1) (\fIN\fR must be a power of 2 greater than 1)
.TP
.B sha256d .B sha256d
SHA-256d (used by Bitcoin) SHA-256d (used by Bitcoin)
.RE .RE

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2012 pooler@litecoinpool.org * Copyright 2012, 2014 pooler@litecoinpool.org
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free * under the terms of the GNU General Public License as published by the Free
@ -472,14 +472,16 @@ scrypt_core:
_scrypt_core: _scrypt_core:
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r11, lr}
mov r12, sp mov r12, sp
sub sp, sp, #21*4 sub sp, sp, #22*4
bic sp, sp, #63 bic sp, sp, #63
str r12, [sp, #20*4] str r12, [sp, #20*4]
str r2, [sp, #21*4]
scrypt_shuffle scrypt_shuffle
ldr r2, [sp, #21*4]
str r0, [sp, #16*4] str r0, [sp, #16*4]
add r12, r1, #1024*32*4 add r12, r1, r2, lsl #7
str r12, [sp, #18*4] str r12, [sp, #18*4]
scrypt_core_loop1: scrypt_core_loop1:
add lr, r0, #16*4 add lr, r0, #16*4
@ -517,12 +519,14 @@ scrypt_core_loop1:
cmp r1, r12 cmp r1, r12
bne scrypt_core_loop1 bne scrypt_core_loop1
ldr r12, [sp, #21*4]
ldr r4, [r0, #16*4] ldr r4, [r0, #16*4]
sub r1, r1, #1024*32*4 sub r2, r12, #1
str r2, [sp, #21*4]
sub r1, r1, r12, lsl #7
str r1, [sp, #17*4] str r1, [sp, #17*4]
mov r4, r4, lsl #32-10 and r4, r4, r2
mov r12, #1024 add r1, r1, r4, lsl #7
add r1, r1, r4, lsr #32-10-7
scrypt_core_loop2: scrypt_core_loop2:
add r2, r0, #16*4 add r2, r0, #16*4
add r3, r1, #16*4 add r3, r1, #16*4
@ -553,9 +557,10 @@ scrypt_core_loop2:
mov r1, sp mov r1, sp
ldr r3, [sp, #17*4] ldr r3, [sp, #17*4]
add r0, r0, #16*4 add r0, r0, #16*4
ldr r2, [sp, #21*4]
scrypt_core_macro3_x4 scrypt_core_macro3_x4
mov r4, r4, lsl #32-10 and r4, r4, r2
add r3, r3, r4, lsr #32-10-7 add r3, r3, r4, lsl #7
str r3, [sp, #19*4] str r3, [sp, #19*4]
#ifdef __ARM_ARCH_5E_OR_6_OR_7__ #ifdef __ARM_ARCH_5E_OR_6_OR_7__
pld [r3, #16*4] pld [r3, #16*4]
@ -794,10 +799,11 @@ _scrypt_core_3way:
mov r12, sp mov r12, sp
sub sp, sp, #24*16 sub sp, sp, #24*16
bic sp, sp, #63 bic sp, sp, #63
str r12, [sp, #4*16+3*4] str r2, [sp, #4*16+3*4]
str r12, [sp, #4*16+4*4]
mov r2, r0 mov r3, r0
vldmia r2!, {q8-q15} vldmia r3!, {q8-q15}
vmov.u64 q0, #0xffffffff vmov.u64 q0, #0xffffffff
vmov.u32 q1, q8 vmov.u32 q1, q8
vmov.u32 q2, q12 vmov.u32 q2, q12
@ -809,7 +815,7 @@ _scrypt_core_3way:
vbif.u32 q14, q15, q0 vbif.u32 q14, q15, q0
vbif.u32 q11, q1, q0 vbif.u32 q11, q1, q0
vbif.u32 q15, q2, q0 vbif.u32 q15, q2, q0
vldmia r2!, {q0-q7} vldmia r3!, {q0-q7}
vswp.u32 d17, d21 vswp.u32 d17, d21
vswp.u32 d25, d29 vswp.u32 d25, d29
vswp.u32 d18, d22 vswp.u32 d18, d22
@ -826,7 +832,7 @@ _scrypt_core_3way:
vbif.u32 q6, q7, q8 vbif.u32 q6, q7, q8
vbif.u32 q3, q9, q8 vbif.u32 q3, q9, q8
vbif.u32 q7, q10, q8 vbif.u32 q7, q10, q8
vldmia r2, {q8-q15} vldmia r3, {q8-q15}
vswp.u32 d1, d5 vswp.u32 d1, d5
vswp.u32 d9, d13 vswp.u32 d9, d13
vswp.u32 d2, d6 vswp.u32 d2, d6
@ -852,7 +858,7 @@ _scrypt_core_3way:
add lr, sp, #128 add lr, sp, #128
vldmia lr, {q0-q7} vldmia lr, {q0-q7}
add r2, r1, #1024*32*4 add r2, r1, r2, lsl #7
str r0, [sp, #4*16+0*4] str r0, [sp, #4*16+0*4]
str r2, [sp, #4*16+2*4] str r2, [sp, #4*16+2*4]
scrypt_core_3way_loop1: scrypt_core_3way_loop1:
@ -863,12 +869,13 @@ scrypt_core_3way_loop1:
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
ldr r2, [sp, #4*16+3*4]
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
sub r1, r1, #4*16 sub r1, r1, #4*16
add r1, r1, #1024*32*4 add r1, r1, r2, lsl #7
vstmia r1, {q0-q7} vstmia r1, {q0-q7}
add r3, r1, #1024*32*4 add r3, r1, r2, lsl #7
vstmia r3, {q8-q15} vstmia r3, {q8-q15}
add lr, sp, #128 add lr, sp, #128
@ -957,20 +964,22 @@ scrypt_core_3way_loop1:
cmp r1, r2 cmp r1, r2
bne scrypt_core_3way_loop1 bne scrypt_core_3way_loop1
ldr r2, [sp, #4*16+3*4]
add r5, sp, #256+4*16 add r5, sp, #256+4*16
vstmia r5, {q12-q15} vstmia r5, {q12-q15}
sub r1, r1, #1024*32*4 sub r1, r1, r2, lsl #7
str r1, [sp, #4*16+1*4] str r1, [sp, #4*16+1*4]
mov r2, #1024
scrypt_core_3way_loop2: scrypt_core_3way_loop2:
str r2, [sp, #4*16+2*4] str r2, [sp, #4*16+2*4]
ldr r0, [sp, #4*16+0*4] ldr r0, [sp, #4*16+0*4]
ldr r1, [sp, #4*16+1*4] ldr r1, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
ldr r4, [r0, #16*4] ldr r4, [r0, #16*4]
mov r4, r4, lsl #32-10 sub r2, r2, #1
add r1, r1, r4, lsr #32-10-7 and r4, r4, r2
add r1, r1, r4, lsl #7
add r2, r0, #16*4 add r2, r0, #16*4
add r3, r1, #16*4 add r3, r1, #16*4
mov r12, sp mov r12, sp
@ -980,11 +989,13 @@ scrypt_core_3way_loop2:
scrypt_core_macro1b_x4 scrypt_core_macro1b_x4
ldr r1, [sp, #4*16+1*4] ldr r1, [sp, #4*16+1*4]
add r1, r1, #1024*32*4 ldr r2, [sp, #4*16+3*4]
add r3, r1, #1024*32*4 add r1, r1, r2, lsl #7
add r3, r1, r2, lsl #7
sub r2, r2, #1
vmov r6, r7, d8 vmov r6, r7, d8
mov r6, r6, lsl #32-10 and r6, r6, r2
add r6, r1, r6, lsr #32-10-7 add r6, r1, r6, lsl #7
vmov r7, r8, d24 vmov r7, r8, d24
add lr, sp, #128 add lr, sp, #128
vldmia lr, {q0-q3} vldmia lr, {q0-q3}
@ -993,8 +1004,8 @@ scrypt_core_3way_loop2:
pld [r6, #16*4] pld [r6, #16*4]
pld [r6, #24*4] pld [r6, #24*4]
vldmia r6, {q8-q15} vldmia r6, {q8-q15}
mov r7, r7, lsl #32-10 and r7, r7, r2
add r7, r3, r7, lsr #32-10-7 add r7, r3, r7, lsl #7
veor.u32 q8, q8, q0 veor.u32 q8, q8, q0
veor.u32 q9, q9, q1 veor.u32 q9, q9, q1
veor.u32 q10, q10, q2 veor.u32 q10, q10, q2
@ -1079,11 +1090,13 @@ scrypt_core_3way_loop2:
ldr r0, [sp, #4*16+0*4] ldr r0, [sp, #4*16+0*4]
ldr r3, [sp, #4*16+1*4] ldr r3, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
mov r1, sp mov r1, sp
add r0, r0, #16*4 add r0, r0, #16*4
sub r2, r2, #1
scrypt_core_macro3_x4 scrypt_core_macro3_x4
mov r4, r4, lsl #32-10 and r4, r4, r2
add r3, r3, r4, lsr #32-10-7 add r3, r3, r4, lsl #7
pld [r3, #16*4] pld [r3, #16*4]
pld [r3] pld [r3]
pld [r3, #24*4] pld [r3, #24*4]
@ -1164,7 +1177,7 @@ scrypt_core_3way_loop2:
vswp.u32 d26, d30 vswp.u32 d26, d30
vstmia r0, {q8-q15} vstmia r0, {q8-q15}
ldr sp, [sp, #4*16+3*4] ldr sp, [sp, #4*16+4*4]
vpop {q4-q7} vpop {q4-q7}
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2011-2013 pooler@litecoinpool.org * Copyright 2011-2014 pooler@litecoinpool.org
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -113,38 +113,38 @@ scrypt_best_throughput_exit:
.macro scrypt_shuffle src, so, dest, do .macro scrypt_shuffle src, so, dest, do
movl \so+60(\src), %r8d movl \so+60(\src), %eax
movl \so+44(\src), %r9d movl \so+44(\src), %ebx
movl \so+28(\src), %r10d movl \so+28(\src), %ecx
movl \so+12(\src), %r11d movl \so+12(\src), %edx
movl %r8d, \do+12(\dest) movl %eax, \do+12(\dest)
movl %r9d, \do+28(\dest) movl %ebx, \do+28(\dest)
movl %r10d, \do+44(\dest) movl %ecx, \do+44(\dest)
movl %r11d, \do+60(\dest) movl %edx, \do+60(\dest)
movl \so+40(\src), %r8d movl \so+40(\src), %eax
movl \so+8(\src), %r9d movl \so+8(\src), %ebx
movl \so+48(\src), %r10d movl \so+48(\src), %ecx
movl \so+16(\src), %r11d movl \so+16(\src), %edx
movl %r8d, \do+8(\dest) movl %eax, \do+8(\dest)
movl %r9d, \do+40(\dest) movl %ebx, \do+40(\dest)
movl %r10d, \do+16(\dest) movl %ecx, \do+16(\dest)
movl %r11d, \do+48(\dest) movl %edx, \do+48(\dest)
movl \so+20(\src), %r8d movl \so+20(\src), %eax
movl \so+4(\src), %r9d movl \so+4(\src), %ebx
movl \so+52(\src), %r10d movl \so+52(\src), %ecx
movl \so+36(\src), %r11d movl \so+36(\src), %edx
movl %r8d, \do+4(\dest) movl %eax, \do+4(\dest)
movl %r9d, \do+20(\dest) movl %ebx, \do+20(\dest)
movl %r10d, \do+36(\dest) movl %ecx, \do+36(\dest)
movl %r11d, \do+52(\dest) movl %edx, \do+52(\dest)
movl \so+0(\src), %r8d movl \so+0(\src), %eax
movl \so+24(\src), %r9d movl \so+24(\src), %ebx
movl \so+32(\src), %r10d movl \so+32(\src), %ecx
movl \so+56(\src), %r11d movl \so+56(\src), %edx
movl %r8d, \do+0(\dest) movl %eax, \do+0(\dest)
movl %r9d, \do+24(\dest) movl %ebx, \do+24(\dest)
movl %r10d, \do+32(\dest) movl %ecx, \do+32(\dest)
movl %r11d, \do+56(\dest) movl %edx, \do+56(\dest)
.endm .endm
@ -384,6 +384,8 @@ _scrypt_core:
pushq %rsi pushq %rsi
movq %rcx, %rdi movq %rcx, %rdi
movq %rdx, %rsi movq %rdx, %rsi
#else
movq %rdx, %r8
#endif #endif
.macro scrypt_core_cleanup .macro scrypt_core_cleanup
@ -432,7 +434,10 @@ scrypt_core_gen:
movdqa 96(%rdi), %xmm14 movdqa 96(%rdi), %xmm14
movdqa 112(%rdi), %xmm15 movdqa 112(%rdi), %xmm15
leaq 131072(%rsi), %rcx movq %r8, %rcx
shlq $7, %rcx
addq %rsi, %rcx
movq %r8, 96(%rsp)
movq %rdi, 104(%rsp) movq %rdi, 104(%rsp)
movq %rsi, 112(%rsp) movq %rsi, 112(%rsp)
movq %rcx, 120(%rsp) movq %rcx, 120(%rsp)
@ -481,11 +486,14 @@ scrypt_core_gen_loop1:
cmpq %rcx, %rsi cmpq %rcx, %rsi
jne scrypt_core_gen_loop1 jne scrypt_core_gen_loop1
movq $1024, %rcx movq 96(%rsp), %r8
movq %r8, %rcx
subl $1, %r8d
movq %r8, 96(%rsp)
movd %xmm12, %edx movd %xmm12, %edx
scrypt_core_gen_loop2: scrypt_core_gen_loop2:
movq 112(%rsp), %rsi movq 112(%rsp), %rsi
andl $1023, %edx andl %r8d, %edx
shll $7, %edx shll $7, %edx
addq %rsi, %rdx addq %rsi, %rdx
movdqa 0(%rdx), %xmm0 movdqa 0(%rdx), %xmm0
@ -529,6 +537,7 @@ scrypt_core_gen_loop2:
movdqa %xmm14, 32(%rsp) movdqa %xmm14, 32(%rsp)
movdqa %xmm15, 48(%rsp) movdqa %xmm15, 48(%rsp)
call salsa8_core_gen call salsa8_core_gen
movq 96(%rsp), %r8
movq 128(%rsp), %rcx movq 128(%rsp), %rcx
addl 0(%rsp), %edx addl 0(%rsp), %edx
paddd %xmm0, %xmm12 paddd %xmm0, %xmm12
@ -691,7 +700,9 @@ scrypt_core_xmm:
punpckhqdq %xmm0, %xmm13 punpckhqdq %xmm0, %xmm13
movq %rsi, %rdx movq %rsi, %rdx
leaq 131072(%rsi), %rcx movq %r8, %rcx
shlq $7, %rcx
addq %rsi, %rcx
scrypt_core_xmm_loop1: scrypt_core_xmm_loop1:
pxor %xmm12, %xmm8 pxor %xmm12, %xmm8
pxor %xmm13, %xmm9 pxor %xmm13, %xmm9
@ -734,10 +745,11 @@ scrypt_core_xmm_loop1:
cmpq %rcx, %rdx cmpq %rcx, %rdx
jne scrypt_core_xmm_loop1 jne scrypt_core_xmm_loop1
movq $1024, %rcx movq %r8, %rcx
subl $1, %r8d
scrypt_core_xmm_loop2: scrypt_core_xmm_loop2:
movd %xmm12, %edx movd %xmm12, %edx
andl $1023, %edx andl %r8d, %edx
shll $7, %edx shll $7, %edx
pxor 0(%rsi, %rdx), %xmm8 pxor 0(%rsi, %rdx), %xmm8
pxor 16(%rsi, %rdx), %xmm9 pxor 16(%rsi, %rdx), %xmm9
@ -1019,6 +1031,8 @@ _scrypt_core_3way:
pushq %rsi pushq %rsi
movq %rcx, %rdi movq %rcx, %rdi
movq %rdx, %rsi movq %rdx, %rsi
#else
movq %rdx, %r8
#endif #endif
subq $392, %rsp subq $392, %rsp
@ -1088,7 +1102,9 @@ scrypt_core_3way_avx:
movdqa 256+112(%rsp), %xmm15 movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 3*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_avx_loop1: scrypt_core_3way_avx_loop1:
movdqa %xmm0, 64(%rbx) movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx) movdqa %xmm1, 80(%rbx)
@ -1208,7 +1224,8 @@ scrypt_core_3way_avx_loop1:
movdqa %xmm14, 256+96(%rsp) movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp) movdqa %xmm15, 256+112(%rsp)
movq $1024, %rcx movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_avx_loop2: scrypt_core_3way_avx_loop2:
movd %xmm0, %ebp movd %xmm0, %ebp
movd %xmm8, %ebx movd %xmm8, %ebx
@ -1225,13 +1242,13 @@ scrypt_core_3way_avx_loop2:
pxor 256+16(%rsp), %xmm13 pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14 pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15 pxor 256+48(%rsp), %xmm15
andl $1023, %ebp andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp shll $7, %ebp
andl $1023, %ebx andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx shll $7, %ebx
andl $1023, %eax andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $7, %eax shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0 pxor 0(%rsi, %rbp), %xmm0
@ -1491,7 +1508,9 @@ scrypt_core_3way_xop:
movdqa 256+112(%rsp), %xmm15 movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 3*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_xop_loop1: scrypt_core_3way_xop_loop1:
movdqa %xmm0, 64(%rbx) movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx) movdqa %xmm1, 80(%rbx)
@ -1611,7 +1630,8 @@ scrypt_core_3way_xop_loop1:
movdqa %xmm14, 256+96(%rsp) movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp) movdqa %xmm15, 256+112(%rsp)
movq $1024, %rcx movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_xop_loop2: scrypt_core_3way_xop_loop2:
movd %xmm0, %ebp movd %xmm0, %ebp
movd %xmm8, %ebx movd %xmm8, %ebx
@ -1628,13 +1648,13 @@ scrypt_core_3way_xop_loop2:
pxor 256+16(%rsp), %xmm13 pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14 pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15 pxor 256+48(%rsp), %xmm15
andl $1023, %ebp andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp shll $7, %ebp
andl $1023, %ebx andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx shll $7, %ebx
andl $1023, %eax andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $7, %eax shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0 pxor 0(%rsi, %rbp), %xmm0
@ -1991,7 +2011,9 @@ scrypt_core_3way_xmm:
movdqa 256+112(%rsp), %xmm15 movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 3*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_xmm_loop1: scrypt_core_3way_xmm_loop1:
movdqa %xmm0, 64(%rbx) movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx) movdqa %xmm1, 80(%rbx)
@ -2111,7 +2133,8 @@ scrypt_core_3way_xmm_loop1:
movdqa %xmm14, 256+96(%rsp) movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp) movdqa %xmm15, 256+112(%rsp)
movq $1024, %rcx movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_xmm_loop2: scrypt_core_3way_xmm_loop2:
movd %xmm0, %ebp movd %xmm0, %ebp
movd %xmm8, %ebx movd %xmm8, %ebx
@ -2128,13 +2151,13 @@ scrypt_core_3way_xmm_loop2:
pxor 256+16(%rsp), %xmm13 pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14 pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15 pxor 256+48(%rsp), %xmm15
andl $1023, %ebp andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp shll $7, %ebp
andl $1023, %ebx andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx shll $7, %ebx
andl $1023, %eax andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $7, %eax shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0 pxor 0(%rsi, %rbp), %xmm0
@ -2445,6 +2468,8 @@ _scrypt_core_6way:
pushq %rsi pushq %rsi
movq %rcx, %rdi movq %rcx, %rdi
movq %rdx, %rsi movq %rdx, %rsi
#else
movq %rdx, %r8
#endif #endif
movq %rsp, %rdx movq %rsp, %rdx
subq $768, %rsp subq $768, %rsp
@ -2539,7 +2564,9 @@ scrypt_core_6way_avx2:
vmovdqa 2*256+7*32(%rsp), %ymm15 vmovdqa 2*256+7*32(%rsp), %ymm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 6*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $8, %rax
addq %rsi, %rax
scrypt_core_6way_avx2_loop1: scrypt_core_6way_avx2_loop1:
vmovdqa %ymm0, 0*256+4*32(%rbx) vmovdqa %ymm0, 0*256+4*32(%rbx)
vmovdqa %ymm1, 0*256+5*32(%rbx) vmovdqa %ymm1, 0*256+5*32(%rbx)
@ -2659,7 +2686,8 @@ scrypt_core_6way_avx2_loop1:
vmovdqa %ymm14, 2*256+6*32(%rsp) vmovdqa %ymm14, 2*256+6*32(%rsp)
vmovdqa %ymm15, 2*256+7*32(%rsp) vmovdqa %ymm15, 2*256+7*32(%rsp)
movq $1024, %rcx movq %r8, %rcx
leaq -1(%r8), %r11
scrypt_core_6way_avx2_loop2: scrypt_core_6way_avx2_loop2:
vmovd %xmm0, %ebp vmovd %xmm0, %ebp
vmovd %xmm8, %ebx vmovd %xmm8, %ebx
@ -2682,22 +2710,22 @@ scrypt_core_6way_avx2_loop2:
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13 vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14 vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15 vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
andl $1023, %ebp andl %r11d, %ebp
leaq 0(%rbp, %rbp, 2), %rbp leaq 0(%rbp, %rbp, 2), %rbp
shll $8, %ebp shll $8, %ebp
andl $1023, %ebx andl %r11d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $8, %ebx shll $8, %ebx
andl $1023, %eax andl %r11d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $8, %eax shll $8, %eax
andl $1023, %r8d andl %r11d, %r8d
leaq 0(%r8, %r8, 2), %r8 leaq 0(%r8, %r8, 2), %r8
shll $8, %r8d shll $8, %r8d
andl $1023, %r9d andl %r11d, %r9d
leaq 1(%r9, %r9, 2), %r9 leaq 1(%r9, %r9, 2), %r9
shll $8, %r9d shll $8, %r9d
andl $1023, %r10d andl %r11d, %r10d
leaq 2(%r10, %r10, 2), %r10 leaq 2(%r10, %r10, 2), %r10
shll $8, %r10d shll $8, %r10d
vmovdqa 0*32(%rsi, %rbp), %xmm4 vmovdqa 0*32(%rsi, %rbp), %xmm4

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2011-2012 pooler@litecoinpool.org * Copyright 2011-2012, 2014 pooler@litecoinpool.org
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -415,6 +415,7 @@ _scrypt_core:
scrypt_core_gen: scrypt_core_gen:
movl 20(%esp), %edi movl 20(%esp), %edi
movl 24(%esp), %esi movl 24(%esp), %esi
movl 28(%esp), %ecx
subl $72, %esp subl $72, %esp
.macro scrypt_core_macro1a p, q .macro scrypt_core_macro1a p, q
@ -453,7 +454,8 @@ scrypt_core_gen:
movl %eax, \q(%edi) movl %eax, \q(%edi)
.endm .endm
leal 131072(%esi), %ecx shll $7, %ecx
addl %esi, %ecx
scrypt_core_gen_loop1: scrypt_core_gen_loop1:
movl %esi, 64(%esp) movl %esi, 64(%esp)
movl %ecx, 68(%esp) movl %ecx, 68(%esp)
@ -522,12 +524,15 @@ scrypt_core_gen_loop1:
jne scrypt_core_gen_loop1 jne scrypt_core_gen_loop1
movl 96(%esp), %esi movl 96(%esp), %esi
movl $1024, %ecx movl 100(%esp), %ecx
movl %ecx, %eax
subl $1, %eax
movl %eax, 100(%esp)
scrypt_core_gen_loop2: scrypt_core_gen_loop2:
movl %ecx, 68(%esp) movl %ecx, 68(%esp)
movl 64(%edi), %edx movl 64(%edi), %edx
andl $1023, %edx andl 100(%esp), %edx
shll $7, %edx shll $7, %edx
scrypt_core_macro1b 0, 64 scrypt_core_macro1b 0, 64
@ -694,7 +699,9 @@ scrypt_core_sse2:
movdqa 112(%esp), %xmm7 movdqa 112(%esp), %xmm7
movl %esi, %edx movl %esi, %edx
leal 131072(%esi), %ecx movl 28(%ebp), %ecx
shll $7, %ecx
addl %esi, %ecx
scrypt_core_sse2_loop1: scrypt_core_sse2_loop1:
movdqa 0(%esp), %xmm0 movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1 movdqa 16(%esp), %xmm1
@ -748,14 +755,16 @@ scrypt_core_sse2_loop1:
movdqa 64(%esp), %xmm4 movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5 movdqa 80(%esp), %xmm5
movl $1024, %ecx movl 28(%ebp), %ecx
movl %ecx, %eax
subl $1, %eax
scrypt_core_sse2_loop2: scrypt_core_sse2_loop2:
movd %xmm4, %edx movd %xmm4, %edx
movdqa 0(%esp), %xmm0 movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1 movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2 movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3 movdqa 48(%esp), %xmm3
andl $1023, %edx andl %eax, %edx
shll $7, %edx shll $7, %edx
pxor 0(%esi, %edx), %xmm0 pxor 0(%esi, %edx), %xmm0
pxor 16(%esi, %edx), %xmm1 pxor 16(%esi, %edx), %xmm1

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -383,30 +383,30 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
#define SCRYPT_MAX_WAYS 12 #define SCRYPT_MAX_WAYS 12
#define HAVE_SCRYPT_3WAY 1 #define HAVE_SCRYPT_3WAY 1
int scrypt_best_throughput(); int scrypt_best_throughput();
void scrypt_core(uint32_t *X, uint32_t *V); void scrypt_core(uint32_t *X, uint32_t *V, int N);
void scrypt_core_3way(uint32_t *X, uint32_t *V); void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#if defined(USE_AVX2) #if defined(USE_AVX2)
#undef SCRYPT_MAX_WAYS #undef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 24 #define SCRYPT_MAX_WAYS 24
#define HAVE_SCRYPT_6WAY 1 #define HAVE_SCRYPT_6WAY 1
void scrypt_core_6way(uint32_t *X, uint32_t *V); void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
#endif #endif
#elif defined(USE_ASM) && defined(__i386__) #elif defined(USE_ASM) && defined(__i386__)
#define SCRYPT_MAX_WAYS 4 #define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1 #define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V); void scrypt_core(uint32_t *X, uint32_t *V, int N);
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__) #elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V); void scrypt_core(uint32_t *X, uint32_t *V, int N);
#if defined(__ARM_NEON__) #if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY #undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3 #define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1 #define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3 #define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V); void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#endif #endif
#else #else
@ -479,17 +479,17 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
B[15] += x15; B[15] += x15;
} }
static inline void scrypt_core(uint32_t *X, uint32_t *V) static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
{ {
uint32_t i, j, k; uint32_t i, j, k;
for (i = 0; i < 1024; i++) { for (i = 0; i < N; i++) {
memcpy(&V[i * 32], X, 128); memcpy(&V[i * 32], X, 128);
xor_salsa8(&X[0], &X[16]); xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]); xor_salsa8(&X[16], &X[0]);
} }
for (i = 0; i < 1024; i++) { for (i = 0; i < N; i++) {
j = 32 * (X[16] & 1023); j = 32 * (X[16] & (N - 1));
for (k = 0; k < 32; k++) for (k = 0; k < 32; k++)
X[k] ^= V[j + k]; X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]); xor_salsa8(&X[0], &X[16]);
@ -504,15 +504,13 @@ static inline void scrypt_core(uint32_t *X, uint32_t *V)
#define scrypt_best_throughput() 1 #define scrypt_best_throughput() 1
#endif #endif
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63) unsigned char *scrypt_buffer_alloc(int N)
unsigned char *scrypt_buffer_alloc()
{ {
return malloc(SCRYPT_BUFFER_SIZE); return malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
} }
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output, static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad) uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[8], ostate[8]; uint32_t tstate[8], ostate[8];
uint32_t X[32]; uint32_t X[32];
@ -524,14 +522,14 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
HMAC_SHA256_80_init(input, tstate, ostate); HMAC_SHA256_80_init(input, tstate, ostate);
PBKDF2_SHA256_80_128(tstate, ostate, input, X); PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V); scrypt_core(X, V, N);
PBKDF2_SHA256_128_32(tstate, ostate, X, output); PBKDF2_SHA256_128_32(tstate, ostate, X, output);
} }
#ifdef HAVE_SHA256_4WAY #ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input, static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[4 * 8] __attribute__((aligned(128))); uint32_t tstate[4 * 8] __attribute__((aligned(128)));
uint32_t ostate[4 * 8] __attribute__((aligned(128))); uint32_t ostate[4 * 8] __attribute__((aligned(128)));
@ -553,10 +551,10 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
X[k * 32 + i] = W[4 * i + k]; X[k * 32 + i] = W[4 * i + k];
scrypt_core(X + 0 * 32, V); scrypt_core(X + 0 * 32, V, N);
scrypt_core(X + 1 * 32, V); scrypt_core(X + 1 * 32, V, N);
scrypt_core(X + 2 * 32, V); scrypt_core(X + 2 * 32, V, N);
scrypt_core(X + 3 * 32, V); scrypt_core(X + 3 * 32, V, N);
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
W[4 * i + k] = X[k * 32 + i]; W[4 * i + k] = X[k * 32 + i];
@ -570,7 +568,7 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
#ifdef HAVE_SCRYPT_3WAY #ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input, static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[3 * 8], ostate[3 * 8]; uint32_t tstate[3 * 8], ostate[3 * 8];
uint32_t X[3 * 32] __attribute__((aligned(64))); uint32_t X[3 * 32] __attribute__((aligned(64)));
@ -588,7 +586,7 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32); PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64); PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V); scrypt_core_3way(X, V, N);
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0); PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8); PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
@ -597,7 +595,7 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
#ifdef HAVE_SHA256_4WAY #ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input, static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[12 * 8] __attribute__((aligned(128))); uint32_t tstate[12 * 8] __attribute__((aligned(128)));
uint32_t ostate[12 * 8] __attribute__((aligned(128))); uint32_t ostate[12 * 8] __attribute__((aligned(128)));
@ -626,10 +624,10 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k]; X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
scrypt_core_3way(X + 0 * 96, V); scrypt_core_3way(X + 0 * 96, V, N);
scrypt_core_3way(X + 1 * 96, V); scrypt_core_3way(X + 1 * 96, V, N);
scrypt_core_3way(X + 2 * 96, V); scrypt_core_3way(X + 2 * 96, V, N);
scrypt_core_3way(X + 3 * 96, V); scrypt_core_3way(X + 3 * 96, V, N);
for (j = 0; j < 3; j++) for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
@ -648,7 +646,7 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
#ifdef HAVE_SCRYPT_6WAY #ifdef HAVE_SCRYPT_6WAY
static void scrypt_1024_1_1_256_24way(const uint32_t *input, static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[24 * 8] __attribute__((aligned(128))); uint32_t tstate[24 * 8] __attribute__((aligned(128)));
uint32_t ostate[24 * 8] __attribute__((aligned(128))); uint32_t ostate[24 * 8] __attribute__((aligned(128)));
@ -677,10 +675,10 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++) for (k = 0; k < 8; k++)
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k]; X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
scrypt_core_6way(X + 0 * 32, V); scrypt_core_6way(X + 0 * 32, V, N);
scrypt_core_6way(X + 6 * 32, V); scrypt_core_6way(X + 6 * 32, V, N);
scrypt_core_6way(X + 12 * 32, V); scrypt_core_6way(X + 12 * 32, V, N);
scrypt_core_6way(X + 18 * 32, V); scrypt_core_6way(X + 18 * 32, V, N);
for (j = 0; j < 3; j++) for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++) for (k = 0; k < 8; k++)
@ -697,7 +695,7 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
int scanhash_scrypt(int thr_id, uint32_t *pdata, int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget, unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done, int N)
{ {
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8]; uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8]; uint32_t midstate[8];
@ -723,25 +721,25 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
#if defined(HAVE_SHA256_4WAY) #if defined(HAVE_SHA256_4WAY)
if (throughput == 4) if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY) #if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12) if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
#if defined(HAVE_SCRYPT_6WAY) #if defined(HAVE_SCRYPT_6WAY)
if (throughput == 24) if (throughput == 24)
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
#if defined(HAVE_SCRYPT_3WAY) #if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3) if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
for (i = 0; i < throughput; i++) { for (i = 0; i < throughput; i++) {
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) { if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {