Add support for scrypt(N, 1, 1)

This commit is contained in:
pooler 2014-05-25 17:21:36 +02:00
parent a988337f52
commit be1b725270
7 changed files with 235 additions and 159 deletions

View file

@ -100,7 +100,7 @@ struct workio_cmd {
} u; } u;
}; };
enum sha256_algos { enum algos {
ALGO_SCRYPT, /* scrypt(1024,1,1) */ ALGO_SCRYPT, /* scrypt(1024,1,1) */
ALGO_SHA256D, /* SHA-256d */ ALGO_SHA256D, /* SHA-256d */
}; };
@ -128,7 +128,8 @@ static int opt_fail_pause = 30;
int opt_timeout = 0; int opt_timeout = 0;
static int opt_scantime = 5; static int opt_scantime = 5;
static const bool opt_time = true; static const bool opt_time = true;
static enum sha256_algos opt_algo = ALGO_SCRYPT; static enum algos opt_algo = ALGO_SCRYPT;
static int opt_scrypt_n = 1024;
static int opt_n_threads; static int opt_n_threads;
static int num_processors; static int num_processors;
static char *rpc_url; static char *rpc_url;
@ -170,6 +171,7 @@ Usage: " PROGRAM_NAME " [OPTIONS]\n\
Options:\n\ Options:\n\
-a, --algo=ALGO specify the algorithm to use\n\ -a, --algo=ALGO specify the algorithm to use\n\
scrypt scrypt(1024, 1, 1) (default)\n\ scrypt scrypt(1024, 1, 1) (default)\n\
scrypt:N scrypt(N, 1, 1)\n\
sha256d SHA-256d\n\ sha256d SHA-256d\n\
-o, --url=URL URL of mining server\n\ -o, --url=URL URL of mining server\n\
-O, --userpass=U:P username:password pair for mining server\n\ -O, --userpass=U:P username:password pair for mining server\n\
@ -1080,9 +1082,13 @@ static void *miner_thread(void *userdata)
affine_to_cpu(thr_id, thr_id % num_processors); affine_to_cpu(thr_id, thr_id % num_processors);
} }
if (opt_algo == ALGO_SCRYPT) if (opt_algo == ALGO_SCRYPT) {
{ scratchbuf = scrypt_buffer_alloc(opt_scrypt_n);
scratchbuf = scrypt_buffer_alloc(); if (!scratchbuf) {
applog(LOG_ERR, "scrypt buffer allocation failed");
pthread_mutex_lock(&applog_lock);
exit(1);
}
} }
while (1) { while (1) {
@ -1133,8 +1139,16 @@ static void *miner_thread(void *userdata)
max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime) max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime)
- time(NULL); - time(NULL);
max64 *= thr_hashrates[thr_id]; max64 *= thr_hashrates[thr_id];
if (max64 <= 0) if (max64 <= 0) {
max64 = opt_algo == ALGO_SCRYPT ? 0xfffLL : 0x1fffffLL; switch (opt_algo) {
case ALGO_SCRYPT:
max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n;
break;
case ALGO_SHA256D:
max64 = 0x1fffff;
break;
}
}
if (work.data[19] + max64 > end_nonce) if (work.data[19] + max64 > end_nonce)
max_nonce = end_nonce; max_nonce = end_nonce;
else else
@ -1147,7 +1161,7 @@ static void *miner_thread(void *userdata)
switch (opt_algo) { switch (opt_algo) {
case ALGO_SCRYPT: case ALGO_SCRYPT:
rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target, rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target,
max_nonce, &hashes_done); max_nonce, &hashes_done, opt_scrypt_n);
break; break;
case ALGO_SHA256D: case ALGO_SHA256D:
@ -1471,10 +1485,21 @@ static void parse_arg(int key, char *arg, char *pname)
switch(key) { switch(key) {
case 'a': case 'a':
for (i = 0; i < ARRAY_SIZE(algo_names); i++) { for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
if (algo_names[i] && v = strlen(algo_names[i]);
!strcmp(arg, algo_names[i])) { if (!strncmp(arg, algo_names[i], v)) {
opt_algo = i; if (arg[v] == '\0') {
break; opt_algo = i;
break;
}
if (arg[v] == ':' && i == ALGO_SCRYPT) {
char *ep;
v = strtol(arg+v+1, &ep, 10);
if (*ep || v & (v-1) || v < 2)
continue;
opt_algo = i;
opt_scrypt_n = v;
break;
}
} }
} }
if (i == ARRAY_SIZE(algo_names)) { if (i == ARRAY_SIZE(algo_names)) {

View file

@ -154,10 +154,10 @@ void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
extern int scanhash_sha256d(int thr_id, uint32_t *pdata, extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done); const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done);
extern unsigned char *scrypt_buffer_alloc(); extern unsigned char *scrypt_buffer_alloc(int N);
extern int scanhash_scrypt(int thr_id, uint32_t *pdata, extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget, unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done); uint32_t max_nonce, unsigned long *hashes_done, int N);
struct thr_info { struct thr_info {
int id; int id;

View file

@ -72,6 +72,9 @@ Possible values are:
.B scrypt .B scrypt
scrypt(1024, 1, 1) (used by Litecoin) scrypt(1024, 1, 1) (used by Litecoin)
.TP .TP
.B scrypt:\fIN\fR
scrypt(\fIN\fR, 1, 1) (\fIN\fR must be a power of 2 greater than 1)
.TP
.B sha256d .B sha256d
SHA-256d (used by Bitcoin) SHA-256d (used by Bitcoin)
.RE .RE

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2012 pooler@litecoinpool.org * Copyright 2012, 2014 pooler@litecoinpool.org
* *
* This program is free software; you can redistribute it and/or modify it * This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free * under the terms of the GNU General Public License as published by the Free
@ -472,14 +472,16 @@ scrypt_core:
_scrypt_core: _scrypt_core:
stmfd sp!, {r4-r11, lr} stmfd sp!, {r4-r11, lr}
mov r12, sp mov r12, sp
sub sp, sp, #21*4 sub sp, sp, #22*4
bic sp, sp, #63 bic sp, sp, #63
str r12, [sp, #20*4] str r12, [sp, #20*4]
str r2, [sp, #21*4]
scrypt_shuffle scrypt_shuffle
ldr r2, [sp, #21*4]
str r0, [sp, #16*4] str r0, [sp, #16*4]
add r12, r1, #1024*32*4 add r12, r1, r2, lsl #7
str r12, [sp, #18*4] str r12, [sp, #18*4]
scrypt_core_loop1: scrypt_core_loop1:
add lr, r0, #16*4 add lr, r0, #16*4
@ -517,12 +519,14 @@ scrypt_core_loop1:
cmp r1, r12 cmp r1, r12
bne scrypt_core_loop1 bne scrypt_core_loop1
ldr r12, [sp, #21*4]
ldr r4, [r0, #16*4] ldr r4, [r0, #16*4]
sub r1, r1, #1024*32*4 sub r2, r12, #1
str r2, [sp, #21*4]
sub r1, r1, r12, lsl #7
str r1, [sp, #17*4] str r1, [sp, #17*4]
mov r4, r4, lsl #32-10 and r4, r4, r2
mov r12, #1024 add r1, r1, r4, lsl #7
add r1, r1, r4, lsr #32-10-7
scrypt_core_loop2: scrypt_core_loop2:
add r2, r0, #16*4 add r2, r0, #16*4
add r3, r1, #16*4 add r3, r1, #16*4
@ -553,9 +557,10 @@ scrypt_core_loop2:
mov r1, sp mov r1, sp
ldr r3, [sp, #17*4] ldr r3, [sp, #17*4]
add r0, r0, #16*4 add r0, r0, #16*4
ldr r2, [sp, #21*4]
scrypt_core_macro3_x4 scrypt_core_macro3_x4
mov r4, r4, lsl #32-10 and r4, r4, r2
add r3, r3, r4, lsr #32-10-7 add r3, r3, r4, lsl #7
str r3, [sp, #19*4] str r3, [sp, #19*4]
#ifdef __ARM_ARCH_5E_OR_6_OR_7__ #ifdef __ARM_ARCH_5E_OR_6_OR_7__
pld [r3, #16*4] pld [r3, #16*4]
@ -794,10 +799,11 @@ _scrypt_core_3way:
mov r12, sp mov r12, sp
sub sp, sp, #24*16 sub sp, sp, #24*16
bic sp, sp, #63 bic sp, sp, #63
str r12, [sp, #4*16+3*4] str r2, [sp, #4*16+3*4]
str r12, [sp, #4*16+4*4]
mov r2, r0 mov r3, r0
vldmia r2!, {q8-q15} vldmia r3!, {q8-q15}
vmov.u64 q0, #0xffffffff vmov.u64 q0, #0xffffffff
vmov.u32 q1, q8 vmov.u32 q1, q8
vmov.u32 q2, q12 vmov.u32 q2, q12
@ -809,7 +815,7 @@ _scrypt_core_3way:
vbif.u32 q14, q15, q0 vbif.u32 q14, q15, q0
vbif.u32 q11, q1, q0 vbif.u32 q11, q1, q0
vbif.u32 q15, q2, q0 vbif.u32 q15, q2, q0
vldmia r2!, {q0-q7} vldmia r3!, {q0-q7}
vswp.u32 d17, d21 vswp.u32 d17, d21
vswp.u32 d25, d29 vswp.u32 d25, d29
vswp.u32 d18, d22 vswp.u32 d18, d22
@ -826,7 +832,7 @@ _scrypt_core_3way:
vbif.u32 q6, q7, q8 vbif.u32 q6, q7, q8
vbif.u32 q3, q9, q8 vbif.u32 q3, q9, q8
vbif.u32 q7, q10, q8 vbif.u32 q7, q10, q8
vldmia r2, {q8-q15} vldmia r3, {q8-q15}
vswp.u32 d1, d5 vswp.u32 d1, d5
vswp.u32 d9, d13 vswp.u32 d9, d13
vswp.u32 d2, d6 vswp.u32 d2, d6
@ -852,7 +858,7 @@ _scrypt_core_3way:
add lr, sp, #128 add lr, sp, #128
vldmia lr, {q0-q7} vldmia lr, {q0-q7}
add r2, r1, #1024*32*4 add r2, r1, r2, lsl #7
str r0, [sp, #4*16+0*4] str r0, [sp, #4*16+0*4]
str r2, [sp, #4*16+2*4] str r2, [sp, #4*16+2*4]
scrypt_core_3way_loop1: scrypt_core_3way_loop1:
@ -863,12 +869,13 @@ scrypt_core_3way_loop1:
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
ldr r2, [sp, #4*16+3*4]
scrypt_core_macro1a_x4 scrypt_core_macro1a_x4
sub r1, r1, #4*16 sub r1, r1, #4*16
add r1, r1, #1024*32*4 add r1, r1, r2, lsl #7
vstmia r1, {q0-q7} vstmia r1, {q0-q7}
add r3, r1, #1024*32*4 add r3, r1, r2, lsl #7
vstmia r3, {q8-q15} vstmia r3, {q8-q15}
add lr, sp, #128 add lr, sp, #128
@ -957,20 +964,22 @@ scrypt_core_3way_loop1:
cmp r1, r2 cmp r1, r2
bne scrypt_core_3way_loop1 bne scrypt_core_3way_loop1
ldr r2, [sp, #4*16+3*4]
add r5, sp, #256+4*16 add r5, sp, #256+4*16
vstmia r5, {q12-q15} vstmia r5, {q12-q15}
sub r1, r1, #1024*32*4 sub r1, r1, r2, lsl #7
str r1, [sp, #4*16+1*4] str r1, [sp, #4*16+1*4]
mov r2, #1024
scrypt_core_3way_loop2: scrypt_core_3way_loop2:
str r2, [sp, #4*16+2*4] str r2, [sp, #4*16+2*4]
ldr r0, [sp, #4*16+0*4] ldr r0, [sp, #4*16+0*4]
ldr r1, [sp, #4*16+1*4] ldr r1, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
ldr r4, [r0, #16*4] ldr r4, [r0, #16*4]
mov r4, r4, lsl #32-10 sub r2, r2, #1
add r1, r1, r4, lsr #32-10-7 and r4, r4, r2
add r1, r1, r4, lsl #7
add r2, r0, #16*4 add r2, r0, #16*4
add r3, r1, #16*4 add r3, r1, #16*4
mov r12, sp mov r12, sp
@ -980,29 +989,31 @@ scrypt_core_3way_loop2:
scrypt_core_macro1b_x4 scrypt_core_macro1b_x4
ldr r1, [sp, #4*16+1*4] ldr r1, [sp, #4*16+1*4]
add r1, r1, #1024*32*4 ldr r2, [sp, #4*16+3*4]
add r3, r1, #1024*32*4 add r1, r1, r2, lsl #7
add r3, r1, r2, lsl #7
sub r2, r2, #1
vmov r6, r7, d8 vmov r6, r7, d8
mov r6, r6, lsl #32-10 and r6, r6, r2
add r6, r1, r6, lsr #32-10-7 add r6, r1, r6, lsl #7
vmov r7, r8, d24 vmov r7, r8, d24
add lr, sp, #128 add lr, sp, #128
vldmia lr, {q0-q3} vldmia lr, {q0-q3}
pld [r6] pld [r6]
pld [r6, #8*4] pld [r6, #8*4]
pld [r6, #16*4] pld [r6, #16*4]
pld [r6, #24*4] pld [r6, #24*4]
vldmia r6, {q8-q15} vldmia r6, {q8-q15}
mov r7, r7, lsl #32-10 and r7, r7, r2
add r7, r3, r7, lsr #32-10-7 add r7, r3, r7, lsl #7
veor.u32 q8, q8, q0 veor.u32 q8, q8, q0
veor.u32 q9, q9, q1 veor.u32 q9, q9, q1
veor.u32 q10, q10, q2 veor.u32 q10, q10, q2
veor.u32 q11, q11, q3 veor.u32 q11, q11, q3
pld [r7] pld [r7]
pld [r7, #8*4] pld [r7, #8*4]
pld [r7, #16*4] pld [r7, #16*4]
pld [r7, #24*4] pld [r7, #24*4]
veor.u32 q12, q12, q4 veor.u32 q12, q12, q4
veor.u32 q13, q13, q5 veor.u32 q13, q13, q5
veor.u32 q14, q14, q6 veor.u32 q14, q14, q6
@ -1079,15 +1090,17 @@ scrypt_core_3way_loop2:
ldr r0, [sp, #4*16+0*4] ldr r0, [sp, #4*16+0*4]
ldr r3, [sp, #4*16+1*4] ldr r3, [sp, #4*16+1*4]
ldr r2, [sp, #4*16+3*4]
mov r1, sp mov r1, sp
add r0, r0, #16*4 add r0, r0, #16*4
sub r2, r2, #1
scrypt_core_macro3_x4 scrypt_core_macro3_x4
mov r4, r4, lsl #32-10 and r4, r4, r2
add r3, r3, r4, lsr #32-10-7 add r3, r3, r4, lsl #7
pld [r3, #16*4] pld [r3, #16*4]
pld [r3] pld [r3]
pld [r3, #24*4] pld [r3, #24*4]
pld [r3, #8*4] pld [r3, #8*4]
scrypt_core_macro3_x6 scrypt_core_macro3_x6
scrypt_core_macro3_x6 scrypt_core_macro3_x6
@ -1164,7 +1177,7 @@ scrypt_core_3way_loop2:
vswp.u32 d26, d30 vswp.u32 d26, d30
vstmia r0, {q8-q15} vstmia r0, {q8-q15}
ldr sp, [sp, #4*16+3*4] ldr sp, [sp, #4*16+4*4]
vpop {q4-q7} vpop {q4-q7}
ldmfd sp!, {r4-r11, pc} ldmfd sp!, {r4-r11, pc}

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2011-2013 pooler@litecoinpool.org * Copyright 2011-2014 pooler@litecoinpool.org
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -113,38 +113,38 @@ scrypt_best_throughput_exit:
.macro scrypt_shuffle src, so, dest, do .macro scrypt_shuffle src, so, dest, do
movl \so+60(\src), %r8d movl \so+60(\src), %eax
movl \so+44(\src), %r9d movl \so+44(\src), %ebx
movl \so+28(\src), %r10d movl \so+28(\src), %ecx
movl \so+12(\src), %r11d movl \so+12(\src), %edx
movl %r8d, \do+12(\dest) movl %eax, \do+12(\dest)
movl %r9d, \do+28(\dest) movl %ebx, \do+28(\dest)
movl %r10d, \do+44(\dest) movl %ecx, \do+44(\dest)
movl %r11d, \do+60(\dest) movl %edx, \do+60(\dest)
movl \so+40(\src), %r8d movl \so+40(\src), %eax
movl \so+8(\src), %r9d movl \so+8(\src), %ebx
movl \so+48(\src), %r10d movl \so+48(\src), %ecx
movl \so+16(\src), %r11d movl \so+16(\src), %edx
movl %r8d, \do+8(\dest) movl %eax, \do+8(\dest)
movl %r9d, \do+40(\dest) movl %ebx, \do+40(\dest)
movl %r10d, \do+16(\dest) movl %ecx, \do+16(\dest)
movl %r11d, \do+48(\dest) movl %edx, \do+48(\dest)
movl \so+20(\src), %r8d movl \so+20(\src), %eax
movl \so+4(\src), %r9d movl \so+4(\src), %ebx
movl \so+52(\src), %r10d movl \so+52(\src), %ecx
movl \so+36(\src), %r11d movl \so+36(\src), %edx
movl %r8d, \do+4(\dest) movl %eax, \do+4(\dest)
movl %r9d, \do+20(\dest) movl %ebx, \do+20(\dest)
movl %r10d, \do+36(\dest) movl %ecx, \do+36(\dest)
movl %r11d, \do+52(\dest) movl %edx, \do+52(\dest)
movl \so+0(\src), %r8d movl \so+0(\src), %eax
movl \so+24(\src), %r9d movl \so+24(\src), %ebx
movl \so+32(\src), %r10d movl \so+32(\src), %ecx
movl \so+56(\src), %r11d movl \so+56(\src), %edx
movl %r8d, \do+0(\dest) movl %eax, \do+0(\dest)
movl %r9d, \do+24(\dest) movl %ebx, \do+24(\dest)
movl %r10d, \do+32(\dest) movl %ecx, \do+32(\dest)
movl %r11d, \do+56(\dest) movl %edx, \do+56(\dest)
.endm .endm
@ -384,6 +384,8 @@ _scrypt_core:
pushq %rsi pushq %rsi
movq %rcx, %rdi movq %rcx, %rdi
movq %rdx, %rsi movq %rdx, %rsi
#else
movq %rdx, %r8
#endif #endif
.macro scrypt_core_cleanup .macro scrypt_core_cleanup
@ -432,7 +434,10 @@ scrypt_core_gen:
movdqa 96(%rdi), %xmm14 movdqa 96(%rdi), %xmm14
movdqa 112(%rdi), %xmm15 movdqa 112(%rdi), %xmm15
leaq 131072(%rsi), %rcx movq %r8, %rcx
shlq $7, %rcx
addq %rsi, %rcx
movq %r8, 96(%rsp)
movq %rdi, 104(%rsp) movq %rdi, 104(%rsp)
movq %rsi, 112(%rsp) movq %rsi, 112(%rsp)
movq %rcx, 120(%rsp) movq %rcx, 120(%rsp)
@ -481,11 +486,14 @@ scrypt_core_gen_loop1:
cmpq %rcx, %rsi cmpq %rcx, %rsi
jne scrypt_core_gen_loop1 jne scrypt_core_gen_loop1
movq $1024, %rcx movq 96(%rsp), %r8
movq %r8, %rcx
subl $1, %r8d
movq %r8, 96(%rsp)
movd %xmm12, %edx movd %xmm12, %edx
scrypt_core_gen_loop2: scrypt_core_gen_loop2:
movq 112(%rsp), %rsi movq 112(%rsp), %rsi
andl $1023, %edx andl %r8d, %edx
shll $7, %edx shll $7, %edx
addq %rsi, %rdx addq %rsi, %rdx
movdqa 0(%rdx), %xmm0 movdqa 0(%rdx), %xmm0
@ -529,6 +537,7 @@ scrypt_core_gen_loop2:
movdqa %xmm14, 32(%rsp) movdqa %xmm14, 32(%rsp)
movdqa %xmm15, 48(%rsp) movdqa %xmm15, 48(%rsp)
call salsa8_core_gen call salsa8_core_gen
movq 96(%rsp), %r8
movq 128(%rsp), %rcx movq 128(%rsp), %rcx
addl 0(%rsp), %edx addl 0(%rsp), %edx
paddd %xmm0, %xmm12 paddd %xmm0, %xmm12
@ -691,7 +700,9 @@ scrypt_core_xmm:
punpckhqdq %xmm0, %xmm13 punpckhqdq %xmm0, %xmm13
movq %rsi, %rdx movq %rsi, %rdx
leaq 131072(%rsi), %rcx movq %r8, %rcx
shlq $7, %rcx
addq %rsi, %rcx
scrypt_core_xmm_loop1: scrypt_core_xmm_loop1:
pxor %xmm12, %xmm8 pxor %xmm12, %xmm8
pxor %xmm13, %xmm9 pxor %xmm13, %xmm9
@ -734,10 +745,11 @@ scrypt_core_xmm_loop1:
cmpq %rcx, %rdx cmpq %rcx, %rdx
jne scrypt_core_xmm_loop1 jne scrypt_core_xmm_loop1
movq $1024, %rcx movq %r8, %rcx
subl $1, %r8d
scrypt_core_xmm_loop2: scrypt_core_xmm_loop2:
movd %xmm12, %edx movd %xmm12, %edx
andl $1023, %edx andl %r8d, %edx
shll $7, %edx shll $7, %edx
pxor 0(%rsi, %rdx), %xmm8 pxor 0(%rsi, %rdx), %xmm8
pxor 16(%rsi, %rdx), %xmm9 pxor 16(%rsi, %rdx), %xmm9
@ -1019,6 +1031,8 @@ _scrypt_core_3way:
pushq %rsi pushq %rsi
movq %rcx, %rdi movq %rcx, %rdi
movq %rdx, %rsi movq %rdx, %rsi
#else
movq %rdx, %r8
#endif #endif
subq $392, %rsp subq $392, %rsp
@ -1088,7 +1102,9 @@ scrypt_core_3way_avx:
movdqa 256+112(%rsp), %xmm15 movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 3*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_avx_loop1: scrypt_core_3way_avx_loop1:
movdqa %xmm0, 64(%rbx) movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx) movdqa %xmm1, 80(%rbx)
@ -1208,7 +1224,8 @@ scrypt_core_3way_avx_loop1:
movdqa %xmm14, 256+96(%rsp) movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp) movdqa %xmm15, 256+112(%rsp)
movq $1024, %rcx movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_avx_loop2: scrypt_core_3way_avx_loop2:
movd %xmm0, %ebp movd %xmm0, %ebp
movd %xmm8, %ebx movd %xmm8, %ebx
@ -1225,13 +1242,13 @@ scrypt_core_3way_avx_loop2:
pxor 256+16(%rsp), %xmm13 pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14 pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15 pxor 256+48(%rsp), %xmm15
andl $1023, %ebp andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp shll $7, %ebp
andl $1023, %ebx andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx shll $7, %ebx
andl $1023, %eax andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $7, %eax shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0 pxor 0(%rsi, %rbp), %xmm0
@ -1491,7 +1508,9 @@ scrypt_core_3way_xop:
movdqa 256+112(%rsp), %xmm15 movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 3*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_xop_loop1: scrypt_core_3way_xop_loop1:
movdqa %xmm0, 64(%rbx) movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx) movdqa %xmm1, 80(%rbx)
@ -1611,7 +1630,8 @@ scrypt_core_3way_xop_loop1:
movdqa %xmm14, 256+96(%rsp) movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp) movdqa %xmm15, 256+112(%rsp)
movq $1024, %rcx movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_xop_loop2: scrypt_core_3way_xop_loop2:
movd %xmm0, %ebp movd %xmm0, %ebp
movd %xmm8, %ebx movd %xmm8, %ebx
@ -1628,13 +1648,13 @@ scrypt_core_3way_xop_loop2:
pxor 256+16(%rsp), %xmm13 pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14 pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15 pxor 256+48(%rsp), %xmm15
andl $1023, %ebp andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp shll $7, %ebp
andl $1023, %ebx andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx shll $7, %ebx
andl $1023, %eax andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $7, %eax shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0 pxor 0(%rsi, %rbp), %xmm0
@ -1991,7 +2011,9 @@ scrypt_core_3way_xmm:
movdqa 256+112(%rsp), %xmm15 movdqa 256+112(%rsp), %xmm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 3*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $7, %rax
addq %rsi, %rax
scrypt_core_3way_xmm_loop1: scrypt_core_3way_xmm_loop1:
movdqa %xmm0, 64(%rbx) movdqa %xmm0, 64(%rbx)
movdqa %xmm1, 80(%rbx) movdqa %xmm1, 80(%rbx)
@ -2111,7 +2133,8 @@ scrypt_core_3way_xmm_loop1:
movdqa %xmm14, 256+96(%rsp) movdqa %xmm14, 256+96(%rsp)
movdqa %xmm15, 256+112(%rsp) movdqa %xmm15, 256+112(%rsp)
movq $1024, %rcx movq %r8, %rcx
subq $1, %r8
scrypt_core_3way_xmm_loop2: scrypt_core_3way_xmm_loop2:
movd %xmm0, %ebp movd %xmm0, %ebp
movd %xmm8, %ebx movd %xmm8, %ebx
@ -2128,13 +2151,13 @@ scrypt_core_3way_xmm_loop2:
pxor 256+16(%rsp), %xmm13 pxor 256+16(%rsp), %xmm13
pxor 256+32(%rsp), %xmm14 pxor 256+32(%rsp), %xmm14
pxor 256+48(%rsp), %xmm15 pxor 256+48(%rsp), %xmm15
andl $1023, %ebp andl %r8d, %ebp
leaq (%rbp, %rbp, 2), %rbp leaq (%rbp, %rbp, 2), %rbp
shll $7, %ebp shll $7, %ebp
andl $1023, %ebx andl %r8d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $7, %ebx shll $7, %ebx
andl $1023, %eax andl %r8d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $7, %eax shll $7, %eax
pxor 0(%rsi, %rbp), %xmm0 pxor 0(%rsi, %rbp), %xmm0
@ -2445,6 +2468,8 @@ _scrypt_core_6way:
pushq %rsi pushq %rsi
movq %rcx, %rdi movq %rcx, %rdi
movq %rdx, %rsi movq %rdx, %rsi
#else
movq %rdx, %r8
#endif #endif
movq %rsp, %rdx movq %rsp, %rdx
subq $768, %rsp subq $768, %rsp
@ -2539,7 +2564,9 @@ scrypt_core_6way_avx2:
vmovdqa 2*256+7*32(%rsp), %ymm15 vmovdqa 2*256+7*32(%rsp), %ymm15
movq %rsi, %rbx movq %rsi, %rbx
leaq 6*131072(%rsi), %rax leaq (%r8, %r8, 2), %rax
shlq $8, %rax
addq %rsi, %rax
scrypt_core_6way_avx2_loop1: scrypt_core_6way_avx2_loop1:
vmovdqa %ymm0, 0*256+4*32(%rbx) vmovdqa %ymm0, 0*256+4*32(%rbx)
vmovdqa %ymm1, 0*256+5*32(%rbx) vmovdqa %ymm1, 0*256+5*32(%rbx)
@ -2659,7 +2686,8 @@ scrypt_core_6way_avx2_loop1:
vmovdqa %ymm14, 2*256+6*32(%rsp) vmovdqa %ymm14, 2*256+6*32(%rsp)
vmovdqa %ymm15, 2*256+7*32(%rsp) vmovdqa %ymm15, 2*256+7*32(%rsp)
movq $1024, %rcx movq %r8, %rcx
leaq -1(%r8), %r11
scrypt_core_6way_avx2_loop2: scrypt_core_6way_avx2_loop2:
vmovd %xmm0, %ebp vmovd %xmm0, %ebp
vmovd %xmm8, %ebx vmovd %xmm8, %ebx
@ -2682,22 +2710,22 @@ scrypt_core_6way_avx2_loop2:
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13 vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14 vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15 vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
andl $1023, %ebp andl %r11d, %ebp
leaq 0(%rbp, %rbp, 2), %rbp leaq 0(%rbp, %rbp, 2), %rbp
shll $8, %ebp shll $8, %ebp
andl $1023, %ebx andl %r11d, %ebx
leaq 1(%rbx, %rbx, 2), %rbx leaq 1(%rbx, %rbx, 2), %rbx
shll $8, %ebx shll $8, %ebx
andl $1023, %eax andl %r11d, %eax
leaq 2(%rax, %rax, 2), %rax leaq 2(%rax, %rax, 2), %rax
shll $8, %eax shll $8, %eax
andl $1023, %r8d andl %r11d, %r8d
leaq 0(%r8, %r8, 2), %r8 leaq 0(%r8, %r8, 2), %r8
shll $8, %r8d shll $8, %r8d
andl $1023, %r9d andl %r11d, %r9d
leaq 1(%r9, %r9, 2), %r9 leaq 1(%r9, %r9, 2), %r9
shll $8, %r9d shll $8, %r9d
andl $1023, %r10d andl %r11d, %r10d
leaq 2(%r10, %r10, 2), %r10 leaq 2(%r10, %r10, 2), %r10
shll $8, %r10d shll $8, %r10d
vmovdqa 0*32(%rsi, %rbp), %xmm4 vmovdqa 0*32(%rsi, %rbp), %xmm4

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2011-2012 pooler@litecoinpool.org * Copyright 2011-2012, 2014 pooler@litecoinpool.org
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -415,6 +415,7 @@ _scrypt_core:
scrypt_core_gen: scrypt_core_gen:
movl 20(%esp), %edi movl 20(%esp), %edi
movl 24(%esp), %esi movl 24(%esp), %esi
movl 28(%esp), %ecx
subl $72, %esp subl $72, %esp
.macro scrypt_core_macro1a p, q .macro scrypt_core_macro1a p, q
@ -453,7 +454,8 @@ scrypt_core_gen:
movl %eax, \q(%edi) movl %eax, \q(%edi)
.endm .endm
leal 131072(%esi), %ecx shll $7, %ecx
addl %esi, %ecx
scrypt_core_gen_loop1: scrypt_core_gen_loop1:
movl %esi, 64(%esp) movl %esi, 64(%esp)
movl %ecx, 68(%esp) movl %ecx, 68(%esp)
@ -522,12 +524,15 @@ scrypt_core_gen_loop1:
jne scrypt_core_gen_loop1 jne scrypt_core_gen_loop1
movl 96(%esp), %esi movl 96(%esp), %esi
movl $1024, %ecx movl 100(%esp), %ecx
movl %ecx, %eax
subl $1, %eax
movl %eax, 100(%esp)
scrypt_core_gen_loop2: scrypt_core_gen_loop2:
movl %ecx, 68(%esp) movl %ecx, 68(%esp)
movl 64(%edi), %edx movl 64(%edi), %edx
andl $1023, %edx andl 100(%esp), %edx
shll $7, %edx shll $7, %edx
scrypt_core_macro1b 0, 64 scrypt_core_macro1b 0, 64
@ -694,7 +699,9 @@ scrypt_core_sse2:
movdqa 112(%esp), %xmm7 movdqa 112(%esp), %xmm7
movl %esi, %edx movl %esi, %edx
leal 131072(%esi), %ecx movl 28(%ebp), %ecx
shll $7, %ecx
addl %esi, %ecx
scrypt_core_sse2_loop1: scrypt_core_sse2_loop1:
movdqa 0(%esp), %xmm0 movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1 movdqa 16(%esp), %xmm1
@ -748,14 +755,16 @@ scrypt_core_sse2_loop1:
movdqa 64(%esp), %xmm4 movdqa 64(%esp), %xmm4
movdqa 80(%esp), %xmm5 movdqa 80(%esp), %xmm5
movl $1024, %ecx movl 28(%ebp), %ecx
movl %ecx, %eax
subl $1, %eax
scrypt_core_sse2_loop2: scrypt_core_sse2_loop2:
movd %xmm4, %edx movd %xmm4, %edx
movdqa 0(%esp), %xmm0 movdqa 0(%esp), %xmm0
movdqa 16(%esp), %xmm1 movdqa 16(%esp), %xmm1
movdqa 32(%esp), %xmm2 movdqa 32(%esp), %xmm2
movdqa 48(%esp), %xmm3 movdqa 48(%esp), %xmm3
andl $1023, %edx andl %eax, %edx
shll $7, %edx shll $7, %edx
pxor 0(%esi, %edx), %xmm0 pxor 0(%esi, %edx), %xmm0
pxor 16(%esi, %edx), %xmm1 pxor 16(%esi, %edx), %xmm1

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler * Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Redistribution and use in source and binary forms, with or without
@ -383,30 +383,30 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
#define SCRYPT_MAX_WAYS 12 #define SCRYPT_MAX_WAYS 12
#define HAVE_SCRYPT_3WAY 1 #define HAVE_SCRYPT_3WAY 1
int scrypt_best_throughput(); int scrypt_best_throughput();
void scrypt_core(uint32_t *X, uint32_t *V); void scrypt_core(uint32_t *X, uint32_t *V, int N);
void scrypt_core_3way(uint32_t *X, uint32_t *V); void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#if defined(USE_AVX2) #if defined(USE_AVX2)
#undef SCRYPT_MAX_WAYS #undef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 24 #define SCRYPT_MAX_WAYS 24
#define HAVE_SCRYPT_6WAY 1 #define HAVE_SCRYPT_6WAY 1
void scrypt_core_6way(uint32_t *X, uint32_t *V); void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
#endif #endif
#elif defined(USE_ASM) && defined(__i386__) #elif defined(USE_ASM) && defined(__i386__)
#define SCRYPT_MAX_WAYS 4 #define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1 #define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V); void scrypt_core(uint32_t *X, uint32_t *V, int N);
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__) #elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V); void scrypt_core(uint32_t *X, uint32_t *V, int N);
#if defined(__ARM_NEON__) #if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY #undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3 #define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1 #define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3 #define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V); void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#endif #endif
#else #else
@ -479,17 +479,17 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
B[15] += x15; B[15] += x15;
} }
static inline void scrypt_core(uint32_t *X, uint32_t *V) static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
{ {
uint32_t i, j, k; uint32_t i, j, k;
for (i = 0; i < 1024; i++) { for (i = 0; i < N; i++) {
memcpy(&V[i * 32], X, 128); memcpy(&V[i * 32], X, 128);
xor_salsa8(&X[0], &X[16]); xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]); xor_salsa8(&X[16], &X[0]);
} }
for (i = 0; i < 1024; i++) { for (i = 0; i < N; i++) {
j = 32 * (X[16] & 1023); j = 32 * (X[16] & (N - 1));
for (k = 0; k < 32; k++) for (k = 0; k < 32; k++)
X[k] ^= V[j + k]; X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]); xor_salsa8(&X[0], &X[16]);
@ -504,15 +504,13 @@ static inline void scrypt_core(uint32_t *X, uint32_t *V)
#define scrypt_best_throughput() 1 #define scrypt_best_throughput() 1
#endif #endif
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63) unsigned char *scrypt_buffer_alloc(int N)
unsigned char *scrypt_buffer_alloc()
{ {
return malloc(SCRYPT_BUFFER_SIZE); return malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
} }
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output, static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad) uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[8], ostate[8]; uint32_t tstate[8], ostate[8];
uint32_t X[32]; uint32_t X[32];
@ -524,14 +522,14 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
HMAC_SHA256_80_init(input, tstate, ostate); HMAC_SHA256_80_init(input, tstate, ostate);
PBKDF2_SHA256_80_128(tstate, ostate, input, X); PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V); scrypt_core(X, V, N);
PBKDF2_SHA256_128_32(tstate, ostate, X, output); PBKDF2_SHA256_128_32(tstate, ostate, X, output);
} }
#ifdef HAVE_SHA256_4WAY #ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input, static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[4 * 8] __attribute__((aligned(128))); uint32_t tstate[4 * 8] __attribute__((aligned(128)));
uint32_t ostate[4 * 8] __attribute__((aligned(128))); uint32_t ostate[4 * 8] __attribute__((aligned(128)));
@ -553,10 +551,10 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
X[k * 32 + i] = W[4 * i + k]; X[k * 32 + i] = W[4 * i + k];
scrypt_core(X + 0 * 32, V); scrypt_core(X + 0 * 32, V, N);
scrypt_core(X + 1 * 32, V); scrypt_core(X + 1 * 32, V, N);
scrypt_core(X + 2 * 32, V); scrypt_core(X + 2 * 32, V, N);
scrypt_core(X + 3 * 32, V); scrypt_core(X + 3 * 32, V, N);
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
W[4 * i + k] = X[k * 32 + i]; W[4 * i + k] = X[k * 32 + i];
@ -570,7 +568,7 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
#ifdef HAVE_SCRYPT_3WAY #ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input, static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[3 * 8], ostate[3 * 8]; uint32_t tstate[3 * 8], ostate[3 * 8];
uint32_t X[3 * 32] __attribute__((aligned(64))); uint32_t X[3 * 32] __attribute__((aligned(64)));
@ -588,7 +586,7 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32); PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64); PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V); scrypt_core_3way(X, V, N);
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0); PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8); PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
@ -597,7 +595,7 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
#ifdef HAVE_SHA256_4WAY #ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input, static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[12 * 8] __attribute__((aligned(128))); uint32_t tstate[12 * 8] __attribute__((aligned(128)));
uint32_t ostate[12 * 8] __attribute__((aligned(128))); uint32_t ostate[12 * 8] __attribute__((aligned(128)));
@ -626,10 +624,10 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k]; X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
scrypt_core_3way(X + 0 * 96, V); scrypt_core_3way(X + 0 * 96, V, N);
scrypt_core_3way(X + 1 * 96, V); scrypt_core_3way(X + 1 * 96, V, N);
scrypt_core_3way(X + 2 * 96, V); scrypt_core_3way(X + 2 * 96, V, N);
scrypt_core_3way(X + 3 * 96, V); scrypt_core_3way(X + 3 * 96, V, N);
for (j = 0; j < 3; j++) for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 4; k++) for (k = 0; k < 4; k++)
@ -648,7 +646,7 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
#ifdef HAVE_SCRYPT_6WAY #ifdef HAVE_SCRYPT_6WAY
static void scrypt_1024_1_1_256_24way(const uint32_t *input, static void scrypt_1024_1_1_256_24way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad) uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[24 * 8] __attribute__((aligned(128))); uint32_t tstate[24 * 8] __attribute__((aligned(128)));
uint32_t ostate[24 * 8] __attribute__((aligned(128))); uint32_t ostate[24 * 8] __attribute__((aligned(128)));
@ -677,10 +675,10 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++) for (k = 0; k < 8; k++)
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k]; X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
scrypt_core_6way(X + 0 * 32, V); scrypt_core_6way(X + 0 * 32, V, N);
scrypt_core_6way(X + 6 * 32, V); scrypt_core_6way(X + 6 * 32, V, N);
scrypt_core_6way(X + 12 * 32, V); scrypt_core_6way(X + 12 * 32, V, N);
scrypt_core_6way(X + 18 * 32, V); scrypt_core_6way(X + 18 * 32, V, N);
for (j = 0; j < 3; j++) for (j = 0; j < 3; j++)
for (i = 0; i < 32; i++) for (i = 0; i < 32; i++)
for (k = 0; k < 8; k++) for (k = 0; k < 8; k++)
@ -697,7 +695,7 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
int scanhash_scrypt(int thr_id, uint32_t *pdata, int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget, unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done) uint32_t max_nonce, unsigned long *hashes_done, int N)
{ {
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8]; uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8]; uint32_t midstate[8];
@ -723,25 +721,25 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
#if defined(HAVE_SHA256_4WAY) #if defined(HAVE_SHA256_4WAY)
if (throughput == 4) if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY) #if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12) if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
#if defined(HAVE_SCRYPT_6WAY) #if defined(HAVE_SCRYPT_6WAY)
if (throughput == 24) if (throughput == 24)
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
#if defined(HAVE_SCRYPT_3WAY) #if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3) if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
else else
#endif #endif
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf); scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
for (i = 0; i < throughput; i++) { for (i = 0; i < throughput; i++) {
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) { if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {