Add support for scrypt(N, 1, 1)
This commit is contained in:
parent
a988337f52
commit
be1b725270
7 changed files with 235 additions and 159 deletions
45
cpu-miner.c
45
cpu-miner.c
|
@ -100,7 +100,7 @@ struct workio_cmd {
|
||||||
} u;
|
} u;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum sha256_algos {
|
enum algos {
|
||||||
ALGO_SCRYPT, /* scrypt(1024,1,1) */
|
ALGO_SCRYPT, /* scrypt(1024,1,1) */
|
||||||
ALGO_SHA256D, /* SHA-256d */
|
ALGO_SHA256D, /* SHA-256d */
|
||||||
};
|
};
|
||||||
|
@ -128,7 +128,8 @@ static int opt_fail_pause = 30;
|
||||||
int opt_timeout = 0;
|
int opt_timeout = 0;
|
||||||
static int opt_scantime = 5;
|
static int opt_scantime = 5;
|
||||||
static const bool opt_time = true;
|
static const bool opt_time = true;
|
||||||
static enum sha256_algos opt_algo = ALGO_SCRYPT;
|
static enum algos opt_algo = ALGO_SCRYPT;
|
||||||
|
static int opt_scrypt_n = 1024;
|
||||||
static int opt_n_threads;
|
static int opt_n_threads;
|
||||||
static int num_processors;
|
static int num_processors;
|
||||||
static char *rpc_url;
|
static char *rpc_url;
|
||||||
|
@ -170,6 +171,7 @@ Usage: " PROGRAM_NAME " [OPTIONS]\n\
|
||||||
Options:\n\
|
Options:\n\
|
||||||
-a, --algo=ALGO specify the algorithm to use\n\
|
-a, --algo=ALGO specify the algorithm to use\n\
|
||||||
scrypt scrypt(1024, 1, 1) (default)\n\
|
scrypt scrypt(1024, 1, 1) (default)\n\
|
||||||
|
scrypt:N scrypt(N, 1, 1)\n\
|
||||||
sha256d SHA-256d\n\
|
sha256d SHA-256d\n\
|
||||||
-o, --url=URL URL of mining server\n\
|
-o, --url=URL URL of mining server\n\
|
||||||
-O, --userpass=U:P username:password pair for mining server\n\
|
-O, --userpass=U:P username:password pair for mining server\n\
|
||||||
|
@ -1080,9 +1082,13 @@ static void *miner_thread(void *userdata)
|
||||||
affine_to_cpu(thr_id, thr_id % num_processors);
|
affine_to_cpu(thr_id, thr_id % num_processors);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt_algo == ALGO_SCRYPT)
|
if (opt_algo == ALGO_SCRYPT) {
|
||||||
{
|
scratchbuf = scrypt_buffer_alloc(opt_scrypt_n);
|
||||||
scratchbuf = scrypt_buffer_alloc();
|
if (!scratchbuf) {
|
||||||
|
applog(LOG_ERR, "scrypt buffer allocation failed");
|
||||||
|
pthread_mutex_lock(&applog_lock);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
@ -1133,8 +1139,16 @@ static void *miner_thread(void *userdata)
|
||||||
max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime)
|
max64 = g_work_time + (have_longpoll ? LP_SCANTIME : opt_scantime)
|
||||||
- time(NULL);
|
- time(NULL);
|
||||||
max64 *= thr_hashrates[thr_id];
|
max64 *= thr_hashrates[thr_id];
|
||||||
if (max64 <= 0)
|
if (max64 <= 0) {
|
||||||
max64 = opt_algo == ALGO_SCRYPT ? 0xfffLL : 0x1fffffLL;
|
switch (opt_algo) {
|
||||||
|
case ALGO_SCRYPT:
|
||||||
|
max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n;
|
||||||
|
break;
|
||||||
|
case ALGO_SHA256D:
|
||||||
|
max64 = 0x1fffff;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (work.data[19] + max64 > end_nonce)
|
if (work.data[19] + max64 > end_nonce)
|
||||||
max_nonce = end_nonce;
|
max_nonce = end_nonce;
|
||||||
else
|
else
|
||||||
|
@ -1147,7 +1161,7 @@ static void *miner_thread(void *userdata)
|
||||||
switch (opt_algo) {
|
switch (opt_algo) {
|
||||||
case ALGO_SCRYPT:
|
case ALGO_SCRYPT:
|
||||||
rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target,
|
rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target,
|
||||||
max_nonce, &hashes_done);
|
max_nonce, &hashes_done, opt_scrypt_n);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ALGO_SHA256D:
|
case ALGO_SHA256D:
|
||||||
|
@ -1471,11 +1485,22 @@ static void parse_arg(int key, char *arg, char *pname)
|
||||||
switch(key) {
|
switch(key) {
|
||||||
case 'a':
|
case 'a':
|
||||||
for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
|
for (i = 0; i < ARRAY_SIZE(algo_names); i++) {
|
||||||
if (algo_names[i] &&
|
v = strlen(algo_names[i]);
|
||||||
!strcmp(arg, algo_names[i])) {
|
if (!strncmp(arg, algo_names[i], v)) {
|
||||||
|
if (arg[v] == '\0') {
|
||||||
opt_algo = i;
|
opt_algo = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (arg[v] == ':' && i == ALGO_SCRYPT) {
|
||||||
|
char *ep;
|
||||||
|
v = strtol(arg+v+1, &ep, 10);
|
||||||
|
if (*ep || v & (v-1) || v < 2)
|
||||||
|
continue;
|
||||||
|
opt_algo = i;
|
||||||
|
opt_scrypt_n = v;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (i == ARRAY_SIZE(algo_names)) {
|
if (i == ARRAY_SIZE(algo_names)) {
|
||||||
fprintf(stderr, "%s: unknown algorithm -- '%s'\n",
|
fprintf(stderr, "%s: unknown algorithm -- '%s'\n",
|
||||||
|
|
4
miner.h
4
miner.h
|
@ -154,10 +154,10 @@ void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
|
||||||
extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
|
extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
|
||||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done);
|
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done);
|
||||||
|
|
||||||
extern unsigned char *scrypt_buffer_alloc();
|
extern unsigned char *scrypt_buffer_alloc(int N);
|
||||||
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||||
uint32_t max_nonce, unsigned long *hashes_done);
|
uint32_t max_nonce, unsigned long *hashes_done, int N);
|
||||||
|
|
||||||
struct thr_info {
|
struct thr_info {
|
||||||
int id;
|
int id;
|
||||||
|
|
3
minerd.1
3
minerd.1
|
@ -72,6 +72,9 @@ Possible values are:
|
||||||
.B scrypt
|
.B scrypt
|
||||||
scrypt(1024, 1, 1) (used by Litecoin)
|
scrypt(1024, 1, 1) (used by Litecoin)
|
||||||
.TP
|
.TP
|
||||||
|
.B scrypt:\fIN\fR
|
||||||
|
scrypt(\fIN\fR, 1, 1) (\fIN\fR must be a power of 2 greater than 1)
|
||||||
|
.TP
|
||||||
.B sha256d
|
.B sha256d
|
||||||
SHA-256d (used by Bitcoin)
|
SHA-256d (used by Bitcoin)
|
||||||
.RE
|
.RE
|
||||||
|
|
73
scrypt-arm.S
73
scrypt-arm.S
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2012 pooler@litecoinpool.org
|
* Copyright 2012, 2014 pooler@litecoinpool.org
|
||||||
*
|
*
|
||||||
* This program is free software; you can redistribute it and/or modify it
|
* This program is free software; you can redistribute it and/or modify it
|
||||||
* under the terms of the GNU General Public License as published by the Free
|
* under the terms of the GNU General Public License as published by the Free
|
||||||
|
@ -472,14 +472,16 @@ scrypt_core:
|
||||||
_scrypt_core:
|
_scrypt_core:
|
||||||
stmfd sp!, {r4-r11, lr}
|
stmfd sp!, {r4-r11, lr}
|
||||||
mov r12, sp
|
mov r12, sp
|
||||||
sub sp, sp, #21*4
|
sub sp, sp, #22*4
|
||||||
bic sp, sp, #63
|
bic sp, sp, #63
|
||||||
str r12, [sp, #20*4]
|
str r12, [sp, #20*4]
|
||||||
|
str r2, [sp, #21*4]
|
||||||
|
|
||||||
scrypt_shuffle
|
scrypt_shuffle
|
||||||
|
|
||||||
|
ldr r2, [sp, #21*4]
|
||||||
str r0, [sp, #16*4]
|
str r0, [sp, #16*4]
|
||||||
add r12, r1, #1024*32*4
|
add r12, r1, r2, lsl #7
|
||||||
str r12, [sp, #18*4]
|
str r12, [sp, #18*4]
|
||||||
scrypt_core_loop1:
|
scrypt_core_loop1:
|
||||||
add lr, r0, #16*4
|
add lr, r0, #16*4
|
||||||
|
@ -517,12 +519,14 @@ scrypt_core_loop1:
|
||||||
cmp r1, r12
|
cmp r1, r12
|
||||||
bne scrypt_core_loop1
|
bne scrypt_core_loop1
|
||||||
|
|
||||||
|
ldr r12, [sp, #21*4]
|
||||||
ldr r4, [r0, #16*4]
|
ldr r4, [r0, #16*4]
|
||||||
sub r1, r1, #1024*32*4
|
sub r2, r12, #1
|
||||||
|
str r2, [sp, #21*4]
|
||||||
|
sub r1, r1, r12, lsl #7
|
||||||
str r1, [sp, #17*4]
|
str r1, [sp, #17*4]
|
||||||
mov r4, r4, lsl #32-10
|
and r4, r4, r2
|
||||||
mov r12, #1024
|
add r1, r1, r4, lsl #7
|
||||||
add r1, r1, r4, lsr #32-10-7
|
|
||||||
scrypt_core_loop2:
|
scrypt_core_loop2:
|
||||||
add r2, r0, #16*4
|
add r2, r0, #16*4
|
||||||
add r3, r1, #16*4
|
add r3, r1, #16*4
|
||||||
|
@ -553,9 +557,10 @@ scrypt_core_loop2:
|
||||||
mov r1, sp
|
mov r1, sp
|
||||||
ldr r3, [sp, #17*4]
|
ldr r3, [sp, #17*4]
|
||||||
add r0, r0, #16*4
|
add r0, r0, #16*4
|
||||||
|
ldr r2, [sp, #21*4]
|
||||||
scrypt_core_macro3_x4
|
scrypt_core_macro3_x4
|
||||||
mov r4, r4, lsl #32-10
|
and r4, r4, r2
|
||||||
add r3, r3, r4, lsr #32-10-7
|
add r3, r3, r4, lsl #7
|
||||||
str r3, [sp, #19*4]
|
str r3, [sp, #19*4]
|
||||||
#ifdef __ARM_ARCH_5E_OR_6_OR_7__
|
#ifdef __ARM_ARCH_5E_OR_6_OR_7__
|
||||||
pld [r3, #16*4]
|
pld [r3, #16*4]
|
||||||
|
@ -794,10 +799,11 @@ _scrypt_core_3way:
|
||||||
mov r12, sp
|
mov r12, sp
|
||||||
sub sp, sp, #24*16
|
sub sp, sp, #24*16
|
||||||
bic sp, sp, #63
|
bic sp, sp, #63
|
||||||
str r12, [sp, #4*16+3*4]
|
str r2, [sp, #4*16+3*4]
|
||||||
|
str r12, [sp, #4*16+4*4]
|
||||||
|
|
||||||
mov r2, r0
|
mov r3, r0
|
||||||
vldmia r2!, {q8-q15}
|
vldmia r3!, {q8-q15}
|
||||||
vmov.u64 q0, #0xffffffff
|
vmov.u64 q0, #0xffffffff
|
||||||
vmov.u32 q1, q8
|
vmov.u32 q1, q8
|
||||||
vmov.u32 q2, q12
|
vmov.u32 q2, q12
|
||||||
|
@ -809,7 +815,7 @@ _scrypt_core_3way:
|
||||||
vbif.u32 q14, q15, q0
|
vbif.u32 q14, q15, q0
|
||||||
vbif.u32 q11, q1, q0
|
vbif.u32 q11, q1, q0
|
||||||
vbif.u32 q15, q2, q0
|
vbif.u32 q15, q2, q0
|
||||||
vldmia r2!, {q0-q7}
|
vldmia r3!, {q0-q7}
|
||||||
vswp.u32 d17, d21
|
vswp.u32 d17, d21
|
||||||
vswp.u32 d25, d29
|
vswp.u32 d25, d29
|
||||||
vswp.u32 d18, d22
|
vswp.u32 d18, d22
|
||||||
|
@ -826,7 +832,7 @@ _scrypt_core_3way:
|
||||||
vbif.u32 q6, q7, q8
|
vbif.u32 q6, q7, q8
|
||||||
vbif.u32 q3, q9, q8
|
vbif.u32 q3, q9, q8
|
||||||
vbif.u32 q7, q10, q8
|
vbif.u32 q7, q10, q8
|
||||||
vldmia r2, {q8-q15}
|
vldmia r3, {q8-q15}
|
||||||
vswp.u32 d1, d5
|
vswp.u32 d1, d5
|
||||||
vswp.u32 d9, d13
|
vswp.u32 d9, d13
|
||||||
vswp.u32 d2, d6
|
vswp.u32 d2, d6
|
||||||
|
@ -852,7 +858,7 @@ _scrypt_core_3way:
|
||||||
|
|
||||||
add lr, sp, #128
|
add lr, sp, #128
|
||||||
vldmia lr, {q0-q7}
|
vldmia lr, {q0-q7}
|
||||||
add r2, r1, #1024*32*4
|
add r2, r1, r2, lsl #7
|
||||||
str r0, [sp, #4*16+0*4]
|
str r0, [sp, #4*16+0*4]
|
||||||
str r2, [sp, #4*16+2*4]
|
str r2, [sp, #4*16+2*4]
|
||||||
scrypt_core_3way_loop1:
|
scrypt_core_3way_loop1:
|
||||||
|
@ -863,12 +869,13 @@ scrypt_core_3way_loop1:
|
||||||
scrypt_core_macro1a_x4
|
scrypt_core_macro1a_x4
|
||||||
scrypt_core_macro1a_x4
|
scrypt_core_macro1a_x4
|
||||||
scrypt_core_macro1a_x4
|
scrypt_core_macro1a_x4
|
||||||
|
ldr r2, [sp, #4*16+3*4]
|
||||||
scrypt_core_macro1a_x4
|
scrypt_core_macro1a_x4
|
||||||
sub r1, r1, #4*16
|
sub r1, r1, #4*16
|
||||||
|
|
||||||
add r1, r1, #1024*32*4
|
add r1, r1, r2, lsl #7
|
||||||
vstmia r1, {q0-q7}
|
vstmia r1, {q0-q7}
|
||||||
add r3, r1, #1024*32*4
|
add r3, r1, r2, lsl #7
|
||||||
vstmia r3, {q8-q15}
|
vstmia r3, {q8-q15}
|
||||||
|
|
||||||
add lr, sp, #128
|
add lr, sp, #128
|
||||||
|
@ -957,20 +964,22 @@ scrypt_core_3way_loop1:
|
||||||
cmp r1, r2
|
cmp r1, r2
|
||||||
bne scrypt_core_3way_loop1
|
bne scrypt_core_3way_loop1
|
||||||
|
|
||||||
|
ldr r2, [sp, #4*16+3*4]
|
||||||
add r5, sp, #256+4*16
|
add r5, sp, #256+4*16
|
||||||
vstmia r5, {q12-q15}
|
vstmia r5, {q12-q15}
|
||||||
|
|
||||||
sub r1, r1, #1024*32*4
|
sub r1, r1, r2, lsl #7
|
||||||
str r1, [sp, #4*16+1*4]
|
str r1, [sp, #4*16+1*4]
|
||||||
mov r2, #1024
|
|
||||||
scrypt_core_3way_loop2:
|
scrypt_core_3way_loop2:
|
||||||
str r2, [sp, #4*16+2*4]
|
str r2, [sp, #4*16+2*4]
|
||||||
|
|
||||||
ldr r0, [sp, #4*16+0*4]
|
ldr r0, [sp, #4*16+0*4]
|
||||||
ldr r1, [sp, #4*16+1*4]
|
ldr r1, [sp, #4*16+1*4]
|
||||||
|
ldr r2, [sp, #4*16+3*4]
|
||||||
ldr r4, [r0, #16*4]
|
ldr r4, [r0, #16*4]
|
||||||
mov r4, r4, lsl #32-10
|
sub r2, r2, #1
|
||||||
add r1, r1, r4, lsr #32-10-7
|
and r4, r4, r2
|
||||||
|
add r1, r1, r4, lsl #7
|
||||||
add r2, r0, #16*4
|
add r2, r0, #16*4
|
||||||
add r3, r1, #16*4
|
add r3, r1, #16*4
|
||||||
mov r12, sp
|
mov r12, sp
|
||||||
|
@ -980,11 +989,13 @@ scrypt_core_3way_loop2:
|
||||||
scrypt_core_macro1b_x4
|
scrypt_core_macro1b_x4
|
||||||
|
|
||||||
ldr r1, [sp, #4*16+1*4]
|
ldr r1, [sp, #4*16+1*4]
|
||||||
add r1, r1, #1024*32*4
|
ldr r2, [sp, #4*16+3*4]
|
||||||
add r3, r1, #1024*32*4
|
add r1, r1, r2, lsl #7
|
||||||
|
add r3, r1, r2, lsl #7
|
||||||
|
sub r2, r2, #1
|
||||||
vmov r6, r7, d8
|
vmov r6, r7, d8
|
||||||
mov r6, r6, lsl #32-10
|
and r6, r6, r2
|
||||||
add r6, r1, r6, lsr #32-10-7
|
add r6, r1, r6, lsl #7
|
||||||
vmov r7, r8, d24
|
vmov r7, r8, d24
|
||||||
add lr, sp, #128
|
add lr, sp, #128
|
||||||
vldmia lr, {q0-q3}
|
vldmia lr, {q0-q3}
|
||||||
|
@ -993,8 +1004,8 @@ scrypt_core_3way_loop2:
|
||||||
pld [r6, #16*4]
|
pld [r6, #16*4]
|
||||||
pld [r6, #24*4]
|
pld [r6, #24*4]
|
||||||
vldmia r6, {q8-q15}
|
vldmia r6, {q8-q15}
|
||||||
mov r7, r7, lsl #32-10
|
and r7, r7, r2
|
||||||
add r7, r3, r7, lsr #32-10-7
|
add r7, r3, r7, lsl #7
|
||||||
veor.u32 q8, q8, q0
|
veor.u32 q8, q8, q0
|
||||||
veor.u32 q9, q9, q1
|
veor.u32 q9, q9, q1
|
||||||
veor.u32 q10, q10, q2
|
veor.u32 q10, q10, q2
|
||||||
|
@ -1079,11 +1090,13 @@ scrypt_core_3way_loop2:
|
||||||
|
|
||||||
ldr r0, [sp, #4*16+0*4]
|
ldr r0, [sp, #4*16+0*4]
|
||||||
ldr r3, [sp, #4*16+1*4]
|
ldr r3, [sp, #4*16+1*4]
|
||||||
|
ldr r2, [sp, #4*16+3*4]
|
||||||
mov r1, sp
|
mov r1, sp
|
||||||
add r0, r0, #16*4
|
add r0, r0, #16*4
|
||||||
|
sub r2, r2, #1
|
||||||
scrypt_core_macro3_x4
|
scrypt_core_macro3_x4
|
||||||
mov r4, r4, lsl #32-10
|
and r4, r4, r2
|
||||||
add r3, r3, r4, lsr #32-10-7
|
add r3, r3, r4, lsl #7
|
||||||
pld [r3, #16*4]
|
pld [r3, #16*4]
|
||||||
pld [r3]
|
pld [r3]
|
||||||
pld [r3, #24*4]
|
pld [r3, #24*4]
|
||||||
|
@ -1164,7 +1177,7 @@ scrypt_core_3way_loop2:
|
||||||
vswp.u32 d26, d30
|
vswp.u32 d26, d30
|
||||||
vstmia r0, {q8-q15}
|
vstmia r0, {q8-q15}
|
||||||
|
|
||||||
ldr sp, [sp, #4*16+3*4]
|
ldr sp, [sp, #4*16+4*4]
|
||||||
vpop {q4-q7}
|
vpop {q4-q7}
|
||||||
ldmfd sp!, {r4-r11, pc}
|
ldmfd sp!, {r4-r11, pc}
|
||||||
|
|
||||||
|
|
152
scrypt-x64.S
152
scrypt-x64.S
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2011-2013 pooler@litecoinpool.org
|
* Copyright 2011-2014 pooler@litecoinpool.org
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -113,38 +113,38 @@ scrypt_best_throughput_exit:
|
||||||
|
|
||||||
|
|
||||||
.macro scrypt_shuffle src, so, dest, do
|
.macro scrypt_shuffle src, so, dest, do
|
||||||
movl \so+60(\src), %r8d
|
movl \so+60(\src), %eax
|
||||||
movl \so+44(\src), %r9d
|
movl \so+44(\src), %ebx
|
||||||
movl \so+28(\src), %r10d
|
movl \so+28(\src), %ecx
|
||||||
movl \so+12(\src), %r11d
|
movl \so+12(\src), %edx
|
||||||
movl %r8d, \do+12(\dest)
|
movl %eax, \do+12(\dest)
|
||||||
movl %r9d, \do+28(\dest)
|
movl %ebx, \do+28(\dest)
|
||||||
movl %r10d, \do+44(\dest)
|
movl %ecx, \do+44(\dest)
|
||||||
movl %r11d, \do+60(\dest)
|
movl %edx, \do+60(\dest)
|
||||||
movl \so+40(\src), %r8d
|
movl \so+40(\src), %eax
|
||||||
movl \so+8(\src), %r9d
|
movl \so+8(\src), %ebx
|
||||||
movl \so+48(\src), %r10d
|
movl \so+48(\src), %ecx
|
||||||
movl \so+16(\src), %r11d
|
movl \so+16(\src), %edx
|
||||||
movl %r8d, \do+8(\dest)
|
movl %eax, \do+8(\dest)
|
||||||
movl %r9d, \do+40(\dest)
|
movl %ebx, \do+40(\dest)
|
||||||
movl %r10d, \do+16(\dest)
|
movl %ecx, \do+16(\dest)
|
||||||
movl %r11d, \do+48(\dest)
|
movl %edx, \do+48(\dest)
|
||||||
movl \so+20(\src), %r8d
|
movl \so+20(\src), %eax
|
||||||
movl \so+4(\src), %r9d
|
movl \so+4(\src), %ebx
|
||||||
movl \so+52(\src), %r10d
|
movl \so+52(\src), %ecx
|
||||||
movl \so+36(\src), %r11d
|
movl \so+36(\src), %edx
|
||||||
movl %r8d, \do+4(\dest)
|
movl %eax, \do+4(\dest)
|
||||||
movl %r9d, \do+20(\dest)
|
movl %ebx, \do+20(\dest)
|
||||||
movl %r10d, \do+36(\dest)
|
movl %ecx, \do+36(\dest)
|
||||||
movl %r11d, \do+52(\dest)
|
movl %edx, \do+52(\dest)
|
||||||
movl \so+0(\src), %r8d
|
movl \so+0(\src), %eax
|
||||||
movl \so+24(\src), %r9d
|
movl \so+24(\src), %ebx
|
||||||
movl \so+32(\src), %r10d
|
movl \so+32(\src), %ecx
|
||||||
movl \so+56(\src), %r11d
|
movl \so+56(\src), %edx
|
||||||
movl %r8d, \do+0(\dest)
|
movl %eax, \do+0(\dest)
|
||||||
movl %r9d, \do+24(\dest)
|
movl %ebx, \do+24(\dest)
|
||||||
movl %r10d, \do+32(\dest)
|
movl %ecx, \do+32(\dest)
|
||||||
movl %r11d, \do+56(\dest)
|
movl %edx, \do+56(\dest)
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
@ -384,6 +384,8 @@ _scrypt_core:
|
||||||
pushq %rsi
|
pushq %rsi
|
||||||
movq %rcx, %rdi
|
movq %rcx, %rdi
|
||||||
movq %rdx, %rsi
|
movq %rdx, %rsi
|
||||||
|
#else
|
||||||
|
movq %rdx, %r8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.macro scrypt_core_cleanup
|
.macro scrypt_core_cleanup
|
||||||
|
@ -432,7 +434,10 @@ scrypt_core_gen:
|
||||||
movdqa 96(%rdi), %xmm14
|
movdqa 96(%rdi), %xmm14
|
||||||
movdqa 112(%rdi), %xmm15
|
movdqa 112(%rdi), %xmm15
|
||||||
|
|
||||||
leaq 131072(%rsi), %rcx
|
movq %r8, %rcx
|
||||||
|
shlq $7, %rcx
|
||||||
|
addq %rsi, %rcx
|
||||||
|
movq %r8, 96(%rsp)
|
||||||
movq %rdi, 104(%rsp)
|
movq %rdi, 104(%rsp)
|
||||||
movq %rsi, 112(%rsp)
|
movq %rsi, 112(%rsp)
|
||||||
movq %rcx, 120(%rsp)
|
movq %rcx, 120(%rsp)
|
||||||
|
@ -481,11 +486,14 @@ scrypt_core_gen_loop1:
|
||||||
cmpq %rcx, %rsi
|
cmpq %rcx, %rsi
|
||||||
jne scrypt_core_gen_loop1
|
jne scrypt_core_gen_loop1
|
||||||
|
|
||||||
movq $1024, %rcx
|
movq 96(%rsp), %r8
|
||||||
|
movq %r8, %rcx
|
||||||
|
subl $1, %r8d
|
||||||
|
movq %r8, 96(%rsp)
|
||||||
movd %xmm12, %edx
|
movd %xmm12, %edx
|
||||||
scrypt_core_gen_loop2:
|
scrypt_core_gen_loop2:
|
||||||
movq 112(%rsp), %rsi
|
movq 112(%rsp), %rsi
|
||||||
andl $1023, %edx
|
andl %r8d, %edx
|
||||||
shll $7, %edx
|
shll $7, %edx
|
||||||
addq %rsi, %rdx
|
addq %rsi, %rdx
|
||||||
movdqa 0(%rdx), %xmm0
|
movdqa 0(%rdx), %xmm0
|
||||||
|
@ -529,6 +537,7 @@ scrypt_core_gen_loop2:
|
||||||
movdqa %xmm14, 32(%rsp)
|
movdqa %xmm14, 32(%rsp)
|
||||||
movdqa %xmm15, 48(%rsp)
|
movdqa %xmm15, 48(%rsp)
|
||||||
call salsa8_core_gen
|
call salsa8_core_gen
|
||||||
|
movq 96(%rsp), %r8
|
||||||
movq 128(%rsp), %rcx
|
movq 128(%rsp), %rcx
|
||||||
addl 0(%rsp), %edx
|
addl 0(%rsp), %edx
|
||||||
paddd %xmm0, %xmm12
|
paddd %xmm0, %xmm12
|
||||||
|
@ -691,7 +700,9 @@ scrypt_core_xmm:
|
||||||
punpckhqdq %xmm0, %xmm13
|
punpckhqdq %xmm0, %xmm13
|
||||||
|
|
||||||
movq %rsi, %rdx
|
movq %rsi, %rdx
|
||||||
leaq 131072(%rsi), %rcx
|
movq %r8, %rcx
|
||||||
|
shlq $7, %rcx
|
||||||
|
addq %rsi, %rcx
|
||||||
scrypt_core_xmm_loop1:
|
scrypt_core_xmm_loop1:
|
||||||
pxor %xmm12, %xmm8
|
pxor %xmm12, %xmm8
|
||||||
pxor %xmm13, %xmm9
|
pxor %xmm13, %xmm9
|
||||||
|
@ -734,10 +745,11 @@ scrypt_core_xmm_loop1:
|
||||||
cmpq %rcx, %rdx
|
cmpq %rcx, %rdx
|
||||||
jne scrypt_core_xmm_loop1
|
jne scrypt_core_xmm_loop1
|
||||||
|
|
||||||
movq $1024, %rcx
|
movq %r8, %rcx
|
||||||
|
subl $1, %r8d
|
||||||
scrypt_core_xmm_loop2:
|
scrypt_core_xmm_loop2:
|
||||||
movd %xmm12, %edx
|
movd %xmm12, %edx
|
||||||
andl $1023, %edx
|
andl %r8d, %edx
|
||||||
shll $7, %edx
|
shll $7, %edx
|
||||||
pxor 0(%rsi, %rdx), %xmm8
|
pxor 0(%rsi, %rdx), %xmm8
|
||||||
pxor 16(%rsi, %rdx), %xmm9
|
pxor 16(%rsi, %rdx), %xmm9
|
||||||
|
@ -1019,6 +1031,8 @@ _scrypt_core_3way:
|
||||||
pushq %rsi
|
pushq %rsi
|
||||||
movq %rcx, %rdi
|
movq %rcx, %rdi
|
||||||
movq %rdx, %rsi
|
movq %rdx, %rsi
|
||||||
|
#else
|
||||||
|
movq %rdx, %r8
|
||||||
#endif
|
#endif
|
||||||
subq $392, %rsp
|
subq $392, %rsp
|
||||||
|
|
||||||
|
@ -1088,7 +1102,9 @@ scrypt_core_3way_avx:
|
||||||
movdqa 256+112(%rsp), %xmm15
|
movdqa 256+112(%rsp), %xmm15
|
||||||
|
|
||||||
movq %rsi, %rbx
|
movq %rsi, %rbx
|
||||||
leaq 3*131072(%rsi), %rax
|
leaq (%r8, %r8, 2), %rax
|
||||||
|
shlq $7, %rax
|
||||||
|
addq %rsi, %rax
|
||||||
scrypt_core_3way_avx_loop1:
|
scrypt_core_3way_avx_loop1:
|
||||||
movdqa %xmm0, 64(%rbx)
|
movdqa %xmm0, 64(%rbx)
|
||||||
movdqa %xmm1, 80(%rbx)
|
movdqa %xmm1, 80(%rbx)
|
||||||
|
@ -1208,7 +1224,8 @@ scrypt_core_3way_avx_loop1:
|
||||||
movdqa %xmm14, 256+96(%rsp)
|
movdqa %xmm14, 256+96(%rsp)
|
||||||
movdqa %xmm15, 256+112(%rsp)
|
movdqa %xmm15, 256+112(%rsp)
|
||||||
|
|
||||||
movq $1024, %rcx
|
movq %r8, %rcx
|
||||||
|
subq $1, %r8
|
||||||
scrypt_core_3way_avx_loop2:
|
scrypt_core_3way_avx_loop2:
|
||||||
movd %xmm0, %ebp
|
movd %xmm0, %ebp
|
||||||
movd %xmm8, %ebx
|
movd %xmm8, %ebx
|
||||||
|
@ -1225,13 +1242,13 @@ scrypt_core_3way_avx_loop2:
|
||||||
pxor 256+16(%rsp), %xmm13
|
pxor 256+16(%rsp), %xmm13
|
||||||
pxor 256+32(%rsp), %xmm14
|
pxor 256+32(%rsp), %xmm14
|
||||||
pxor 256+48(%rsp), %xmm15
|
pxor 256+48(%rsp), %xmm15
|
||||||
andl $1023, %ebp
|
andl %r8d, %ebp
|
||||||
leaq (%rbp, %rbp, 2), %rbp
|
leaq (%rbp, %rbp, 2), %rbp
|
||||||
shll $7, %ebp
|
shll $7, %ebp
|
||||||
andl $1023, %ebx
|
andl %r8d, %ebx
|
||||||
leaq 1(%rbx, %rbx, 2), %rbx
|
leaq 1(%rbx, %rbx, 2), %rbx
|
||||||
shll $7, %ebx
|
shll $7, %ebx
|
||||||
andl $1023, %eax
|
andl %r8d, %eax
|
||||||
leaq 2(%rax, %rax, 2), %rax
|
leaq 2(%rax, %rax, 2), %rax
|
||||||
shll $7, %eax
|
shll $7, %eax
|
||||||
pxor 0(%rsi, %rbp), %xmm0
|
pxor 0(%rsi, %rbp), %xmm0
|
||||||
|
@ -1491,7 +1508,9 @@ scrypt_core_3way_xop:
|
||||||
movdqa 256+112(%rsp), %xmm15
|
movdqa 256+112(%rsp), %xmm15
|
||||||
|
|
||||||
movq %rsi, %rbx
|
movq %rsi, %rbx
|
||||||
leaq 3*131072(%rsi), %rax
|
leaq (%r8, %r8, 2), %rax
|
||||||
|
shlq $7, %rax
|
||||||
|
addq %rsi, %rax
|
||||||
scrypt_core_3way_xop_loop1:
|
scrypt_core_3way_xop_loop1:
|
||||||
movdqa %xmm0, 64(%rbx)
|
movdqa %xmm0, 64(%rbx)
|
||||||
movdqa %xmm1, 80(%rbx)
|
movdqa %xmm1, 80(%rbx)
|
||||||
|
@ -1611,7 +1630,8 @@ scrypt_core_3way_xop_loop1:
|
||||||
movdqa %xmm14, 256+96(%rsp)
|
movdqa %xmm14, 256+96(%rsp)
|
||||||
movdqa %xmm15, 256+112(%rsp)
|
movdqa %xmm15, 256+112(%rsp)
|
||||||
|
|
||||||
movq $1024, %rcx
|
movq %r8, %rcx
|
||||||
|
subq $1, %r8
|
||||||
scrypt_core_3way_xop_loop2:
|
scrypt_core_3way_xop_loop2:
|
||||||
movd %xmm0, %ebp
|
movd %xmm0, %ebp
|
||||||
movd %xmm8, %ebx
|
movd %xmm8, %ebx
|
||||||
|
@ -1628,13 +1648,13 @@ scrypt_core_3way_xop_loop2:
|
||||||
pxor 256+16(%rsp), %xmm13
|
pxor 256+16(%rsp), %xmm13
|
||||||
pxor 256+32(%rsp), %xmm14
|
pxor 256+32(%rsp), %xmm14
|
||||||
pxor 256+48(%rsp), %xmm15
|
pxor 256+48(%rsp), %xmm15
|
||||||
andl $1023, %ebp
|
andl %r8d, %ebp
|
||||||
leaq (%rbp, %rbp, 2), %rbp
|
leaq (%rbp, %rbp, 2), %rbp
|
||||||
shll $7, %ebp
|
shll $7, %ebp
|
||||||
andl $1023, %ebx
|
andl %r8d, %ebx
|
||||||
leaq 1(%rbx, %rbx, 2), %rbx
|
leaq 1(%rbx, %rbx, 2), %rbx
|
||||||
shll $7, %ebx
|
shll $7, %ebx
|
||||||
andl $1023, %eax
|
andl %r8d, %eax
|
||||||
leaq 2(%rax, %rax, 2), %rax
|
leaq 2(%rax, %rax, 2), %rax
|
||||||
shll $7, %eax
|
shll $7, %eax
|
||||||
pxor 0(%rsi, %rbp), %xmm0
|
pxor 0(%rsi, %rbp), %xmm0
|
||||||
|
@ -1991,7 +2011,9 @@ scrypt_core_3way_xmm:
|
||||||
movdqa 256+112(%rsp), %xmm15
|
movdqa 256+112(%rsp), %xmm15
|
||||||
|
|
||||||
movq %rsi, %rbx
|
movq %rsi, %rbx
|
||||||
leaq 3*131072(%rsi), %rax
|
leaq (%r8, %r8, 2), %rax
|
||||||
|
shlq $7, %rax
|
||||||
|
addq %rsi, %rax
|
||||||
scrypt_core_3way_xmm_loop1:
|
scrypt_core_3way_xmm_loop1:
|
||||||
movdqa %xmm0, 64(%rbx)
|
movdqa %xmm0, 64(%rbx)
|
||||||
movdqa %xmm1, 80(%rbx)
|
movdqa %xmm1, 80(%rbx)
|
||||||
|
@ -2111,7 +2133,8 @@ scrypt_core_3way_xmm_loop1:
|
||||||
movdqa %xmm14, 256+96(%rsp)
|
movdqa %xmm14, 256+96(%rsp)
|
||||||
movdqa %xmm15, 256+112(%rsp)
|
movdqa %xmm15, 256+112(%rsp)
|
||||||
|
|
||||||
movq $1024, %rcx
|
movq %r8, %rcx
|
||||||
|
subq $1, %r8
|
||||||
scrypt_core_3way_xmm_loop2:
|
scrypt_core_3way_xmm_loop2:
|
||||||
movd %xmm0, %ebp
|
movd %xmm0, %ebp
|
||||||
movd %xmm8, %ebx
|
movd %xmm8, %ebx
|
||||||
|
@ -2128,13 +2151,13 @@ scrypt_core_3way_xmm_loop2:
|
||||||
pxor 256+16(%rsp), %xmm13
|
pxor 256+16(%rsp), %xmm13
|
||||||
pxor 256+32(%rsp), %xmm14
|
pxor 256+32(%rsp), %xmm14
|
||||||
pxor 256+48(%rsp), %xmm15
|
pxor 256+48(%rsp), %xmm15
|
||||||
andl $1023, %ebp
|
andl %r8d, %ebp
|
||||||
leaq (%rbp, %rbp, 2), %rbp
|
leaq (%rbp, %rbp, 2), %rbp
|
||||||
shll $7, %ebp
|
shll $7, %ebp
|
||||||
andl $1023, %ebx
|
andl %r8d, %ebx
|
||||||
leaq 1(%rbx, %rbx, 2), %rbx
|
leaq 1(%rbx, %rbx, 2), %rbx
|
||||||
shll $7, %ebx
|
shll $7, %ebx
|
||||||
andl $1023, %eax
|
andl %r8d, %eax
|
||||||
leaq 2(%rax, %rax, 2), %rax
|
leaq 2(%rax, %rax, 2), %rax
|
||||||
shll $7, %eax
|
shll $7, %eax
|
||||||
pxor 0(%rsi, %rbp), %xmm0
|
pxor 0(%rsi, %rbp), %xmm0
|
||||||
|
@ -2445,6 +2468,8 @@ _scrypt_core_6way:
|
||||||
pushq %rsi
|
pushq %rsi
|
||||||
movq %rcx, %rdi
|
movq %rcx, %rdi
|
||||||
movq %rdx, %rsi
|
movq %rdx, %rsi
|
||||||
|
#else
|
||||||
|
movq %rdx, %r8
|
||||||
#endif
|
#endif
|
||||||
movq %rsp, %rdx
|
movq %rsp, %rdx
|
||||||
subq $768, %rsp
|
subq $768, %rsp
|
||||||
|
@ -2539,7 +2564,9 @@ scrypt_core_6way_avx2:
|
||||||
vmovdqa 2*256+7*32(%rsp), %ymm15
|
vmovdqa 2*256+7*32(%rsp), %ymm15
|
||||||
|
|
||||||
movq %rsi, %rbx
|
movq %rsi, %rbx
|
||||||
leaq 6*131072(%rsi), %rax
|
leaq (%r8, %r8, 2), %rax
|
||||||
|
shlq $8, %rax
|
||||||
|
addq %rsi, %rax
|
||||||
scrypt_core_6way_avx2_loop1:
|
scrypt_core_6way_avx2_loop1:
|
||||||
vmovdqa %ymm0, 0*256+4*32(%rbx)
|
vmovdqa %ymm0, 0*256+4*32(%rbx)
|
||||||
vmovdqa %ymm1, 0*256+5*32(%rbx)
|
vmovdqa %ymm1, 0*256+5*32(%rbx)
|
||||||
|
@ -2659,7 +2686,8 @@ scrypt_core_6way_avx2_loop1:
|
||||||
vmovdqa %ymm14, 2*256+6*32(%rsp)
|
vmovdqa %ymm14, 2*256+6*32(%rsp)
|
||||||
vmovdqa %ymm15, 2*256+7*32(%rsp)
|
vmovdqa %ymm15, 2*256+7*32(%rsp)
|
||||||
|
|
||||||
movq $1024, %rcx
|
movq %r8, %rcx
|
||||||
|
leaq -1(%r8), %r11
|
||||||
scrypt_core_6way_avx2_loop2:
|
scrypt_core_6way_avx2_loop2:
|
||||||
vmovd %xmm0, %ebp
|
vmovd %xmm0, %ebp
|
||||||
vmovd %xmm8, %ebx
|
vmovd %xmm8, %ebx
|
||||||
|
@ -2682,22 +2710,22 @@ scrypt_core_6way_avx2_loop2:
|
||||||
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
|
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
|
||||||
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
|
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
|
||||||
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
|
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
|
||||||
andl $1023, %ebp
|
andl %r11d, %ebp
|
||||||
leaq 0(%rbp, %rbp, 2), %rbp
|
leaq 0(%rbp, %rbp, 2), %rbp
|
||||||
shll $8, %ebp
|
shll $8, %ebp
|
||||||
andl $1023, %ebx
|
andl %r11d, %ebx
|
||||||
leaq 1(%rbx, %rbx, 2), %rbx
|
leaq 1(%rbx, %rbx, 2), %rbx
|
||||||
shll $8, %ebx
|
shll $8, %ebx
|
||||||
andl $1023, %eax
|
andl %r11d, %eax
|
||||||
leaq 2(%rax, %rax, 2), %rax
|
leaq 2(%rax, %rax, 2), %rax
|
||||||
shll $8, %eax
|
shll $8, %eax
|
||||||
andl $1023, %r8d
|
andl %r11d, %r8d
|
||||||
leaq 0(%r8, %r8, 2), %r8
|
leaq 0(%r8, %r8, 2), %r8
|
||||||
shll $8, %r8d
|
shll $8, %r8d
|
||||||
andl $1023, %r9d
|
andl %r11d, %r9d
|
||||||
leaq 1(%r9, %r9, 2), %r9
|
leaq 1(%r9, %r9, 2), %r9
|
||||||
shll $8, %r9d
|
shll $8, %r9d
|
||||||
andl $1023, %r10d
|
andl %r11d, %r10d
|
||||||
leaq 2(%r10, %r10, 2), %r10
|
leaq 2(%r10, %r10, 2), %r10
|
||||||
shll $8, %r10d
|
shll $8, %r10d
|
||||||
vmovdqa 0*32(%rsi, %rbp), %xmm4
|
vmovdqa 0*32(%rsi, %rbp), %xmm4
|
||||||
|
|
23
scrypt-x86.S
23
scrypt-x86.S
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2011-2012 pooler@litecoinpool.org
|
* Copyright 2011-2012, 2014 pooler@litecoinpool.org
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -415,6 +415,7 @@ _scrypt_core:
|
||||||
scrypt_core_gen:
|
scrypt_core_gen:
|
||||||
movl 20(%esp), %edi
|
movl 20(%esp), %edi
|
||||||
movl 24(%esp), %esi
|
movl 24(%esp), %esi
|
||||||
|
movl 28(%esp), %ecx
|
||||||
subl $72, %esp
|
subl $72, %esp
|
||||||
|
|
||||||
.macro scrypt_core_macro1a p, q
|
.macro scrypt_core_macro1a p, q
|
||||||
|
@ -453,7 +454,8 @@ scrypt_core_gen:
|
||||||
movl %eax, \q(%edi)
|
movl %eax, \q(%edi)
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
leal 131072(%esi), %ecx
|
shll $7, %ecx
|
||||||
|
addl %esi, %ecx
|
||||||
scrypt_core_gen_loop1:
|
scrypt_core_gen_loop1:
|
||||||
movl %esi, 64(%esp)
|
movl %esi, 64(%esp)
|
||||||
movl %ecx, 68(%esp)
|
movl %ecx, 68(%esp)
|
||||||
|
@ -522,12 +524,15 @@ scrypt_core_gen_loop1:
|
||||||
jne scrypt_core_gen_loop1
|
jne scrypt_core_gen_loop1
|
||||||
|
|
||||||
movl 96(%esp), %esi
|
movl 96(%esp), %esi
|
||||||
movl $1024, %ecx
|
movl 100(%esp), %ecx
|
||||||
|
movl %ecx, %eax
|
||||||
|
subl $1, %eax
|
||||||
|
movl %eax, 100(%esp)
|
||||||
scrypt_core_gen_loop2:
|
scrypt_core_gen_loop2:
|
||||||
movl %ecx, 68(%esp)
|
movl %ecx, 68(%esp)
|
||||||
|
|
||||||
movl 64(%edi), %edx
|
movl 64(%edi), %edx
|
||||||
andl $1023, %edx
|
andl 100(%esp), %edx
|
||||||
shll $7, %edx
|
shll $7, %edx
|
||||||
|
|
||||||
scrypt_core_macro1b 0, 64
|
scrypt_core_macro1b 0, 64
|
||||||
|
@ -694,7 +699,9 @@ scrypt_core_sse2:
|
||||||
movdqa 112(%esp), %xmm7
|
movdqa 112(%esp), %xmm7
|
||||||
|
|
||||||
movl %esi, %edx
|
movl %esi, %edx
|
||||||
leal 131072(%esi), %ecx
|
movl 28(%ebp), %ecx
|
||||||
|
shll $7, %ecx
|
||||||
|
addl %esi, %ecx
|
||||||
scrypt_core_sse2_loop1:
|
scrypt_core_sse2_loop1:
|
||||||
movdqa 0(%esp), %xmm0
|
movdqa 0(%esp), %xmm0
|
||||||
movdqa 16(%esp), %xmm1
|
movdqa 16(%esp), %xmm1
|
||||||
|
@ -748,14 +755,16 @@ scrypt_core_sse2_loop1:
|
||||||
movdqa 64(%esp), %xmm4
|
movdqa 64(%esp), %xmm4
|
||||||
movdqa 80(%esp), %xmm5
|
movdqa 80(%esp), %xmm5
|
||||||
|
|
||||||
movl $1024, %ecx
|
movl 28(%ebp), %ecx
|
||||||
|
movl %ecx, %eax
|
||||||
|
subl $1, %eax
|
||||||
scrypt_core_sse2_loop2:
|
scrypt_core_sse2_loop2:
|
||||||
movd %xmm4, %edx
|
movd %xmm4, %edx
|
||||||
movdqa 0(%esp), %xmm0
|
movdqa 0(%esp), %xmm0
|
||||||
movdqa 16(%esp), %xmm1
|
movdqa 16(%esp), %xmm1
|
||||||
movdqa 32(%esp), %xmm2
|
movdqa 32(%esp), %xmm2
|
||||||
movdqa 48(%esp), %xmm3
|
movdqa 48(%esp), %xmm3
|
||||||
andl $1023, %edx
|
andl %eax, %edx
|
||||||
shll $7, %edx
|
shll $7, %edx
|
||||||
pxor 0(%esi, %edx), %xmm0
|
pxor 0(%esi, %edx), %xmm0
|
||||||
pxor 16(%esi, %edx), %xmm1
|
pxor 16(%esi, %edx), %xmm1
|
||||||
|
|
78
scrypt.c
78
scrypt.c
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler
|
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2014 pooler
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
@ -383,30 +383,30 @@ static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
|
||||||
#define SCRYPT_MAX_WAYS 12
|
#define SCRYPT_MAX_WAYS 12
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
#define HAVE_SCRYPT_3WAY 1
|
||||||
int scrypt_best_throughput();
|
int scrypt_best_throughput();
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||||
#if defined(USE_AVX2)
|
#if defined(USE_AVX2)
|
||||||
#undef SCRYPT_MAX_WAYS
|
#undef SCRYPT_MAX_WAYS
|
||||||
#define SCRYPT_MAX_WAYS 24
|
#define SCRYPT_MAX_WAYS 24
|
||||||
#define HAVE_SCRYPT_6WAY 1
|
#define HAVE_SCRYPT_6WAY 1
|
||||||
void scrypt_core_6way(uint32_t *X, uint32_t *V);
|
void scrypt_core_6way(uint32_t *X, uint32_t *V, int N);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#elif defined(USE_ASM) && defined(__i386__)
|
#elif defined(USE_ASM) && defined(__i386__)
|
||||||
|
|
||||||
#define SCRYPT_MAX_WAYS 4
|
#define SCRYPT_MAX_WAYS 4
|
||||||
#define scrypt_best_throughput() 1
|
#define scrypt_best_throughput() 1
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
|
|
||||||
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
#elif defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
||||||
|
|
||||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
#if defined(__ARM_NEON__)
|
#if defined(__ARM_NEON__)
|
||||||
#undef HAVE_SHA256_4WAY
|
#undef HAVE_SHA256_4WAY
|
||||||
#define SCRYPT_MAX_WAYS 3
|
#define SCRYPT_MAX_WAYS 3
|
||||||
#define HAVE_SCRYPT_3WAY 1
|
#define HAVE_SCRYPT_3WAY 1
|
||||||
#define scrypt_best_throughput() 3
|
#define scrypt_best_throughput() 3
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -479,17 +479,17 @@ static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
||||||
B[15] += x15;
|
B[15] += x15;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void scrypt_core(uint32_t *X, uint32_t *V)
|
static inline void scrypt_core(uint32_t *X, uint32_t *V, int N)
|
||||||
{
|
{
|
||||||
uint32_t i, j, k;
|
uint32_t i, j, k;
|
||||||
|
|
||||||
for (i = 0; i < 1024; i++) {
|
for (i = 0; i < N; i++) {
|
||||||
memcpy(&V[i * 32], X, 128);
|
memcpy(&V[i * 32], X, 128);
|
||||||
xor_salsa8(&X[0], &X[16]);
|
xor_salsa8(&X[0], &X[16]);
|
||||||
xor_salsa8(&X[16], &X[0]);
|
xor_salsa8(&X[16], &X[0]);
|
||||||
}
|
}
|
||||||
for (i = 0; i < 1024; i++) {
|
for (i = 0; i < N; i++) {
|
||||||
j = 32 * (X[16] & 1023);
|
j = 32 * (X[16] & (N - 1));
|
||||||
for (k = 0; k < 32; k++)
|
for (k = 0; k < 32; k++)
|
||||||
X[k] ^= V[j + k];
|
X[k] ^= V[j + k];
|
||||||
xor_salsa8(&X[0], &X[16]);
|
xor_salsa8(&X[0], &X[16]);
|
||||||
|
@ -504,15 +504,13 @@ static inline void scrypt_core(uint32_t *X, uint32_t *V)
|
||||||
#define scrypt_best_throughput() 1
|
#define scrypt_best_throughput() 1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63)
|
unsigned char *scrypt_buffer_alloc(int N)
|
||||||
|
|
||||||
unsigned char *scrypt_buffer_alloc()
|
|
||||||
{
|
{
|
||||||
return malloc(SCRYPT_BUFFER_SIZE);
|
return malloc((size_t)N * SCRYPT_MAX_WAYS * 128 + 63);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad)
|
uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||||
{
|
{
|
||||||
uint32_t tstate[8], ostate[8];
|
uint32_t tstate[8], ostate[8];
|
||||||
uint32_t X[32];
|
uint32_t X[32];
|
||||||
|
@ -524,14 +522,14 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||||
HMAC_SHA256_80_init(input, tstate, ostate);
|
HMAC_SHA256_80_init(input, tstate, ostate);
|
||||||
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
|
||||||
|
|
||||||
scrypt_core(X, V);
|
scrypt_core(X, V, N);
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||||
{
|
{
|
||||||
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
|
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
|
||||||
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
|
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
|
||||||
|
@ -553,10 +551,10 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
X[k * 32 + i] = W[4 * i + k];
|
X[k * 32 + i] = W[4 * i + k];
|
||||||
scrypt_core(X + 0 * 32, V);
|
scrypt_core(X + 0 * 32, V, N);
|
||||||
scrypt_core(X + 1 * 32, V);
|
scrypt_core(X + 1 * 32, V, N);
|
||||||
scrypt_core(X + 2 * 32, V);
|
scrypt_core(X + 2 * 32, V, N);
|
||||||
scrypt_core(X + 3 * 32, V);
|
scrypt_core(X + 3 * 32, V, N);
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
W[4 * i + k] = X[k * 32 + i];
|
W[4 * i + k] = X[k * 32 + i];
|
||||||
|
@ -570,7 +568,7 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
||||||
#ifdef HAVE_SCRYPT_3WAY
|
#ifdef HAVE_SCRYPT_3WAY
|
||||||
|
|
||||||
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||||
{
|
{
|
||||||
uint32_t tstate[3 * 8], ostate[3 * 8];
|
uint32_t tstate[3 * 8], ostate[3 * 8];
|
||||||
uint32_t X[3 * 32] __attribute__((aligned(64)));
|
uint32_t X[3 * 32] __attribute__((aligned(64)));
|
||||||
|
@ -588,7 +586,7 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||||
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
|
||||||
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
|
||||||
|
|
||||||
scrypt_core_3way(X, V);
|
scrypt_core_3way(X, V, N);
|
||||||
|
|
||||||
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
|
||||||
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
|
||||||
|
@ -597,7 +595,7 @@ static void scrypt_1024_1_1_256_3way(const uint32_t *input,
|
||||||
|
|
||||||
#ifdef HAVE_SHA256_4WAY
|
#ifdef HAVE_SHA256_4WAY
|
||||||
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||||
{
|
{
|
||||||
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
|
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
|
||||||
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
|
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
|
||||||
|
@ -626,10 +624,10 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
||||||
scrypt_core_3way(X + 0 * 96, V);
|
scrypt_core_3way(X + 0 * 96, V, N);
|
||||||
scrypt_core_3way(X + 1 * 96, V);
|
scrypt_core_3way(X + 1 * 96, V, N);
|
||||||
scrypt_core_3way(X + 2 * 96, V);
|
scrypt_core_3way(X + 2 * 96, V, N);
|
||||||
scrypt_core_3way(X + 3 * 96, V);
|
scrypt_core_3way(X + 3 * 96, V, N);
|
||||||
for (j = 0; j < 3; j++)
|
for (j = 0; j < 3; j++)
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 4; k++)
|
for (k = 0; k < 4; k++)
|
||||||
|
@ -648,7 +646,7 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
||||||
|
|
||||||
#ifdef HAVE_SCRYPT_6WAY
|
#ifdef HAVE_SCRYPT_6WAY
|
||||||
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||||
{
|
{
|
||||||
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
|
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
|
||||||
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
|
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
|
||||||
|
@ -677,10 +675,10 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 8; k++)
|
for (k = 0; k < 8; k++)
|
||||||
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
||||||
scrypt_core_6way(X + 0 * 32, V);
|
scrypt_core_6way(X + 0 * 32, V, N);
|
||||||
scrypt_core_6way(X + 6 * 32, V);
|
scrypt_core_6way(X + 6 * 32, V, N);
|
||||||
scrypt_core_6way(X + 12 * 32, V);
|
scrypt_core_6way(X + 12 * 32, V, N);
|
||||||
scrypt_core_6way(X + 18 * 32, V);
|
scrypt_core_6way(X + 18 * 32, V, N);
|
||||||
for (j = 0; j < 3; j++)
|
for (j = 0; j < 3; j++)
|
||||||
for (i = 0; i < 32; i++)
|
for (i = 0; i < 32; i++)
|
||||||
for (k = 0; k < 8; k++)
|
for (k = 0; k < 8; k++)
|
||||||
|
@ -697,7 +695,7 @@ static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||||
|
|
||||||
int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||||
uint32_t max_nonce, unsigned long *hashes_done)
|
uint32_t max_nonce, unsigned long *hashes_done, int N)
|
||||||
{
|
{
|
||||||
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
|
||||||
uint32_t midstate[8];
|
uint32_t midstate[8];
|
||||||
|
@ -723,25 +721,25 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||||
|
|
||||||
#if defined(HAVE_SHA256_4WAY)
|
#if defined(HAVE_SHA256_4WAY)
|
||||||
if (throughput == 4)
|
if (throughput == 4)
|
||||||
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf);
|
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf, N);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
|
||||||
if (throughput == 12)
|
if (throughput == 12)
|
||||||
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
|
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf, N);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_SCRYPT_6WAY)
|
#if defined(HAVE_SCRYPT_6WAY)
|
||||||
if (throughput == 24)
|
if (throughput == 24)
|
||||||
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf);
|
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf, N);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
#if defined(HAVE_SCRYPT_3WAY)
|
#if defined(HAVE_SCRYPT_3WAY)
|
||||||
if (throughput == 3)
|
if (throughput == 3)
|
||||||
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
|
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf, N);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
|
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf, N);
|
||||||
|
|
||||||
for (i = 0; i < throughput; i++) {
|
for (i = 0; i < throughput; i++) {
|
||||||
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
|
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
|
||||||
|
|
Loading…
Reference in a new issue