cpuminer/scrypt.c

612 lines
18 KiB
C
Raw Normal View History

2012-03-12 13:32:11 +01:00
/*
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2012 pooler
2011-09-20 12:20:26 +02:00
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* This file was originally written by Colin Percival as part of the Tarsnap
* online backup system.
*/
#include "cpuminer-config.h"
#include "miner.h"
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
static const uint32_t keypad[12] = {
2012-03-21 23:07:56 +01:00
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000280
};
static const uint32_t innerpad[11] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x000004a0
};
static const uint32_t outerpad[8] = {
0x80000000, 0, 0, 0, 0, 0, 0, 0x00000300
};
static const uint32_t finalblk[16] = {
0x00000001, 0x80000000, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x00000620
};
static inline void HMAC_SHA256_80_init(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
2011-09-20 12:20:26 +02:00
{
uint32_t ihash[8];
uint32_t pad[16];
int i;
2011-09-20 12:20:26 +02:00
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 16, 16);
memcpy(pad + 4, keypad, 48);
2012-03-21 23:07:56 +01:00
sha256_transform(tstate, pad, 0);
memcpy(ihash, tstate, 32);
2012-03-12 13:32:11 +01:00
sha256_init(ostate);
2011-12-24 12:22:06 +01:00
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 16; i++)
pad[i] = 0x5c5c5c5c;
2012-03-12 13:32:11 +01:00
sha256_transform(ostate, pad, 0);
2011-12-24 12:22:06 +01:00
2012-03-12 13:32:11 +01:00
sha256_init(tstate);
for (i = 0; i < 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 16; i++)
pad[i] = 0x36363636;
2012-03-12 13:32:11 +01:00
sha256_transform(tstate, pad, 0);
2011-12-24 12:22:06 +01:00
}
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
2011-12-24 12:22:06 +01:00
{
uint32_t istate[8], ostate2[8];
uint32_t ibuf[16], obuf[16];
2012-03-12 13:32:11 +01:00
int i, j;
2011-12-24 12:22:06 +01:00
memcpy(istate, tstate, 32);
2012-03-21 23:07:56 +01:00
sha256_transform(istate, salt, 0);
2012-03-21 23:07:56 +01:00
memcpy(ibuf, salt + 16, 16);
memcpy(ibuf + 5, innerpad, 44);
memcpy(obuf + 8, outerpad, 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 32);
ibuf[4] = i + 1;
2012-03-12 13:32:11 +01:00
sha256_transform(obuf, ibuf, 0);
memcpy(ostate2, ostate, 32);
2012-03-12 13:32:11 +01:00
sha256_transform(ostate2, obuf, 0);
for (j = 0; j < 8; j++)
output[8 * i + j] = swab32(ostate2[j]);
}
2011-09-20 12:20:26 +02:00
}
static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
const uint32_t *salt, uint32_t *output)
2011-09-20 12:20:26 +02:00
{
uint32_t buf[16];
2012-03-12 13:32:11 +01:00
int i;
2012-03-12 13:32:11 +01:00
sha256_transform(tstate, salt, 1);
sha256_transform(tstate, salt + 16, 1);
sha256_transform(tstate, finalblk, 0);
memcpy(buf, tstate, 32);
memcpy(buf + 8, outerpad, 32);
2011-09-20 12:20:26 +02:00
2012-03-12 13:32:11 +01:00
sha256_transform(ostate, buf, 0);
for (i = 0; i < 8; i++)
output[i] = swab32(ostate[i]);
2011-09-20 12:20:26 +02:00
}
2012-03-23 16:35:21 +01:00
#ifdef HAVE_SHA256_4WAY
static const uint32_t keypad_4way[4 * 12] = {
2012-03-21 23:07:56 +01:00
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
2012-03-21 23:07:56 +01:00
0x00000280, 0x00000280, 0x00000280, 0x00000280
};
static const uint32_t innerpad_4way[4 * 11] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x000004a0, 0x000004a0, 0x000004a0, 0x000004a0
};
static const uint32_t outerpad_4way[4 * 8] = {
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000300, 0x00000300, 0x00000300, 0x00000300
};
2012-03-21 23:07:56 +01:00
static const uint32_t finalblk_4way[4 * 16] __attribute__((aligned(16))) = {
0x00000001, 0x00000001, 0x00000001, 0x00000001,
0x80000000, 0x80000000, 0x80000000, 0x80000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000620, 0x00000620, 0x00000620, 0x00000620
};
static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
uint32_t *tstate, uint32_t *ostate)
{
2012-03-21 23:07:56 +01:00
uint32_t ihash[4 * 8] __attribute__((aligned(16)));
uint32_t pad[4 * 16] __attribute__((aligned(16)));
int i;
/* tstate is assumed to contain the midstate of key */
memcpy(pad, key + 4 * 16, 4 * 16);
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
2012-03-21 23:07:56 +01:00
sha256_transform_4way(tstate, pad, 0);
memcpy(ihash, tstate, 4 * 32);
2012-03-12 13:32:11 +01:00
sha256_init_4way(ostate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x5c5c5c5c;
for (; i < 4 * 16; i++)
pad[i] = 0x5c5c5c5c;
2012-03-12 13:32:11 +01:00
sha256_transform_4way(ostate, pad, 0);
2012-03-12 13:32:11 +01:00
sha256_init_4way(tstate);
for (i = 0; i < 4 * 8; i++)
pad[i] = ihash[i] ^ 0x36363636;
for (; i < 4 * 16; i++)
pad[i] = 0x36363636;
2012-03-12 13:32:11 +01:00
sha256_transform_4way(tstate, pad, 0);
}
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
2012-03-21 23:07:56 +01:00
uint32_t istate[4 * 8] __attribute__((aligned(16)));
uint32_t ostate2[4 * 8] __attribute__((aligned(16)));
uint32_t ibuf[4 * 16] __attribute__((aligned(16)));
uint32_t obuf[4 * 16] __attribute__((aligned(16)));
2012-03-12 13:32:11 +01:00
int i, j;
memcpy(istate, tstate, 4 * 32);
2012-03-21 23:07:56 +01:00
sha256_transform_4way(istate, salt, 0);
2012-03-21 23:07:56 +01:00
memcpy(ibuf, salt + 4 * 16, 4 * 16);
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
for (i = 0; i < 4; i++) {
memcpy(obuf, istate, 4 * 32);
ibuf[4 * 4 + 0] = i + 1;
ibuf[4 * 4 + 1] = i + 1;
ibuf[4 * 4 + 2] = i + 1;
ibuf[4 * 4 + 3] = i + 1;
2012-03-12 13:32:11 +01:00
sha256_transform_4way(obuf, ibuf, 0);
memcpy(ostate2, ostate, 4 * 32);
2012-03-12 13:32:11 +01:00
sha256_transform_4way(ostate2, obuf, 0);
for (j = 0; j < 4 * 8; j++)
output[4 * 8 * i + j] = swab32(ostate2[j]);
}
}
static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
{
2012-03-21 23:07:56 +01:00
uint32_t buf[4 * 16] __attribute__((aligned(16)));
2012-03-12 13:32:11 +01:00
int i;
2012-03-12 13:32:11 +01:00
sha256_transform_4way(tstate, salt, 1);
sha256_transform_4way(tstate, salt + 4 * 16, 1);
sha256_transform_4way(tstate, finalblk_4way, 0);
memcpy(buf, tstate, 4 * 32);
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
2012-03-12 13:32:11 +01:00
sha256_transform_4way(ostate, buf, 0);
for (i = 0; i < 4 * 8; i++)
output[i] = swab32(ostate[i]);
}
2012-03-23 16:35:21 +01:00
#endif /* HAVE_SHA256_4WAY */
#if defined(__x86_64__)
#define SCRYPT_MAX_WAYS 12
2012-03-23 16:35:21 +01:00
#define HAVE_SCRYPT_3WAY 1
int scrypt_best_throughput();
void scrypt_core(uint32_t *X, uint32_t *V);
void scrypt_core_3way(uint32_t *X, uint32_t *V);
#elif defined(__i386__)
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V);
#elif defined(__arm__) && defined(__APCS_32__)
void scrypt_core(uint32_t *X, uint32_t *V);
#if defined(__ARM_NEON__)
#undef HAVE_SHA256_4WAY
#define SCRYPT_MAX_WAYS 3
#define HAVE_SCRYPT_3WAY 1
#define scrypt_best_throughput() 3
void scrypt_core_3way(uint32_t *X, uint32_t *V);
#endif
#else
2012-03-21 23:07:56 +01:00
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
2011-09-20 12:20:26 +02:00
{
uint32_t x00,x01,x02,x03,x04,x05,x06,x07,x08,x09,x10,x11,x12,x13,x14,x15;
int i;
2011-09-20 12:20:26 +02:00
x00 = (B[ 0] ^= Bx[ 0]);
x01 = (B[ 1] ^= Bx[ 1]);
x02 = (B[ 2] ^= Bx[ 2]);
x03 = (B[ 3] ^= Bx[ 3]);
x04 = (B[ 4] ^= Bx[ 4]);
x05 = (B[ 5] ^= Bx[ 5]);
x06 = (B[ 6] ^= Bx[ 6]);
x07 = (B[ 7] ^= Bx[ 7]);
x08 = (B[ 8] ^= Bx[ 8]);
x09 = (B[ 9] ^= Bx[ 9]);
x10 = (B[10] ^= Bx[10]);
x11 = (B[11] ^= Bx[11]);
x12 = (B[12] ^= Bx[12]);
x13 = (B[13] ^= Bx[13]);
x14 = (B[14] ^= Bx[14]);
x15 = (B[15] ^= Bx[15]);
2011-09-20 12:20:26 +02:00
for (i = 0; i < 8; i += 2) {
#define R(a, b) (((a) << (b)) | ((a) >> (32 - (b))))
2011-09-20 12:20:26 +02:00
/* Operate on columns. */
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7);
x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9);
x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13);
x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18);
x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
2011-09-20 12:20:26 +02:00
/* Operate on rows. */
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7);
x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9);
x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13);
x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18);
x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
2011-09-20 12:20:26 +02:00
#undef R
}
B[ 0] += x00;
B[ 1] += x01;
B[ 2] += x02;
B[ 3] += x03;
B[ 4] += x04;
B[ 5] += x05;
B[ 6] += x06;
B[ 7] += x07;
B[ 8] += x08;
B[ 9] += x09;
B[10] += x10;
B[11] += x11;
B[12] += x12;
B[13] += x13;
B[14] += x14;
B[15] += x15;
2011-09-20 12:20:26 +02:00
}
2011-12-24 12:22:06 +01:00
static inline void scrypt_core(uint32_t *X, uint32_t *V)
2011-09-20 12:20:26 +02:00
{
uint32_t i, j, k;
for (i = 0; i < 1024; i++) {
memcpy(&V[i * 32], X, 128);
2012-03-21 23:07:56 +01:00
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
}
for (i = 0; i < 1024; i++) {
2012-03-21 23:07:56 +01:00
j = 32 * (X[16] & 1023);
for (k = 0; k < 32; k++)
X[k] ^= V[j + k];
xor_salsa8(&X[0], &X[16]);
xor_salsa8(&X[16], &X[0]);
2011-09-20 12:20:26 +02:00
}
2011-12-24 12:22:06 +01:00
}
2011-12-18 22:41:16 +01:00
#endif
2011-09-20 12:20:26 +02:00
#ifndef SCRYPT_MAX_WAYS
#define SCRYPT_MAX_WAYS 1
#define scrypt_best_throughput() 1
#endif
#define SCRYPT_BUFFER_SIZE (SCRYPT_MAX_WAYS * 131072 + 63)
unsigned char *scrypt_buffer_alloc()
{
2011-12-24 12:22:06 +01:00
return malloc(SCRYPT_BUFFER_SIZE);
}
2012-03-23 16:35:21 +01:00
static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad)
2011-12-24 12:22:06 +01:00
{
uint32_t tstate[8], ostate[8];
uint32_t X[32];
2012-03-21 23:07:56 +01:00
uint32_t *V;
2011-12-24 12:22:06 +01:00
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate, midstate, 32);
HMAC_SHA256_80_init(input, tstate, ostate);
2011-12-24 12:22:06 +01:00
PBKDF2_SHA256_80_128(tstate, ostate, input, X);
scrypt_core(X, V);
2012-03-07 14:03:56 +01:00
PBKDF2_SHA256_128_32(tstate, ostate, X, output);
2011-09-20 12:20:26 +02:00
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_4way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
2012-01-11 14:52:04 +01:00
{
2012-03-21 23:07:56 +01:00
uint32_t tstate[4 * 8] __attribute__((aligned(128)));
uint32_t ostate[4 * 8] __attribute__((aligned(128)));
uint32_t W[4 * 32] __attribute__((aligned(128)));
uint32_t X[4 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (i = 0; i < 20; i++) {
W[4 * i + 0] = input[0 * 20 + i];
W[4 * i + 1] = input[1 * 20 + i];
W[4 * i + 2] = input[2 * 20 + i];
W[4 * i + 3] = input[3 * 20 + i];
}
for (i = 0; i < 8; i++) {
tstate[4 * i + 0] = midstate[i];
tstate[4 * i + 1] = midstate[i];
tstate[4 * i + 2] = midstate[i];
tstate[4 * i + 3] = midstate[i];
}
HMAC_SHA256_80_init_4way(W, tstate, ostate);
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
for (i = 0; i < 32; i++) {
X[0 * 32 + i] = W[4 * i + 0];
X[1 * 32 + i] = W[4 * i + 1];
X[2 * 32 + i] = W[4 * i + 2];
X[3 * 32 + i] = W[4 * i + 3];
}
scrypt_core(X + 0 * 32, V);
scrypt_core(X + 1 * 32, V);
scrypt_core(X + 2 * 32, V);
scrypt_core(X + 3 * 32, V);
for (i = 0; i < 32; i++) {
W[4 * i + 0] = X[0 * 32 + i];
W[4 * i + 1] = X[1 * 32 + i];
W[4 * i + 2] = X[2 * 32 + i];
W[4 * i + 3] = X[3 * 32 + i];
}
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
for (i = 0; i < 8; i++) {
output[0 * 8 + i] = W[4 * i + 0];
output[1 * 8 + i] = W[4 * i + 1];
output[2 * 8 + i] = W[4 * i + 2];
output[3 * 8 + i] = W[4 * i + 3];
}
}
#endif /* HAVE_SHA256_4WAY */
#ifdef HAVE_SCRYPT_3WAY
static void scrypt_1024_1_1_256_3way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[3 * 8], ostate[3 * 8];
uint32_t X[3 * 32] __attribute__((aligned(64)));
uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
memcpy(tstate + 0, midstate, 32);
memcpy(tstate + 8, midstate, 32);
memcpy(tstate + 16, midstate, 32);
HMAC_SHA256_80_init(input + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init(input + 20, tstate + 8, ostate + 8);
HMAC_SHA256_80_init(input + 40, tstate + 16, ostate + 16);
PBKDF2_SHA256_80_128(tstate + 0, ostate + 0, input + 0, X + 0);
PBKDF2_SHA256_80_128(tstate + 8, ostate + 8, input + 20, X + 32);
PBKDF2_SHA256_80_128(tstate + 16, ostate + 16, input + 40, X + 64);
scrypt_core_3way(X, V);
PBKDF2_SHA256_128_32(tstate + 0, ostate + 0, X + 0, output + 0);
PBKDF2_SHA256_128_32(tstate + 8, ostate + 8, X + 32, output + 8);
PBKDF2_SHA256_128_32(tstate + 16, ostate + 16, X + 64, output + 16);
}
#ifdef HAVE_SHA256_4WAY
static void scrypt_1024_1_1_256_12way(const uint32_t *input,
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
{
uint32_t tstate[12 * 8] __attribute__((aligned(128)));
uint32_t ostate[12 * 8] __attribute__((aligned(128)));
uint32_t W[12 * 32] __attribute__((aligned(128)));
uint32_t X[12 * 32] __attribute__((aligned(128)));
uint32_t *V;
int i, j;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
for (j = 0; j < 3; j++) {
for (i = 0; i < 20; i++) {
W[128 * j + 4 * i + 0] = input[80 * j + 0 * 20 + i];
W[128 * j + 4 * i + 1] = input[80 * j + 1 * 20 + i];
W[128 * j + 4 * i + 2] = input[80 * j + 2 * 20 + i];
W[128 * j + 4 * i + 3] = input[80 * j + 3 * 20 + i];
}
}
for (j = 0; j < 3; j++) {
for (i = 0; i < 8; i++) {
tstate[32 * j + 4 * i + 0] = midstate[i];
tstate[32 * j + 4 * i + 1] = midstate[i];
tstate[32 * j + 4 * i + 2] = midstate[i];
tstate[32 * j + 4 * i + 3] = midstate[i];
}
}
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++) {
for (i = 0; i < 32; i++) {
X[128 * j + 0 * 32 + i] = W[128 * j + 4 * i + 0];
X[128 * j + 1 * 32 + i] = W[128 * j + 4 * i + 1];
X[128 * j + 2 * 32 + i] = W[128 * j + 4 * i + 2];
X[128 * j + 3 * 32 + i] = W[128 * j + 4 * i + 3];
}
}
scrypt_core_3way(X + 0 * 96, V);
scrypt_core_3way(X + 1 * 96, V);
scrypt_core_3way(X + 2 * 96, V);
scrypt_core_3way(X + 3 * 96, V);
for (j = 0; j < 3; j++) {
for (i = 0; i < 32; i++) {
W[128 * j + 4 * i + 0] = X[128 * j + 0 * 32 + i];
W[128 * j + 4 * i + 1] = X[128 * j + 1 * 32 + i];
W[128 * j + 4 * i + 2] = X[128 * j + 2 * 32 + i];
W[128 * j + 4 * i + 3] = X[128 * j + 3 * 32 + i];
}
}
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
for (j = 0; j < 3; j++) {
for (i = 0; i < 8; i++) {
output[32 * j + 0 * 8 + i] = W[128 * j + 4 * i + 0];
output[32 * j + 1 * 8 + i] = W[128 * j + 4 * i + 1];
output[32 * j + 2 * 8 + i] = W[128 * j + 4 * i + 2];
output[32 * j + 3 * 8 + i] = W[128 * j + 4 * i + 3];
}
}
2012-01-11 14:52:04 +01:00
}
#endif /* HAVE_SHA256_4WAY */
2012-03-23 16:35:21 +01:00
#endif /* HAVE_SCRYPT_3WAY */
2011-12-24 12:22:06 +01:00
2012-03-10 13:37:33 +01:00
int scanhash_scrypt(int thr_id, uint32_t *pdata,
unsigned char *scratchbuf, const uint32_t *ptarget,
uint32_t max_nonce, unsigned long *hashes_done)
2011-09-20 12:20:26 +02:00
{
uint32_t data[SCRYPT_MAX_WAYS * 20], hash[SCRYPT_MAX_WAYS * 8];
uint32_t midstate[8];
2012-03-10 13:37:33 +01:00
uint32_t n = pdata[19] - 1;
const uint32_t Htarg = ptarget[7];
int throughput = scrypt_best_throughput();
2011-12-29 19:03:18 +01:00
int i;
2011-09-20 12:20:26 +02:00
2012-03-23 16:35:21 +01:00
#ifdef HAVE_SHA256_4WAY
if (sha256_use_4way())
throughput *= 4;
2012-03-23 16:35:21 +01:00
#endif
2012-03-10 13:37:33 +01:00
for (i = 0; i < throughput; i++)
memcpy(data + i * 20, pdata, 80);
2011-09-20 12:20:26 +02:00
2012-03-12 13:32:11 +01:00
sha256_init(midstate);
2012-03-21 23:07:56 +01:00
sha256_transform(midstate, data, 0);
2012-01-21 00:31:51 +01:00
do {
for (i = 0; i < throughput; i++)
2012-03-10 13:37:33 +01:00
data[i * 20 + 19] = ++n;
#if defined(HAVE_SHA256_4WAY)
if (throughput == 4)
scrypt_1024_1_1_256_4way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_3WAY) && defined(HAVE_SHA256_4WAY)
if (throughput == 12)
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
else
#endif
#if defined(HAVE_SCRYPT_3WAY)
if (throughput == 3)
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
else
#endif
2012-03-23 16:35:21 +01:00
scrypt_1024_1_1_256(data, hash, midstate, scratchbuf);
for (i = 0; i < throughput; i++) {
2012-03-10 13:37:33 +01:00
if (hash[i * 8 + 7] <= Htarg && fulltest(hash + i * 8, ptarget)) {
*hashes_done = n - pdata[19] + 1;
pdata[19] = data[i * 20 + 19];
return 1;
2011-12-24 12:22:06 +01:00
}
2011-09-20 12:20:26 +02:00
}
2012-03-10 13:37:33 +01:00
} while (n < max_nonce && !work_restart[thr_id].restart);
2012-01-21 00:31:51 +01:00
2012-03-10 13:37:33 +01:00
*hashes_done = n - pdata[19] + 1;
pdata[19] = n;
return 0;
2011-09-20 12:20:26 +02:00
}