Move SHA-2 code to separate files
This commit is contained in:
parent
40fc3d06f9
commit
b961766f4d
6 changed files with 619 additions and 606 deletions
|
@ -14,7 +14,9 @@ INCLUDES = $(PTHREAD_FLAGS) -fno-strict-aliasing $(JANSSON_INCLUDES)
|
|||
bin_PROGRAMS = minerd
|
||||
|
||||
minerd_SOURCES = elist.h miner.h compat.h \
|
||||
cpu-miner.c util.c scrypt.c scrypt-x86.S scrypt-x64.S
|
||||
cpu-miner.c util.c \
|
||||
sha2.c sha2-x64.S \
|
||||
scrypt.c scrypt-x86.S scrypt-x64.S
|
||||
minerd_LDFLAGS = $(PTHREAD_FLAGS)
|
||||
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@
|
||||
minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@
|
||||
|
|
44
miner.h
44
miner.h
|
@ -36,34 +36,6 @@ void *alloca (size_t);
|
|||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
#if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
||||
#define WANT_BUILTIN_BSWAP
|
||||
#else
|
||||
#if HAVE_BYTESWAP_H
|
||||
#include <byteswap.h>
|
||||
#elif defined(USE_SYS_ENDIAN_H)
|
||||
#include <sys/endian.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <libkern/OSByteOrder.h>
|
||||
#define bswap_16 OSSwapInt16
|
||||
#define bswap_32 OSSwapInt32
|
||||
#define bswap_64 OSSwapInt64
|
||||
#else
|
||||
#define bswap_16(value) \
|
||||
((((value) & 0xff) << 8) | ((value) >> 8))
|
||||
|
||||
#define bswap_32(value) \
|
||||
(((uint32_t)bswap_16((uint16_t)((value) & 0xffff)) << 16) | \
|
||||
(uint32_t)bswap_16((uint16_t)((value) >> 16)))
|
||||
|
||||
#define bswap_64(value) \
|
||||
(((uint64_t)bswap_32((uint32_t)((value) & 0xffffffff)) \
|
||||
<< 32) | \
|
||||
(uint64_t)bswap_32((uint32_t)((value) >> 32)))
|
||||
#endif
|
||||
#endif /* !defined(__GLXBYTEORDER_H__) */
|
||||
|
||||
#ifdef HAVE_SYSLOG_H
|
||||
#include <syslog.h>
|
||||
#else
|
||||
|
@ -89,6 +61,13 @@ enum {
|
|||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
#endif
|
||||
|
||||
#if ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
|
||||
#define WANT_BUILTIN_BSWAP
|
||||
#else
|
||||
#define bswap_32(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
||||
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
||||
#endif
|
||||
|
||||
static inline uint32_t swab32(uint32_t v)
|
||||
{
|
||||
#ifdef WANT_BUILTIN_BSWAP
|
||||
|
@ -130,6 +109,15 @@ static inline void le32enc(void *pp, uint32_t x)
|
|||
p[3] = (x >> 24) & 0xff;
|
||||
}
|
||||
|
||||
void sha256_init(uint32_t *state);
|
||||
void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define SHA256_4WAY
|
||||
void sha256_init_4way(uint32_t *state);
|
||||
void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
|
||||
#endif
|
||||
|
||||
extern unsigned char *scrypt_buffer_alloc();
|
||||
extern int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||
|
|
390
scrypt-x64.S
390
scrypt-x64.S
|
@ -31,396 +31,6 @@
|
|||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
.data
|
||||
.p2align 6
|
||||
sha256_4h:
|
||||
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
|
||||
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
|
||||
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
|
||||
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
|
||||
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
|
||||
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
|
||||
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
|
||||
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
|
||||
|
||||
.data
|
||||
.p2align 6
|
||||
sha256_4k:
|
||||
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
|
||||
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
|
||||
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
|
||||
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
|
||||
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
|
||||
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
|
||||
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
|
||||
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
|
||||
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
|
||||
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
|
||||
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
|
||||
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
|
||||
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
|
||||
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
|
||||
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
|
||||
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
|
||||
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
|
||||
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
|
||||
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
|
||||
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
|
||||
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
|
||||
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
|
||||
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
|
||||
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
|
||||
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
|
||||
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
|
||||
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
|
||||
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
|
||||
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
|
||||
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
|
||||
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
|
||||
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
|
||||
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
|
||||
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
|
||||
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
|
||||
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
|
||||
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
|
||||
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
|
||||
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
|
||||
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
|
||||
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
|
||||
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
|
||||
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
|
||||
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
|
||||
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
|
||||
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
|
||||
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
|
||||
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
|
||||
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
|
||||
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
|
||||
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
|
||||
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
|
||||
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
|
||||
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
|
||||
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
|
||||
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
|
||||
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
|
||||
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
|
||||
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
|
||||
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
|
||||
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
|
||||
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
|
||||
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
|
||||
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.globl SHA256_InitState_4way
|
||||
.globl _SHA256_InitState_4way
|
||||
SHA256_InitState_4way:
|
||||
_SHA256_InitState_4way:
|
||||
#if defined(WIN64)
|
||||
pushq %rdi
|
||||
movq %rcx, %rdi
|
||||
#endif
|
||||
movdqa sha256_4h+0(%rip), %xmm0
|
||||
movdqa sha256_4h+16(%rip), %xmm1
|
||||
movdqa sha256_4h+32(%rip), %xmm2
|
||||
movdqa sha256_4h+48(%rip), %xmm3
|
||||
movdqu %xmm0, 0(%rdi)
|
||||
movdqu %xmm1, 16(%rdi)
|
||||
movdqu %xmm2, 32(%rdi)
|
||||
movdqu %xmm3, 48(%rdi)
|
||||
movdqa sha256_4h+64(%rip), %xmm0
|
||||
movdqa sha256_4h+80(%rip), %xmm1
|
||||
movdqa sha256_4h+96(%rip), %xmm2
|
||||
movdqa sha256_4h+112(%rip), %xmm3
|
||||
movdqu %xmm0, 64(%rdi)
|
||||
movdqu %xmm1, 80(%rdi)
|
||||
movdqu %xmm2, 96(%rdi)
|
||||
movdqu %xmm3, 112(%rdi)
|
||||
#if defined(WIN64)
|
||||
popq %rdi
|
||||
#endif
|
||||
ret
|
||||
|
||||
.macro p2bswap_rsi_rsp i
|
||||
movdqu \i*16(%rsi), %xmm0
|
||||
movdqu (\i+1)*16(%rsi), %xmm2
|
||||
pshuflw $0xb1, %xmm0, %xmm0
|
||||
pshuflw $0xb1, %xmm2, %xmm2
|
||||
pshufhw $0xb1, %xmm0, %xmm0
|
||||
pshufhw $0xb1, %xmm2, %xmm2
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm2, %xmm3
|
||||
psrlw $8, %xmm1
|
||||
psrlw $8, %xmm3
|
||||
psllw $8, %xmm0
|
||||
psllw $8, %xmm2
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm3, %xmm2
|
||||
movdqa %xmm0, \i*16(%rsp)
|
||||
movdqa %xmm2, (\i+1)*16(%rsp)
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.globl SHA256_Transform_4way
|
||||
.globl _SHA256_Transform_4way
|
||||
SHA256_Transform_4way:
|
||||
_SHA256_Transform_4way:
|
||||
#if defined(WIN64)
|
||||
pushq %rdi
|
||||
subq $96, %rsp
|
||||
movdqa %xmm6, 0(%rsp)
|
||||
movdqa %xmm7, 16(%rsp)
|
||||
movdqa %xmm8, 32(%rsp)
|
||||
movdqa %xmm9, 48(%rsp)
|
||||
movdqa %xmm10, 64(%rsp)
|
||||
movdqa %xmm11, 80(%rsp)
|
||||
pushq %rsi
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
#endif
|
||||
subq $1032, %rsp
|
||||
|
||||
testq %rdx, %rdx
|
||||
jz sha256_transform_4way_block_copy
|
||||
|
||||
p2bswap_rsi_rsp 0
|
||||
p2bswap_rsi_rsp 2
|
||||
p2bswap_rsi_rsp 4
|
||||
p2bswap_rsi_rsp 6
|
||||
p2bswap_rsi_rsp 8
|
||||
p2bswap_rsi_rsp 10
|
||||
p2bswap_rsi_rsp 12
|
||||
p2bswap_rsi_rsp 14
|
||||
jmp sha256_transform_4way_extend
|
||||
|
||||
.p2align 6
|
||||
sha256_transform_4way_block_copy:
|
||||
movdqu 0*16(%rsi), %xmm0
|
||||
movdqu 1*16(%rsi), %xmm1
|
||||
movdqu 2*16(%rsi), %xmm2
|
||||
movdqu 3*16(%rsi), %xmm3
|
||||
movdqu 4*16(%rsi), %xmm4
|
||||
movdqu 5*16(%rsi), %xmm5
|
||||
movdqu 6*16(%rsi), %xmm6
|
||||
movdqu 7*16(%rsi), %xmm7
|
||||
movdqa %xmm0, 0*16(%rsp)
|
||||
movdqa %xmm1, 1*16(%rsp)
|
||||
movdqa %xmm2, 2*16(%rsp)
|
||||
movdqa %xmm3, 3*16(%rsp)
|
||||
movdqa %xmm4, 4*16(%rsp)
|
||||
movdqa %xmm5, 5*16(%rsp)
|
||||
movdqa %xmm6, 6*16(%rsp)
|
||||
movdqa %xmm7, 7*16(%rsp)
|
||||
movdqu 8*16(%rsi), %xmm0
|
||||
movdqu 9*16(%rsi), %xmm1
|
||||
movdqu 10*16(%rsi), %xmm2
|
||||
movdqu 11*16(%rsi), %xmm3
|
||||
movdqu 12*16(%rsi), %xmm4
|
||||
movdqu 13*16(%rsi), %xmm5
|
||||
movdqu 14*16(%rsi), %xmm6
|
||||
movdqu 15*16(%rsi), %xmm7
|
||||
movdqa %xmm0, 8*16(%rsp)
|
||||
movdqa %xmm1, 9*16(%rsp)
|
||||
movdqa %xmm2, 10*16(%rsp)
|
||||
movdqa %xmm3, 11*16(%rsp)
|
||||
movdqa %xmm4, 12*16(%rsp)
|
||||
movdqa %xmm5, 13*16(%rsp)
|
||||
movdqa %xmm6, 14*16(%rsp)
|
||||
movdqa %xmm7, 15*16(%rsp)
|
||||
|
||||
sha256_transform_4way_extend:
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 48*16(%rcx), %rax
|
||||
sha256_transform_4way_extend_loop:
|
||||
movdqa -15*16(%rcx), %xmm0
|
||||
movdqa -14*16(%rcx), %xmm4
|
||||
movdqa %xmm0, %xmm2
|
||||
movdqa %xmm4, %xmm6
|
||||
psrld $3, %xmm0
|
||||
psrld $3, %xmm4
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $14, %xmm2
|
||||
pslld $14, %xmm6
|
||||
psrld $4, %xmm1
|
||||
psrld $4, %xmm5
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm5, %xmm4
|
||||
psrld $11, %xmm1
|
||||
psrld $11, %xmm5
|
||||
pxor %xmm2, %xmm0
|
||||
pxor %xmm6, %xmm4
|
||||
pslld $11, %xmm2
|
||||
pslld $11, %xmm6
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm5, %xmm4
|
||||
pxor %xmm2, %xmm0
|
||||
pxor %xmm6, %xmm4
|
||||
|
||||
movdqa -2*16(%rcx), %xmm3
|
||||
movdqa -1*16(%rcx), %xmm7
|
||||
paddd -16*16(%rcx), %xmm0
|
||||
paddd -15*16(%rcx), %xmm4
|
||||
|
||||
movdqa %xmm3, %xmm2
|
||||
movdqa %xmm7, %xmm6
|
||||
psrld $10, %xmm3
|
||||
psrld $10, %xmm7
|
||||
movdqa %xmm3, %xmm1
|
||||
movdqa %xmm7, %xmm5
|
||||
|
||||
paddd -7*16(%rcx), %xmm0
|
||||
|
||||
pslld $13, %xmm2
|
||||
pslld $13, %xmm6
|
||||
psrld $7, %xmm1
|
||||
psrld $7, %xmm5
|
||||
|
||||
paddd -6*16(%rcx), %xmm4
|
||||
|
||||
pxor %xmm1, %xmm3
|
||||
pxor %xmm5, %xmm7
|
||||
psrld $2, %xmm1
|
||||
psrld $2, %xmm5
|
||||
pxor %xmm2, %xmm3
|
||||
pxor %xmm6, %xmm7
|
||||
pslld $2, %xmm2
|
||||
pslld $2, %xmm6
|
||||
pxor %xmm1, %xmm3
|
||||
pxor %xmm5, %xmm7
|
||||
pxor %xmm2, %xmm3
|
||||
pxor %xmm6, %xmm7
|
||||
|
||||
paddd %xmm3, %xmm0
|
||||
paddd %xmm7, %xmm4
|
||||
movdqa %xmm0, (%rcx)
|
||||
movdqa %xmm4, 16(%rcx)
|
||||
addq $2*16, %rcx
|
||||
cmpq %rcx, %rax
|
||||
jne sha256_transform_4way_extend_loop
|
||||
|
||||
movdqu 0(%rdi), %xmm7
|
||||
movdqu 16(%rdi), %xmm5
|
||||
movdqu 32(%rdi), %xmm4
|
||||
movdqu 48(%rdi), %xmm3
|
||||
movdqu 64(%rdi), %xmm0
|
||||
movdqu 80(%rdi), %xmm8
|
||||
movdqu 96(%rdi), %xmm9
|
||||
movdqu 112(%rdi), %xmm10
|
||||
|
||||
leaq sha256_4k(%rip), %rcx
|
||||
xorq %rax, %rax
|
||||
sha256_transform_4way_main_loop:
|
||||
movdqa (%rsp, %rax), %xmm6
|
||||
paddd (%rcx, %rax), %xmm6
|
||||
paddd %xmm10, %xmm6
|
||||
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm9, %xmm2
|
||||
pandn %xmm2, %xmm1
|
||||
|
||||
movdqa %xmm2, %xmm10
|
||||
movdqa %xmm8, %xmm2
|
||||
movdqa %xmm2, %xmm9
|
||||
|
||||
pand %xmm0, %xmm2
|
||||
pxor %xmm2, %xmm1
|
||||
movdqa %xmm0, %xmm8
|
||||
|
||||
paddd %xmm1, %xmm6
|
||||
|
||||
movdqa %xmm0, %xmm1
|
||||
psrld $6, %xmm0
|
||||
movdqa %xmm0, %xmm2
|
||||
pslld $7, %xmm1
|
||||
psrld $5, %xmm2
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm2, %xmm0
|
||||
pslld $14, %xmm1
|
||||
psrld $14, %xmm2
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm2, %xmm0
|
||||
pslld $5, %xmm1
|
||||
pxor %xmm1, %xmm0
|
||||
paddd %xmm0, %xmm6
|
||||
|
||||
movdqa %xmm3, %xmm0
|
||||
paddd %xmm6, %xmm0
|
||||
|
||||
movdqa %xmm5, %xmm1
|
||||
movdqa %xmm4, %xmm3
|
||||
movdqa %xmm4, %xmm2
|
||||
pand %xmm5, %xmm2
|
||||
pand %xmm7, %xmm4
|
||||
pand %xmm7, %xmm1
|
||||
pxor %xmm4, %xmm1
|
||||
movdqa %xmm5, %xmm4
|
||||
movdqa %xmm7, %xmm5
|
||||
pxor %xmm2, %xmm1
|
||||
paddd %xmm1, %xmm6
|
||||
|
||||
movdqa %xmm7, %xmm2
|
||||
psrld $2, %xmm7
|
||||
movdqa %xmm7, %xmm1
|
||||
pslld $10, %xmm2
|
||||
psrld $11, %xmm1
|
||||
pxor %xmm2, %xmm7
|
||||
pxor %xmm1, %xmm7
|
||||
pslld $9, %xmm2
|
||||
psrld $9, %xmm1
|
||||
pxor %xmm2, %xmm7
|
||||
pxor %xmm1, %xmm7
|
||||
pslld $11, %xmm2
|
||||
pxor %xmm2, %xmm7
|
||||
paddd %xmm6, %xmm7
|
||||
|
||||
addq $16, %rax
|
||||
cmpq $16*64, %rax
|
||||
jne sha256_transform_4way_main_loop
|
||||
|
||||
movdqu 0(%rdi), %xmm2
|
||||
movdqu 16(%rdi), %xmm6
|
||||
movdqu 32(%rdi), %xmm11
|
||||
movdqu 48(%rdi), %xmm1
|
||||
paddd %xmm2, %xmm7
|
||||
paddd %xmm6, %xmm5
|
||||
paddd %xmm11, %xmm4
|
||||
paddd %xmm1, %xmm3
|
||||
movdqu 64(%rdi), %xmm2
|
||||
movdqu 80(%rdi), %xmm6
|
||||
movdqu 96(%rdi), %xmm11
|
||||
movdqu 112(%rdi), %xmm1
|
||||
paddd %xmm2, %xmm0
|
||||
paddd %xmm6, %xmm8
|
||||
paddd %xmm11, %xmm9
|
||||
paddd %xmm1, %xmm10
|
||||
|
||||
movdqu %xmm7, 0(%rdi)
|
||||
movdqu %xmm5, 16(%rdi)
|
||||
movdqu %xmm4, 32(%rdi)
|
||||
movdqu %xmm3, 48(%rdi)
|
||||
movdqu %xmm0, 64(%rdi)
|
||||
movdqu %xmm8, 80(%rdi)
|
||||
movdqu %xmm9, 96(%rdi)
|
||||
movdqu %xmm10, 112(%rdi)
|
||||
|
||||
addq $1032, %rsp
|
||||
#if defined(WIN64)
|
||||
popq %rsi
|
||||
movdqa 0(%rsp), %xmm6
|
||||
movdqa 16(%rsp), %xmm7
|
||||
movdqa 32(%rsp), %xmm8
|
||||
movdqa 48(%rsp), %xmm9
|
||||
movdqa 64(%rsp), %xmm10
|
||||
movdqa 80(%rsp), %xmm11
|
||||
addq $96, %rsp
|
||||
popq %rdi
|
||||
#endif
|
||||
ret
|
||||
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
|
|
229
scrypt.c
229
scrypt.c
|
@ -1,4 +1,4 @@
|
|||
/*-
|
||||
/*
|
||||
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2012 pooler
|
||||
* All rights reserved.
|
||||
*
|
||||
|
@ -34,157 +34,6 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#define byteswap(x) ((((x) << 24) & 0xff000000u) | (((x) << 8) & 0x00ff0000u) \
|
||||
| (((x) >> 8) & 0x0000ff00u) | (((x) >> 24) & 0x000000ffu))
|
||||
|
||||
static inline void byteswap_vec(uint32_t *dest, const uint32_t *src, int len)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < len; i++)
|
||||
dest[i] = byteswap(src[i]);
|
||||
}
|
||||
|
||||
|
||||
static inline void SHA256_InitState(uint32_t *state)
|
||||
{
|
||||
/* Magic initialization constants */
|
||||
state[0] = 0x6A09E667;
|
||||
state[1] = 0xBB67AE85;
|
||||
state[2] = 0x3C6EF372;
|
||||
state[3] = 0xA54FF53A;
|
||||
state[4] = 0x510E527F;
|
||||
state[5] = 0x9B05688C;
|
||||
state[6] = 0x1F83D9AB;
|
||||
state[7] = 0x5BE0CD19;
|
||||
}
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1;
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, k) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + k)
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
static void SHA256_Transform(uint32_t *state, const uint32_t *block, int swap)
|
||||
{
|
||||
uint32_t W[64];
|
||||
uint32_t S[8];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
/* 1. Prepare message schedule W. */
|
||||
if (swap)
|
||||
byteswap_vec(W, block, 16);
|
||||
else
|
||||
memcpy(W, block, 64);
|
||||
for (i = 16; i < 64; i += 2) {
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
||||
}
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
RNDr(S, W, 0, 0x428a2f98);
|
||||
RNDr(S, W, 1, 0x71374491);
|
||||
RNDr(S, W, 2, 0xb5c0fbcf);
|
||||
RNDr(S, W, 3, 0xe9b5dba5);
|
||||
RNDr(S, W, 4, 0x3956c25b);
|
||||
RNDr(S, W, 5, 0x59f111f1);
|
||||
RNDr(S, W, 6, 0x923f82a4);
|
||||
RNDr(S, W, 7, 0xab1c5ed5);
|
||||
RNDr(S, W, 8, 0xd807aa98);
|
||||
RNDr(S, W, 9, 0x12835b01);
|
||||
RNDr(S, W, 10, 0x243185be);
|
||||
RNDr(S, W, 11, 0x550c7dc3);
|
||||
RNDr(S, W, 12, 0x72be5d74);
|
||||
RNDr(S, W, 13, 0x80deb1fe);
|
||||
RNDr(S, W, 14, 0x9bdc06a7);
|
||||
RNDr(S, W, 15, 0xc19bf174);
|
||||
RNDr(S, W, 16, 0xe49b69c1);
|
||||
RNDr(S, W, 17, 0xefbe4786);
|
||||
RNDr(S, W, 18, 0x0fc19dc6);
|
||||
RNDr(S, W, 19, 0x240ca1cc);
|
||||
RNDr(S, W, 20, 0x2de92c6f);
|
||||
RNDr(S, W, 21, 0x4a7484aa);
|
||||
RNDr(S, W, 22, 0x5cb0a9dc);
|
||||
RNDr(S, W, 23, 0x76f988da);
|
||||
RNDr(S, W, 24, 0x983e5152);
|
||||
RNDr(S, W, 25, 0xa831c66d);
|
||||
RNDr(S, W, 26, 0xb00327c8);
|
||||
RNDr(S, W, 27, 0xbf597fc7);
|
||||
RNDr(S, W, 28, 0xc6e00bf3);
|
||||
RNDr(S, W, 29, 0xd5a79147);
|
||||
RNDr(S, W, 30, 0x06ca6351);
|
||||
RNDr(S, W, 31, 0x14292967);
|
||||
RNDr(S, W, 32, 0x27b70a85);
|
||||
RNDr(S, W, 33, 0x2e1b2138);
|
||||
RNDr(S, W, 34, 0x4d2c6dfc);
|
||||
RNDr(S, W, 35, 0x53380d13);
|
||||
RNDr(S, W, 36, 0x650a7354);
|
||||
RNDr(S, W, 37, 0x766a0abb);
|
||||
RNDr(S, W, 38, 0x81c2c92e);
|
||||
RNDr(S, W, 39, 0x92722c85);
|
||||
RNDr(S, W, 40, 0xa2bfe8a1);
|
||||
RNDr(S, W, 41, 0xa81a664b);
|
||||
RNDr(S, W, 42, 0xc24b8b70);
|
||||
RNDr(S, W, 43, 0xc76c51a3);
|
||||
RNDr(S, W, 44, 0xd192e819);
|
||||
RNDr(S, W, 45, 0xd6990624);
|
||||
RNDr(S, W, 46, 0xf40e3585);
|
||||
RNDr(S, W, 47, 0x106aa070);
|
||||
RNDr(S, W, 48, 0x19a4c116);
|
||||
RNDr(S, W, 49, 0x1e376c08);
|
||||
RNDr(S, W, 50, 0x2748774c);
|
||||
RNDr(S, W, 51, 0x34b0bcb5);
|
||||
RNDr(S, W, 52, 0x391c0cb3);
|
||||
RNDr(S, W, 53, 0x4ed8aa4a);
|
||||
RNDr(S, W, 54, 0x5b9cca4f);
|
||||
RNDr(S, W, 55, 0x682e6ff3);
|
||||
RNDr(S, W, 56, 0x748f82ee);
|
||||
RNDr(S, W, 57, 0x78a5636f);
|
||||
RNDr(S, W, 58, 0x84c87814);
|
||||
RNDr(S, W, 59, 0x8cc70208);
|
||||
RNDr(S, W, 60, 0x90befffa);
|
||||
RNDr(S, W, 61, 0xa4506ceb);
|
||||
RNDr(S, W, 62, 0xbef9a3f7);
|
||||
RNDr(S, W, 63, 0xc67178f2);
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
}
|
||||
|
||||
#if defined(__x86_64__)
|
||||
#define SHA256_4WAY
|
||||
void SHA256_Transform_4way(uint32_t *state, const uint32_t *block, int swap);
|
||||
void SHA256_InitState_4way(uint32_t *state);
|
||||
#endif
|
||||
|
||||
|
||||
static const uint32_t keypad[12] = {
|
||||
0x00000080, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80020000
|
||||
};
|
||||
|
@ -208,22 +57,22 @@ static inline void HMAC_SHA256_80_init(const uint32_t *key,
|
|||
/* tstate is assumed to contain the midstate of key */
|
||||
memcpy(pad, key + 16, 16);
|
||||
memcpy(pad + 4, keypad, 48);
|
||||
SHA256_Transform(tstate, pad, 1);
|
||||
sha256_transform(tstate, pad, 1);
|
||||
memcpy(ihash, tstate, 32);
|
||||
|
||||
SHA256_InitState(ostate);
|
||||
sha256_init(ostate);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for (; i < 16; i++)
|
||||
pad[i] = 0x5c5c5c5c;
|
||||
SHA256_Transform(ostate, pad, 0);
|
||||
sha256_transform(ostate, pad, 0);
|
||||
|
||||
SHA256_InitState(tstate);
|
||||
sha256_init(tstate);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x36363636;
|
||||
for (; i < 16; i++)
|
||||
pad[i] = 0x36363636;
|
||||
SHA256_Transform(tstate, pad, 0);
|
||||
sha256_transform(tstate, pad, 0);
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
|
||||
|
@ -231,23 +80,25 @@ static inline void PBKDF2_SHA256_80_128(const uint32_t *tstate,
|
|||
{
|
||||
uint32_t istate[8], ostate2[8];
|
||||
uint32_t ibuf[16], obuf[16];
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
memcpy(istate, tstate, 32);
|
||||
SHA256_Transform(istate, salt, 1);
|
||||
sha256_transform(istate, salt, 1);
|
||||
|
||||
byteswap_vec(ibuf, salt + 16, 4);
|
||||
for (i = 0; i < 4; i++)
|
||||
ibuf[i] = swab32(salt[16 + i]);
|
||||
memcpy(ibuf + 5, innerpad, 44);
|
||||
memcpy(obuf + 8, outerpad, 32);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memcpy(obuf, istate, 32);
|
||||
ibuf[4] = i + 1;
|
||||
SHA256_Transform(obuf, ibuf, 0);
|
||||
sha256_transform(obuf, ibuf, 0);
|
||||
|
||||
memcpy(ostate2, ostate, 32);
|
||||
SHA256_Transform(ostate2, obuf, 0);
|
||||
byteswap_vec(output + 8 * i, ostate2, 8);
|
||||
sha256_transform(ostate2, obuf, 0);
|
||||
for (j = 0; j < 8; j++)
|
||||
output[8 * i + j] = swab32(ostate2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -255,15 +106,17 @@ static inline void PBKDF2_SHA256_128_32(uint32_t *tstate, uint32_t *ostate,
|
|||
const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t buf[16];
|
||||
int i;
|
||||
|
||||
SHA256_Transform(tstate, salt, 1);
|
||||
SHA256_Transform(tstate, salt + 16, 1);
|
||||
SHA256_Transform(tstate, finalblk, 0);
|
||||
sha256_transform(tstate, salt, 1);
|
||||
sha256_transform(tstate, salt + 16, 1);
|
||||
sha256_transform(tstate, finalblk, 0);
|
||||
memcpy(buf, tstate, 32);
|
||||
memcpy(buf + 8, outerpad, 32);
|
||||
|
||||
SHA256_Transform(ostate, buf, 0);
|
||||
byteswap_vec(output, ostate, 8);
|
||||
sha256_transform(ostate, buf, 0);
|
||||
for (i = 0; i < 8; i++)
|
||||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -335,22 +188,22 @@ static inline void HMAC_SHA256_80_init_4way(const uint32_t *key,
|
|||
/* tstate is assumed to contain the midstate of key */
|
||||
memcpy(pad, key + 4 * 16, 4 * 16);
|
||||
memcpy(pad + 4 * 4, keypad_4way, 4 * 48);
|
||||
SHA256_Transform_4way(tstate, pad, 1);
|
||||
sha256_transform_4way(tstate, pad, 1);
|
||||
memcpy(ihash, tstate, 4 * 32);
|
||||
|
||||
SHA256_InitState_4way(ostate);
|
||||
sha256_init_4way(ostate);
|
||||
for (i = 0; i < 4 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for (; i < 4 * 16; i++)
|
||||
pad[i] = 0x5c5c5c5c;
|
||||
SHA256_Transform_4way(ostate, pad, 0);
|
||||
sha256_transform_4way(ostate, pad, 0);
|
||||
|
||||
SHA256_InitState_4way(tstate);
|
||||
sha256_init_4way(tstate);
|
||||
for (i = 0; i < 4 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x36363636;
|
||||
for (; i < 4 * 16; i++)
|
||||
pad[i] = 0x36363636;
|
||||
SHA256_Transform_4way(tstate, pad, 0);
|
||||
sha256_transform_4way(tstate, pad, 0);
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
|
||||
|
@ -358,12 +211,13 @@ static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
|
|||
{
|
||||
uint32_t istate[4 * 8], ostate2[4 * 8];
|
||||
uint32_t ibuf[4 * 16], obuf[4 * 16];
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
memcpy(istate, tstate, 4 * 32);
|
||||
SHA256_Transform_4way(istate, salt, 1);
|
||||
sha256_transform_4way(istate, salt, 1);
|
||||
|
||||
byteswap_vec(ibuf, salt + 4 * 16, 4 * 4);
|
||||
for (i = 0; i < 4 * 4; i++)
|
||||
ibuf[i] = swab32(salt[4 * 16 + i]);
|
||||
memcpy(ibuf + 4 * 5, innerpad_4way, 4 * 44);
|
||||
memcpy(obuf + 4 * 8, outerpad_4way, 4 * 32);
|
||||
|
||||
|
@ -373,11 +227,12 @@ static inline void PBKDF2_SHA256_80_128_4way(const uint32_t *tstate,
|
|||
ibuf[4 * 4 + 1] = i + 1;
|
||||
ibuf[4 * 4 + 2] = i + 1;
|
||||
ibuf[4 * 4 + 3] = i + 1;
|
||||
SHA256_Transform_4way(obuf, ibuf, 0);
|
||||
sha256_transform_4way(obuf, ibuf, 0);
|
||||
|
||||
memcpy(ostate2, ostate, 4 * 32);
|
||||
SHA256_Transform_4way(ostate2, obuf, 0);
|
||||
byteswap_vec(output + 4 * 8 * i, ostate2, 4 * 8);
|
||||
sha256_transform_4way(ostate2, obuf, 0);
|
||||
for (j = 0; j < 4 * 8; j++)
|
||||
output[4 * 8 * i + j] = swab32(ostate2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -385,15 +240,17 @@ static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
|
|||
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t buf[4 * 16];
|
||||
int i;
|
||||
|
||||
SHA256_Transform_4way(tstate, salt, 1);
|
||||
SHA256_Transform_4way(tstate, salt + 4 * 16, 1);
|
||||
SHA256_Transform_4way(tstate, finalblk_4way, 0);
|
||||
sha256_transform_4way(tstate, salt, 1);
|
||||
sha256_transform_4way(tstate, salt + 4 * 16, 1);
|
||||
sha256_transform_4way(tstate, finalblk_4way, 0);
|
||||
memcpy(buf, tstate, 4 * 32);
|
||||
memcpy(buf + 4 * 8, outerpad_4way, 4 * 32);
|
||||
|
||||
SHA256_Transform_4way(ostate, buf, 0);
|
||||
byteswap_vec(output, ostate, 4 * 8);
|
||||
sha256_transform_4way(ostate, buf, 0);
|
||||
for (i = 0; i < 4 * 8; i++)
|
||||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
#endif /* SHA256_4WAY */
|
||||
|
@ -616,8 +473,8 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
|||
for (i = 0; i < throughput; i++)
|
||||
memcpy(data + i * 20, pdata, 80);
|
||||
|
||||
SHA256_InitState(midstate);
|
||||
SHA256_Transform(midstate, data, 1);
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, data, 1);
|
||||
|
||||
do {
|
||||
for (i = 0; i < throughput; i++)
|
||||
|
|
408
sha2-x64.S
Normal file
408
sha2-x64.S
Normal file
|
@ -0,0 +1,408 @@
|
|||
/*
|
||||
* Copyright 2012 pooler@litecoinpool.org
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version. See COPYING for more details.
|
||||
*/
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
|
||||
#if defined(__linux__) && defined(__ELF__)
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
.data
|
||||
.p2align 6
|
||||
sha256_4h:
|
||||
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
|
||||
.long 0xbb67ae85, 0xbb67ae85, 0xbb67ae85, 0xbb67ae85
|
||||
.long 0x3c6ef372, 0x3c6ef372, 0x3c6ef372, 0x3c6ef372
|
||||
.long 0xa54ff53a, 0xa54ff53a, 0xa54ff53a, 0xa54ff53a
|
||||
.long 0x510e527f, 0x510e527f, 0x510e527f, 0x510e527f
|
||||
.long 0x9b05688c, 0x9b05688c, 0x9b05688c, 0x9b05688c
|
||||
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
|
||||
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
|
||||
|
||||
.data
|
||||
.p2align 6
|
||||
sha256_4k:
|
||||
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
|
||||
.long 0x71374491, 0x71374491, 0x71374491, 0x71374491
|
||||
.long 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf, 0xb5c0fbcf
|
||||
.long 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5, 0xe9b5dba5
|
||||
.long 0x3956c25b, 0x3956c25b, 0x3956c25b, 0x3956c25b
|
||||
.long 0x59f111f1, 0x59f111f1, 0x59f111f1, 0x59f111f1
|
||||
.long 0x923f82a4, 0x923f82a4, 0x923f82a4, 0x923f82a4
|
||||
.long 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5, 0xab1c5ed5
|
||||
.long 0xd807aa98, 0xd807aa98, 0xd807aa98, 0xd807aa98
|
||||
.long 0x12835b01, 0x12835b01, 0x12835b01, 0x12835b01
|
||||
.long 0x243185be, 0x243185be, 0x243185be, 0x243185be
|
||||
.long 0x550c7dc3, 0x550c7dc3, 0x550c7dc3, 0x550c7dc3
|
||||
.long 0x72be5d74, 0x72be5d74, 0x72be5d74, 0x72be5d74
|
||||
.long 0x80deb1fe, 0x80deb1fe, 0x80deb1fe, 0x80deb1fe
|
||||
.long 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7, 0x9bdc06a7
|
||||
.long 0xc19bf174, 0xc19bf174, 0xc19bf174, 0xc19bf174
|
||||
.long 0xe49b69c1, 0xe49b69c1, 0xe49b69c1, 0xe49b69c1
|
||||
.long 0xefbe4786, 0xefbe4786, 0xefbe4786, 0xefbe4786
|
||||
.long 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6, 0x0fc19dc6
|
||||
.long 0x240ca1cc, 0x240ca1cc, 0x240ca1cc, 0x240ca1cc
|
||||
.long 0x2de92c6f, 0x2de92c6f, 0x2de92c6f, 0x2de92c6f
|
||||
.long 0x4a7484aa, 0x4a7484aa, 0x4a7484aa, 0x4a7484aa
|
||||
.long 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc, 0x5cb0a9dc
|
||||
.long 0x76f988da, 0x76f988da, 0x76f988da, 0x76f988da
|
||||
.long 0x983e5152, 0x983e5152, 0x983e5152, 0x983e5152
|
||||
.long 0xa831c66d, 0xa831c66d, 0xa831c66d, 0xa831c66d
|
||||
.long 0xb00327c8, 0xb00327c8, 0xb00327c8, 0xb00327c8
|
||||
.long 0xbf597fc7, 0xbf597fc7, 0xbf597fc7, 0xbf597fc7
|
||||
.long 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3, 0xc6e00bf3
|
||||
.long 0xd5a79147, 0xd5a79147, 0xd5a79147, 0xd5a79147
|
||||
.long 0x06ca6351, 0x06ca6351, 0x06ca6351, 0x06ca6351
|
||||
.long 0x14292967, 0x14292967, 0x14292967, 0x14292967
|
||||
.long 0x27b70a85, 0x27b70a85, 0x27b70a85, 0x27b70a85
|
||||
.long 0x2e1b2138, 0x2e1b2138, 0x2e1b2138, 0x2e1b2138
|
||||
.long 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc, 0x4d2c6dfc
|
||||
.long 0x53380d13, 0x53380d13, 0x53380d13, 0x53380d13
|
||||
.long 0x650a7354, 0x650a7354, 0x650a7354, 0x650a7354
|
||||
.long 0x766a0abb, 0x766a0abb, 0x766a0abb, 0x766a0abb
|
||||
.long 0x81c2c92e, 0x81c2c92e, 0x81c2c92e, 0x81c2c92e
|
||||
.long 0x92722c85, 0x92722c85, 0x92722c85, 0x92722c85
|
||||
.long 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1, 0xa2bfe8a1
|
||||
.long 0xa81a664b, 0xa81a664b, 0xa81a664b, 0xa81a664b
|
||||
.long 0xc24b8b70, 0xc24b8b70, 0xc24b8b70, 0xc24b8b70
|
||||
.long 0xc76c51a3, 0xc76c51a3, 0xc76c51a3, 0xc76c51a3
|
||||
.long 0xd192e819, 0xd192e819, 0xd192e819, 0xd192e819
|
||||
.long 0xd6990624, 0xd6990624, 0xd6990624, 0xd6990624
|
||||
.long 0xf40e3585, 0xf40e3585, 0xf40e3585, 0xf40e3585
|
||||
.long 0x106aa070, 0x106aa070, 0x106aa070, 0x106aa070
|
||||
.long 0x19a4c116, 0x19a4c116, 0x19a4c116, 0x19a4c116
|
||||
.long 0x1e376c08, 0x1e376c08, 0x1e376c08, 0x1e376c08
|
||||
.long 0x2748774c, 0x2748774c, 0x2748774c, 0x2748774c
|
||||
.long 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5, 0x34b0bcb5
|
||||
.long 0x391c0cb3, 0x391c0cb3, 0x391c0cb3, 0x391c0cb3
|
||||
.long 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a, 0x4ed8aa4a
|
||||
.long 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f, 0x5b9cca4f
|
||||
.long 0x682e6ff3, 0x682e6ff3, 0x682e6ff3, 0x682e6ff3
|
||||
.long 0x748f82ee, 0x748f82ee, 0x748f82ee, 0x748f82ee
|
||||
.long 0x78a5636f, 0x78a5636f, 0x78a5636f, 0x78a5636f
|
||||
.long 0x84c87814, 0x84c87814, 0x84c87814, 0x84c87814
|
||||
.long 0x8cc70208, 0x8cc70208, 0x8cc70208, 0x8cc70208
|
||||
.long 0x90befffa, 0x90befffa, 0x90befffa, 0x90befffa
|
||||
.long 0xa4506ceb, 0xa4506ceb, 0xa4506ceb, 0xa4506ceb
|
||||
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
|
||||
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.globl sha256_init_4way
|
||||
.globl _sha256_init_4way
|
||||
sha256_init_4way:
|
||||
_sha256_init_4way:
|
||||
#if defined(WIN64)
|
||||
pushq %rdi
|
||||
movq %rcx, %rdi
|
||||
#endif
|
||||
movdqa sha256_4h+0(%rip), %xmm0
|
||||
movdqa sha256_4h+16(%rip), %xmm1
|
||||
movdqa sha256_4h+32(%rip), %xmm2
|
||||
movdqa sha256_4h+48(%rip), %xmm3
|
||||
movdqu %xmm0, 0(%rdi)
|
||||
movdqu %xmm1, 16(%rdi)
|
||||
movdqu %xmm2, 32(%rdi)
|
||||
movdqu %xmm3, 48(%rdi)
|
||||
movdqa sha256_4h+64(%rip), %xmm0
|
||||
movdqa sha256_4h+80(%rip), %xmm1
|
||||
movdqa sha256_4h+96(%rip), %xmm2
|
||||
movdqa sha256_4h+112(%rip), %xmm3
|
||||
movdqu %xmm0, 64(%rdi)
|
||||
movdqu %xmm1, 80(%rdi)
|
||||
movdqu %xmm2, 96(%rdi)
|
||||
movdqu %xmm3, 112(%rdi)
|
||||
#if defined(WIN64)
|
||||
popq %rdi
|
||||
#endif
|
||||
ret
|
||||
|
||||
.macro p2bswap_rsi_rsp i
|
||||
movdqu \i*16(%rsi), %xmm0
|
||||
movdqu (\i+1)*16(%rsi), %xmm2
|
||||
pshuflw $0xb1, %xmm0, %xmm0
|
||||
pshuflw $0xb1, %xmm2, %xmm2
|
||||
pshufhw $0xb1, %xmm0, %xmm0
|
||||
pshufhw $0xb1, %xmm2, %xmm2
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm2, %xmm3
|
||||
psrlw $8, %xmm1
|
||||
psrlw $8, %xmm3
|
||||
psllw $8, %xmm0
|
||||
psllw $8, %xmm2
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm3, %xmm2
|
||||
movdqa %xmm0, \i*16(%rsp)
|
||||
movdqa %xmm2, (\i+1)*16(%rsp)
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.globl sha256_transform_4way
|
||||
.globl _sha256_transform_4way
|
||||
sha256_transform_4way:
|
||||
_sha256_transform_4way:
|
||||
#if defined(WIN64)
|
||||
pushq %rdi
|
||||
subq $96, %rsp
|
||||
movdqa %xmm6, 0(%rsp)
|
||||
movdqa %xmm7, 16(%rsp)
|
||||
movdqa %xmm8, 32(%rsp)
|
||||
movdqa %xmm9, 48(%rsp)
|
||||
movdqa %xmm10, 64(%rsp)
|
||||
movdqa %xmm11, 80(%rsp)
|
||||
pushq %rsi
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
movq %r8, %rdx
|
||||
#endif
|
||||
subq $1032, %rsp
|
||||
|
||||
testq %rdx, %rdx
|
||||
jz sha256_transform_4way_block_copy
|
||||
|
||||
p2bswap_rsi_rsp 0
|
||||
p2bswap_rsi_rsp 2
|
||||
p2bswap_rsi_rsp 4
|
||||
p2bswap_rsi_rsp 6
|
||||
p2bswap_rsi_rsp 8
|
||||
p2bswap_rsi_rsp 10
|
||||
p2bswap_rsi_rsp 12
|
||||
p2bswap_rsi_rsp 14
|
||||
jmp sha256_transform_4way_extend
|
||||
|
||||
.p2align 6
|
||||
sha256_transform_4way_block_copy:
|
||||
movdqu 0*16(%rsi), %xmm0
|
||||
movdqu 1*16(%rsi), %xmm1
|
||||
movdqu 2*16(%rsi), %xmm2
|
||||
movdqu 3*16(%rsi), %xmm3
|
||||
movdqu 4*16(%rsi), %xmm4
|
||||
movdqu 5*16(%rsi), %xmm5
|
||||
movdqu 6*16(%rsi), %xmm6
|
||||
movdqu 7*16(%rsi), %xmm7
|
||||
movdqa %xmm0, 0*16(%rsp)
|
||||
movdqa %xmm1, 1*16(%rsp)
|
||||
movdqa %xmm2, 2*16(%rsp)
|
||||
movdqa %xmm3, 3*16(%rsp)
|
||||
movdqa %xmm4, 4*16(%rsp)
|
||||
movdqa %xmm5, 5*16(%rsp)
|
||||
movdqa %xmm6, 6*16(%rsp)
|
||||
movdqa %xmm7, 7*16(%rsp)
|
||||
movdqu 8*16(%rsi), %xmm0
|
||||
movdqu 9*16(%rsi), %xmm1
|
||||
movdqu 10*16(%rsi), %xmm2
|
||||
movdqu 11*16(%rsi), %xmm3
|
||||
movdqu 12*16(%rsi), %xmm4
|
||||
movdqu 13*16(%rsi), %xmm5
|
||||
movdqu 14*16(%rsi), %xmm6
|
||||
movdqu 15*16(%rsi), %xmm7
|
||||
movdqa %xmm0, 8*16(%rsp)
|
||||
movdqa %xmm1, 9*16(%rsp)
|
||||
movdqa %xmm2, 10*16(%rsp)
|
||||
movdqa %xmm3, 11*16(%rsp)
|
||||
movdqa %xmm4, 12*16(%rsp)
|
||||
movdqa %xmm5, 13*16(%rsp)
|
||||
movdqa %xmm6, 14*16(%rsp)
|
||||
movdqa %xmm7, 15*16(%rsp)
|
||||
|
||||
sha256_transform_4way_extend:
|
||||
leaq 256(%rsp), %rcx
|
||||
leaq 48*16(%rcx), %rax
|
||||
sha256_transform_4way_extend_loop:
|
||||
movdqa -15*16(%rcx), %xmm0
|
||||
movdqa -14*16(%rcx), %xmm4
|
||||
movdqa %xmm0, %xmm2
|
||||
movdqa %xmm4, %xmm6
|
||||
psrld $3, %xmm0
|
||||
psrld $3, %xmm4
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm4, %xmm5
|
||||
pslld $14, %xmm2
|
||||
pslld $14, %xmm6
|
||||
psrld $4, %xmm1
|
||||
psrld $4, %xmm5
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm5, %xmm4
|
||||
psrld $11, %xmm1
|
||||
psrld $11, %xmm5
|
||||
pxor %xmm2, %xmm0
|
||||
pxor %xmm6, %xmm4
|
||||
pslld $11, %xmm2
|
||||
pslld $11, %xmm6
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm5, %xmm4
|
||||
pxor %xmm2, %xmm0
|
||||
pxor %xmm6, %xmm4
|
||||
|
||||
movdqa -2*16(%rcx), %xmm3
|
||||
movdqa -1*16(%rcx), %xmm7
|
||||
paddd -16*16(%rcx), %xmm0
|
||||
paddd -15*16(%rcx), %xmm4
|
||||
|
||||
movdqa %xmm3, %xmm2
|
||||
movdqa %xmm7, %xmm6
|
||||
psrld $10, %xmm3
|
||||
psrld $10, %xmm7
|
||||
movdqa %xmm3, %xmm1
|
||||
movdqa %xmm7, %xmm5
|
||||
|
||||
paddd -7*16(%rcx), %xmm0
|
||||
|
||||
pslld $13, %xmm2
|
||||
pslld $13, %xmm6
|
||||
psrld $7, %xmm1
|
||||
psrld $7, %xmm5
|
||||
|
||||
paddd -6*16(%rcx), %xmm4
|
||||
|
||||
pxor %xmm1, %xmm3
|
||||
pxor %xmm5, %xmm7
|
||||
psrld $2, %xmm1
|
||||
psrld $2, %xmm5
|
||||
pxor %xmm2, %xmm3
|
||||
pxor %xmm6, %xmm7
|
||||
pslld $2, %xmm2
|
||||
pslld $2, %xmm6
|
||||
pxor %xmm1, %xmm3
|
||||
pxor %xmm5, %xmm7
|
||||
pxor %xmm2, %xmm3
|
||||
pxor %xmm6, %xmm7
|
||||
|
||||
paddd %xmm3, %xmm0
|
||||
paddd %xmm7, %xmm4
|
||||
movdqa %xmm0, (%rcx)
|
||||
movdqa %xmm4, 16(%rcx)
|
||||
addq $2*16, %rcx
|
||||
cmpq %rcx, %rax
|
||||
jne sha256_transform_4way_extend_loop
|
||||
|
||||
movdqu 0(%rdi), %xmm7
|
||||
movdqu 16(%rdi), %xmm5
|
||||
movdqu 32(%rdi), %xmm4
|
||||
movdqu 48(%rdi), %xmm3
|
||||
movdqu 64(%rdi), %xmm0
|
||||
movdqu 80(%rdi), %xmm8
|
||||
movdqu 96(%rdi), %xmm9
|
||||
movdqu 112(%rdi), %xmm10
|
||||
|
||||
leaq sha256_4k(%rip), %rcx
|
||||
xorq %rax, %rax
|
||||
sha256_transform_4way_main_loop:
|
||||
movdqa (%rsp, %rax), %xmm6
|
||||
paddd (%rcx, %rax), %xmm6
|
||||
paddd %xmm10, %xmm6
|
||||
|
||||
movdqa %xmm0, %xmm1
|
||||
movdqa %xmm9, %xmm2
|
||||
pandn %xmm2, %xmm1
|
||||
|
||||
movdqa %xmm2, %xmm10
|
||||
movdqa %xmm8, %xmm2
|
||||
movdqa %xmm2, %xmm9
|
||||
|
||||
pand %xmm0, %xmm2
|
||||
pxor %xmm2, %xmm1
|
||||
movdqa %xmm0, %xmm8
|
||||
|
||||
paddd %xmm1, %xmm6
|
||||
|
||||
movdqa %xmm0, %xmm1
|
||||
psrld $6, %xmm0
|
||||
movdqa %xmm0, %xmm2
|
||||
pslld $7, %xmm1
|
||||
psrld $5, %xmm2
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm2, %xmm0
|
||||
pslld $14, %xmm1
|
||||
psrld $14, %xmm2
|
||||
pxor %xmm1, %xmm0
|
||||
pxor %xmm2, %xmm0
|
||||
pslld $5, %xmm1
|
||||
pxor %xmm1, %xmm0
|
||||
paddd %xmm0, %xmm6
|
||||
|
||||
movdqa %xmm3, %xmm0
|
||||
paddd %xmm6, %xmm0
|
||||
|
||||
movdqa %xmm5, %xmm1
|
||||
movdqa %xmm4, %xmm3
|
||||
movdqa %xmm4, %xmm2
|
||||
pand %xmm5, %xmm2
|
||||
pand %xmm7, %xmm4
|
||||
pand %xmm7, %xmm1
|
||||
pxor %xmm4, %xmm1
|
||||
movdqa %xmm5, %xmm4
|
||||
movdqa %xmm7, %xmm5
|
||||
pxor %xmm2, %xmm1
|
||||
paddd %xmm1, %xmm6
|
||||
|
||||
movdqa %xmm7, %xmm2
|
||||
psrld $2, %xmm7
|
||||
movdqa %xmm7, %xmm1
|
||||
pslld $10, %xmm2
|
||||
psrld $11, %xmm1
|
||||
pxor %xmm2, %xmm7
|
||||
pxor %xmm1, %xmm7
|
||||
pslld $9, %xmm2
|
||||
psrld $9, %xmm1
|
||||
pxor %xmm2, %xmm7
|
||||
pxor %xmm1, %xmm7
|
||||
pslld $11, %xmm2
|
||||
pxor %xmm2, %xmm7
|
||||
paddd %xmm6, %xmm7
|
||||
|
||||
addq $16, %rax
|
||||
cmpq $16*64, %rax
|
||||
jne sha256_transform_4way_main_loop
|
||||
|
||||
movdqu 0(%rdi), %xmm2
|
||||
movdqu 16(%rdi), %xmm6
|
||||
movdqu 32(%rdi), %xmm11
|
||||
movdqu 48(%rdi), %xmm1
|
||||
paddd %xmm2, %xmm7
|
||||
paddd %xmm6, %xmm5
|
||||
paddd %xmm11, %xmm4
|
||||
paddd %xmm1, %xmm3
|
||||
movdqu 64(%rdi), %xmm2
|
||||
movdqu 80(%rdi), %xmm6
|
||||
movdqu 96(%rdi), %xmm11
|
||||
movdqu 112(%rdi), %xmm1
|
||||
paddd %xmm2, %xmm0
|
||||
paddd %xmm6, %xmm8
|
||||
paddd %xmm11, %xmm9
|
||||
paddd %xmm1, %xmm10
|
||||
|
||||
movdqu %xmm7, 0(%rdi)
|
||||
movdqu %xmm5, 16(%rdi)
|
||||
movdqu %xmm4, 32(%rdi)
|
||||
movdqu %xmm3, 48(%rdi)
|
||||
movdqu %xmm0, 64(%rdi)
|
||||
movdqu %xmm8, 80(%rdi)
|
||||
movdqu %xmm9, 96(%rdi)
|
||||
movdqu %xmm10, 112(%rdi)
|
||||
|
||||
addq $1032, %rsp
|
||||
#if defined(WIN64)
|
||||
popq %rsi
|
||||
movdqa 0(%rsp), %xmm6
|
||||
movdqa 16(%rsp), %xmm7
|
||||
movdqa 32(%rsp), %xmm8
|
||||
movdqa 48(%rsp), %xmm9
|
||||
movdqa 64(%rsp), %xmm10
|
||||
movdqa 80(%rsp), %xmm11
|
||||
addq $96, %rsp
|
||||
popq %rdi
|
||||
#endif
|
||||
ret
|
||||
|
||||
#endif
|
148
sha2.c
Normal file
148
sha2.c
Normal file
|
@ -0,0 +1,148 @@
|
|||
/*
|
||||
* Copyright 2011 ArtForz, 2011-2012 pooler
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version. See COPYING for more details.
|
||||
*/
|
||||
|
||||
#include "cpuminer-config.h"
|
||||
#include "miner.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void sha256_init(uint32_t *state)
|
||||
{
|
||||
/* Magic initialization constants */
|
||||
state[0] = 0x6a09e667;
|
||||
state[1] = 0xbb67ae85;
|
||||
state[2] = 0x3C6ef372;
|
||||
state[3] = 0xa54ff53a;
|
||||
state[4] = 0x510e527f;
|
||||
state[5] = 0x9b05688c;
|
||||
state[6] = 0x1f83d9Ab;
|
||||
state[7] = 0x5be0cd19;
|
||||
}
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define SHR(x, n) (x >> n)
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define s0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SHR(x, 3))
|
||||
#define s1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SHR(x, 10))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
t0 = h + S1(e) + Ch(e, f, g) + k; \
|
||||
t1 = S0(a) + Maj(a, b, c); \
|
||||
d += t0; \
|
||||
h = t0 + t1;
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, k) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i] + k)
|
||||
|
||||
/*
|
||||
* SHA256 block compression function. The 256-bit state is transformed via
|
||||
* the 512-bit input block to produce a new state.
|
||||
*/
|
||||
void sha256_transform(uint32_t *state, const uint32_t *block, int swap)
|
||||
{
|
||||
uint32_t W[64];
|
||||
uint32_t S[8];
|
||||
uint32_t t0, t1;
|
||||
int i;
|
||||
|
||||
/* 1. Prepare message schedule W. */
|
||||
if (swap) {
|
||||
for (i = 0; i < 16; i++)
|
||||
W[i] = swab32(block[i]);
|
||||
} else
|
||||
memcpy(W, block, 64);
|
||||
for (i = 16; i < 64; i += 2) {
|
||||
W[i] = s1(W[i - 2]) + W[i - 7] + s0(W[i - 15]) + W[i - 16];
|
||||
W[i+1] = s1(W[i - 1]) + W[i - 6] + s0(W[i - 14]) + W[i - 15];
|
||||
}
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
RNDr(S, W, 0, 0x428a2f98);
|
||||
RNDr(S, W, 1, 0x71374491);
|
||||
RNDr(S, W, 2, 0xb5c0fbcf);
|
||||
RNDr(S, W, 3, 0xe9b5dba5);
|
||||
RNDr(S, W, 4, 0x3956c25b);
|
||||
RNDr(S, W, 5, 0x59f111f1);
|
||||
RNDr(S, W, 6, 0x923f82a4);
|
||||
RNDr(S, W, 7, 0xab1c5ed5);
|
||||
RNDr(S, W, 8, 0xd807aa98);
|
||||
RNDr(S, W, 9, 0x12835b01);
|
||||
RNDr(S, W, 10, 0x243185be);
|
||||
RNDr(S, W, 11, 0x550c7dc3);
|
||||
RNDr(S, W, 12, 0x72be5d74);
|
||||
RNDr(S, W, 13, 0x80deb1fe);
|
||||
RNDr(S, W, 14, 0x9bdc06a7);
|
||||
RNDr(S, W, 15, 0xc19bf174);
|
||||
RNDr(S, W, 16, 0xe49b69c1);
|
||||
RNDr(S, W, 17, 0xefbe4786);
|
||||
RNDr(S, W, 18, 0x0fc19dc6);
|
||||
RNDr(S, W, 19, 0x240ca1cc);
|
||||
RNDr(S, W, 20, 0x2de92c6f);
|
||||
RNDr(S, W, 21, 0x4a7484aa);
|
||||
RNDr(S, W, 22, 0x5cb0a9dc);
|
||||
RNDr(S, W, 23, 0x76f988da);
|
||||
RNDr(S, W, 24, 0x983e5152);
|
||||
RNDr(S, W, 25, 0xa831c66d);
|
||||
RNDr(S, W, 26, 0xb00327c8);
|
||||
RNDr(S, W, 27, 0xbf597fc7);
|
||||
RNDr(S, W, 28, 0xc6e00bf3);
|
||||
RNDr(S, W, 29, 0xd5a79147);
|
||||
RNDr(S, W, 30, 0x06ca6351);
|
||||
RNDr(S, W, 31, 0x14292967);
|
||||
RNDr(S, W, 32, 0x27b70a85);
|
||||
RNDr(S, W, 33, 0x2e1b2138);
|
||||
RNDr(S, W, 34, 0x4d2c6dfc);
|
||||
RNDr(S, W, 35, 0x53380d13);
|
||||
RNDr(S, W, 36, 0x650a7354);
|
||||
RNDr(S, W, 37, 0x766a0abb);
|
||||
RNDr(S, W, 38, 0x81c2c92e);
|
||||
RNDr(S, W, 39, 0x92722c85);
|
||||
RNDr(S, W, 40, 0xa2bfe8a1);
|
||||
RNDr(S, W, 41, 0xa81a664b);
|
||||
RNDr(S, W, 42, 0xc24b8b70);
|
||||
RNDr(S, W, 43, 0xc76c51a3);
|
||||
RNDr(S, W, 44, 0xd192e819);
|
||||
RNDr(S, W, 45, 0xd6990624);
|
||||
RNDr(S, W, 46, 0xf40e3585);
|
||||
RNDr(S, W, 47, 0x106aa070);
|
||||
RNDr(S, W, 48, 0x19a4c116);
|
||||
RNDr(S, W, 49, 0x1e376c08);
|
||||
RNDr(S, W, 50, 0x2748774c);
|
||||
RNDr(S, W, 51, 0x34b0bcb5);
|
||||
RNDr(S, W, 52, 0x391c0cb3);
|
||||
RNDr(S, W, 53, 0x4ed8aa4a);
|
||||
RNDr(S, W, 54, 0x5b9cca4f);
|
||||
RNDr(S, W, 55, 0x682e6ff3);
|
||||
RNDr(S, W, 56, 0x748f82ee);
|
||||
RNDr(S, W, 57, 0x78a5636f);
|
||||
RNDr(S, W, 58, 0x84c87814);
|
||||
RNDr(S, W, 59, 0x8cc70208);
|
||||
RNDr(S, W, 60, 0x90befffa);
|
||||
RNDr(S, W, 61, 0xa4506ceb);
|
||||
RNDr(S, W, 62, 0xbef9a3f7);
|
||||
RNDr(S, W, 63, 0xc67178f2);
|
||||
|
||||
/* 4. Mix local working variables into global state */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
}
|
Loading…
Reference in a new issue