Add AVX2-enabled functions for x86-64
This commit is contained in:
parent
44d4815b01
commit
e878267239
8 changed files with 1995 additions and 78 deletions
4
README
4
README
|
@ -42,7 +42,7 @@ Architecture-specific notes:
|
|||
To use NEON instructions, add "-mfpu=neon" to CFLAGS.
|
||||
x86: The miner checks for SSE2 instructions support at runtime,
|
||||
and uses them if they are available.
|
||||
x86-64: The miner can take advantage of AVX and XOP instructions,
|
||||
x86-64: The miner can take advantage of AVX, AVX2 and XOP instructions,
|
||||
but only if both the CPU and the operating system support them.
|
||||
* Linux supports AVX starting from kernel version 2.6.30.
|
||||
* FreeBSD supports AVX starting with 9.1-RELEASE.
|
||||
|
@ -50,7 +50,7 @@ Architecture-specific notes:
|
|||
* Windows supports AVX starting from Windows 7 SP1 and
|
||||
Windows Server 2008 R2 SP1.
|
||||
The configure script outputs a warning if the assembler
|
||||
cannot compile AVX or XOP instructions. In that case, the miner
|
||||
doesn't support some instruction sets. In that case, the miner
|
||||
can still be built, but unavailable optimizations are left off.
|
||||
|
||||
Usage instructions: Run "minerd --help" to see options.
|
||||
|
|
|
@ -77,6 +77,14 @@ then
|
|||
AC_MSG_RESULT(no)
|
||||
AC_MSG_WARN([The assembler does not support the XOP instruction set.])
|
||||
)
|
||||
AC_MSG_CHECKING(whether we can compile AVX2 code)
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM(,[asm ("vpaddd %ymm0, %ymm1, %ymm2");])],
|
||||
AC_DEFINE(USE_AVX2, 1, [Define to 1 if AVX2 assembly is available.])
|
||||
AC_MSG_RESULT(yes)
|
||||
,
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_WARN([The assembler does not support the AVX2 instruction set.])
|
||||
)
|
||||
,
|
||||
AC_MSG_RESULT(no)
|
||||
AC_MSG_WARN([The assembler does not support the AVX instruction set.])
|
||||
|
|
|
@ -668,7 +668,7 @@ static void *miner_thread(void *userdata)
|
|||
int thr_id = mythr->id;
|
||||
struct work work;
|
||||
uint32_t max_nonce;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x10;
|
||||
uint32_t end_nonce = 0xffffffffU / opt_n_threads * (thr_id + 1) - 0x20;
|
||||
unsigned char *scratchbuf = NULL;
|
||||
char s[16];
|
||||
int i;
|
||||
|
|
7
miner.h
7
miner.h
|
@ -141,6 +141,13 @@ void sha256_init_4way(uint32_t *state);
|
|||
void sha256_transform_4way(uint32_t *state, const uint32_t *block, int swap);
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) && defined(USE_AVX2)
|
||||
#define HAVE_SHA256_8WAY 1
|
||||
int sha256_use_8way();
|
||||
void sha256_init_8way(uint32_t *state);
|
||||
void sha256_transform_8way(uint32_t *state, const uint32_t *block, int swap);
|
||||
#endif
|
||||
|
||||
extern int scanhash_sha256d(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done);
|
||||
|
||||
|
|
639
scrypt-x64.S
639
scrypt-x64.S
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2011-2012 pooler@litecoinpool.org
|
||||
* Copyright 2011-2013 pooler@litecoinpool.org
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -39,6 +39,30 @@
|
|||
scrypt_best_throughput:
|
||||
_scrypt_best_throughput:
|
||||
pushq %rbx
|
||||
#if defined(USE_AVX2)
|
||||
/* Check for AVX and OSXSAVE support */
|
||||
movl $1, %eax
|
||||
cpuid
|
||||
andl $0x18000000, %ecx
|
||||
cmpl $0x18000000, %ecx
|
||||
jne scrypt_best_throughput_no_avx2
|
||||
/* Check for AVX2 support */
|
||||
movl $7, %eax
|
||||
xorl %ecx, %ecx
|
||||
cpuid
|
||||
andl $0x00000020, %ebx
|
||||
cmpl $0x00000020, %ebx
|
||||
jne scrypt_best_throughput_no_avx2
|
||||
/* Check for XMM and YMM state support */
|
||||
xorl %ecx, %ecx
|
||||
xgetbv
|
||||
andl $0x00000006, %eax
|
||||
cmpl $0x00000006, %eax
|
||||
jne scrypt_best_throughput_no_avx2
|
||||
movl $6, %eax
|
||||
jmp scrypt_best_throughput_exit
|
||||
scrypt_best_throughput_no_avx2:
|
||||
#endif
|
||||
/* Check for AuthenticAMD */
|
||||
xorq %rax, %rax
|
||||
cpuid
|
||||
|
@ -2239,4 +2263,617 @@ scrypt_core_3way_xmm_loop2:
|
|||
scrypt_core_3way_cleanup
|
||||
ret
|
||||
|
||||
|
||||
#if defined(USE_AVX2)
|
||||
|
||||
.macro salsa8_core_6way_avx2_doubleround
|
||||
vpaddd %ymm0, %ymm1, %ymm4
|
||||
vpaddd %ymm8, %ymm9, %ymm6
|
||||
vpaddd %ymm12, %ymm13, %ymm7
|
||||
vpslld $7, %ymm4, %ymm5
|
||||
vpsrld $25, %ymm4, %ymm4
|
||||
vpxor %ymm5, %ymm3, %ymm3
|
||||
vpxor %ymm4, %ymm3, %ymm3
|
||||
vpslld $7, %ymm6, %ymm5
|
||||
vpsrld $25, %ymm6, %ymm6
|
||||
vpxor %ymm5, %ymm11, %ymm11
|
||||
vpxor %ymm6, %ymm11, %ymm11
|
||||
vpslld $7, %ymm7, %ymm5
|
||||
vpsrld $25, %ymm7, %ymm7
|
||||
vpxor %ymm5, %ymm15, %ymm15
|
||||
vpxor %ymm7, %ymm15, %ymm15
|
||||
|
||||
vpaddd %ymm3, %ymm0, %ymm4
|
||||
vpaddd %ymm11, %ymm8, %ymm6
|
||||
vpaddd %ymm15, %ymm12, %ymm7
|
||||
vpslld $9, %ymm4, %ymm5
|
||||
vpsrld $23, %ymm4, %ymm4
|
||||
vpxor %ymm5, %ymm2, %ymm2
|
||||
vpxor %ymm4, %ymm2, %ymm2
|
||||
vpslld $9, %ymm6, %ymm5
|
||||
vpsrld $23, %ymm6, %ymm6
|
||||
vpxor %ymm5, %ymm10, %ymm10
|
||||
vpxor %ymm6, %ymm10, %ymm10
|
||||
vpslld $9, %ymm7, %ymm5
|
||||
vpsrld $23, %ymm7, %ymm7
|
||||
vpxor %ymm5, %ymm14, %ymm14
|
||||
vpxor %ymm7, %ymm14, %ymm14
|
||||
|
||||
vpaddd %ymm2, %ymm3, %ymm4
|
||||
vpaddd %ymm10, %ymm11, %ymm6
|
||||
vpaddd %ymm14, %ymm15, %ymm7
|
||||
vpslld $13, %ymm4, %ymm5
|
||||
vpsrld $19, %ymm4, %ymm4
|
||||
vpshufd $0x93, %ymm3, %ymm3
|
||||
vpshufd $0x93, %ymm11, %ymm11
|
||||
vpshufd $0x93, %ymm15, %ymm15
|
||||
vpxor %ymm5, %ymm1, %ymm1
|
||||
vpxor %ymm4, %ymm1, %ymm1
|
||||
vpslld $13, %ymm6, %ymm5
|
||||
vpsrld $19, %ymm6, %ymm6
|
||||
vpxor %ymm5, %ymm9, %ymm9
|
||||
vpxor %ymm6, %ymm9, %ymm9
|
||||
vpslld $13, %ymm7, %ymm5
|
||||
vpsrld $19, %ymm7, %ymm7
|
||||
vpxor %ymm5, %ymm13, %ymm13
|
||||
vpxor %ymm7, %ymm13, %ymm13
|
||||
|
||||
vpaddd %ymm1, %ymm2, %ymm4
|
||||
vpaddd %ymm9, %ymm10, %ymm6
|
||||
vpaddd %ymm13, %ymm14, %ymm7
|
||||
vpslld $18, %ymm4, %ymm5
|
||||
vpsrld $14, %ymm4, %ymm4
|
||||
vpshufd $0x4e, %ymm2, %ymm2
|
||||
vpshufd $0x4e, %ymm10, %ymm10
|
||||
vpshufd $0x4e, %ymm14, %ymm14
|
||||
vpxor %ymm5, %ymm0, %ymm0
|
||||
vpxor %ymm4, %ymm0, %ymm0
|
||||
vpslld $18, %ymm6, %ymm5
|
||||
vpsrld $14, %ymm6, %ymm6
|
||||
vpxor %ymm5, %ymm8, %ymm8
|
||||
vpxor %ymm6, %ymm8, %ymm8
|
||||
vpslld $18, %ymm7, %ymm5
|
||||
vpsrld $14, %ymm7, %ymm7
|
||||
vpxor %ymm5, %ymm12, %ymm12
|
||||
vpxor %ymm7, %ymm12, %ymm12
|
||||
|
||||
vpaddd %ymm0, %ymm3, %ymm4
|
||||
vpaddd %ymm8, %ymm11, %ymm6
|
||||
vpaddd %ymm12, %ymm15, %ymm7
|
||||
vpslld $7, %ymm4, %ymm5
|
||||
vpsrld $25, %ymm4, %ymm4
|
||||
vpshufd $0x39, %ymm1, %ymm1
|
||||
vpxor %ymm5, %ymm1, %ymm1
|
||||
vpxor %ymm4, %ymm1, %ymm1
|
||||
vpslld $7, %ymm6, %ymm5
|
||||
vpsrld $25, %ymm6, %ymm6
|
||||
vpshufd $0x39, %ymm9, %ymm9
|
||||
vpxor %ymm5, %ymm9, %ymm9
|
||||
vpxor %ymm6, %ymm9, %ymm9
|
||||
vpslld $7, %ymm7, %ymm5
|
||||
vpsrld $25, %ymm7, %ymm7
|
||||
vpshufd $0x39, %ymm13, %ymm13
|
||||
vpxor %ymm5, %ymm13, %ymm13
|
||||
vpxor %ymm7, %ymm13, %ymm13
|
||||
|
||||
vpaddd %ymm1, %ymm0, %ymm4
|
||||
vpaddd %ymm9, %ymm8, %ymm6
|
||||
vpaddd %ymm13, %ymm12, %ymm7
|
||||
vpslld $9, %ymm4, %ymm5
|
||||
vpsrld $23, %ymm4, %ymm4
|
||||
vpxor %ymm5, %ymm2, %ymm2
|
||||
vpxor %ymm4, %ymm2, %ymm2
|
||||
vpslld $9, %ymm6, %ymm5
|
||||
vpsrld $23, %ymm6, %ymm6
|
||||
vpxor %ymm5, %ymm10, %ymm10
|
||||
vpxor %ymm6, %ymm10, %ymm10
|
||||
vpslld $9, %ymm7, %ymm5
|
||||
vpsrld $23, %ymm7, %ymm7
|
||||
vpxor %ymm5, %ymm14, %ymm14
|
||||
vpxor %ymm7, %ymm14, %ymm14
|
||||
|
||||
vpaddd %ymm2, %ymm1, %ymm4
|
||||
vpaddd %ymm10, %ymm9, %ymm6
|
||||
vpaddd %ymm14, %ymm13, %ymm7
|
||||
vpslld $13, %ymm4, %ymm5
|
||||
vpsrld $19, %ymm4, %ymm4
|
||||
vpshufd $0x93, %ymm1, %ymm1
|
||||
vpshufd $0x93, %ymm9, %ymm9
|
||||
vpshufd $0x93, %ymm13, %ymm13
|
||||
vpxor %ymm5, %ymm3, %ymm3
|
||||
vpxor %ymm4, %ymm3, %ymm3
|
||||
vpslld $13, %ymm6, %ymm5
|
||||
vpsrld $19, %ymm6, %ymm6
|
||||
vpxor %ymm5, %ymm11, %ymm11
|
||||
vpxor %ymm6, %ymm11, %ymm11
|
||||
vpslld $13, %ymm7, %ymm5
|
||||
vpsrld $19, %ymm7, %ymm7
|
||||
vpxor %ymm5, %ymm15, %ymm15
|
||||
vpxor %ymm7, %ymm15, %ymm15
|
||||
|
||||
vpaddd %ymm3, %ymm2, %ymm4
|
||||
vpaddd %ymm11, %ymm10, %ymm6
|
||||
vpaddd %ymm15, %ymm14, %ymm7
|
||||
vpslld $18, %ymm4, %ymm5
|
||||
vpsrld $14, %ymm4, %ymm4
|
||||
vpshufd $0x4e, %ymm2, %ymm2
|
||||
vpshufd $0x4e, %ymm10, %ymm10
|
||||
vpxor %ymm5, %ymm0, %ymm0
|
||||
vpxor %ymm4, %ymm0, %ymm0
|
||||
vpslld $18, %ymm6, %ymm5
|
||||
vpsrld $14, %ymm6, %ymm6
|
||||
vpshufd $0x4e, %ymm14, %ymm14
|
||||
vpshufd $0x39, %ymm11, %ymm11
|
||||
vpxor %ymm5, %ymm8, %ymm8
|
||||
vpxor %ymm6, %ymm8, %ymm8
|
||||
vpslld $18, %ymm7, %ymm5
|
||||
vpsrld $14, %ymm7, %ymm7
|
||||
vpshufd $0x39, %ymm3, %ymm3
|
||||
vpshufd $0x39, %ymm15, %ymm15
|
||||
vpxor %ymm5, %ymm12, %ymm12
|
||||
vpxor %ymm7, %ymm12, %ymm12
|
||||
.endm
|
||||
|
||||
.macro salsa8_core_6way_avx2
|
||||
salsa8_core_6way_avx2_doubleround
|
||||
salsa8_core_6way_avx2_doubleround
|
||||
salsa8_core_6way_avx2_doubleround
|
||||
salsa8_core_6way_avx2_doubleround
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 6
|
||||
.globl scrypt_core_6way
|
||||
.globl _scrypt_core_6way
|
||||
scrypt_core_6way:
|
||||
_scrypt_core_6way:
|
||||
pushq %rbx
|
||||
pushq %rbp
|
||||
#if defined(WIN64)
|
||||
subq $176, %rsp
|
||||
vmovdqa %xmm6, 8(%rsp)
|
||||
vmovdqa %xmm7, 24(%rsp)
|
||||
vmovdqa %xmm8, 40(%rsp)
|
||||
vmovdqa %xmm9, 56(%rsp)
|
||||
vmovdqa %xmm10, 72(%rsp)
|
||||
vmovdqa %xmm11, 88(%rsp)
|
||||
vmovdqa %xmm12, 104(%rsp)
|
||||
vmovdqa %xmm13, 120(%rsp)
|
||||
vmovdqa %xmm14, 136(%rsp)
|
||||
vmovdqa %xmm15, 152(%rsp)
|
||||
pushq %rdi
|
||||
pushq %rsi
|
||||
movq %rcx, %rdi
|
||||
movq %rdx, %rsi
|
||||
#endif
|
||||
movq %rsp, %rdx
|
||||
subq $768, %rsp
|
||||
andq $-128, %rsp
|
||||
|
||||
.macro scrypt_core_6way_cleanup
|
||||
movq %rdx, %rsp
|
||||
#if defined(WIN64)
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
vmovdqa 8(%rsp), %xmm6
|
||||
vmovdqa 24(%rsp), %xmm7
|
||||
vmovdqa 40(%rsp), %xmm8
|
||||
vmovdqa 56(%rsp), %xmm9
|
||||
vmovdqa 72(%rsp), %xmm10
|
||||
vmovdqa 88(%rsp), %xmm11
|
||||
vmovdqa 104(%rsp), %xmm12
|
||||
vmovdqa 120(%rsp), %xmm13
|
||||
vmovdqa 136(%rsp), %xmm14
|
||||
vmovdqa 152(%rsp), %xmm15
|
||||
addq $176, %rsp
|
||||
#endif
|
||||
popq %rbp
|
||||
popq %rbx
|
||||
.endm
|
||||
|
||||
.macro scrypt_shuffle_pack2 src, so, dest, do
|
||||
vmovdqa \so+0*16(\src), %xmm0
|
||||
vmovdqa \so+1*16(\src), %xmm1
|
||||
vmovdqa \so+2*16(\src), %xmm2
|
||||
vmovdqa \so+3*16(\src), %xmm3
|
||||
vinserti128 $1, \so+128+0*16(\src), %ymm0, %ymm0
|
||||
vinserti128 $1, \so+128+1*16(\src), %ymm1, %ymm1
|
||||
vinserti128 $1, \so+128+2*16(\src), %ymm2, %ymm2
|
||||
vinserti128 $1, \so+128+3*16(\src), %ymm3, %ymm3
|
||||
vpblendd $0x33, %ymm0, %ymm2, %ymm4
|
||||
vpblendd $0xcc, %ymm1, %ymm3, %ymm5
|
||||
vpblendd $0x33, %ymm2, %ymm0, %ymm6
|
||||
vpblendd $0xcc, %ymm3, %ymm1, %ymm7
|
||||
vpblendd $0x55, %ymm7, %ymm6, %ymm3
|
||||
vpblendd $0x55, %ymm6, %ymm5, %ymm2
|
||||
vpblendd $0x55, %ymm5, %ymm4, %ymm1
|
||||
vpblendd $0x55, %ymm4, %ymm7, %ymm0
|
||||
vmovdqa %ymm0, \do+0*32(\dest)
|
||||
vmovdqa %ymm1, \do+1*32(\dest)
|
||||
vmovdqa %ymm2, \do+2*32(\dest)
|
||||
vmovdqa %ymm3, \do+3*32(\dest)
|
||||
.endm
|
||||
|
||||
.macro scrypt_shuffle_unpack2 src, so, dest, do
|
||||
vmovdqa \so+0*32(\src), %ymm0
|
||||
vmovdqa \so+1*32(\src), %ymm1
|
||||
vmovdqa \so+2*32(\src), %ymm2
|
||||
vmovdqa \so+3*32(\src), %ymm3
|
||||
vpblendd $0x33, %ymm0, %ymm2, %ymm4
|
||||
vpblendd $0xcc, %ymm1, %ymm3, %ymm5
|
||||
vpblendd $0x33, %ymm2, %ymm0, %ymm6
|
||||
vpblendd $0xcc, %ymm3, %ymm1, %ymm7
|
||||
vpblendd $0x55, %ymm7, %ymm6, %ymm3
|
||||
vpblendd $0x55, %ymm6, %ymm5, %ymm2
|
||||
vpblendd $0x55, %ymm5, %ymm4, %ymm1
|
||||
vpblendd $0x55, %ymm4, %ymm7, %ymm0
|
||||
vmovdqa %xmm0, \do+0*16(\dest)
|
||||
vmovdqa %xmm1, \do+1*16(\dest)
|
||||
vmovdqa %xmm2, \do+2*16(\dest)
|
||||
vmovdqa %xmm3, \do+3*16(\dest)
|
||||
vextracti128 $1, %ymm0, \do+128+0*16(\dest)
|
||||
vextracti128 $1, %ymm1, \do+128+1*16(\dest)
|
||||
vextracti128 $1, %ymm2, \do+128+2*16(\dest)
|
||||
vextracti128 $1, %ymm3, \do+128+3*16(\dest)
|
||||
.endm
|
||||
|
||||
scrypt_core_6way_avx2:
|
||||
scrypt_shuffle_pack2 %rdi, 0*256+0, %rsp, 0*128
|
||||
scrypt_shuffle_pack2 %rdi, 0*256+64, %rsp, 1*128
|
||||
scrypt_shuffle_pack2 %rdi, 1*256+0, %rsp, 2*128
|
||||
scrypt_shuffle_pack2 %rdi, 1*256+64, %rsp, 3*128
|
||||
scrypt_shuffle_pack2 %rdi, 2*256+0, %rsp, 4*128
|
||||
scrypt_shuffle_pack2 %rdi, 2*256+64, %rsp, 5*128
|
||||
|
||||
vmovdqa 0*256+4*32(%rsp), %ymm0
|
||||
vmovdqa 0*256+5*32(%rsp), %ymm1
|
||||
vmovdqa 0*256+6*32(%rsp), %ymm2
|
||||
vmovdqa 0*256+7*32(%rsp), %ymm3
|
||||
vmovdqa 1*256+4*32(%rsp), %ymm8
|
||||
vmovdqa 1*256+5*32(%rsp), %ymm9
|
||||
vmovdqa 1*256+6*32(%rsp), %ymm10
|
||||
vmovdqa 1*256+7*32(%rsp), %ymm11
|
||||
vmovdqa 2*256+4*32(%rsp), %ymm12
|
||||
vmovdqa 2*256+5*32(%rsp), %ymm13
|
||||
vmovdqa 2*256+6*32(%rsp), %ymm14
|
||||
vmovdqa 2*256+7*32(%rsp), %ymm15
|
||||
|
||||
movq %rsi, %rbx
|
||||
leaq 6*131072(%rsi), %rax
|
||||
scrypt_core_6way_avx2_loop1:
|
||||
vmovdqa %ymm0, 0*256+4*32(%rbx)
|
||||
vmovdqa %ymm1, 0*256+5*32(%rbx)
|
||||
vmovdqa %ymm2, 0*256+6*32(%rbx)
|
||||
vmovdqa %ymm3, 0*256+7*32(%rbx)
|
||||
vpxor 0*256+0*32(%rsp), %ymm0, %ymm0
|
||||
vpxor 0*256+1*32(%rsp), %ymm1, %ymm1
|
||||
vpxor 0*256+2*32(%rsp), %ymm2, %ymm2
|
||||
vpxor 0*256+3*32(%rsp), %ymm3, %ymm3
|
||||
vmovdqa %ymm8, 1*256+4*32(%rbx)
|
||||
vmovdqa %ymm9, 1*256+5*32(%rbx)
|
||||
vmovdqa %ymm10, 1*256+6*32(%rbx)
|
||||
vmovdqa %ymm11, 1*256+7*32(%rbx)
|
||||
vpxor 1*256+0*32(%rsp), %ymm8, %ymm8
|
||||
vpxor 1*256+1*32(%rsp), %ymm9, %ymm9
|
||||
vpxor 1*256+2*32(%rsp), %ymm10, %ymm10
|
||||
vpxor 1*256+3*32(%rsp), %ymm11, %ymm11
|
||||
vmovdqa %ymm12, 2*256+4*32(%rbx)
|
||||
vmovdqa %ymm13, 2*256+5*32(%rbx)
|
||||
vmovdqa %ymm14, 2*256+6*32(%rbx)
|
||||
vmovdqa %ymm15, 2*256+7*32(%rbx)
|
||||
vpxor 2*256+0*32(%rsp), %ymm12, %ymm12
|
||||
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
|
||||
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
|
||||
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
|
||||
vmovdqa %ymm0, 0*256+0*32(%rbx)
|
||||
vmovdqa %ymm1, 0*256+1*32(%rbx)
|
||||
vmovdqa %ymm2, 0*256+2*32(%rbx)
|
||||
vmovdqa %ymm3, 0*256+3*32(%rbx)
|
||||
vmovdqa %ymm8, 1*256+0*32(%rbx)
|
||||
vmovdqa %ymm9, 1*256+1*32(%rbx)
|
||||
vmovdqa %ymm10, 1*256+2*32(%rbx)
|
||||
vmovdqa %ymm11, 1*256+3*32(%rbx)
|
||||
vmovdqa %ymm12, 2*256+0*32(%rbx)
|
||||
vmovdqa %ymm13, 2*256+1*32(%rbx)
|
||||
vmovdqa %ymm14, 2*256+2*32(%rbx)
|
||||
vmovdqa %ymm15, 2*256+3*32(%rbx)
|
||||
|
||||
salsa8_core_6way_avx2
|
||||
vpaddd 0*256+0*32(%rbx), %ymm0, %ymm0
|
||||
vpaddd 0*256+1*32(%rbx), %ymm1, %ymm1
|
||||
vpaddd 0*256+2*32(%rbx), %ymm2, %ymm2
|
||||
vpaddd 0*256+3*32(%rbx), %ymm3, %ymm3
|
||||
vpaddd 1*256+0*32(%rbx), %ymm8, %ymm8
|
||||
vpaddd 1*256+1*32(%rbx), %ymm9, %ymm9
|
||||
vpaddd 1*256+2*32(%rbx), %ymm10, %ymm10
|
||||
vpaddd 1*256+3*32(%rbx), %ymm11, %ymm11
|
||||
vpaddd 2*256+0*32(%rbx), %ymm12, %ymm12
|
||||
vpaddd 2*256+1*32(%rbx), %ymm13, %ymm13
|
||||
vpaddd 2*256+2*32(%rbx), %ymm14, %ymm14
|
||||
vpaddd 2*256+3*32(%rbx), %ymm15, %ymm15
|
||||
vmovdqa %ymm0, 0*256+0*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+1*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+2*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+3*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+0*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+1*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+2*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+3*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+0*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+1*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+2*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+3*32(%rsp)
|
||||
|
||||
vpxor 0*256+4*32(%rbx), %ymm0, %ymm0
|
||||
vpxor 0*256+5*32(%rbx), %ymm1, %ymm1
|
||||
vpxor 0*256+6*32(%rbx), %ymm2, %ymm2
|
||||
vpxor 0*256+7*32(%rbx), %ymm3, %ymm3
|
||||
vpxor 1*256+4*32(%rbx), %ymm8, %ymm8
|
||||
vpxor 1*256+5*32(%rbx), %ymm9, %ymm9
|
||||
vpxor 1*256+6*32(%rbx), %ymm10, %ymm10
|
||||
vpxor 1*256+7*32(%rbx), %ymm11, %ymm11
|
||||
vpxor 2*256+4*32(%rbx), %ymm12, %ymm12
|
||||
vpxor 2*256+5*32(%rbx), %ymm13, %ymm13
|
||||
vpxor 2*256+6*32(%rbx), %ymm14, %ymm14
|
||||
vpxor 2*256+7*32(%rbx), %ymm15, %ymm15
|
||||
vmovdqa %ymm0, 0*256+4*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+5*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+6*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+7*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+4*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+5*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+6*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+7*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+4*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+5*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+6*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+7*32(%rsp)
|
||||
salsa8_core_6way_avx2
|
||||
vpaddd 0*256+4*32(%rsp), %ymm0, %ymm0
|
||||
vpaddd 0*256+5*32(%rsp), %ymm1, %ymm1
|
||||
vpaddd 0*256+6*32(%rsp), %ymm2, %ymm2
|
||||
vpaddd 0*256+7*32(%rsp), %ymm3, %ymm3
|
||||
vpaddd 1*256+4*32(%rsp), %ymm8, %ymm8
|
||||
vpaddd 1*256+5*32(%rsp), %ymm9, %ymm9
|
||||
vpaddd 1*256+6*32(%rsp), %ymm10, %ymm10
|
||||
vpaddd 1*256+7*32(%rsp), %ymm11, %ymm11
|
||||
vpaddd 2*256+4*32(%rsp), %ymm12, %ymm12
|
||||
vpaddd 2*256+5*32(%rsp), %ymm13, %ymm13
|
||||
vpaddd 2*256+6*32(%rsp), %ymm14, %ymm14
|
||||
vpaddd 2*256+7*32(%rsp), %ymm15, %ymm15
|
||||
|
||||
addq $6*128, %rbx
|
||||
cmpq %rax, %rbx
|
||||
jne scrypt_core_6way_avx2_loop1
|
||||
|
||||
vmovdqa %ymm0, 0*256+4*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+5*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+6*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+7*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+4*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+5*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+6*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+7*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+4*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+5*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+6*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+7*32(%rsp)
|
||||
|
||||
movq $1024, %rcx
|
||||
scrypt_core_6way_avx2_loop2:
|
||||
vmovd %xmm0, %ebp
|
||||
vmovd %xmm8, %ebx
|
||||
vmovd %xmm12, %eax
|
||||
vextracti128 $1, %ymm0, %xmm4
|
||||
vextracti128 $1, %ymm8, %xmm5
|
||||
vextracti128 $1, %ymm12, %xmm6
|
||||
vmovd %xmm4, %r8d
|
||||
vmovd %xmm5, %r9d
|
||||
vmovd %xmm6, %r10d
|
||||
vpxor 0*256+0*32(%rsp), %ymm0, %ymm0
|
||||
vpxor 0*256+1*32(%rsp), %ymm1, %ymm1
|
||||
vpxor 0*256+2*32(%rsp), %ymm2, %ymm2
|
||||
vpxor 0*256+3*32(%rsp), %ymm3, %ymm3
|
||||
vpxor 1*256+0*32(%rsp), %ymm8, %ymm8
|
||||
vpxor 1*256+1*32(%rsp), %ymm9, %ymm9
|
||||
vpxor 1*256+2*32(%rsp), %ymm10, %ymm10
|
||||
vpxor 1*256+3*32(%rsp), %ymm11, %ymm11
|
||||
vpxor 2*256+0*32(%rsp), %ymm12, %ymm12
|
||||
vpxor 2*256+1*32(%rsp), %ymm13, %ymm13
|
||||
vpxor 2*256+2*32(%rsp), %ymm14, %ymm14
|
||||
vpxor 2*256+3*32(%rsp), %ymm15, %ymm15
|
||||
andl $1023, %ebp
|
||||
leaq 0(%rbp, %rbp, 2), %rbp
|
||||
shll $8, %ebp
|
||||
andl $1023, %ebx
|
||||
leaq 1(%rbx, %rbx, 2), %rbx
|
||||
shll $8, %ebx
|
||||
andl $1023, %eax
|
||||
leaq 2(%rax, %rax, 2), %rax
|
||||
shll $8, %eax
|
||||
andl $1023, %r8d
|
||||
leaq 0(%r8, %r8, 2), %r8
|
||||
shll $8, %r8d
|
||||
andl $1023, %r9d
|
||||
leaq 1(%r9, %r9, 2), %r9
|
||||
shll $8, %r9d
|
||||
andl $1023, %r10d
|
||||
leaq 2(%r10, %r10, 2), %r10
|
||||
shll $8, %r10d
|
||||
vmovdqa 0*32(%rsi, %rbp), %xmm4
|
||||
vinserti128 $1, 0*32+16(%rsi, %r8), %ymm4, %ymm4
|
||||
vmovdqa 1*32(%rsi, %rbp), %xmm5
|
||||
vinserti128 $1, 1*32+16(%rsi, %r8), %ymm5, %ymm5
|
||||
vmovdqa 2*32(%rsi, %rbp), %xmm6
|
||||
vinserti128 $1, 2*32+16(%rsi, %r8), %ymm6, %ymm6
|
||||
vmovdqa 3*32(%rsi, %rbp), %xmm7
|
||||
vinserti128 $1, 3*32+16(%rsi, %r8), %ymm7, %ymm7
|
||||
vpxor %ymm4, %ymm0, %ymm0
|
||||
vpxor %ymm5, %ymm1, %ymm1
|
||||
vpxor %ymm6, %ymm2, %ymm2
|
||||
vpxor %ymm7, %ymm3, %ymm3
|
||||
vmovdqa 0*32(%rsi, %rbx), %xmm4
|
||||
vinserti128 $1, 0*32+16(%rsi, %r9), %ymm4, %ymm4
|
||||
vmovdqa 1*32(%rsi, %rbx), %xmm5
|
||||
vinserti128 $1, 1*32+16(%rsi, %r9), %ymm5, %ymm5
|
||||
vmovdqa 2*32(%rsi, %rbx), %xmm6
|
||||
vinserti128 $1, 2*32+16(%rsi, %r9), %ymm6, %ymm6
|
||||
vmovdqa 3*32(%rsi, %rbx), %xmm7
|
||||
vinserti128 $1, 3*32+16(%rsi, %r9), %ymm7, %ymm7
|
||||
vpxor %ymm4, %ymm8, %ymm8
|
||||
vpxor %ymm5, %ymm9, %ymm9
|
||||
vpxor %ymm6, %ymm10, %ymm10
|
||||
vpxor %ymm7, %ymm11, %ymm11
|
||||
vmovdqa 0*32(%rsi, %rax), %xmm4
|
||||
vinserti128 $1, 0*32+16(%rsi, %r10), %ymm4, %ymm4
|
||||
vmovdqa 1*32(%rsi, %rax), %xmm5
|
||||
vinserti128 $1, 1*32+16(%rsi, %r10), %ymm5, %ymm5
|
||||
vmovdqa 2*32(%rsi, %rax), %xmm6
|
||||
vinserti128 $1, 2*32+16(%rsi, %r10), %ymm6, %ymm6
|
||||
vmovdqa 3*32(%rsi, %rax), %xmm7
|
||||
vinserti128 $1, 3*32+16(%rsi, %r10), %ymm7, %ymm7
|
||||
vpxor %ymm4, %ymm12, %ymm12
|
||||
vpxor %ymm5, %ymm13, %ymm13
|
||||
vpxor %ymm6, %ymm14, %ymm14
|
||||
vpxor %ymm7, %ymm15, %ymm15
|
||||
|
||||
vmovdqa %ymm0, 0*256+0*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+1*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+2*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+3*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+0*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+1*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+2*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+3*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+0*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+1*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+2*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+3*32(%rsp)
|
||||
salsa8_core_6way_avx2
|
||||
vpaddd 0*256+0*32(%rsp), %ymm0, %ymm0
|
||||
vpaddd 0*256+1*32(%rsp), %ymm1, %ymm1
|
||||
vpaddd 0*256+2*32(%rsp), %ymm2, %ymm2
|
||||
vpaddd 0*256+3*32(%rsp), %ymm3, %ymm3
|
||||
vpaddd 1*256+0*32(%rsp), %ymm8, %ymm8
|
||||
vpaddd 1*256+1*32(%rsp), %ymm9, %ymm9
|
||||
vpaddd 1*256+2*32(%rsp), %ymm10, %ymm10
|
||||
vpaddd 1*256+3*32(%rsp), %ymm11, %ymm11
|
||||
vpaddd 2*256+0*32(%rsp), %ymm12, %ymm12
|
||||
vpaddd 2*256+1*32(%rsp), %ymm13, %ymm13
|
||||
vpaddd 2*256+2*32(%rsp), %ymm14, %ymm14
|
||||
vpaddd 2*256+3*32(%rsp), %ymm15, %ymm15
|
||||
vmovdqa %ymm0, 0*256+0*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+1*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+2*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+3*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+0*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+1*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+2*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+3*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+0*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+1*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+2*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+3*32(%rsp)
|
||||
|
||||
vmovdqa 4*32(%rsi, %rbp), %xmm4
|
||||
vinserti128 $1, 4*32+16(%rsi, %r8), %ymm4, %ymm4
|
||||
vmovdqa 5*32(%rsi, %rbp), %xmm5
|
||||
vinserti128 $1, 5*32+16(%rsi, %r8), %ymm5, %ymm5
|
||||
vmovdqa 6*32(%rsi, %rbp), %xmm6
|
||||
vinserti128 $1, 6*32+16(%rsi, %r8), %ymm6, %ymm6
|
||||
vmovdqa 7*32(%rsi, %rbp), %xmm7
|
||||
vinserti128 $1, 7*32+16(%rsi, %r8), %ymm7, %ymm7
|
||||
vpxor %ymm4, %ymm0, %ymm0
|
||||
vpxor %ymm5, %ymm1, %ymm1
|
||||
vpxor %ymm6, %ymm2, %ymm2
|
||||
vpxor %ymm7, %ymm3, %ymm3
|
||||
vmovdqa 4*32(%rsi, %rbx), %xmm4
|
||||
vinserti128 $1, 4*32+16(%rsi, %r9), %ymm4, %ymm4
|
||||
vmovdqa 5*32(%rsi, %rbx), %xmm5
|
||||
vinserti128 $1, 5*32+16(%rsi, %r9), %ymm5, %ymm5
|
||||
vmovdqa 6*32(%rsi, %rbx), %xmm6
|
||||
vinserti128 $1, 6*32+16(%rsi, %r9), %ymm6, %ymm6
|
||||
vmovdqa 7*32(%rsi, %rbx), %xmm7
|
||||
vinserti128 $1, 7*32+16(%rsi, %r9), %ymm7, %ymm7
|
||||
vpxor %ymm4, %ymm8, %ymm8
|
||||
vpxor %ymm5, %ymm9, %ymm9
|
||||
vpxor %ymm6, %ymm10, %ymm10
|
||||
vpxor %ymm7, %ymm11, %ymm11
|
||||
vmovdqa 4*32(%rsi, %rax), %xmm4
|
||||
vinserti128 $1, 4*32+16(%rsi, %r10), %ymm4, %ymm4
|
||||
vmovdqa 5*32(%rsi, %rax), %xmm5
|
||||
vinserti128 $1, 5*32+16(%rsi, %r10), %ymm5, %ymm5
|
||||
vmovdqa 6*32(%rsi, %rax), %xmm6
|
||||
vinserti128 $1, 6*32+16(%rsi, %r10), %ymm6, %ymm6
|
||||
vmovdqa 7*32(%rsi, %rax), %xmm7
|
||||
vinserti128 $1, 7*32+16(%rsi, %r10), %ymm7, %ymm7
|
||||
vpxor %ymm4, %ymm12, %ymm12
|
||||
vpxor %ymm5, %ymm13, %ymm13
|
||||
vpxor %ymm6, %ymm14, %ymm14
|
||||
vpxor %ymm7, %ymm15, %ymm15
|
||||
vpxor 0*256+4*32(%rsp), %ymm0, %ymm0
|
||||
vpxor 0*256+5*32(%rsp), %ymm1, %ymm1
|
||||
vpxor 0*256+6*32(%rsp), %ymm2, %ymm2
|
||||
vpxor 0*256+7*32(%rsp), %ymm3, %ymm3
|
||||
vpxor 1*256+4*32(%rsp), %ymm8, %ymm8
|
||||
vpxor 1*256+5*32(%rsp), %ymm9, %ymm9
|
||||
vpxor 1*256+6*32(%rsp), %ymm10, %ymm10
|
||||
vpxor 1*256+7*32(%rsp), %ymm11, %ymm11
|
||||
vpxor 2*256+4*32(%rsp), %ymm12, %ymm12
|
||||
vpxor 2*256+5*32(%rsp), %ymm13, %ymm13
|
||||
vpxor 2*256+6*32(%rsp), %ymm14, %ymm14
|
||||
vpxor 2*256+7*32(%rsp), %ymm15, %ymm15
|
||||
vmovdqa %ymm0, 0*256+4*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+5*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+6*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+7*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+4*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+5*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+6*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+7*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+4*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+5*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+6*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+7*32(%rsp)
|
||||
salsa8_core_6way_avx2
|
||||
vpaddd 0*256+4*32(%rsp), %ymm0, %ymm0
|
||||
vpaddd 0*256+5*32(%rsp), %ymm1, %ymm1
|
||||
vpaddd 0*256+6*32(%rsp), %ymm2, %ymm2
|
||||
vpaddd 0*256+7*32(%rsp), %ymm3, %ymm3
|
||||
vpaddd 1*256+4*32(%rsp), %ymm8, %ymm8
|
||||
vpaddd 1*256+5*32(%rsp), %ymm9, %ymm9
|
||||
vpaddd 1*256+6*32(%rsp), %ymm10, %ymm10
|
||||
vpaddd 1*256+7*32(%rsp), %ymm11, %ymm11
|
||||
vpaddd 2*256+4*32(%rsp), %ymm12, %ymm12
|
||||
vpaddd 2*256+5*32(%rsp), %ymm13, %ymm13
|
||||
vpaddd 2*256+6*32(%rsp), %ymm14, %ymm14
|
||||
vpaddd 2*256+7*32(%rsp), %ymm15, %ymm15
|
||||
vmovdqa %ymm0, 0*256+4*32(%rsp)
|
||||
vmovdqa %ymm1, 0*256+5*32(%rsp)
|
||||
vmovdqa %ymm2, 0*256+6*32(%rsp)
|
||||
vmovdqa %ymm3, 0*256+7*32(%rsp)
|
||||
vmovdqa %ymm8, 1*256+4*32(%rsp)
|
||||
vmovdqa %ymm9, 1*256+5*32(%rsp)
|
||||
vmovdqa %ymm10, 1*256+6*32(%rsp)
|
||||
vmovdqa %ymm11, 1*256+7*32(%rsp)
|
||||
vmovdqa %ymm12, 2*256+4*32(%rsp)
|
||||
vmovdqa %ymm13, 2*256+5*32(%rsp)
|
||||
vmovdqa %ymm14, 2*256+6*32(%rsp)
|
||||
vmovdqa %ymm15, 2*256+7*32(%rsp)
|
||||
|
||||
subq $1, %rcx
|
||||
ja scrypt_core_6way_avx2_loop2
|
||||
|
||||
scrypt_shuffle_unpack2 %rsp, 0*128, %rdi, 0*256+0
|
||||
scrypt_shuffle_unpack2 %rsp, 1*128, %rdi, 0*256+64
|
||||
scrypt_shuffle_unpack2 %rsp, 2*128, %rdi, 1*256+0
|
||||
scrypt_shuffle_unpack2 %rsp, 3*128, %rdi, 1*256+64
|
||||
scrypt_shuffle_unpack2 %rsp, 4*128, %rdi, 2*256+0
|
||||
scrypt_shuffle_unpack2 %rsp, 5*128, %rdi, 2*256+64
|
||||
|
||||
scrypt_core_6way_cleanup
|
||||
ret
|
||||
|
||||
#endif /* USE_AVX2 */
|
||||
|
||||
#endif
|
||||
|
|
293
scrypt.c
293
scrypt.c
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2012 pooler
|
||||
* Copyright 2009 Colin Percival, 2011 ArtForz, 2011-2013 pooler
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
|
@ -256,6 +256,128 @@ static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
|
|||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
|
||||
static const uint32_t finalblk_8way[8 * 16] __attribute__((aligned(32))) = {
|
||||
0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001, 0x00000001,
|
||||
0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620, 0x00000620
|
||||
};
|
||||
|
||||
static inline void HMAC_SHA256_80_init_8way(const uint32_t *key,
|
||||
uint32_t *tstate, uint32_t *ostate)
|
||||
{
|
||||
uint32_t ihash[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t pad[8 * 16] __attribute__((aligned(32)));
|
||||
int i;
|
||||
|
||||
/* tstate is assumed to contain the midstate of key */
|
||||
memcpy(pad, key + 8 * 16, 8 * 16);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[8 * 4 + i] = 0x80000000;
|
||||
memset(pad + 8 * 5, 0x00, 8 * 40);
|
||||
for (i = 0; i < 8; i++)
|
||||
pad[8 * 15 + i] = 0x00000280;
|
||||
sha256_transform_8way(tstate, pad, 0);
|
||||
memcpy(ihash, tstate, 8 * 32);
|
||||
|
||||
sha256_init_8way(ostate);
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x5c5c5c5c;
|
||||
for (; i < 8 * 16; i++)
|
||||
pad[i] = 0x5c5c5c5c;
|
||||
sha256_transform_8way(ostate, pad, 0);
|
||||
|
||||
sha256_init_8way(tstate);
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
pad[i] = ihash[i] ^ 0x36363636;
|
||||
for (; i < 8 * 16; i++)
|
||||
pad[i] = 0x36363636;
|
||||
sha256_transform_8way(tstate, pad, 0);
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_80_128_8way(const uint32_t *tstate,
|
||||
const uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t istate[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t ostate2[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t ibuf[8 * 16] __attribute__((aligned(32)));
|
||||
uint32_t obuf[8 * 16] __attribute__((aligned(32)));
|
||||
int i, j;
|
||||
|
||||
memcpy(istate, tstate, 8 * 32);
|
||||
sha256_transform_8way(istate, salt, 0);
|
||||
|
||||
memcpy(ibuf, salt + 8 * 16, 8 * 16);
|
||||
for (i = 0; i < 8; i++)
|
||||
ibuf[8 * 5 + i] = 0x80000000;
|
||||
memset(ibuf + 8 * 6, 0x00, 8 * 36);
|
||||
for (i = 0; i < 8; i++)
|
||||
ibuf[8 * 15 + i] = 0x000004a0;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
obuf[8 * 8 + i] = 0x80000000;
|
||||
memset(obuf + 8 * 9, 0x00, 8 * 24);
|
||||
for (i = 0; i < 8; i++)
|
||||
obuf[8 * 15 + i] = 0x00000300;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memcpy(obuf, istate, 8 * 32);
|
||||
ibuf[8 * 4 + 0] = i + 1;
|
||||
ibuf[8 * 4 + 1] = i + 1;
|
||||
ibuf[8 * 4 + 2] = i + 1;
|
||||
ibuf[8 * 4 + 3] = i + 1;
|
||||
ibuf[8 * 4 + 4] = i + 1;
|
||||
ibuf[8 * 4 + 5] = i + 1;
|
||||
ibuf[8 * 4 + 6] = i + 1;
|
||||
ibuf[8 * 4 + 7] = i + 1;
|
||||
sha256_transform_8way(obuf, ibuf, 0);
|
||||
|
||||
memcpy(ostate2, ostate, 8 * 32);
|
||||
sha256_transform_8way(ostate2, obuf, 0);
|
||||
for (j = 0; j < 8 * 8; j++)
|
||||
output[8 * 8 * i + j] = swab32(ostate2[j]);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void PBKDF2_SHA256_128_32_8way(uint32_t *tstate,
|
||||
uint32_t *ostate, const uint32_t *salt, uint32_t *output)
|
||||
{
|
||||
uint32_t buf[8 * 16] __attribute__((aligned(32)));
|
||||
int i;
|
||||
|
||||
sha256_transform_8way(tstate, salt, 1);
|
||||
sha256_transform_8way(tstate, salt + 8 * 16, 1);
|
||||
sha256_transform_8way(tstate, finalblk_8way, 0);
|
||||
|
||||
memcpy(buf, tstate, 8 * 32);
|
||||
for (i = 0; i < 8; i++)
|
||||
buf[8 * 8 + i] = 0x80000000;
|
||||
memset(buf + 8 * 9, 0x00, 8 * 24);
|
||||
for (i = 0; i < 8; i++)
|
||||
buf[8 * 15 + i] = 0x00000300;
|
||||
sha256_transform_8way(ostate, buf, 0);
|
||||
|
||||
for (i = 0; i < 8 * 8; i++)
|
||||
output[i] = swab32(ostate[i]);
|
||||
}
|
||||
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
|
||||
#if defined(__x86_64__)
|
||||
|
||||
#define SCRYPT_MAX_WAYS 12
|
||||
|
@ -263,6 +385,12 @@ static inline void PBKDF2_SHA256_128_32_4way(uint32_t *tstate,
|
|||
int scrypt_best_throughput();
|
||||
void scrypt_core(uint32_t *X, uint32_t *V);
|
||||
void scrypt_core_3way(uint32_t *X, uint32_t *V);
|
||||
#if defined(USE_AVX2)
|
||||
#undef SCRYPT_MAX_WAYS
|
||||
#define SCRYPT_MAX_WAYS 24
|
||||
#define HAVE_SCRYPT_6WAY 1
|
||||
void scrypt_core_6way(uint32_t *X, uint32_t *V);
|
||||
#endif
|
||||
|
||||
#elif defined(__i386__)
|
||||
|
||||
|
@ -410,47 +538,32 @@ static void scrypt_1024_1_1_256_4way(const uint32_t *input,
|
|||
uint32_t W[4 * 32] __attribute__((aligned(128)));
|
||||
uint32_t X[4 * 32] __attribute__((aligned(128)));
|
||||
uint32_t *V;
|
||||
int i;
|
||||
int i, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
for (i = 0; i < 20; i++) {
|
||||
W[4 * i + 0] = input[0 * 20 + i];
|
||||
W[4 * i + 1] = input[1 * 20 + i];
|
||||
W[4 * i + 2] = input[2 * 20 + i];
|
||||
W[4 * i + 3] = input[3 * 20 + i];
|
||||
}
|
||||
for (i = 0; i < 8; i++) {
|
||||
tstate[4 * i + 0] = midstate[i];
|
||||
tstate[4 * i + 1] = midstate[i];
|
||||
tstate[4 * i + 2] = midstate[i];
|
||||
tstate[4 * i + 3] = midstate[i];
|
||||
}
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[4 * i + k] = input[k * 20 + i];
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
tstate[4 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_4way(W, tstate, ostate);
|
||||
PBKDF2_SHA256_80_128_4way(tstate, ostate, W, W);
|
||||
for (i = 0; i < 32; i++) {
|
||||
X[0 * 32 + i] = W[4 * i + 0];
|
||||
X[1 * 32 + i] = W[4 * i + 1];
|
||||
X[2 * 32 + i] = W[4 * i + 2];
|
||||
X[3 * 32 + i] = W[4 * i + 3];
|
||||
}
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
X[k * 32 + i] = W[4 * i + k];
|
||||
scrypt_core(X + 0 * 32, V);
|
||||
scrypt_core(X + 1 * 32, V);
|
||||
scrypt_core(X + 2 * 32, V);
|
||||
scrypt_core(X + 3 * 32, V);
|
||||
for (i = 0; i < 32; i++) {
|
||||
W[4 * i + 0] = X[0 * 32 + i];
|
||||
W[4 * i + 1] = X[1 * 32 + i];
|
||||
W[4 * i + 2] = X[2 * 32 + i];
|
||||
W[4 * i + 3] = X[3 * 32 + i];
|
||||
}
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[4 * i + k] = X[k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_4way(tstate, ostate, W, W);
|
||||
for (i = 0; i < 8; i++) {
|
||||
output[0 * 8 + i] = W[4 * i + 0];
|
||||
output[1 * 8 + i] = W[4 * i + 1];
|
||||
output[2 * 8 + i] = W[4 * i + 2];
|
||||
output[3 * 8 + i] = W[4 * i + 3];
|
||||
}
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
output[k * 8 + i] = W[4 * i + k];
|
||||
}
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
|
@ -491,68 +604,97 @@ static void scrypt_1024_1_1_256_12way(const uint32_t *input,
|
|||
uint32_t W[12 * 32] __attribute__((aligned(128)));
|
||||
uint32_t X[12 * 32] __attribute__((aligned(128)));
|
||||
uint32_t *V;
|
||||
int i, j;
|
||||
int i, j, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (i = 0; i < 20; i++) {
|
||||
W[128 * j + 4 * i + 0] = input[80 * j + 0 * 20 + i];
|
||||
W[128 * j + 4 * i + 1] = input[80 * j + 1 * 20 + i];
|
||||
W[128 * j + 4 * i + 2] = input[80 * j + 2 * 20 + i];
|
||||
W[128 * j + 4 * i + 3] = input[80 * j + 3 * 20 + i];
|
||||
}
|
||||
}
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (i = 0; i < 8; i++) {
|
||||
tstate[32 * j + 4 * i + 0] = midstate[i];
|
||||
tstate[32 * j + 4 * i + 1] = midstate[i];
|
||||
tstate[32 * j + 4 * i + 2] = midstate[i];
|
||||
tstate[32 * j + 4 * i + 3] = midstate[i];
|
||||
}
|
||||
}
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[128 * j + 4 * i + k] = input[80 * j + k * 20 + i];
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
tstate[32 * j + 4 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_4way(W + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init_4way(W + 128, tstate + 32, ostate + 32);
|
||||
HMAC_SHA256_80_init_4way(W + 256, tstate + 64, ostate + 64);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||
PBKDF2_SHA256_80_128_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (i = 0; i < 32; i++) {
|
||||
X[128 * j + 0 * 32 + i] = W[128 * j + 4 * i + 0];
|
||||
X[128 * j + 1 * 32 + i] = W[128 * j + 4 * i + 1];
|
||||
X[128 * j + 2 * 32 + i] = W[128 * j + 4 * i + 2];
|
||||
X[128 * j + 3 * 32 + i] = W[128 * j + 4 * i + 3];
|
||||
}
|
||||
}
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
X[128 * j + k * 32 + i] = W[128 * j + 4 * i + k];
|
||||
scrypt_core_3way(X + 0 * 96, V);
|
||||
scrypt_core_3way(X + 1 * 96, V);
|
||||
scrypt_core_3way(X + 2 * 96, V);
|
||||
scrypt_core_3way(X + 3 * 96, V);
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (i = 0; i < 32; i++) {
|
||||
W[128 * j + 4 * i + 0] = X[128 * j + 0 * 32 + i];
|
||||
W[128 * j + 4 * i + 1] = X[128 * j + 1 * 32 + i];
|
||||
W[128 * j + 4 * i + 2] = X[128 * j + 2 * 32 + i];
|
||||
W[128 * j + 4 * i + 3] = X[128 * j + 3 * 32 + i];
|
||||
}
|
||||
}
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
W[128 * j + 4 * i + k] = X[128 * j + k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 32, ostate + 32, W + 128, W + 128);
|
||||
PBKDF2_SHA256_128_32_4way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
for (j = 0; j < 3; j++) {
|
||||
for (i = 0; i < 8; i++) {
|
||||
output[32 * j + 0 * 8 + i] = W[128 * j + 4 * i + 0];
|
||||
output[32 * j + 1 * 8 + i] = W[128 * j + 4 * i + 1];
|
||||
output[32 * j + 2 * 8 + i] = W[128 * j + 4 * i + 2];
|
||||
output[32 * j + 3 * 8 + i] = W[128 * j + 4 * i + 3];
|
||||
}
|
||||
}
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 4; k++)
|
||||
output[32 * j + k * 8 + i] = W[128 * j + 4 * i + k];
|
||||
}
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#endif /* HAVE_SCRYPT_3WAY */
|
||||
|
||||
#ifdef HAVE_SCRYPT_6WAY
|
||||
static void scrypt_1024_1_1_256_24way(const uint32_t *input,
|
||||
uint32_t *output, uint32_t *midstate, unsigned char *scratchpad)
|
||||
{
|
||||
uint32_t tstate[24 * 8] __attribute__((aligned(128)));
|
||||
uint32_t ostate[24 * 8] __attribute__((aligned(128)));
|
||||
uint32_t W[24 * 32] __attribute__((aligned(128)));
|
||||
uint32_t X[24 * 32] __attribute__((aligned(128)));
|
||||
uint32_t *V;
|
||||
int i, j, k;
|
||||
|
||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 20; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
W[8 * 32 * j + 8 * i + k] = input[8 * 20 * j + k * 20 + i];
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
tstate[8 * 8 * j + 8 * i + k] = midstate[i];
|
||||
HMAC_SHA256_80_init_8way(W + 0, tstate + 0, ostate + 0);
|
||||
HMAC_SHA256_80_init_8way(W + 256, tstate + 64, ostate + 64);
|
||||
HMAC_SHA256_80_init_8way(W + 512, tstate + 128, ostate + 128);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
PBKDF2_SHA256_80_128_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
X[8 * 32 * j + k * 32 + i] = W[8 * 32 * j + 8 * i + k];
|
||||
scrypt_core_6way(X + 0 * 32, V);
|
||||
scrypt_core_6way(X + 6 * 32, V);
|
||||
scrypt_core_6way(X + 12 * 32, V);
|
||||
scrypt_core_6way(X + 18 * 32, V);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 32; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
W[8 * 32 * j + 8 * i + k] = X[8 * 32 * j + k * 32 + i];
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 0, ostate + 0, W + 0, W + 0);
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 64, ostate + 64, W + 256, W + 256);
|
||||
PBKDF2_SHA256_128_32_8way(tstate + 128, ostate + 128, W + 512, W + 512);
|
||||
for (j = 0; j < 3; j++)
|
||||
for (i = 0; i < 8; i++)
|
||||
for (k = 0; k < 8; k++)
|
||||
output[8 * 8 * j + k * 8 + i] = W[8 * 32 * j + 8 * i + k];
|
||||
}
|
||||
#endif /* HAVE_SCRYPT_6WAY */
|
||||
|
||||
int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
||||
unsigned char *scratchbuf, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
|
@ -589,6 +731,11 @@ int scanhash_scrypt(int thr_id, uint32_t *pdata,
|
|||
scrypt_1024_1_1_256_12way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_6WAY)
|
||||
if (throughput == 24)
|
||||
scrypt_1024_1_1_256_24way(data, hash, midstate, scratchbuf);
|
||||
else
|
||||
#endif
|
||||
#if defined(HAVE_SCRYPT_3WAY)
|
||||
if (throughput == 3)
|
||||
scrypt_1024_1_1_256_3way(data, hash, midstate, scratchbuf);
|
||||
|
|
1056
sha2-x64.S
1056
sha2-x64.S
File diff suppressed because it is too large
Load diff
64
sha2.c
64
sha2.c
|
@ -522,6 +522,65 @@ static inline int scanhash_sha256d_4way(int thr_id, uint32_t *pdata,
|
|||
|
||||
#endif /* HAVE_SHA256_4WAY */
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
|
||||
void sha256d_ms_8way(uint32_t *hash, uint32_t *data,
|
||||
const uint32_t *midstate, const uint32_t *prehash);
|
||||
|
||||
static inline int scanhash_sha256d_8way(int thr_id, uint32_t *pdata,
|
||||
const uint32_t *ptarget, uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
uint32_t data[8 * 64] __attribute__((aligned(128)));
|
||||
uint32_t hash[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t midstate[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t prehash[8 * 8] __attribute__((aligned(32)));
|
||||
uint32_t n = pdata[19] - 1;
|
||||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
int i, j;
|
||||
|
||||
memcpy(data, pdata + 16, 64);
|
||||
sha256d_preextend(data);
|
||||
for (i = 31; i >= 0; i--)
|
||||
for (j = 0; j < 8; j++)
|
||||
data[i * 8 + j] = data[i];
|
||||
|
||||
sha256_init(midstate);
|
||||
sha256_transform(midstate, pdata, 0);
|
||||
memcpy(prehash, midstate, 32);
|
||||
sha256d_prehash(prehash, pdata + 16);
|
||||
for (i = 7; i >= 0; i--) {
|
||||
for (j = 0; j < 8; j++) {
|
||||
midstate[i * 8 + j] = midstate[i];
|
||||
prehash[i * 8 + j] = prehash[i];
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
for (i = 0; i < 8; i++)
|
||||
data[8 * 3 + i] = ++n;
|
||||
|
||||
sha256d_ms_8way(hash, data, midstate, prehash);
|
||||
|
||||
for (i = 0; i < 8; i++) {
|
||||
if (swab32(hash[8 * 7 + i]) <= Htarg) {
|
||||
pdata[19] = data[8 * 3 + i];
|
||||
sha256d_80_swap(hash, pdata);
|
||||
if (fulltest(hash, ptarget)) {
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (n < max_nonce && !work_restart[thr_id].restart);
|
||||
|
||||
*hashes_done = n - first_nonce + 1;
|
||||
pdata[19] = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* HAVE_SHA256_8WAY */
|
||||
|
||||
int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
||||
uint32_t max_nonce, unsigned long *hashes_done)
|
||||
{
|
||||
|
@ -533,6 +592,11 @@ int scanhash_sha256d(int thr_id, uint32_t *pdata, const uint32_t *ptarget,
|
|||
const uint32_t first_nonce = pdata[19];
|
||||
const uint32_t Htarg = ptarget[7];
|
||||
|
||||
#ifdef HAVE_SHA256_8WAY
|
||||
if (sha256_use_8way())
|
||||
return scanhash_sha256d_8way(thr_id, pdata, ptarget,
|
||||
max_nonce, hashes_done);
|
||||
#endif
|
||||
#ifdef HAVE_SHA256_4WAY
|
||||
if (sha256_use_4way())
|
||||
return scanhash_sha256d_4way(thr_id, pdata, ptarget,
|
||||
|
|
Loading…
Reference in a new issue