Add optimized PowerPC code
This commit is contained in:
parent
4611186cb8
commit
9373a5c433
9 changed files with 3081 additions and 3 deletions
|
@ -28,6 +28,9 @@ endif
|
||||||
if ARCH_ARM
|
if ARCH_ARM
|
||||||
minerd_SOURCES += sha2-arm.S scrypt-arm.S
|
minerd_SOURCES += sha2-arm.S scrypt-arm.S
|
||||||
endif
|
endif
|
||||||
|
if ARCH_PPC
|
||||||
|
minerd_SOURCES += sha2-ppc.S scrypt-ppc.S
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
minerd_LDFLAGS = $(PTHREAD_FLAGS)
|
minerd_LDFLAGS = $(PTHREAD_FLAGS)
|
||||||
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@
|
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@
|
||||||
|
|
2
README
2
README
|
@ -40,6 +40,8 @@ Architecture-specific notes:
|
||||||
but the decision whether to use them is made at compile time,
|
but the decision whether to use them is made at compile time,
|
||||||
based on compiler-defined macros.
|
based on compiler-defined macros.
|
||||||
To use NEON instructions, add "-mfpu=neon" to CFLAGS.
|
To use NEON instructions, add "-mfpu=neon" to CFLAGS.
|
||||||
|
PowerPC: No runtime CPU detection.
|
||||||
|
To use AltiVec instructions, add "-maltivec" to CFLAGS.
|
||||||
x86: The miner checks for SSE2 instructions support at runtime,
|
x86: The miner checks for SSE2 instructions support at runtime,
|
||||||
and uses them if they are available.
|
and uses them if they are available.
|
||||||
x86-64: The miner can take advantage of AVX, AVX2 and XOP instructions,
|
x86-64: The miner can take advantage of AVX, AVX2 and XOP instructions,
|
||||||
|
|
|
@ -48,6 +48,9 @@ case $target in
|
||||||
arm*-*-*)
|
arm*-*-*)
|
||||||
have_arm=true
|
have_arm=true
|
||||||
;;
|
;;
|
||||||
|
powerpc*-*-*)
|
||||||
|
have_ppc=true
|
||||||
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
PTHREAD_FLAGS="-pthread"
|
PTHREAD_FLAGS="-pthread"
|
||||||
|
@ -108,6 +111,7 @@ AM_CONDITIONAL([USE_ASM], [test x$enable_assembly != xno])
|
||||||
AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue])
|
AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue])
|
||||||
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
|
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
|
||||||
AM_CONDITIONAL([ARCH_ARM], [test x$have_arm = xtrue])
|
AM_CONDITIONAL([ARCH_ARM], [test x$have_arm = xtrue])
|
||||||
|
AM_CONDITIONAL([ARCH_PPC], [test x$have_ppc = xtrue])
|
||||||
|
|
||||||
if test x$request_jansson = xtrue
|
if test x$request_jansson = xtrue
|
||||||
then
|
then
|
||||||
|
|
|
@ -1456,6 +1456,12 @@ static void show_version_and_exit(void)
|
||||||
#if defined(__ARM_NEON__)
|
#if defined(__ARM_NEON__)
|
||||||
" NEON"
|
" NEON"
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
|
||||||
|
" PowerPC"
|
||||||
|
#if defined(__ALTIVEC__)
|
||||||
|
" AltiVec"
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
"\n");
|
"\n");
|
||||||
|
|
||||||
|
|
2
miner.h
2
miner.h
|
@ -137,7 +137,7 @@ void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
|
||||||
void sha256d(unsigned char *hash, const unsigned char *data, int len);
|
void sha256d(unsigned char *hash, const unsigned char *data, int len);
|
||||||
|
|
||||||
#ifdef USE_ASM
|
#ifdef USE_ASM
|
||||||
#if defined(__ARM_NEON__) || defined(__i386__) || defined(__x86_64__)
|
#if defined(__ARM_NEON__) || defined(__ALTIVEC__) || defined(__i386__) || defined(__x86_64__)
|
||||||
#define HAVE_SHA256_4WAY 1
|
#define HAVE_SHA256_4WAY 1
|
||||||
int sha256_use_4way();
|
int sha256_use_4way();
|
||||||
void sha256_init_4way(uint32_t *state);
|
void sha256_init_4way(uint32_t *state);
|
||||||
|
|
1136
scrypt-ppc.S
Normal file
1136
scrypt-ppc.S
Normal file
File diff suppressed because it is too large
Load diff
8
scrypt.c
8
scrypt.c
|
@ -409,6 +409,12 @@ void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#elif defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
|
||||||
|
|
||||||
|
#define SCRYPT_MAX_WAYS 4
|
||||||
|
#define scrypt_best_throughput() 1
|
||||||
|
void scrypt_core(uint32_t *X, uint32_t *V, int N);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
|
||||||
|
@ -513,7 +519,7 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
|
||||||
uint32_t *midstate, unsigned char *scratchpad, int N)
|
uint32_t *midstate, unsigned char *scratchpad, int N)
|
||||||
{
|
{
|
||||||
uint32_t tstate[8], ostate[8];
|
uint32_t tstate[8], ostate[8];
|
||||||
uint32_t X[32];
|
uint32_t X[32] __attribute__((aligned(128)));
|
||||||
uint32_t *V;
|
uint32_t *V;
|
||||||
|
|
||||||
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));
|
||||||
|
|
1919
sha2-ppc.S
Normal file
1919
sha2-ppc.S
Normal file
File diff suppressed because it is too large
Load diff
4
sha2.c
4
sha2.c
|
@ -14,7 +14,9 @@
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
|
|
||||||
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
|
#if defined(USE_ASM) && \
|
||||||
|
((defined(__arm__) && defined(__APCS_32__)) || \
|
||||||
|
(defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)))
|
||||||
#define EXTERN_SHA256
|
#define EXTERN_SHA256
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue