Add optimized PowerPC code

This commit is contained in:
pooler 2015-02-24 17:06:20 +01:00
parent 4611186cb8
commit 9373a5c433
9 changed files with 3081 additions and 3 deletions

View file

@ -28,6 +28,9 @@ endif
if ARCH_ARM if ARCH_ARM
minerd_SOURCES += sha2-arm.S scrypt-arm.S minerd_SOURCES += sha2-arm.S scrypt-arm.S
endif endif
if ARCH_PPC
minerd_SOURCES += sha2-ppc.S scrypt-ppc.S
endif
endif endif
minerd_LDFLAGS = $(PTHREAD_FLAGS) minerd_LDFLAGS = $(PTHREAD_FLAGS)
minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@

2
README
View file

@ -40,6 +40,8 @@ Architecture-specific notes:
but the decision whether to use them is made at compile time, but the decision whether to use them is made at compile time,
based on compiler-defined macros. based on compiler-defined macros.
To use NEON instructions, add "-mfpu=neon" to CFLAGS. To use NEON instructions, add "-mfpu=neon" to CFLAGS.
PowerPC: No runtime CPU detection.
To use AltiVec instructions, add "-maltivec" to CFLAGS.
x86: The miner checks for SSE2 instructions support at runtime, x86: The miner checks for SSE2 instructions support at runtime,
and uses them if they are available. and uses them if they are available.
x86-64: The miner can take advantage of AVX, AVX2 and XOP instructions, x86-64: The miner can take advantage of AVX, AVX2 and XOP instructions,

View file

@ -48,6 +48,9 @@ case $target in
arm*-*-*) arm*-*-*)
have_arm=true have_arm=true
;; ;;
powerpc*-*-*)
have_ppc=true
;;
esac esac
PTHREAD_FLAGS="-pthread" PTHREAD_FLAGS="-pthread"
@ -108,6 +111,7 @@ AM_CONDITIONAL([USE_ASM], [test x$enable_assembly != xno])
AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue]) AM_CONDITIONAL([ARCH_x86], [test x$have_x86 = xtrue])
AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue]) AM_CONDITIONAL([ARCH_x86_64], [test x$have_x86_64 = xtrue])
AM_CONDITIONAL([ARCH_ARM], [test x$have_arm = xtrue]) AM_CONDITIONAL([ARCH_ARM], [test x$have_arm = xtrue])
AM_CONDITIONAL([ARCH_PPC], [test x$have_ppc = xtrue])
if test x$request_jansson = xtrue if test x$request_jansson = xtrue
then then

View file

@ -1456,6 +1456,12 @@ static void show_version_and_exit(void)
#if defined(__ARM_NEON__) #if defined(__ARM_NEON__)
" NEON" " NEON"
#endif #endif
#endif
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
" PowerPC"
#if defined(__ALTIVEC__)
" AltiVec"
#endif
#endif #endif
"\n"); "\n");

View file

@ -137,7 +137,7 @@ void sha256_transform(uint32_t *state, const uint32_t *block, int swap);
void sha256d(unsigned char *hash, const unsigned char *data, int len); void sha256d(unsigned char *hash, const unsigned char *data, int len);
#ifdef USE_ASM #ifdef USE_ASM
#if defined(__ARM_NEON__) || defined(__i386__) || defined(__x86_64__) #if defined(__ARM_NEON__) || defined(__ALTIVEC__) || defined(__i386__) || defined(__x86_64__)
#define HAVE_SHA256_4WAY 1 #define HAVE_SHA256_4WAY 1
int sha256_use_4way(); int sha256_use_4way();
void sha256_init_4way(uint32_t *state); void sha256_init_4way(uint32_t *state);

1136
scrypt-ppc.S Normal file

File diff suppressed because it is too large Load diff

View file

@ -409,6 +409,12 @@ void scrypt_core(uint32_t *X, uint32_t *V, int N);
void scrypt_core_3way(uint32_t *X, uint32_t *V, int N); void scrypt_core_3way(uint32_t *X, uint32_t *V, int N);
#endif #endif
#elif defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
#define SCRYPT_MAX_WAYS 4
#define scrypt_best_throughput() 1
void scrypt_core(uint32_t *X, uint32_t *V, int N);
#else #else
static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16]) static inline void xor_salsa8(uint32_t B[16], const uint32_t Bx[16])
@ -513,7 +519,7 @@ static void scrypt_1024_1_1_256(const uint32_t *input, uint32_t *output,
uint32_t *midstate, unsigned char *scratchpad, int N) uint32_t *midstate, unsigned char *scratchpad, int N)
{ {
uint32_t tstate[8], ostate[8]; uint32_t tstate[8], ostate[8];
uint32_t X[32]; uint32_t X[32] __attribute__((aligned(128)));
uint32_t *V; uint32_t *V;
V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63)); V = (uint32_t *)(((uintptr_t)(scratchpad) + 63) & ~ (uintptr_t)(63));

1919
sha2-ppc.S Normal file

File diff suppressed because it is too large Load diff

4
sha2.c
View file

@ -14,7 +14,9 @@
#include <string.h> #include <string.h>
#include <inttypes.h> #include <inttypes.h>
#if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__) #if defined(USE_ASM) && \
((defined(__arm__) && defined(__APCS_32__)) || \
(defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)))
#define EXTERN_SHA256 #define EXTERN_SHA256
#endif #endif