1136 lines
19 KiB
ArmAsm
1136 lines
19 KiB
ArmAsm
/*
|
|
* Copyright 2014-2015 pooler@litecoinpool.org
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the Free
|
|
* Software Foundation; either version 2 of the License, or (at your option)
|
|
* any later version. See COPYING for more details.
|
|
*/
|
|
|
|
#include "cpuminer-config.h"
|
|
|
|
#if defined(USE_ASM) && (defined(__powerpc__) || defined(__ppc__) || defined(__PPC__))
|
|
|
|
#ifndef __APPLE__
|
|
|
|
#define r0 0
|
|
#define r1 1
|
|
#define r2 2
|
|
#define r3 3
|
|
#define r4 4
|
|
#define r5 5
|
|
#define r6 6
|
|
#define r7 7
|
|
#define r8 8
|
|
#define r9 9
|
|
#define r10 10
|
|
#define r11 11
|
|
#define r12 12
|
|
#define r13 13
|
|
#define r14 14
|
|
#define r15 15
|
|
#define r16 16
|
|
#define r17 17
|
|
#define r18 18
|
|
#define r19 19
|
|
#define r20 20
|
|
#define r21 21
|
|
#define r22 22
|
|
#define r23 23
|
|
#define r24 24
|
|
#define r25 25
|
|
#define r26 26
|
|
#define r27 27
|
|
#define r28 28
|
|
#define r29 29
|
|
#define r30 30
|
|
#define r31 31
|
|
|
|
#ifdef __ALTIVEC__
|
|
#define v0 0
|
|
#define v1 1
|
|
#define v2 2
|
|
#define v3 3
|
|
#define v4 4
|
|
#define v5 5
|
|
#define v6 6
|
|
#define v7 7
|
|
#define v8 8
|
|
#define v9 9
|
|
#define v10 10
|
|
#define v11 11
|
|
#define v12 12
|
|
#define v13 13
|
|
#define v14 14
|
|
#define v15 15
|
|
#define v16 16
|
|
#define v17 17
|
|
#define v18 18
|
|
#define v19 19
|
|
#define v20 20
|
|
#define v21 21
|
|
#define v22 22
|
|
#define v23 23
|
|
#define v24 24
|
|
#define v25 25
|
|
#define v26 26
|
|
#define v27 27
|
|
#define v28 28
|
|
#define v29 29
|
|
#define v30 30
|
|
#define v31 31
|
|
#endif
|
|
|
|
#endif
|
|
|
|
|
|
#ifdef __ALTIVEC__
|
|
|
|
#ifdef __APPLE__
|
|
.machine ppc7400
|
|
#endif
|
|
|
|
.macro salsa8_core_doubleround
|
|
vadduwm v4, v0, v1
|
|
vrlw v4, v4, v16
|
|
vxor v3, v3, v4
|
|
|
|
vadduwm v4, v3, v0
|
|
vrlw v4, v4, v17
|
|
vxor v2, v2, v4
|
|
|
|
vadduwm v4, v2, v3
|
|
vrlw v4, v4, v18
|
|
vsldoi v3, v3, v3, 12
|
|
vxor v1, v1, v4
|
|
|
|
vadduwm v4, v1, v2
|
|
vrlw v4, v4, v19
|
|
vsldoi v1, v1, v1, 4
|
|
vxor v0, v0, v4
|
|
|
|
vadduwm v4, v0, v3
|
|
vrlw v4, v4, v16
|
|
vsldoi v2, v2, v2, 8
|
|
vxor v1, v1, v4
|
|
|
|
vadduwm v4, v1, v0
|
|
vrlw v4, v4, v17
|
|
vxor v2, v2, v4
|
|
|
|
vadduwm v4, v2, v1
|
|
vrlw v4, v4, v18
|
|
vsldoi v1, v1, v1, 12
|
|
vxor v3, v3, v4
|
|
|
|
vadduwm v4, v3, v2
|
|
vrlw v4, v4, v19
|
|
vsldoi v3, v3, v3, 4
|
|
vxor v0, v0, v4
|
|
vsldoi v2, v2, v2, 8
|
|
.endm
|
|
|
|
.macro salsa8_core
|
|
salsa8_core_doubleround
|
|
salsa8_core_doubleround
|
|
salsa8_core_doubleround
|
|
salsa8_core_doubleround
|
|
.endm
|
|
|
|
.text
|
|
.align 2
|
|
.globl scrypt_core
|
|
.globl _scrypt_core
|
|
#ifdef __ELF__
|
|
.type scrypt_core, %function
|
|
#endif
|
|
scrypt_core:
|
|
_scrypt_core:
|
|
stwu r1, -4*4(r1)
|
|
mflr r0
|
|
stw r0, 5*4(r1)
|
|
mfspr r0, 256
|
|
stw r0, 2*4(r1)
|
|
oris r0, r0, 0xffff
|
|
ori r0, r0, 0xf000
|
|
mtspr 256, r0
|
|
|
|
li r6, 1*16
|
|
li r7, 2*16
|
|
li r8, 3*16
|
|
li r9, 4*16
|
|
li r10, 5*16
|
|
li r11, 6*16
|
|
li r12, 7*16
|
|
|
|
lvx v8, 0, r3
|
|
lvx v9, r3, r6
|
|
lvx v10, r3, r7
|
|
lvx v11, r3, r8
|
|
lvx v12, r3, r9
|
|
lvx v13, r3, r10
|
|
lvx v14, r3, r11
|
|
lvx v15, r3, r12
|
|
|
|
vxor v0, v0, v0
|
|
vnor v1, v0, v0
|
|
vsldoi v2, v0, v1, 4
|
|
vsldoi v3, v2, v0, 8
|
|
vor v3, v3, v2
|
|
vsldoi v1, v0, v1, 8
|
|
|
|
vor v4, v8, v8
|
|
vsel v8, v8, v9, v3
|
|
vsel v9, v9, v10, v3
|
|
vsel v10, v10, v11, v3
|
|
vsel v11, v11, v4, v3
|
|
vor v4, v8, v8
|
|
vor v5, v9, v9
|
|
vsel v8, v8, v10, v1
|
|
vsel v9, v11, v9, v1
|
|
vsel v10, v10, v4, v1
|
|
vsel v11, v5, v11, v1
|
|
|
|
vor v4, v12, v12
|
|
vsel v12, v12, v13, v3
|
|
vsel v13, v13, v14, v3
|
|
vsel v14, v14, v15, v3
|
|
vsel v15, v15, v4, v3
|
|
vor v4, v12, v12
|
|
vor v5, v13, v13
|
|
vsel v12, v12, v14, v1
|
|
vsel v13, v15, v13, v1
|
|
vsel v14, v14, v4, v1
|
|
vsel v15, v5, v15, v1
|
|
|
|
vspltisw v16, 7
|
|
vspltisw v17, 9
|
|
vspltisw v18, 13
|
|
vadduwm v19, v17, v17
|
|
|
|
mtctr r5
|
|
scrypt_core_loop1:
|
|
vxor v8, v8, v12
|
|
stvx v8, 0, r4
|
|
vxor v9, v9, v13
|
|
stvx v9, r4, r6
|
|
vxor v10, v10, v14
|
|
stvx v10, r4, r7
|
|
vxor v11, v11, v15
|
|
stvx v11, r4, r8
|
|
vor v0, v8, v8
|
|
stvx v12, r4, r9
|
|
vor v1, v9, v9
|
|
stvx v13, r4, r10
|
|
vor v2, v10, v10
|
|
stvx v14, r4, r11
|
|
vor v3, v11, v11
|
|
stvx v15, r4, r12
|
|
|
|
salsa8_core
|
|
|
|
vadduwm v8, v8, v0
|
|
vadduwm v9, v9, v1
|
|
vadduwm v10, v10, v2
|
|
vadduwm v11, v11, v3
|
|
|
|
vxor v12, v12, v8
|
|
vxor v13, v13, v9
|
|
vxor v14, v14, v10
|
|
vxor v15, v15, v11
|
|
vor v0, v12, v12
|
|
vor v1, v13, v13
|
|
vor v2, v14, v14
|
|
vor v3, v15, v15
|
|
|
|
salsa8_core
|
|
|
|
vadduwm v12, v12, v0
|
|
vadduwm v13, v13, v1
|
|
vadduwm v14, v14, v2
|
|
vadduwm v15, v15, v3
|
|
|
|
addi r4, r4, 32*4
|
|
bdnz scrypt_core_loop1
|
|
|
|
stvx v12, 0, r3
|
|
slwi r6, r5, 7
|
|
subf r4, r6, r4
|
|
mtctr r5
|
|
addi r5, r5, -1
|
|
addi r7, r4, 1*16
|
|
addi r8, r4, 2*16
|
|
addi r9, r4, 3*16
|
|
scrypt_core_loop2:
|
|
lwz r6, 0(r3)
|
|
and r6, r6, r5
|
|
slwi r6, r6, 7
|
|
lvx v0, r4, r6
|
|
vxor v8, v8, v12
|
|
lvx v1, r7, r6
|
|
vxor v9, v9, v13
|
|
lvx v2, r8, r6
|
|
vxor v10, v10, v14
|
|
lvx v3, r9, r6
|
|
vxor v11, v11, v15
|
|
vxor v0, v0, v8
|
|
vxor v1, v1, v9
|
|
vxor v2, v2, v10
|
|
vxor v3, v3, v11
|
|
addi r6, r6, 64
|
|
vor v8, v0, v0
|
|
vor v9, v1, v1
|
|
lvx v5, r4, r6
|
|
vor v10, v2, v2
|
|
lvx v6, r7, r6
|
|
vor v11, v3, v3
|
|
lvx v7, r8, r6
|
|
|
|
salsa8_core
|
|
|
|
vadduwm v8, v8, v0
|
|
lvx v0, r9, r6
|
|
vadduwm v9, v9, v1
|
|
vadduwm v10, v10, v2
|
|
vadduwm v11, v11, v3
|
|
|
|
vxor v12, v12, v5
|
|
vxor v13, v13, v6
|
|
vxor v14, v14, v7
|
|
vxor v15, v15, v0
|
|
vxor v12, v12, v8
|
|
vxor v13, v13, v9
|
|
vxor v14, v14, v10
|
|
vxor v15, v15, v11
|
|
vor v0, v12, v12
|
|
vor v1, v13, v13
|
|
vor v2, v14, v14
|
|
vor v3, v15, v15
|
|
|
|
salsa8_core
|
|
|
|
vadduwm v12, v12, v0
|
|
stvx v12, 0, r3
|
|
vadduwm v13, v13, v1
|
|
vadduwm v14, v14, v2
|
|
vadduwm v15, v15, v3
|
|
|
|
bdnz scrypt_core_loop2
|
|
|
|
vxor v0, v0, v0
|
|
vnor v1, v0, v0
|
|
vsldoi v2, v0, v1, 4
|
|
vsldoi v3, v2, v0, 8
|
|
vor v3, v3, v2
|
|
vsldoi v1, v0, v1, 8
|
|
|
|
vor v4, v8, v8
|
|
vsel v8, v8, v9, v3
|
|
vsel v9, v9, v10, v3
|
|
vsel v10, v10, v11, v3
|
|
vsel v11, v11, v4, v3
|
|
vor v4, v8, v8
|
|
vor v5, v9, v9
|
|
vsel v8, v8, v10, v1
|
|
vsel v9, v11, v9, v1
|
|
vsel v10, v10, v4, v1
|
|
vsel v11, v5, v11, v1
|
|
|
|
vor v4, v12, v12
|
|
vsel v12, v12, v13, v3
|
|
vsel v13, v13, v14, v3
|
|
vsel v14, v14, v15, v3
|
|
vsel v15, v15, v4, v3
|
|
vor v4, v12, v12
|
|
vor v5, v13, v13
|
|
vsel v12, v12, v14, v1
|
|
vsel v13, v15, v13, v1
|
|
vsel v14, v14, v4, v1
|
|
vsel v15, v5, v15, v1
|
|
|
|
li r6, 1*16
|
|
li r7, 2*16
|
|
li r8, 3*16
|
|
li r9, 4*16
|
|
|
|
stvx v8, 0, r3
|
|
stvx v9, r3, r6
|
|
stvx v10, r3, r7
|
|
stvx v11, r3, r8
|
|
stvx v12, r3, r9
|
|
stvx v13, r3, r10
|
|
stvx v14, r3, r11
|
|
stvx v15, r3, r12
|
|
|
|
lwz r0, 2*4(r1)
|
|
mtspr 256, r0
|
|
lwz r0, 5*4(r1)
|
|
mtlr r0
|
|
addi r1, r1, 4*4
|
|
blr
|
|
|
|
#else /* __ALTIVEC__ */
|
|
|
|
.macro salsa8_core_doubleround
|
|
add r0, r16, r28
|
|
add r5, r21, r17
|
|
add r6, r26, r22
|
|
add r7, r31, r27
|
|
rotlwi r0, r0, 7
|
|
rotlwi r5, r5, 7
|
|
rotlwi r6, r6, 7
|
|
rotlwi r7, r7, 7
|
|
xor r20, r20, r0
|
|
xor r25, r25, r5
|
|
xor r30, r30, r6
|
|
xor r19, r19, r7
|
|
|
|
add r0, r20, r16
|
|
add r5, r25, r21
|
|
add r6, r30, r26
|
|
add r7, r19, r31
|
|
rotlwi r0, r0, 9
|
|
rotlwi r5, r5, 9
|
|
rotlwi r6, r6, 9
|
|
rotlwi r7, r7, 9
|
|
xor r24, r24, r0
|
|
xor r29, r29, r5
|
|
xor r18, r18, r6
|
|
xor r23, r23, r7
|
|
|
|
add r0, r24, r20
|
|
add r5, r29, r25
|
|
add r6, r18, r30
|
|
add r7, r23, r19
|
|
rotlwi r0, r0, 13
|
|
rotlwi r5, r5, 13
|
|
rotlwi r6, r6, 13
|
|
rotlwi r7, r7, 13
|
|
xor r28, r28, r0
|
|
xor r17, r17, r5
|
|
xor r22, r22, r6
|
|
xor r27, r27, r7
|
|
|
|
add r0, r28, r24
|
|
add r5, r17, r29
|
|
add r6, r22, r18
|
|
add r7, r27, r23
|
|
rotlwi r0, r0, 18
|
|
rotlwi r5, r5, 18
|
|
rotlwi r6, r6, 18
|
|
rotlwi r7, r7, 18
|
|
xor r16, r16, r0
|
|
xor r21, r21, r5
|
|
xor r26, r26, r6
|
|
xor r31, r31, r7
|
|
|
|
add r0, r16, r19
|
|
add r5, r21, r20
|
|
add r6, r26, r25
|
|
add r7, r31, r30
|
|
rotlwi r0, r0, 7
|
|
rotlwi r5, r5, 7
|
|
rotlwi r6, r6, 7
|
|
rotlwi r7, r7, 7
|
|
xor r17, r17, r0
|
|
xor r22, r22, r5
|
|
xor r27, r27, r6
|
|
xor r28, r28, r7
|
|
|
|
add r0, r17, r16
|
|
add r5, r22, r21
|
|
add r6, r27, r26
|
|
add r7, r28, r31
|
|
rotlwi r0, r0, 9
|
|
rotlwi r5, r5, 9
|
|
rotlwi r6, r6, 9
|
|
rotlwi r7, r7, 9
|
|
xor r18, r18, r0
|
|
xor r23, r23, r5
|
|
xor r24, r24, r6
|
|
xor r29, r29, r7
|
|
|
|
add r0, r18, r17
|
|
add r5, r23, r22
|
|
add r6, r24, r27
|
|
add r7, r29, r28
|
|
rotlwi r0, r0, 13
|
|
rotlwi r5, r5, 13
|
|
rotlwi r6, r6, 13
|
|
rotlwi r7, r7, 13
|
|
xor r19, r19, r0
|
|
xor r20, r20, r5
|
|
xor r25, r25, r6
|
|
xor r30, r30, r7
|
|
|
|
add r0, r19, r18
|
|
add r5, r20, r23
|
|
add r6, r25, r24
|
|
add r7, r30, r29
|
|
rotlwi r0, r0, 18
|
|
rotlwi r5, r5, 18
|
|
rotlwi r6, r6, 18
|
|
rotlwi r7, r7, 18
|
|
xor r16, r16, r0
|
|
xor r21, r21, r5
|
|
xor r26, r26, r6
|
|
xor r31, r31, r7
|
|
.endm
|
|
|
|
.macro salsa8_core
|
|
salsa8_core_doubleround
|
|
salsa8_core_doubleround
|
|
salsa8_core_doubleround
|
|
salsa8_core_doubleround
|
|
.endm
|
|
|
|
.text
|
|
.align 2
|
|
.globl scrypt_core
|
|
.globl _scrypt_core
|
|
#ifdef __ELF__
|
|
.type scrypt_core, %function
|
|
#endif
|
|
scrypt_core:
|
|
_scrypt_core:
|
|
stwu r1, -48*4(r1)
|
|
mflr r0
|
|
stw r0, 49*4(r1)
|
|
stw r5, 2*4(r1)
|
|
stw r13, 3*4(r1)
|
|
stw r14, 4*4(r1)
|
|
stw r15, 5*4(r1)
|
|
stw r16, 6*4(r1)
|
|
stw r17, 7*4(r1)
|
|
stw r18, 8*4(r1)
|
|
stw r19, 9*4(r1)
|
|
stw r20, 10*4(r1)
|
|
stw r21, 11*4(r1)
|
|
stw r22, 12*4(r1)
|
|
stw r23, 13*4(r1)
|
|
stw r24, 14*4(r1)
|
|
stw r25, 15*4(r1)
|
|
stw r26, 16*4(r1)
|
|
stw r27, 17*4(r1)
|
|
stw r28, 18*4(r1)
|
|
stw r29, 19*4(r1)
|
|
stw r30, 20*4(r1)
|
|
stw r31, 21*4(r1)
|
|
stw r3, 22*4(r1)
|
|
|
|
lwz r16, 0*4(r3)
|
|
lwz r17, 1*4(r3)
|
|
lwz r18, 2*4(r3)
|
|
lwz r19, 3*4(r3)
|
|
lwz r20, 4*4(r3)
|
|
lwz r21, 5*4(r3)
|
|
lwz r22, 6*4(r3)
|
|
lwz r23, 7*4(r3)
|
|
stw r16, 24*4(r1)
|
|
stw r17, 25*4(r1)
|
|
stw r18, 26*4(r1)
|
|
stw r19, 27*4(r1)
|
|
stw r20, 28*4(r1)
|
|
stw r21, 29*4(r1)
|
|
stw r22, 30*4(r1)
|
|
stw r23, 31*4(r1)
|
|
lwz r24, 8*4(r3)
|
|
lwz r25, 9*4(r3)
|
|
lwz r26, 10*4(r3)
|
|
lwz r27, 11*4(r3)
|
|
lwz r28, 12*4(r3)
|
|
lwz r29, 13*4(r3)
|
|
lwz r30, 14*4(r3)
|
|
lwz r31, 15*4(r3)
|
|
stw r24, 32*4(r1)
|
|
stw r25, 33*4(r1)
|
|
stw r26, 34*4(r1)
|
|
stw r27, 35*4(r1)
|
|
stw r28, 36*4(r1)
|
|
stw r29, 37*4(r1)
|
|
stw r30, 38*4(r1)
|
|
stw r31, 39*4(r1)
|
|
lwz r16, 16*4(r3)
|
|
lwz r17, 17*4(r3)
|
|
lwz r18, 18*4(r3)
|
|
lwz r19, 19*4(r3)
|
|
lwz r20, 20*4(r3)
|
|
lwz r21, 21*4(r3)
|
|
lwz r22, 22*4(r3)
|
|
lwz r23, 23*4(r3)
|
|
stw r16, 40*4(r1)
|
|
stw r17, 41*4(r1)
|
|
stw r18, 42*4(r1)
|
|
stw r19, 43*4(r1)
|
|
stw r20, 44*4(r1)
|
|
stw r21, 45*4(r1)
|
|
stw r22, 46*4(r1)
|
|
stw r23, 47*4(r1)
|
|
lwz r8, 24*4(r3)
|
|
lwz r9, 25*4(r3)
|
|
lwz r10, 26*4(r3)
|
|
lwz r11, 27*4(r3)
|
|
lwz r12, 28*4(r3)
|
|
lwz r13, 29*4(r3)
|
|
lwz r14, 30*4(r3)
|
|
lwz r15, 31*4(r3)
|
|
|
|
mtctr r5
|
|
scrypt_core_loop1:
|
|
lwz r16, 24*4(r1)
|
|
lwz r17, 25*4(r1)
|
|
lwz r18, 26*4(r1)
|
|
lwz r19, 27*4(r1)
|
|
lwz r20, 28*4(r1)
|
|
lwz r21, 29*4(r1)
|
|
lwz r22, 30*4(r1)
|
|
lwz r23, 31*4(r1)
|
|
lwz r24, 32*4(r1)
|
|
lwz r25, 33*4(r1)
|
|
lwz r26, 34*4(r1)
|
|
lwz r27, 35*4(r1)
|
|
lwz r28, 36*4(r1)
|
|
lwz r29, 37*4(r1)
|
|
lwz r30, 38*4(r1)
|
|
lwz r31, 39*4(r1)
|
|
|
|
lwz r0, 40*4(r1)
|
|
lwz r5, 41*4(r1)
|
|
lwz r6, 42*4(r1)
|
|
lwz r7, 43*4(r1)
|
|
xor r16, r16, r0
|
|
xor r17, r17, r5
|
|
xor r18, r18, r6
|
|
xor r19, r19, r7
|
|
stw r16, 0*4(r4)
|
|
stw r17, 1*4(r4)
|
|
stw r18, 2*4(r4)
|
|
stw r19, 3*4(r4)
|
|
stw r0, 16*4(r4)
|
|
stw r5, 17*4(r4)
|
|
stw r6, 18*4(r4)
|
|
stw r7, 19*4(r4)
|
|
|
|
lwz r0, 44*4(r1)
|
|
lwz r5, 45*4(r1)
|
|
lwz r6, 46*4(r1)
|
|
lwz r7, 47*4(r1)
|
|
xor r20, r20, r0
|
|
xor r21, r21, r5
|
|
xor r22, r22, r6
|
|
xor r23, r23, r7
|
|
stw r0, 20*4(r4)
|
|
stw r5, 21*4(r4)
|
|
stw r6, 22*4(r4)
|
|
stw r7, 23*4(r4)
|
|
stw r20, 4*4(r4)
|
|
stw r21, 5*4(r4)
|
|
stw r22, 6*4(r4)
|
|
stw r23, 7*4(r4)
|
|
|
|
xor r24, r24, r8
|
|
xor r25, r25, r9
|
|
xor r26, r26, r10
|
|
xor r27, r27, r11
|
|
xor r28, r28, r12
|
|
xor r29, r29, r13
|
|
xor r30, r30, r14
|
|
xor r31, r31, r15
|
|
stw r24, 8*4(r4)
|
|
stw r25, 9*4(r4)
|
|
stw r26, 10*4(r4)
|
|
stw r27, 11*4(r4)
|
|
stw r28, 12*4(r4)
|
|
stw r29, 13*4(r4)
|
|
stw r30, 14*4(r4)
|
|
stw r31, 15*4(r4)
|
|
stw r8, 24*4(r4)
|
|
stw r9, 25*4(r4)
|
|
stw r10, 26*4(r4)
|
|
stw r11, 27*4(r4)
|
|
stw r12, 28*4(r4)
|
|
stw r13, 29*4(r4)
|
|
stw r14, 30*4(r4)
|
|
stw r15, 31*4(r4)
|
|
|
|
salsa8_core
|
|
|
|
lwz r0, 0*4(r4)
|
|
lwz r5, 1*4(r4)
|
|
lwz r6, 2*4(r4)
|
|
lwz r7, 3*4(r4)
|
|
add r16, r16, r0
|
|
add r17, r17, r5
|
|
add r18, r18, r6
|
|
add r19, r19, r7
|
|
lwz r0, 4*4(r4)
|
|
lwz r5, 5*4(r4)
|
|
lwz r6, 6*4(r4)
|
|
lwz r7, 7*4(r4)
|
|
add r20, r20, r0
|
|
add r21, r21, r5
|
|
add r22, r22, r6
|
|
add r23, r23, r7
|
|
lwz r0, 8*4(r4)
|
|
lwz r5, 9*4(r4)
|
|
lwz r6, 10*4(r4)
|
|
lwz r7, 11*4(r4)
|
|
add r24, r24, r0
|
|
add r25, r25, r5
|
|
add r26, r26, r6
|
|
add r27, r27, r7
|
|
lwz r0, 12*4(r4)
|
|
lwz r5, 13*4(r4)
|
|
lwz r6, 14*4(r4)
|
|
lwz r7, 15*4(r4)
|
|
add r28, r28, r0
|
|
add r29, r29, r5
|
|
add r30, r30, r6
|
|
add r31, r31, r7
|
|
|
|
stw r16, 24*4(r1)
|
|
stw r17, 25*4(r1)
|
|
stw r18, 26*4(r1)
|
|
stw r19, 27*4(r1)
|
|
stw r20, 28*4(r1)
|
|
stw r21, 29*4(r1)
|
|
stw r22, 30*4(r1)
|
|
stw r23, 31*4(r1)
|
|
stw r24, 32*4(r1)
|
|
stw r25, 33*4(r1)
|
|
stw r26, 34*4(r1)
|
|
stw r27, 35*4(r1)
|
|
stw r28, 36*4(r1)
|
|
stw r29, 37*4(r1)
|
|
stw r30, 38*4(r1)
|
|
stw r31, 39*4(r1)
|
|
|
|
lwz r0, 40*4(r1)
|
|
lwz r5, 41*4(r1)
|
|
lwz r6, 42*4(r1)
|
|
lwz r7, 43*4(r1)
|
|
xor r16, r16, r0
|
|
xor r17, r17, r5
|
|
xor r18, r18, r6
|
|
xor r19, r19, r7
|
|
lwz r0, 44*4(r1)
|
|
lwz r5, 45*4(r1)
|
|
lwz r6, 46*4(r1)
|
|
lwz r7, 47*4(r1)
|
|
xor r20, r20, r0
|
|
xor r21, r21, r5
|
|
xor r22, r22, r6
|
|
xor r23, r23, r7
|
|
xor r24, r24, r8
|
|
xor r25, r25, r9
|
|
xor r26, r26, r10
|
|
xor r27, r27, r11
|
|
xor r28, r28, r12
|
|
xor r29, r29, r13
|
|
xor r30, r30, r14
|
|
xor r31, r31, r15
|
|
stw r16, 40*4(r1)
|
|
stw r17, 41*4(r1)
|
|
stw r18, 42*4(r1)
|
|
stw r19, 43*4(r1)
|
|
mr r8, r24
|
|
mr r9, r25
|
|
mr r10, r26
|
|
mr r11, r27
|
|
stw r20, 44*4(r1)
|
|
stw r21, 45*4(r1)
|
|
stw r22, 46*4(r1)
|
|
stw r23, 47*4(r1)
|
|
mr r12, r28
|
|
mr r13, r29
|
|
mr r14, r30
|
|
mr r15, r31
|
|
|
|
salsa8_core
|
|
|
|
lwz r0, 40*4(r1)
|
|
lwz r5, 41*4(r1)
|
|
lwz r6, 42*4(r1)
|
|
lwz r7, 43*4(r1)
|
|
add r16, r16, r0
|
|
add r17, r17, r5
|
|
add r18, r18, r6
|
|
add r19, r19, r7
|
|
lwz r0, 44*4(r1)
|
|
lwz r5, 45*4(r1)
|
|
lwz r6, 46*4(r1)
|
|
lwz r7, 47*4(r1)
|
|
add r20, r20, r0
|
|
add r21, r21, r5
|
|
add r22, r22, r6
|
|
add r23, r23, r7
|
|
add r8, r8, r24
|
|
add r9, r9, r25
|
|
add r10, r10, r26
|
|
add r11, r11, r27
|
|
stw r16, 40*4(r1)
|
|
stw r17, 41*4(r1)
|
|
stw r18, 42*4(r1)
|
|
stw r19, 43*4(r1)
|
|
add r12, r12, r28
|
|
add r13, r13, r29
|
|
add r14, r14, r30
|
|
add r15, r15, r31
|
|
stw r20, 44*4(r1)
|
|
stw r21, 45*4(r1)
|
|
stw r22, 46*4(r1)
|
|
stw r23, 47*4(r1)
|
|
|
|
addi r4, r4, 32*4
|
|
bdnz scrypt_core_loop1
|
|
|
|
lwz r5, 2*4(r1)
|
|
slwi r3, r5, 7
|
|
subf r4, r3, r4
|
|
mtctr r5
|
|
addi r5, r5, -1
|
|
stw r5, 2*4(r1)
|
|
scrypt_core_loop2:
|
|
and r3, r16, r5
|
|
slwi r3, r3, 7
|
|
add r3, r3, r4
|
|
mr r0, r16
|
|
mr r5, r17
|
|
mr r6, r18
|
|
mr r7, r19
|
|
lwz r16, 24*4(r1)
|
|
lwz r17, 25*4(r1)
|
|
lwz r18, 26*4(r1)
|
|
lwz r19, 27*4(r1)
|
|
lwz r20, 28*4(r1)
|
|
lwz r21, 29*4(r1)
|
|
lwz r22, 30*4(r1)
|
|
lwz r23, 31*4(r1)
|
|
lwz r24, 32*4(r1)
|
|
lwz r25, 33*4(r1)
|
|
lwz r26, 34*4(r1)
|
|
lwz r27, 35*4(r1)
|
|
lwz r28, 36*4(r1)
|
|
lwz r29, 37*4(r1)
|
|
lwz r30, 38*4(r1)
|
|
lwz r31, 39*4(r1)
|
|
xor r16, r16, r0
|
|
xor r17, r17, r5
|
|
xor r18, r18, r6
|
|
xor r19, r19, r7
|
|
lwz r0, 44*4(r1)
|
|
lwz r5, 45*4(r1)
|
|
lwz r6, 46*4(r1)
|
|
lwz r7, 47*4(r1)
|
|
xor r20, r20, r0
|
|
xor r21, r21, r5
|
|
xor r22, r22, r6
|
|
xor r23, r23, r7
|
|
xor r24, r24, r8
|
|
xor r25, r25, r9
|
|
xor r26, r26, r10
|
|
xor r27, r27, r11
|
|
xor r28, r28, r12
|
|
xor r29, r29, r13
|
|
xor r30, r30, r14
|
|
xor r31, r31, r15
|
|
|
|
lwz r0, 0*4(r3)
|
|
lwz r5, 1*4(r3)
|
|
lwz r6, 2*4(r3)
|
|
lwz r7, 3*4(r3)
|
|
xor r16, r16, r0
|
|
xor r17, r17, r5
|
|
xor r18, r18, r6
|
|
xor r19, r19, r7
|
|
lwz r0, 4*4(r3)
|
|
lwz r5, 5*4(r3)
|
|
lwz r6, 6*4(r3)
|
|
lwz r7, 7*4(r3)
|
|
xor r20, r20, r0
|
|
xor r21, r21, r5
|
|
xor r22, r22, r6
|
|
xor r23, r23, r7
|
|
lwz r0, 8*4(r3)
|
|
lwz r5, 9*4(r3)
|
|
lwz r6, 10*4(r3)
|
|
lwz r7, 11*4(r3)
|
|
xor r24, r24, r0
|
|
xor r25, r25, r5
|
|
xor r26, r26, r6
|
|
xor r27, r27, r7
|
|
lwz r0, 12*4(r3)
|
|
lwz r5, 13*4(r3)
|
|
lwz r6, 14*4(r3)
|
|
lwz r7, 15*4(r3)
|
|
xor r28, r28, r0
|
|
xor r29, r29, r5
|
|
xor r30, r30, r6
|
|
xor r31, r31, r7
|
|
|
|
stw r16, 24*4(r1)
|
|
stw r17, 25*4(r1)
|
|
stw r18, 26*4(r1)
|
|
stw r19, 27*4(r1)
|
|
stw r20, 28*4(r1)
|
|
stw r21, 29*4(r1)
|
|
stw r22, 30*4(r1)
|
|
stw r23, 31*4(r1)
|
|
stw r24, 32*4(r1)
|
|
stw r25, 33*4(r1)
|
|
stw r26, 34*4(r1)
|
|
stw r27, 35*4(r1)
|
|
stw r28, 36*4(r1)
|
|
stw r29, 37*4(r1)
|
|
stw r30, 38*4(r1)
|
|
stw r31, 39*4(r1)
|
|
|
|
salsa8_core
|
|
|
|
lwz r0, 24*4(r1)
|
|
lwz r5, 25*4(r1)
|
|
lwz r6, 26*4(r1)
|
|
lwz r7, 27*4(r1)
|
|
add r16, r16, r0
|
|
add r17, r17, r5
|
|
add r18, r18, r6
|
|
add r19, r19, r7
|
|
lwz r0, 28*4(r1)
|
|
lwz r5, 29*4(r1)
|
|
lwz r6, 30*4(r1)
|
|
lwz r7, 31*4(r1)
|
|
add r20, r20, r0
|
|
add r21, r21, r5
|
|
add r22, r22, r6
|
|
add r23, r23, r7
|
|
lwz r0, 32*4(r1)
|
|
lwz r5, 33*4(r1)
|
|
lwz r6, 34*4(r1)
|
|
lwz r7, 35*4(r1)
|
|
add r24, r24, r0
|
|
add r25, r25, r5
|
|
add r26, r26, r6
|
|
add r27, r27, r7
|
|
lwz r0, 36*4(r1)
|
|
lwz r5, 37*4(r1)
|
|
lwz r6, 38*4(r1)
|
|
lwz r7, 39*4(r1)
|
|
add r28, r28, r0
|
|
add r29, r29, r5
|
|
add r30, r30, r6
|
|
add r31, r31, r7
|
|
|
|
stw r16, 24*4(r1)
|
|
stw r17, 25*4(r1)
|
|
stw r18, 26*4(r1)
|
|
stw r19, 27*4(r1)
|
|
stw r20, 28*4(r1)
|
|
stw r21, 29*4(r1)
|
|
stw r22, 30*4(r1)
|
|
stw r23, 31*4(r1)
|
|
stw r24, 32*4(r1)
|
|
stw r25, 33*4(r1)
|
|
stw r26, 34*4(r1)
|
|
stw r27, 35*4(r1)
|
|
stw r28, 36*4(r1)
|
|
stw r29, 37*4(r1)
|
|
stw r30, 38*4(r1)
|
|
stw r31, 39*4(r1)
|
|
|
|
lwz r0, 16*4(r3)
|
|
lwz r5, 17*4(r3)
|
|
lwz r6, 18*4(r3)
|
|
lwz r7, 19*4(r3)
|
|
xor r16, r16, r0
|
|
xor r17, r17, r5
|
|
xor r18, r18, r6
|
|
xor r19, r19, r7
|
|
lwz r0, 20*4(r3)
|
|
lwz r5, 21*4(r3)
|
|
lwz r6, 22*4(r3)
|
|
lwz r7, 23*4(r3)
|
|
xor r20, r20, r0
|
|
xor r21, r21, r5
|
|
xor r22, r22, r6
|
|
xor r23, r23, r7
|
|
lwz r0, 24*4(r3)
|
|
lwz r5, 25*4(r3)
|
|
lwz r6, 26*4(r3)
|
|
lwz r7, 27*4(r3)
|
|
xor r24, r24, r0
|
|
xor r25, r25, r5
|
|
xor r26, r26, r6
|
|
xor r27, r27, r7
|
|
lwz r0, 28*4(r3)
|
|
lwz r5, 29*4(r3)
|
|
lwz r6, 30*4(r3)
|
|
lwz r7, 31*4(r3)
|
|
xor r28, r28, r0
|
|
xor r29, r29, r5
|
|
xor r30, r30, r6
|
|
xor r31, r31, r7
|
|
|
|
lwz r0, 40*4(r1)
|
|
lwz r5, 41*4(r1)
|
|
lwz r6, 42*4(r1)
|
|
lwz r7, 43*4(r1)
|
|
xor r16, r16, r0
|
|
xor r17, r17, r5
|
|
xor r18, r18, r6
|
|
xor r19, r19, r7
|
|
lwz r0, 44*4(r1)
|
|
lwz r5, 45*4(r1)
|
|
lwz r6, 46*4(r1)
|
|
lwz r7, 47*4(r1)
|
|
xor r20, r20, r0
|
|
xor r21, r21, r5
|
|
xor r22, r22, r6
|
|
xor r23, r23, r7
|
|
xor r24, r24, r8
|
|
xor r25, r25, r9
|
|
xor r26, r26, r10
|
|
xor r27, r27, r11
|
|
xor r28, r28, r12
|
|
xor r29, r29, r13
|
|
xor r30, r30, r14
|
|
xor r31, r31, r15
|
|
stw r16, 40*4(r1)
|
|
stw r17, 41*4(r1)
|
|
stw r18, 42*4(r1)
|
|
stw r19, 43*4(r1)
|
|
mr r8, r24
|
|
mr r9, r25
|
|
mr r10, r26
|
|
mr r11, r27
|
|
stw r20, 44*4(r1)
|
|
stw r21, 45*4(r1)
|
|
stw r22, 46*4(r1)
|
|
stw r23, 47*4(r1)
|
|
mr r12, r28
|
|
mr r13, r29
|
|
mr r14, r30
|
|
mr r15, r31
|
|
|
|
salsa8_core
|
|
|
|
lwz r0, 40*4(r1)
|
|
lwz r5, 41*4(r1)
|
|
lwz r6, 42*4(r1)
|
|
lwz r7, 43*4(r1)
|
|
add r16, r16, r0
|
|
add r17, r17, r5
|
|
add r18, r18, r6
|
|
add r19, r19, r7
|
|
lwz r0, 44*4(r1)
|
|
lwz r5, 45*4(r1)
|
|
lwz r6, 46*4(r1)
|
|
lwz r7, 47*4(r1)
|
|
add r20, r20, r0
|
|
add r21, r21, r5
|
|
add r22, r22, r6
|
|
add r23, r23, r7
|
|
lwz r5, 2*4(r1)
|
|
add r8, r8, r24
|
|
add r9, r9, r25
|
|
add r10, r10, r26
|
|
add r11, r11, r27
|
|
add r12, r12, r28
|
|
add r13, r13, r29
|
|
add r14, r14, r30
|
|
add r15, r15, r31
|
|
stw r16, 40*4(r1)
|
|
stw r17, 41*4(r1)
|
|
stw r18, 42*4(r1)
|
|
stw r19, 43*4(r1)
|
|
stw r20, 44*4(r1)
|
|
stw r21, 45*4(r1)
|
|
stw r22, 46*4(r1)
|
|
stw r23, 47*4(r1)
|
|
bdnz scrypt_core_loop2
|
|
|
|
lwz r3, 22*4(r1)
|
|
|
|
lwz r16, 24*4(r1)
|
|
lwz r17, 25*4(r1)
|
|
lwz r18, 26*4(r1)
|
|
lwz r19, 27*4(r1)
|
|
lwz r20, 28*4(r1)
|
|
lwz r21, 29*4(r1)
|
|
lwz r22, 30*4(r1)
|
|
lwz r23, 31*4(r1)
|
|
stw r16, 0*4(r3)
|
|
stw r17, 1*4(r3)
|
|
stw r18, 2*4(r3)
|
|
stw r19, 3*4(r3)
|
|
stw r20, 4*4(r3)
|
|
stw r21, 5*4(r3)
|
|
stw r22, 6*4(r3)
|
|
stw r23, 7*4(r3)
|
|
lwz r24, 32*4(r1)
|
|
lwz r25, 33*4(r1)
|
|
lwz r26, 34*4(r1)
|
|
lwz r27, 35*4(r1)
|
|
lwz r28, 36*4(r1)
|
|
lwz r29, 37*4(r1)
|
|
lwz r30, 38*4(r1)
|
|
lwz r31, 39*4(r1)
|
|
stw r24, 8*4(r3)
|
|
stw r25, 9*4(r3)
|
|
stw r26, 10*4(r3)
|
|
stw r27, 11*4(r3)
|
|
stw r28, 12*4(r3)
|
|
stw r29, 13*4(r3)
|
|
stw r30, 14*4(r3)
|
|
stw r31, 15*4(r3)
|
|
lwz r16, 40*4(r1)
|
|
lwz r17, 41*4(r1)
|
|
lwz r18, 42*4(r1)
|
|
lwz r19, 43*4(r1)
|
|
lwz r20, 44*4(r1)
|
|
lwz r21, 45*4(r1)
|
|
lwz r22, 46*4(r1)
|
|
lwz r23, 47*4(r1)
|
|
stw r16, 16*4(r3)
|
|
stw r17, 17*4(r3)
|
|
stw r18, 18*4(r3)
|
|
stw r19, 19*4(r3)
|
|
stw r20, 20*4(r3)
|
|
stw r21, 21*4(r3)
|
|
stw r22, 22*4(r3)
|
|
stw r23, 23*4(r3)
|
|
stw r8, 24*4(r3)
|
|
stw r9, 25*4(r3)
|
|
stw r10, 26*4(r3)
|
|
stw r11, 27*4(r3)
|
|
stw r12, 28*4(r3)
|
|
stw r13, 29*4(r3)
|
|
stw r14, 30*4(r3)
|
|
stw r15, 31*4(r3)
|
|
|
|
lwz r13, 3*4(r1)
|
|
lwz r14, 4*4(r1)
|
|
lwz r15, 5*4(r1)
|
|
lwz r16, 6*4(r1)
|
|
lwz r17, 7*4(r1)
|
|
lwz r18, 8*4(r1)
|
|
lwz r19, 9*4(r1)
|
|
lwz r20, 10*4(r1)
|
|
lwz r21, 11*4(r1)
|
|
lwz r22, 12*4(r1)
|
|
lwz r23, 13*4(r1)
|
|
lwz r24, 14*4(r1)
|
|
lwz r25, 15*4(r1)
|
|
lwz r26, 16*4(r1)
|
|
lwz r27, 17*4(r1)
|
|
lwz r28, 18*4(r1)
|
|
lwz r29, 19*4(r1)
|
|
lwz r30, 20*4(r1)
|
|
lwz r31, 21*4(r1)
|
|
lwz r0, 49*4(r1)
|
|
mtlr r0
|
|
addi r1, r1, 48*4
|
|
blr
|
|
|
|
#endif /* __ALTIVEC__ */
|
|
|
|
#endif
|