392 lines
7 KiB
ArmAsm
392 lines
7 KiB
ArmAsm
/*
|
|
* Copyright 2012 pooler@litecoinpool.org
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the Free
|
|
* Software Foundation; either version 2 of the License, or (at your option)
|
|
* any later version. See COPYING for more details.
|
|
*/
|
|
|
|
#include "cpuminer-config.h"
|
|
|
|
#if defined(__arm__) && defined(__APCS_32__)
|
|
|
|
.macro salsa8_core_doubleround_body
|
|
ldr r8, [sp, #8*4]
|
|
add r11, r11, r10
|
|
ldr lr, [sp, #13*4]
|
|
add r12, r12, r3
|
|
eor r2, r2, r11, ror #23
|
|
add r11, r4, r0
|
|
eor r7, r7, r12, ror #23
|
|
add r12, r9, r5
|
|
str r9, [sp, #9*4]
|
|
eor r8, r8, r11, ror #23
|
|
str r10, [sp, #14*4]
|
|
eor lr, lr, r12, ror #23
|
|
|
|
ldr r11, [sp, #11*4]
|
|
add r9, lr, r9
|
|
ldr r12, [sp, #12*4]
|
|
add r10, r2, r10
|
|
eor r1, r1, r9, ror #19
|
|
add r9, r7, r3
|
|
eor r6, r6, r10, ror #19
|
|
add r10, r8, r4
|
|
str r8, [sp, #8*4]
|
|
eor r11, r11, r9, ror #19
|
|
str lr, [sp, #13*4]
|
|
eor r12, r12, r10, ror #19
|
|
|
|
ldr r9, [sp, #10*4]
|
|
add r8, r12, r8
|
|
ldr r10, [sp, #15*4]
|
|
add lr, r1, lr
|
|
eor r0, r0, r8, ror #14
|
|
add r8, r6, r2
|
|
eor r5, r5, lr, ror #14
|
|
add lr, r11, r7
|
|
eor r9, r9, r8, ror #14
|
|
ldr r8, [sp, #9*4]
|
|
eor r10, r10, lr, ror #14
|
|
ldr lr, [sp, #14*4]
|
|
|
|
|
|
add r8, r9, r8
|
|
str r9, [sp, #10*4]
|
|
add lr, r10, lr
|
|
str r10, [sp, #15*4]
|
|
eor r11, r11, r8, ror #25
|
|
add r8, r0, r3
|
|
eor r12, r12, lr, ror #25
|
|
add lr, r5, r4
|
|
eor r1, r1, r8, ror #25
|
|
ldr r8, [sp, #8*4]
|
|
eor r6, r6, lr, ror #25
|
|
|
|
add r9, r11, r9
|
|
ldr lr, [sp, #13*4]
|
|
add r10, r12, r10
|
|
eor r8, r8, r9, ror #23
|
|
add r9, r1, r0
|
|
eor lr, lr, r10, ror #23
|
|
add r10, r6, r5
|
|
str r11, [sp, #11*4]
|
|
eor r2, r2, r9, ror #23
|
|
str r12, [sp, #12*4]
|
|
eor r7, r7, r10, ror #23
|
|
|
|
ldr r9, [sp, #9*4]
|
|
add r11, r8, r11
|
|
ldr r10, [sp, #14*4]
|
|
add r12, lr, r12
|
|
eor r9, r9, r11, ror #19
|
|
add r11, r2, r1
|
|
eor r10, r10, r12, ror #19
|
|
add r12, r7, r6
|
|
str r8, [sp, #8*4]
|
|
eor r3, r3, r11, ror #19
|
|
str lr, [sp, #13*4]
|
|
eor r4, r4, r12, ror #19
|
|
.endm
|
|
|
|
.macro salsa8_core
|
|
ldmia sp, {r0-r7}
|
|
|
|
ldr r12, [sp, #15*4]
|
|
ldr r8, [sp, #11*4]
|
|
ldr lr, [sp, #12*4]
|
|
|
|
ldr r9, [sp, #9*4]
|
|
add r8, r8, r12
|
|
ldr r11, [sp, #10*4]
|
|
add lr, lr, r0
|
|
eor r3, r3, r8, ror #25
|
|
add r8, r5, r1
|
|
ldr r10, [sp, #14*4]
|
|
eor r4, r4, lr, ror #25
|
|
add lr, r11, r6
|
|
eor r9, r9, r8, ror #25
|
|
eor r10, r10, lr, ror #25
|
|
|
|
salsa8_core_doubleround_body
|
|
|
|
ldr r11, [sp, #10*4]
|
|
add r8, r9, r8
|
|
ldr r12, [sp, #15*4]
|
|
add lr, r10, lr
|
|
eor r11, r11, r8, ror #14
|
|
str r9, [sp, #9*4]
|
|
eor r12, r12, lr, ror #14
|
|
add r8, r3, r2
|
|
add lr, r4, r7
|
|
str r10, [sp, #14*4]
|
|
eor r0, r0, r8, ror #14
|
|
ldr r8, [sp, #11*4]
|
|
eor r5, r5, lr, ror #14
|
|
ldr lr, [sp, #12*4]
|
|
|
|
add r8, r8, r12
|
|
str r11, [sp, #10*4]
|
|
add lr, lr, r0
|
|
str r12, [sp, #15*4]
|
|
eor r3, r3, r8, ror #25
|
|
add r8, r5, r1
|
|
eor r4, r4, lr, ror #25
|
|
add lr, r11, r6
|
|
eor r9, r9, r8, ror #25
|
|
eor r10, r10, lr, ror #25
|
|
|
|
salsa8_core_doubleround_body
|
|
|
|
ldr r11, [sp, #10*4]
|
|
add r8, r9, r8
|
|
ldr r12, [sp, #15*4]
|
|
add lr, r10, lr
|
|
eor r11, r11, r8, ror #14
|
|
str r9, [sp, #9*4]
|
|
eor r12, r12, lr, ror #14
|
|
add r8, r3, r2
|
|
add lr, r4, r7
|
|
str r10, [sp, #14*4]
|
|
eor r0, r0, r8, ror #14
|
|
ldr r8, [sp, #11*4]
|
|
eor r5, r5, lr, ror #14
|
|
ldr lr, [sp, #12*4]
|
|
|
|
add r8, r8, r12
|
|
str r11, [sp, #10*4]
|
|
add lr, lr, r0
|
|
str r12, [sp, #15*4]
|
|
eor r3, r3, r8, ror #25
|
|
add r8, r5, r1
|
|
eor r4, r4, lr, ror #25
|
|
add lr, r11, r6
|
|
eor r9, r9, r8, ror #25
|
|
eor r10, r10, lr, ror #25
|
|
|
|
salsa8_core_doubleround_body
|
|
|
|
ldr r11, [sp, #10*4]
|
|
add r8, r9, r8
|
|
ldr r12, [sp, #15*4]
|
|
add lr, r10, lr
|
|
eor r11, r11, r8, ror #14
|
|
str r9, [sp, #9*4]
|
|
eor r12, r12, lr, ror #14
|
|
add r8, r3, r2
|
|
add lr, r4, r7
|
|
str r10, [sp, #14*4]
|
|
eor r0, r0, r8, ror #14
|
|
ldr r8, [sp, #11*4]
|
|
eor r5, r5, lr, ror #14
|
|
ldr lr, [sp, #12*4]
|
|
|
|
add r8, r8, r12
|
|
str r11, [sp, #10*4]
|
|
add lr, lr, r0
|
|
str r12, [sp, #15*4]
|
|
eor r3, r3, r8, ror #25
|
|
add r8, r5, r1
|
|
eor r4, r4, lr, ror #25
|
|
add lr, r11, r6
|
|
eor r9, r9, r8, ror #25
|
|
eor r10, r10, lr, ror #25
|
|
|
|
salsa8_core_doubleround_body
|
|
|
|
ldr r11, [sp, #10*4]
|
|
add r8, r9, r8
|
|
ldr r12, [sp, #15*4]
|
|
add lr, r10, lr
|
|
str r9, [sp, #9*4]
|
|
eor r11, r11, r8, ror #14
|
|
eor r12, r12, lr, ror #14
|
|
add r8, r3, r2
|
|
str r10, [sp, #14*4]
|
|
add lr, r4, r7
|
|
str r11, [sp, #10*4]
|
|
eor r0, r0, r8, ror #14
|
|
str r12, [sp, #15*4]
|
|
eor r5, r5, lr, ror #14
|
|
|
|
stmia sp, {r0-r7}
|
|
.endm
|
|
|
|
|
|
.macro scrypt_core_macro1a_x4
|
|
ldmia r0, {r4-r7}
|
|
ldmia lr!, {r8-r11}
|
|
stmia r1!, {r4-r7}
|
|
stmia r3!, {r8-r11}
|
|
eor r4, r4, r8
|
|
eor r5, r5, r9
|
|
eor r6, r6, r10
|
|
eor r7, r7, r11
|
|
stmia r0!, {r4-r7}
|
|
stmia r12!, {r4-r7}
|
|
.endm
|
|
|
|
.macro scrypt_core_macro1b_x4
|
|
ldmia r3!, {r8-r11}
|
|
ldmia r2, {r4-r7}
|
|
eor r8, r8, r4
|
|
eor r9, r9, r5
|
|
eor r10, r10, r6
|
|
eor r11, r11, r7
|
|
ldmia r0, {r4-r7}
|
|
stmia r2!, {r8-r11}
|
|
eor r4, r4, r8
|
|
eor r5, r5, r9
|
|
eor r6, r6, r10
|
|
eor r7, r7, r11
|
|
ldmia r1!, {r8-r11}
|
|
eor r4, r4, r8
|
|
eor r5, r5, r9
|
|
eor r6, r6, r10
|
|
eor r7, r7, r11
|
|
stmia r0!, {r4-r7}
|
|
stmia r12!, {r4-r7}
|
|
.endm
|
|
|
|
.macro scrypt_core_macro2_x4
|
|
ldmia r12, {r4-r7}
|
|
ldmia r0, {r8-r11}
|
|
add r4, r4, r8
|
|
add r5, r5, r9
|
|
add r6, r6, r10
|
|
add r7, r7, r11
|
|
stmia r0!, {r4-r7}
|
|
ldmia r2, {r8-r11}
|
|
eor r4, r4, r8
|
|
eor r5, r5, r9
|
|
eor r6, r6, r10
|
|
eor r7, r7, r11
|
|
stmia r2!, {r4-r7}
|
|
stmia r12!, {r4-r7}
|
|
.endm
|
|
|
|
.macro scrypt_core_macro3_x4
|
|
ldmia r1!, {r4-r7}
|
|
ldmia r0, {r8-r11}
|
|
add r4, r4, r8
|
|
add r5, r5, r9
|
|
add r6, r6, r10
|
|
add r7, r7, r11
|
|
stmia r0!, {r4-r7}
|
|
.endm
|
|
|
|
.macro scrypt_core_macro3_x6
|
|
ldmia r1!, {r2-r7}
|
|
ldmia r0, {r8-r12, lr}
|
|
add r2, r2, r8
|
|
add r3, r3, r9
|
|
add r4, r4, r10
|
|
add r5, r5, r11
|
|
add r6, r6, r12
|
|
add r7, r7, lr
|
|
stmia r0!, {r2-r7}
|
|
.endm
|
|
|
|
|
|
.text
|
|
.code 32
|
|
.align 2
|
|
.globl scrypt_core
|
|
.globl _scrypt_core
|
|
scrypt_core:
|
|
_scrypt_core:
|
|
stmfd sp!, {r4-r11, lr}
|
|
sub sp, sp, #20*4
|
|
|
|
str r0, [sp, #16*4]
|
|
add r12, r1, #1024*32*4
|
|
str r12, [sp, #18*4]
|
|
scrypt_core_loop1:
|
|
add lr, r0, #16*4
|
|
add r3, r1, #16*4
|
|
mov r12, sp
|
|
scrypt_core_macro1a_x4
|
|
scrypt_core_macro1a_x4
|
|
scrypt_core_macro1a_x4
|
|
scrypt_core_macro1a_x4
|
|
str r1, [sp, #17*4]
|
|
|
|
salsa8_core
|
|
|
|
ldr r0, [sp, #16*4]
|
|
mov r12, sp
|
|
add r2, r0, #16*4
|
|
scrypt_core_macro2_x4
|
|
scrypt_core_macro2_x4
|
|
scrypt_core_macro2_x4
|
|
scrypt_core_macro2_x4
|
|
|
|
salsa8_core
|
|
|
|
ldr r0, [sp, #16*4]
|
|
mov r1, sp
|
|
add r0, r0, #16*4
|
|
scrypt_core_macro3_x6
|
|
scrypt_core_macro3_x6
|
|
ldr r3, [sp, #17*4]
|
|
ldr r12, [sp, #18*4]
|
|
scrypt_core_macro3_x4
|
|
|
|
add r1, r3, #16*4
|
|
sub r0, r0, #32*4
|
|
cmp r1, r12
|
|
bne scrypt_core_loop1
|
|
|
|
sub r1, r1, #1024*32*4
|
|
str r1, [sp, #17*4]
|
|
mov r12, #1024
|
|
scrypt_core_loop2:
|
|
str r12, [sp, #18*4]
|
|
|
|
ldr r4, [r0, #16*4]
|
|
mov r4, r4, lsl #32-10
|
|
add r1, r1, r4, lsr #32-10-7
|
|
|
|
add r2, r0, #16*4
|
|
add r3, r1, #16*4
|
|
mov r12, sp
|
|
scrypt_core_macro1b_x4
|
|
scrypt_core_macro1b_x4
|
|
scrypt_core_macro1b_x4
|
|
scrypt_core_macro1b_x4
|
|
|
|
salsa8_core
|
|
|
|
ldr r0, [sp, #16*4]
|
|
mov r12, sp
|
|
add r2, r0, #16*4
|
|
scrypt_core_macro2_x4
|
|
scrypt_core_macro2_x4
|
|
scrypt_core_macro2_x4
|
|
scrypt_core_macro2_x4
|
|
|
|
salsa8_core
|
|
|
|
ldr r0, [sp, #16*4]
|
|
mov r1, sp
|
|
add r0, r0, #16*4
|
|
scrypt_core_macro3_x6
|
|
scrypt_core_macro3_x6
|
|
scrypt_core_macro3_x4
|
|
|
|
ldr r12, [sp, #18*4]
|
|
sub r0, r0, #32*4
|
|
ldr r1, [sp, #17*4]
|
|
subs r12, r12, #1
|
|
bne scrypt_core_loop2
|
|
|
|
add sp, sp, #20*4
|
|
#ifdef __thumb__
|
|
ldmfd sp!, {r4-r11, lr}
|
|
bx lr
|
|
#else
|
|
ldmfd sp!, {r4-r11, pc}
|
|
#endif
|
|
|
|
#endif
|