/* * Copyright 2012 pooler@litecoinpool.org * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. See COPYING for more details. */ #include "cpuminer-config.h" #if defined(__arm__) && defined(__APCS_32__) .macro sha256_k .align 2 .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 .endm .macro sha256_extend_doubleround_core i, rw, ra, rb, ry, rz mov r12, \ry, ror #17 add r11, r11, \ra eor r12, r12, \ry, ror #19 mov \ra, lr, ror #7 eor r12, r12, \ry, lsr #10 eor \ra, \ra, lr, ror #18 add r12, r12, r11 ldr r11, [\rw, #(\i+2)*4] eor \ra, \ra, lr, lsr #3 add \ra, \ra, r12 mov r12, \rz, ror #17 str \ra, [\rw, #(\i+16)*4] add lr, lr, \rb eor r12, r12, \rz, ror #19 mov \rb, r11, ror #7 eor r12, r12, \rz, lsr #10 eor \rb, \rb, r11, ror #18 add lr, lr, r12 eor \rb, \rb, r11, lsr #3 add \rb, \rb, lr .endm .macro sha256_extend_doubleround_head i, rw, ra, rb, ry, rz ldr lr, [\rw, #(\i+1)*4] sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz ldr lr, [\rw, #(\i+3)*4] .endm .macro sha256_extend_doubleround_body i, rw, ra, rb, ry, rz str \rz, [\rw, #(\i+15)*4] sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz ldr lr, [\rw, #(\i+3)*4] .endm .macro sha256_extend_doubleround_foot i, rw, ra, rb, ry, rz str \rz, [\rw, #(\i+15)*4] sha256_extend_doubleround_core \i, \rw, \ra, \rb, \ry, \rz str \rb, [\rw, #(\i+17)*4] .endm .macro sha256_main_round i, ka, rw, ra, rb, rc, rd, re, rf, rg, rh ldr r12, [\rw, #(\i)*4] and r3, \rf, \re bic lr, \rg, \re orr lr, lr, r3 ldr r3, \ka + (\i)*4 add \rh, \rh, lr eor lr, \re, \re, ror #5 add \rh, \rh, r12 eor lr, lr, \re, ror #19 add \rh, \rh, r3 eor r3, \ra, \rb add \rh, \rh, lr, ror #6 and r3, r3, \rc eor r12, \ra, \ra, ror #11 and lr, \ra, \rb eor r12, r12, \ra, ror #20 eor lr, lr, r3 add r3, \rh, lr add \rh, \rh, \rd add \rd, r3, r12, ror #2 .endm .macro sha256_main_quadround i, ka, rw sha256_main_round \i+0, \ka, \rw, r4, r5, r6, r7, r8, r9, r10, r11 sha256_main_round \i+1, \ka, \rw, r7, r4, r5, r6, r11, r8, r9, r10 sha256_main_round \i+2, \ka, \rw, r6, r7, r4, r5, r10, r11, r8, r9 sha256_main_round \i+3, \ka, \rw, r5, r6, r7, r4, r9, r10, r11, r8 .endm .text .code 32 .align 2 .globl sha256_transform .globl _sha256_transform sha256_transform: _sha256_transform: stmfd sp!, {r4-r11, lr} cmp r2, #0 sub sp, sp, #64*4 bne sha256_transform_swap ldmia r1!, {r4-r11} stmia sp, {r4-r11} add r3, sp, #8*4 ldmia r1, {r4-r11} stmia r3, {r4-r11} b sha256_transform_extend .macro bswap rd, rn eor r12, \rn, \rn, ror #16 bic r12, r12, #0x00ff0000 mov \rd, \rn, ror #8 eor \rd, \rd, r12, lsr #8 .endm sha256_transform_swap: ldmia r1!, {r4-r11} bswap r4, r4 bswap r5, r5 bswap r6, r6 bswap r7, r7 bswap r8, r8 bswap r9, r9 bswap r10, r10 bswap r11, r11 stmia sp, {r4-r11} add r3, sp, #8*4 ldmia r1, {r4-r11} bswap r4, r4 bswap r5, r5 bswap r6, r6 bswap r7, r7 bswap r8, r8 bswap r9, r9 bswap r10, r10 bswap r11, r11 stmia r3, {r4-r11} sha256_transform_extend: add r12, sp, #9*4 ldr r11, [sp, #0*4] ldmia r12, {r4-r10} sha256_extend_doubleround_head 0, sp, r4, r5, r9, r10 sha256_extend_doubleround_body 2, sp, r6, r7, r4, r5 sha256_extend_doubleround_body 4, sp, r8, r9, r6, r7 sha256_extend_doubleround_body 6, sp, r10, r4, r8, r9 sha256_extend_doubleround_body 8, sp, r5, r6, r10, r4 sha256_extend_doubleround_body 10, sp, r7, r8, r5, r6 sha256_extend_doubleround_body 12, sp, r9, r10, r7, r8 sha256_extend_doubleround_body 14, sp, r4, r5, r9, r10 sha256_extend_doubleround_body 16, sp, r6, r7, r4, r5 sha256_extend_doubleround_body 18, sp, r8, r9, r6, r7 sha256_extend_doubleround_body 20, sp, r10, r4, r8, r9 sha256_extend_doubleround_body 22, sp, r5, r6, r10, r4 sha256_extend_doubleround_body 24, sp, r7, r8, r5, r6 sha256_extend_doubleround_body 26, sp, r9, r10, r7, r8 sha256_extend_doubleround_body 28, sp, r4, r5, r9, r10 sha256_extend_doubleround_body 30, sp, r6, r7, r4, r5 sha256_extend_doubleround_body 32, sp, r8, r9, r6, r7 sha256_extend_doubleround_body 34, sp, r10, r4, r8, r9 sha256_extend_doubleround_body 36, sp, r5, r6, r10, r4 sha256_extend_doubleround_body 38, sp, r7, r8, r5, r6 sha256_extend_doubleround_body 40, sp, r9, r10, r7, r8 sha256_extend_doubleround_body 42, sp, r4, r5, r9, r10 sha256_extend_doubleround_body 44, sp, r6, r7, r4, r5 sha256_extend_doubleround_foot 46, sp, r8, r9, r6, r7 ldmia r0, {r4-r11} sha256_main_quadround 0, sha256_transform_k, sp sha256_main_quadround 4, sha256_transform_k, sp sha256_main_quadround 8, sha256_transform_k, sp sha256_main_quadround 12, sha256_transform_k, sp sha256_main_quadround 16, sha256_transform_k, sp sha256_main_quadround 20, sha256_transform_k, sp sha256_main_quadround 24, sha256_transform_k, sp sha256_main_quadround 28, sha256_transform_k, sp b sha256_transform_k_over sha256_transform_k: sha256_k sha256_transform_k_over: sha256_main_quadround 32, sha256_transform_k, sp sha256_main_quadround 36, sha256_transform_k, sp sha256_main_quadround 40, sha256_transform_k, sp sha256_main_quadround 44, sha256_transform_k, sp sha256_main_quadround 48, sha256_transform_k, sp sha256_main_quadround 52, sha256_transform_k, sp sha256_main_quadround 56, sha256_transform_k, sp sha256_main_quadround 60, sha256_transform_k, sp ldmia r0, {r1, r2, r3, r12} add r4, r4, r1 add r5, r5, r2 add r6, r6, r3 add r7, r7, r12 stmia r0!, {r4-r7} ldmia r0, {r1, r2, r3, r12} add r8, r8, r1 add r9, r9, r2 add r10, r10, r3 add r11, r11, r12 stmia r0, {r8-r11} add sp, sp, #64*4 #ifdef __thumb__ ldmfd sp!, {r4-r11, lr} bx lr #else ldmfd sp!, {r4-r11, pc} #endif .text .code 32 .align 2 .globl sha256d_ms .globl _sha256d_ms sha256d_ms: _sha256d_ms: stmfd sp!, {r4-r11, lr} sub sp, sp, #64*4 cmp r0, r0 ldr lr, [r1, #3*4] ldr r6, [r1, #18*4] ldr r7, [r1, #19*4] mov r12, lr, ror #7 str r6, [sp, #18*4] eor r12, r12, lr, ror #18 str r7, [sp, #19*4] eor r12, r12, lr, lsr #3 ldr r8, [r1, #20*4] add r6, r6, r12 ldr r10, [r1, #22*4] add r7, r7, lr str r6, [r1, #18*4] mov r12, r6, ror #17 str r7, [r1, #19*4] eor r12, r12, r6, ror #19 str r8, [sp, #20*4] eor r12, r12, r6, lsr #10 ldr r4, [r1, #23*4] add r8, r8, r12 ldr r5, [r1, #24*4] mov r9, r7, ror #17 str r8, [r1, #20*4] eor r9, r9, r7, ror #19 str r10, [sp, #21*4] eor r9, r9, r7, lsr #10 str r4, [sp, #22*4] mov r12, r8, ror #17 str r9, [r1, #21*4] eor r12, r12, r8, ror #19 str r5, [sp, #23*4] eor r12, r12, r8, lsr #10 mov lr, r9, ror #17 add r10, r10, r12 ldr r11, [r1, #30*4] eor lr, lr, r9, ror #19 str r10, [r1, #22*4] eor lr, lr, r9, lsr #10 str r11, [sp, #24*4] add r4, r4, lr mov r12, r10, ror #17 str r4, [r1, #23*4] eor r12, r12, r10, ror #19 mov lr, r4, ror #17 eor r12, r12, r10, lsr #10 eor lr, lr, r4, ror #19 add r5, r5, r12 eor lr, lr, r4, lsr #10 str r5, [r1, #24*4] add r6, r6, lr mov r12, r5, ror #17 str r6, [r1, #25*4] eor r12, r12, r5, ror #19 mov lr, r6, ror #17 eor r12, r12, r5, lsr #10 eor lr, lr, r6, ror #19 add r7, r7, r12 eor lr, lr, r6, lsr #10 str r7, [r1, #26*4] add r8, r8, lr mov r12, r7, ror #17 str r8, [r1, #27*4] eor r12, r12, r7, ror #19 mov lr, r8, ror #17 eor r12, r12, r7, lsr #10 eor lr, lr, r8, ror #19 add r9, r9, r12 eor lr, lr, r8, lsr #10 str r9, [r1, #28*4] add r10, r10, lr ldr lr, [r1, #31*4] mov r12, r9, ror #17 str r10, [r1, #29*4] eor r12, r12, r9, ror #19 str lr, [sp, #25*4] eor r12, r12, r9, lsr #10 add r11, r11, r12 add r5, r5, lr mov r12, r10, ror #17 add r4, r4, r11 ldr r11, [r1, #16*4] eor r12, r12, r10, ror #19 str r4, [r1, #30*4] eor r12, r12, r10, lsr #10 add r5, r5, r12 ldr lr, [r1, #17*4] sha256d_ms_extend_loop2: sha256_extend_doubleround_body 16, r1, r6, r7, r4, r5 sha256_extend_doubleround_body 18, r1, r8, r9, r6, r7 sha256_extend_doubleround_body 20, r1, r10, r4, r8, r9 sha256_extend_doubleround_body 22, r1, r5, r6, r10, r4 sha256_extend_doubleround_body 24, r1, r7, r8, r5, r6 sha256_extend_doubleround_body 26, r1, r9, r10, r7, r8 sha256_extend_doubleround_body 28, r1, r4, r5, r9, r10 sha256_extend_doubleround_body 30, r1, r6, r7, r4, r5 sha256_extend_doubleround_body 32, r1, r8, r9, r6, r7 sha256_extend_doubleround_body 34, r1, r10, r4, r8, r9 sha256_extend_doubleround_body 36, r1, r5, r6, r10, r4 sha256_extend_doubleround_body 38, r1, r7, r8, r5, r6 sha256_extend_doubleround_body 40, r1, r9, r10, r7, r8 sha256_extend_doubleround_body 42, r1, r4, r5, r9, r10 bne sha256d_ms_extend_coda2 sha256_extend_doubleround_body 44, r1, r6, r7, r4, r5 sha256_extend_doubleround_foot 46, r1, r8, r9, r6, r7 ldr r4, [r3, #0*4] ldr r9, [r3, #1*4] ldr r10, [r3, #2*4] ldr r11, [r3, #3*4] ldr r8, [r3, #4*4] ldr r5, [r3, #5*4] ldr r6, [r3, #6*4] ldr r7, [r3, #7*4] b sha256d_ms_main_loop1 sha256d_ms_main_loop2: sha256_main_round 0, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11 sha256_main_round 1, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10 sha256_main_round 2, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9 sha256d_ms_main_loop1: sha256_main_round 3, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8 sha256_main_quadround 4, sha256d_ms_k, r1 sha256_main_quadround 8, sha256d_ms_k, r1 sha256_main_quadround 12, sha256d_ms_k, r1 sha256_main_quadround 16, sha256d_ms_k, r1 sha256_main_quadround 20, sha256d_ms_k, r1 sha256_main_quadround 24, sha256d_ms_k, r1 sha256_main_quadround 28, sha256d_ms_k, r1 b sha256d_ms_k_over sha256d_ms_k: sha256_k sha256d_ms_k_over: sha256_main_quadround 32, sha256d_ms_k, r1 sha256_main_quadround 36, sha256d_ms_k, r1 sha256_main_quadround 40, sha256d_ms_k, r1 sha256_main_quadround 44, sha256d_ms_k, r1 sha256_main_quadround 48, sha256d_ms_k, r1 sha256_main_quadround 52, sha256d_ms_k, r1 sha256_main_round 56, sha256d_ms_k, r1, r4, r5, r6, r7, r8, r9, r10, r11 bne sha256d_ms_finish sha256_main_round 57, sha256d_ms_k, r1, r7, r4, r5, r6, r11, r8, r9, r10 sha256_main_round 58, sha256d_ms_k, r1, r6, r7, r4, r5, r10, r11, r8, r9 sha256_main_round 59, sha256d_ms_k, r1, r5, r6, r7, r4, r9, r10, r11, r8 sha256_main_quadround 60, sha256d_ms_k, r1 ldmia r2!, {r3, r12, lr} add r4, r4, r3 add r5, r5, r12 add r6, r6, lr stmia sp, {r4-r6} ldmia r2, {r3, r4, r5, r6, r12} add lr, sp, #3*4 add r7, r7, r3 add r8, r8, r4 add r9, r9, r5 add r10, r10, r6 add r11, r11, r12 add r12, sp, #18*4 stmia lr!, {r7-r11} ldmia r12, {r4-r11} str r4, [r1, #18*4] str r5, [r1, #19*4] str r6, [r1, #20*4] str r7, [r1, #22*4] str r8, [r1, #23*4] str r9, [r1, #24*4] str r10, [r1, #30*4] str r11, [r1, #31*4] mov r3, #0x80000000 mov r4, #0 mov r5, #0 mov r6, #0 mov r7, #0 mov r8, #0 mov r9, #0 mov r10, #0x00000100 stmia lr, {r3-r10} ldr lr, [sp, #1*4] movs r1, sp ldr r4, [sp, #0*4] ldr r11, [sp, #2*4] mov r12, lr, ror #7 eor r12, r12, lr, ror #18 add r5, lr, #0x00a00000 eor r12, r12, lr, lsr #3 mov lr, r11, ror #7 add r4, r4, r12 eor lr, lr, r11, ror #18 str r4, [sp, #16*4] eor lr, lr, r11, lsr #3 mov r12, r4, ror #17 add r5, r5, lr ldr lr, [sp, #3*4] str r5, [sp, #17*4] eor r12, r12, r4, ror #19 mov r6, lr, ror #7 eor r12, r12, r4, lsr #10 eor r6, r6, lr, ror #18 add r11, r11, r12 eor r6, r6, lr, lsr #3 mov r12, r5, ror #17 add r6, r6, r11 ldr r11, [sp, #4*4] str r6, [sp, #18*4] eor r12, r12, r5, ror #19 mov r7, r11, ror #7 eor r12, r12, r5, lsr #10 eor r7, r7, r11, ror #18 add lr, lr, r12 eor r7, r7, r11, lsr #3 mov r12, r6, ror #17 add r7, r7, lr ldr lr, [sp, #5*4] str r7, [sp, #19*4] eor r12, r12, r6, ror #19 mov r8, lr, ror #7 eor r12, r12, r6, lsr #10 eor r8, r8, lr, ror #18 add r11, r11, r12 eor r8, r8, lr, lsr #3 mov r12, r7, ror #17 add r8, r8, r11 ldr r11, [sp, #6*4] str r8, [sp, #20*4] eor r12, r12, r7, ror #19 mov r9, r11, ror #7 eor r12, r12, r7, lsr #10 eor r9, r9, r11, ror #18 add lr, lr, r12 eor r9, r9, r11, lsr #3 mov r12, r8, ror #17 add r9, r9, lr ldr lr, [sp, #7*4] str r9, [sp, #21*4] eor r12, r12, r8, ror #19 mov r10, lr, ror #7 eor r12, r12, r8, lsr #10 eor r10, r10, lr, ror #18 add r11, r11, r12 eor r10, r10, lr, lsr #3 mov r12, r9, ror #17 add r11, r11, #0x00000100 add lr, lr, r4 add r10, r10, r11 eor r12, r12, r9, ror #19 str r10, [sp, #22*4] add lr, lr, #0x11000000 eor r12, r12, r9, lsr #10 add lr, lr, r12 mov r12, r10, ror #17 add r4, lr, #0x00002000 eor r12, r12, r10, ror #19 str r4, [sp, #23*4] add r5, r5, #0x80000000 eor r12, r12, r10, lsr #10 add r5, r5, r12 mov r12, r4, ror #17 str r5, [sp, #24*4] eor r12, r12, r4, ror #19 mov r11, r5, ror #17 eor r12, r12, r4, lsr #10 eor r11, r11, r5, ror #19 add r6, r6, r12 eor r11, r11, r5, lsr #10 str r6, [sp, #25*4] add r7, r7, r11 mov r12, r6, ror #17 str r7, [sp, #26*4] eor r12, r12, r6, ror #19 mov r11, r7, ror #17 eor r12, r12, r6, lsr #10 eor r11, r11, r7, ror #19 add r8, r8, r12 eor r11, r11, r7, lsr #10 str r8, [sp, #27*4] add r9, r9, r11 mov lr, r8, ror #17 mov r12, r9, ror #17 str r9, [sp, #28*4] add r4, r4, #0x00400000 eor lr, lr, r8, ror #19 eor r12, r12, r9, ror #19 eor lr, lr, r8, lsr #10 eor r12, r12, r9, lsr #10 add r4, r4, #0x00000022 add r10, r10, lr add r4, r4, r12 ldr r11, [sp, #16*4] add r5, r5, #0x00000100 str r4, [sp, #30*4] mov lr, r11, ror #7 str r10, [sp, #29*4] mov r12, r10, ror #17 eor lr, lr, r11, ror #18 eor r12, r12, r10, ror #19 eor lr, lr, r11, lsr #3 eor r12, r12, r10, lsr #10 add r5, r5, lr ldr lr, [r1, #17*4] add r5, r5, r12 b sha256d_ms_extend_loop2 sha256d_ms_extend_coda2: str r5, [r1, #(44+15)*4] mov r12, r4, ror #17 add r11, r11, r6 mov r6, lr, ror #7 eor r12, r12, r4, ror #19 eor r6, r6, lr, ror #18 eor r12, r12, r4, lsr #10 eor r6, r6, lr, lsr #3 add r12, r12, r11 add r6, r6, r12 str r6, [r1, #(44+16)*4] adr r2, sha256d_ms_h ldmia r2, {r4-r11} b sha256d_ms_main_loop2 sha256d_ms_h: .long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a .long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 .macro sha256_main_round_red i, ka, rw, rd, re, rf, rg, rh ldr r12, [\rw, #(\i)*4] and r3, \rf, \re bic lr, \rg, \re add \rh, \rh, \rd orr lr, lr, r3 ldr r3, \ka + (\i)*4 add \rh, \rh, lr eor lr, \re, \re, ror #5 add \rh, \rh, r12 eor lr, lr, \re, ror #19 add \rh, \rh, r3 add \rh, \rh, lr, ror #6 .endm sha256d_ms_finish: sha256_main_round_red 57, sha256d_ms_k, r1, r6, r11, r8, r9, r10 sha256_main_round_red 58, sha256d_ms_k, r1, r5, r10, r11, r8, r9 sha256_main_round_red 59, sha256d_ms_k, r1, r4, r9, r10, r11, r8 ldr r5, [r2, #7*4] sha256_main_round_red 60, sha256d_ms_k, r1, r7, r8, r9, r10, r11 add r11, r11, r5 str r11, [r0, #7*4] add sp, sp, #64*4 #ifdef __thumb__ ldmfd sp!, {r4-r11, lr} bx lr #else ldmfd sp!, {r4-r11, pc} #endif #endif