Fix PowerPC assembly on AIX

This commit is contained in:
pooler 2017-06-21 21:24:48 +02:00
parent 46e919f93c
commit a9ccc86a82
2 changed files with 100 additions and 8 deletions

View file

@ -145,15 +145,21 @@
salsa8_core_doubleround salsa8_core_doubleround
.endm .endm
#ifdef _AIX
.csect .text[PR]
#else
.text .text
#endif
.align 2 .align 2
.globl scrypt_core .globl scrypt_core
.globl _scrypt_core .globl _scrypt_core
.globl .scrypt_core
#ifdef __ELF__ #ifdef __ELF__
.type scrypt_core, %function .type scrypt_core, %function
#endif #endif
scrypt_core: scrypt_core:
_scrypt_core: _scrypt_core:
.scrypt_core:
stdu r1, -4*4(r1) stdu r1, -4*4(r1)
mfspr r0, 256 mfspr r0, 256
std r0, 2*4(r1) std r0, 2*4(r1)
@ -488,15 +494,21 @@ scrypt_core_loop2:
salsa8_core_doubleround salsa8_core_doubleround
.endm .endm
#ifdef _AIX
.csect .text[PR]
#else
.text .text
#endif
.align 2 .align 2
.globl scrypt_core .globl scrypt_core
.globl _scrypt_core .globl _scrypt_core
.globl .scrypt_core
#ifdef __ELF__ #ifdef __ELF__
.type scrypt_core, %function .type scrypt_core, %function
#endif #endif
scrypt_core: scrypt_core:
_scrypt_core: _scrypt_core:
.scrypt_core:
stdu r1, -68*4(r1) stdu r1, -68*4(r1)
stw r5, 2*4(r1) stw r5, 2*4(r1)
std r13, 4*4(r1) std r13, 4*4(r1)

View file

@ -100,13 +100,16 @@
#endif #endif
#ifdef _AIX
.csect .text[RO]
#else
.data .data
#endif
.align 2 .align 2
sha256_h: sha256_h:
.long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a .long 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a
.long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 .long 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
.data
.align 2 .align 2
sha256_k: sha256_k:
.long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
@ -126,6 +129,14 @@ sha256_k:
.long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
.long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
#ifdef _AIX
.toc
T.sha256_h:
.tc sha256_h[TC], sha256_h
T.sha256_k:
.tc sha256_k[TC], sha256_k
#endif
.macro sha256_extend_doubleround i, rw, wo, ra, rb, ry, rz .macro sha256_extend_doubleround i, rw, wo, ra, rb, ry, rz
lwz r14, \wo+(\i+1)*4(\rw) lwz r14, \wo+(\i+1)*4(\rw)
@ -200,15 +211,21 @@ sha256_k:
.endm .endm
#ifdef _AIX
.csect .text[PR]
#else
.text .text
#endif
.align 2 .align 2
.globl sha256_transform .globl sha256_transform
.globl _sha256_transform .globl _sha256_transform
.globl .sha256_transform
#ifdef __ELF__ #ifdef __ELF__
.type sha256_transform, %function .type sha256_transform, %function
#endif #endif
sha256_transform: sha256_transform:
_sha256_transform: _sha256_transform:
.sha256_transform:
stdu r1, -76*4(r1) stdu r1, -76*4(r1)
cmpwi 0, r5, 0 cmpwi 0, r5, 0
std r13, 2*4(r1) std r13, 2*4(r1)
@ -328,8 +345,12 @@ sha256_transform_extend:
lwz r9, 5*4(r3) lwz r9, 5*4(r3)
lwz r10, 6*4(r3) lwz r10, 6*4(r3)
lwz r11, 7*4(r3) lwz r11, 7*4(r3)
#ifdef _AIX
ld r16, T.sha256_k(r2)
#else
lis r16, HI(sha256_k) lis r16, HI(sha256_k)
addi r16, r16, LO(sha256_k) addi r16, r16, LO(sha256_k)
#endif
sha256_main_quadround 0, r16, r1, 8*4 sha256_main_quadround 0, r16, r1, 8*4
sha256_main_quadround 4, r16, r1, 8*4 sha256_main_quadround 4, r16, r1, 8*4
sha256_main_quadround 8, r16, r1, 8*4 sha256_main_quadround 8, r16, r1, 8*4
@ -380,15 +401,16 @@ sha256_transform_extend:
blr blr
.text
.align 2 .align 2
.globl sha256d_ms .globl sha256d_ms
.globl _sha256d_ms .globl _sha256d_ms
.globl .sha256d_ms
#ifdef __ELF__ #ifdef __ELF__
.type sha256d_ms, %function .type sha256d_ms, %function
#endif #endif
sha256d_ms: sha256d_ms:
_sha256d_ms: _sha256d_ms:
.sha256d_ms:
stdu r1, -80*4(r1) stdu r1, -80*4(r1)
std r13, 2*4(r1) std r13, 2*4(r1)
std r14, 4*4(r1) std r14, 4*4(r1)
@ -549,8 +571,12 @@ _sha256d_ms:
lwz r5, 5*4(r16) lwz r5, 5*4(r16)
lwz r6, 6*4(r16) lwz r6, 6*4(r16)
lwz r7, 7*4(r16) lwz r7, 7*4(r16)
#ifdef _AIX
ld r16, T.sha256_k(r2)
#else
lis r16, HI(sha256_k) lis r16, HI(sha256_k)
addi r16, r16, LO(sha256_k) addi r16, r16, LO(sha256_k)
#endif
sha256_main_round 3, r16, r17, 0, r5, r6, r7, r4, r9, r10, r11, r8 sha256_main_round 3, r16, r17, 0, r5, r6, r7, r4, r9, r10, r11, r8
sha256_main_quadround 4, r16, r17, 0 sha256_main_quadround 4, r16, r17, 0
@ -818,8 +844,12 @@ _sha256d_ms:
sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8 sha256_extend_doubleround 40, r1, 8*4, r9, r10, r7, r8
sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10 sha256_extend_doubleround 42, r1, 8*4, r4, r5, r9, r10
#ifdef _AIX
ld r18, T.sha256_h(r2)
#else
lis r18, HI(sha256_h) lis r18, HI(sha256_h)
addi r18, r18, LO(sha256_h) addi r18, r18, LO(sha256_h)
#endif
lwz r14, 8*4+(44+1)*4(r1) lwz r14, 8*4+(44+1)*4(r1)
rotrwi r12, r4, 17 rotrwi r12, r4, 17
@ -907,7 +937,11 @@ _sha256d_ms:
.machine ppc7400 .machine ppc7400
#endif #endif
#ifdef _AIX
.csect .text[RO]
#else
.data .data
#endif
.align 4 .align 4
sha256_4h: sha256_4h:
.long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667 .long 0x6a09e667, 0x6a09e667, 0x6a09e667, 0x6a09e667
@ -919,7 +953,6 @@ sha256_4h:
.long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab .long 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab, 0x1f83d9ab
.long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19 .long 0x5be0cd19, 0x5be0cd19, 0x5be0cd19, 0x5be0cd19
.data
.align 4 .align 4
sha256_4k: sha256_4k:
.long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98 .long 0x428a2f98, 0x428a2f98, 0x428a2f98, 0x428a2f98
@ -987,7 +1020,6 @@ sha256_4k:
.long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7 .long 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7, 0xbef9a3f7
.long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2 .long 0xc67178f2, 0xc67178f2, 0xc67178f2, 0xc67178f2
.data
.align 4 .align 4
sha256d_4preext2: sha256d_4preext2:
.long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000 .long 0x00a00000, 0x00a00000, 0x00a00000, 0x00a00000
@ -995,11 +1027,22 @@ sha256d_4preext2:
.long 0x80000000, 0x80000000, 0x80000000, 0x80000000 .long 0x80000000, 0x80000000, 0x80000000, 0x80000000
.long 0x00400022, 0x00400022, 0x00400022, 0x00400022 .long 0x00400022, 0x00400022, 0x00400022, 0x00400022
.data
.align 4 .align 4
br_perm: br_perm:
.long 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c .long 0x03020100, 0x07060504, 0x0b0a0908, 0x0f0e0d0c
#ifdef _AIX
.toc
T.sha256_4h:
.tc sha256_4h[TC], sha256_4h
T.sha256_4k:
.tc sha256_4k[TC], sha256_4k
T.sha256d_4preext2:
.tc sha256d_4preext2[TC], sha256d_4preext2
T.br_perm:
.tc br_perm[TC], br_perm
#endif
.macro sha256_4way_extend_setup .macro sha256_4way_extend_setup
vspltisw v0, 10 vspltisw v0, 10
@ -1093,21 +1136,31 @@ br_perm:
.endm .endm
#ifdef _AIX
.csect .text[PR]
#else
.text .text
#endif
.align 2 .align 2
.globl sha256_init_4way .globl sha256_init_4way
.globl _sha256_init_4way .globl _sha256_init_4way
.globl .sha256_init_4way
#ifdef __ELF__ #ifdef __ELF__
.type sha256_init_4way, %function .type sha256_init_4way, %function
#endif #endif
sha256_init_4way: sha256_init_4way:
_sha256_init_4way: _sha256_init_4way:
.sha256_init_4way:
mfspr r0, 256 mfspr r0, 256
oris r12, r0, 0xff00 oris r12, r0, 0xff00
mtspr 256, r12 mtspr 256, r12
#ifdef _AIX
ld r4, T.sha256_4h(r2)
#else
lis r4, HI(sha256_4h) lis r4, HI(sha256_4h)
addi r4, r4, LO(sha256_4h) addi r4, r4, LO(sha256_4h)
#endif
li r5, 1*16 li r5, 1*16
li r6, 2*16 li r6, 2*16
li r7, 3*16 li r7, 3*16
@ -1136,15 +1189,16 @@ _sha256_init_4way:
blr blr
.text
.align 2 .align 2
.globl sha256_transform_4way .globl sha256_transform_4way
.globl _sha256_transform_4way .globl _sha256_transform_4way
.globl .sha256_transform_4way
#ifdef __ELF__ #ifdef __ELF__
.type sha256_transform_4way, %function .type sha256_transform_4way, %function
#endif #endif
sha256_transform_4way: sha256_transform_4way:
_sha256_transform_4way: _sha256_transform_4way:
.sha256_transform_4way:
mfspr r0, 256 mfspr r0, 256
oris r12, r0, 0xffff oris r12, r0, 0xffff
ori r12, r12, 0xf000 ori r12, r12, 0xf000
@ -1204,8 +1258,12 @@ _sha256_transform_4way:
b sha256_transform_4way_extend b sha256_transform_4way_extend
sha256_transform_4way_swap: sha256_transform_4way_swap:
#ifdef _AIX
ld r5, T.br_perm(r2)
#else
lis r5, HI(br_perm) lis r5, HI(br_perm)
addi r5, r5, LO(br_perm) addi r5, r5, LO(br_perm)
#endif
lvx v19, 0, r5 lvx v19, 0, r5
lvx v11, 0, r4 lvx v11, 0, r4
@ -1298,8 +1356,12 @@ sha256_transform_4way_extend:
lvx v9, r11, r7 lvx v9, r11, r7
lvx v10, r11, r8 lvx v10, r11, r8
lvx v11, r11, r9 lvx v11, r11, r9
#ifdef _AIX
ld r12, T.sha256_4k(r2)
#else
lis r12, HI(sha256_4k) lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k) addi r12, r12, LO(sha256_4k)
#endif
addi r5, r1, 4*4 addi r5, r1, 4*4
sha256_4way_main_setup sha256_4way_main_setup
sha256_4way_main_quadround 0, r12, r5 sha256_4way_main_quadround 0, r12, r5
@ -1349,15 +1411,16 @@ sha256_transform_4way_extend:
blr blr
.text
.align 2 .align 2
.globl sha256d_ms_4way .globl sha256d_ms_4way
.globl _sha256d_ms_4way .globl _sha256d_ms_4way
.globl .sha256d_ms_4way
#ifdef __ELF__ #ifdef __ELF__
.type sha256d_ms_4way, %function .type sha256d_ms_4way, %function
#endif #endif
sha256d_ms_4way: sha256d_ms_4way:
_sha256d_ms_4way: _sha256d_ms_4way:
.sha256d_ms_4way:
mfspr r0, 256 mfspr r0, 256
oris r12, r0, 0xffff oris r12, r0, 0xffff
ori r12, r12, 0xf000 ori r12, r12, 0xf000
@ -1533,8 +1596,12 @@ _sha256d_ms_4way:
lvx v5, r12, r7 lvx v5, r12, r7
lvx v6, r12, r8 lvx v6, r12, r8
lvx v7, r12, r9 lvx v7, r12, r9
#ifdef _AIX
ld r12, T.sha256_4k(r2)
#else
lis r12, HI(sha256_4k) lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k) addi r12, r12, LO(sha256_4k)
#endif
sha256_4way_main_setup sha256_4way_main_setup
sha256_4way_main_round 3, r12, r4, v5, v6, v7, v4, v9, v10, v11, v8 sha256_4way_main_round 3, r12, r4, v5, v6, v7, v4, v9, v10, v11, v8
sha256_4way_main_quadround 4, r12, r4 sha256_4way_main_quadround 4, r12, r4
@ -1607,8 +1674,12 @@ _sha256d_ms_4way:
sha256_4way_extend_setup sha256_4way_extend_setup
#ifdef _AIX
ld r12, T.sha256d_4preext2(r2)
#else
lis r12, HI(sha256d_4preext2) lis r12, HI(sha256d_4preext2)
addi r12, r12, LO(sha256d_4preext2) addi r12, r12, LO(sha256d_4preext2)
#endif
lvx v2, 0, r12 lvx v2, 0, r12
vxor v9, v9, v9 vxor v9, v9, v9
@ -1846,8 +1917,12 @@ _sha256d_ms_4way:
stvx v6, r4, r10 stvx v6, r4, r10
addi r4, r4, -44*16 addi r4, r4, -44*16
#ifdef _AIX
ld r5, T.sha256_4h(r2)
#else
lis r5, HI(sha256_4h) lis r5, HI(sha256_4h)
addi r5, r5, LO(sha256_4h) addi r5, r5, LO(sha256_4h)
#endif
lvx v4, 0, r5 lvx v4, 0, r5
lvx v5, r5, r7 lvx v5, r5, r7
lvx v6, r5, r8 lvx v6, r5, r8
@ -1857,8 +1932,12 @@ _sha256d_ms_4way:
lvx v9, r12, r7 lvx v9, r12, r7
lvx v10, r12, r8 lvx v10, r12, r8
lvx v11, r12, r9 lvx v11, r12, r9
#ifdef _AIX
ld r12, T.sha256_4k(r2)
#else
lis r12, HI(sha256_4k) lis r12, HI(sha256_4k)
addi r12, r12, LO(sha256_4k) addi r12, r12, LO(sha256_4k)
#endif
sha256_4way_main_setup sha256_4way_main_setup
sha256_4way_main_quadround 0, r12, r4 sha256_4way_main_quadround 0, r12, r4
sha256_4way_main_quadround 4, r12, r4 sha256_4way_main_quadround 4, r12, r4
@ -1910,15 +1989,16 @@ _sha256d_ms_4way:
blr blr
.text
.align 2 .align 2
.globl sha256_use_4way .globl sha256_use_4way
.globl _sha256_use_4way .globl _sha256_use_4way
.globl .sha256_use_4way
#ifdef __ELF__ #ifdef __ELF__
.type sha256_use_4way, %function .type sha256_use_4way, %function
#endif #endif
sha256_use_4way: sha256_use_4way:
_sha256_use_4way: _sha256_use_4way:
.sha256_use_4way:
li r3, 1 li r3, 1
blr blr