Rearrange order of operations in scrypt salsa20
3.73kH/s/core on a 3.6GHz PhenomII compiled with gcc 4.6.1 and CFLAGS="-march=amdfam10 -O3"
This commit is contained in:
parent
a8a1f3f8d4
commit
0941296571
1 changed files with 8 additions and 8 deletions
16
scrypt.c
16
scrypt.c
|
@ -328,16 +328,16 @@ salsa20_8(uint32_t B[16], const uint32_t Bx[16])
|
||||||
for (i = 0; i < 8; i += 2) {
|
for (i = 0; i < 8; i += 2) {
|
||||||
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
|
#define R(a,b) (((a) << (b)) | ((a) >> (32 - (b))))
|
||||||
/* Operate on columns. */
|
/* Operate on columns. */
|
||||||
x04 ^= R(x00+x12, 7); x08 ^= R(x04+x00, 9); x12 ^= R(x08+x04,13); x00 ^= R(x12+x08,18);
|
x04 ^= R(x00+x12, 7); x09 ^= R(x05+x01, 7); x14 ^= R(x10+x06, 7); x03 ^= R(x15+x11, 7);
|
||||||
x09 ^= R(x05+x01, 7); x13 ^= R(x09+x05, 9); x01 ^= R(x13+x09,13); x05 ^= R(x01+x13,18);
|
x08 ^= R(x04+x00, 9); x13 ^= R(x09+x05, 9); x02 ^= R(x14+x10, 9); x07 ^= R(x03+x15, 9);
|
||||||
x14 ^= R(x10+x06, 7); x02 ^= R(x14+x10, 9); x06 ^= R(x02+x14,13); x10 ^= R(x06+x02,18);
|
x12 ^= R(x08+x04,13); x01 ^= R(x13+x09,13); x06 ^= R(x02+x14,13); x11 ^= R(x07+x03,13);
|
||||||
x03 ^= R(x15+x11, 7); x07 ^= R(x03+x15, 9); x11 ^= R(x07+x03,13); x15 ^= R(x11+x07,18);
|
x00 ^= R(x12+x08,18); x05 ^= R(x01+x13,18); x10 ^= R(x06+x02,18); x15 ^= R(x11+x07,18);
|
||||||
|
|
||||||
/* Operate on rows. */
|
/* Operate on rows. */
|
||||||
x01 ^= R(x00+x03, 7); x02 ^= R(x01+x00, 9); x03 ^= R(x02+x01,13); x00 ^= R(x03+x02,18);
|
x01 ^= R(x00+x03, 7); x06 ^= R(x05+x04, 7); x11 ^= R(x10+x09, 7); x12 ^= R(x15+x14, 7);
|
||||||
x06 ^= R(x05+x04, 7); x07 ^= R(x06+x05, 9); x04 ^= R(x07+x06,13); x05 ^= R(x04+x07,18);
|
x02 ^= R(x01+x00, 9); x07 ^= R(x06+x05, 9); x08 ^= R(x11+x10, 9); x13 ^= R(x12+x15, 9);
|
||||||
x11 ^= R(x10+x09, 7); x08 ^= R(x11+x10, 9); x09 ^= R(x08+x11,13); x10 ^= R(x09+x08,18);
|
x03 ^= R(x02+x01,13); x04 ^= R(x07+x06,13); x09 ^= R(x08+x11,13); x14 ^= R(x13+x12,13);
|
||||||
x12 ^= R(x15+x14, 7); x13 ^= R(x12+x15, 9); x14 ^= R(x13+x12,13); x15 ^= R(x14+x13,18);
|
x00 ^= R(x03+x02,18); x05 ^= R(x04+x07,18); x10 ^= R(x09+x08,18); x15 ^= R(x14+x13,18);
|
||||||
#undef R
|
#undef R
|
||||||
}
|
}
|
||||||
B[ 0] += x00;
|
B[ 0] += x00;
|
||||||
|
|
Loading…
Reference in a new issue