From a4bb8de94a5d7c12db1015043c67119549e7e467 Mon Sep 17 00:00:00 2001 From: lbrynaut Date: Mon, 1 Jul 2019 08:18:01 -0500 Subject: [PATCH] Add support for lbry mining algorithm. --- Makefile.am | 8 +- cpu-miner.c | 150 ++-- lbry.c | 128 +++ miner.h | 8 + sha3/md_helper.c | 348 ++++++++ sha3/sph_ripemd.h | 281 +++++++ sha3/sph_sha2.h | 378 +++++++++ sha3/sph_types.h | 1976 +++++++++++++++++++++++++++++++++++++++++++++ sph_ripemd.c | 841 +++++++++++++++++++ sph_sha2.c | 698 ++++++++++++++++ sph_sha2big.c | 255 ++++++ 11 files changed, 4966 insertions(+), 105 deletions(-) create mode 100644 lbry.c create mode 100644 sha3/md_helper.c create mode 100644 sha3/sph_ripemd.h create mode 100644 sha3/sph_sha2.h create mode 100644 sha3/sph_types.h create mode 100644 sph_ripemd.c create mode 100644 sph_sha2.c create mode 100644 sph_sha2big.c diff --git a/Makefile.am b/Makefile.am index f6d194d..e4e4123 100644 --- a/Makefile.am +++ b/Makefile.am @@ -5,6 +5,8 @@ else JANSSON_INCLUDES= endif +SHA3_INCLUDES= -I$(top_srcdir)/sha3 + EXTRA_DIST = example-cfg.json nomacro.pl SUBDIRS = compat @@ -15,7 +17,8 @@ dist_man_MANS = minerd.1 minerd_SOURCES = elist.h miner.h compat.h \ cpu-miner.c util.c \ - sha2.c scrypt.c + sha2.c scrypt.c \ + sph_sha2.c sph_sha2big.c sph_ripemd.c lbry.c if USE_ASM if ARCH_x86 minerd_SOURCES += sha2-x86.S scrypt-x86.S @@ -33,5 +36,4 @@ endif minerd_LDFLAGS = $(PTHREAD_FLAGS) minerd_LDADD = @LIBCURL@ @JANSSON_LIBS@ @PTHREAD_LIBS@ @WS2_LIBS@ minerd_CFLAGS = -fno-strict-aliasing -minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(JANSSON_INCLUDES) $(PTHREAD_FLAGS) - +minerd_CPPFLAGS = @LIBCURL_CPPFLAGS@ $(JANSSON_INCLUDES) $(SHA3_INCLUDES) $(PTHREAD_FLAGS) diff --git a/cpu-miner.c b/cpu-miner.c index ef2fc76..d91271e 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -101,13 +101,11 @@ struct workio_cmd { }; enum algos { - ALGO_SCRYPT, /* scrypt(1024,1,1) */ - ALGO_SHA256D, /* SHA-256d */ + ALGO_LBRY, /* lbry */ }; static const char *algo_names[] = { - [ALGO_SCRYPT] = "scrypt", - [ALGO_SHA256D] = "sha256d", + [ALGO_LBRY] = "lbry", }; bool opt_debug = false; @@ -117,8 +115,8 @@ bool opt_redirect = true; bool want_longpoll = true; bool have_longpoll = false; bool have_gbt = true; -bool allow_getwork = true; -bool want_stratum = true; +bool allow_getwork = false; +bool want_stratum = false; bool have_stratum = false; bool use_syslog = false; static bool opt_background = false; @@ -127,7 +125,7 @@ static int opt_retries = -1; static int opt_fail_pause = 30; int opt_timeout = 0; static int opt_scantime = 5; -static enum algos opt_algo = ALGO_SCRYPT; +static enum algos opt_algo = ALGO_LBRY; static int opt_scrypt_n = 1024; static int opt_n_threads; static int num_processors; @@ -169,6 +167,7 @@ static char const usage[] = "\ Usage: " PROGRAM_NAME " [OPTIONS]\n\ Options:\n\ -a, --algo=ALGO specify the algorithm to use\n\ + lbry (default)\n\ scrypt scrypt(1024, 1, 1) (default)\n\ scrypt:N scrypt(N, 1, 1)\n\ sha256d SHA-256d\n\ @@ -188,9 +187,7 @@ Options:\n\ --coinbase-addr=ADDR payout address for solo mining\n\ --coinbase-sig=TEXT data to insert in the coinbase when possible\n\ --no-longpoll disable long polling support\n\ - --no-getwork disable getwork support\n\ --no-gbt disable getblocktemplate support\n\ - --no-stratum disable X-Stratum support\n\ --no-redirect ignore requests to change the URL of the mining server\n\ -q, --quiet disable per-thread hashmeter output\n\ -D, --debug enable debug output\n\ @@ -232,10 +229,8 @@ static struct option const options[] = { { "debug", 0, NULL, 'D' }, { "help", 0, NULL, 'h' }, { "no-gbt", 0, NULL, 1011 }, - { "no-getwork", 0, NULL, 1010 }, { "no-longpoll", 0, NULL, 1003 }, { "no-redirect", 0, NULL, 1009 }, - { "no-stratum", 0, NULL, 1007 }, { "pass", 1, NULL, 'p' }, { "protocol-dump", 0, NULL, 'P' }, { "proxy", 1, NULL, 'x' }, @@ -256,7 +251,7 @@ static struct option const options[] = { }; struct work { - uint32_t data[32]; + uint32_t data[40]; uint32_t target[8]; int height; @@ -319,36 +314,13 @@ static bool jobj_binary(const json_t *obj, const char *key, return true; } -static bool work_decode(const json_t *val, struct work *work) -{ - int i; - - if (unlikely(!jobj_binary(val, "data", work->data, sizeof(work->data)))) { - applog(LOG_ERR, "JSON invalid data"); - goto err_out; - } - if (unlikely(!jobj_binary(val, "target", work->target, sizeof(work->target)))) { - applog(LOG_ERR, "JSON invalid target"); - goto err_out; - } - - for (i = 0; i < ARRAY_SIZE(work->data); i++) - work->data[i] = le32dec(work->data + i); - for (i = 0; i < ARRAY_SIZE(work->target); i++) - work->target[i] = le32dec(work->target + i); - - return true; - -err_out: - return false; -} - static bool gbt_work_decode(const json_t *val, struct work *work) { int i, n; uint32_t version, curtime, bits; uint32_t prevhash[8]; uint32_t target[8]; + uint32_t claimtrie[8]; int cbtx_size; unsigned char *cbtx = NULL; unsigned char *tx = NULL; @@ -401,6 +373,11 @@ static bool gbt_work_decode(const json_t *val, struct work *work) } version = json_integer_value(tmp); + if (unlikely(!jobj_binary(val, "claimtrie", claimtrie, sizeof(claimtrie)))) { + applog(LOG_ERR, "JSON invalid claimtrie"); + goto out; + } + if (unlikely(!jobj_binary(val, "previousblockhash", prevhash, sizeof(prevhash)))) { applog(LOG_ERR, "JSON invalid previousblockhash"); goto out; @@ -438,6 +415,7 @@ static bool gbt_work_decode(const json_t *val, struct work *work) /* build coinbase transaction */ tmp = json_object_get(val, "coinbasetxn"); + if (tmp) { const char *cbtx_hex = json_string_value(json_object_get(tmp, "data")); cbtx_size = cbtx_hex ? strlen(cbtx_hex) / 2 : 0; @@ -628,18 +606,20 @@ static bool gbt_work_decode(const json_t *val, struct work *work) } /* assemble block header */ + memset(work->data, 0x00, sizeof(work->data)); work->data[0] = swab32(version); for (i = 0; i < 8; i++) work->data[8 - i] = le32dec(prevhash + i); for (i = 0; i < 8; i++) work->data[9 + i] = be32dec((uint32_t *)merkle_tree[0] + i); - work->data[17] = swab32(curtime); - work->data[18] = le32dec(&bits); - memset(work->data + 19, 0x00, 52); - work->data[20] = 0x80000000; - work->data[31] = 0x00000280; + for (i = 0; i < 8; i++) + work->data[24 - i] = le32dec(claimtrie + i); + work->data[25] = swab32(curtime); + work->data[26] = le32dec(&bits); + work->data[28] = 0x80000000; + work->data[39] = 0x00000280; - if (unlikely(!jobj_binary(val, "target", target, sizeof(target)))) { + if (unlikely(!jobj_binary(val, "target", target, sizeof(target)))) { applog(LOG_ERR, "JSON invalid target"); goto out; } @@ -744,20 +724,20 @@ static bool submit_upstream_work(CURL *curl, struct work *work) for (i = 0; i < ARRAY_SIZE(work->data); i++) be32enc(work->data + i, work->data[i]); - bin2hex(data_str, (unsigned char *)work->data, 80); + bin2hex(data_str, (unsigned char *)work->data, 112); if (work->workid) { char *params; val = json_object(); json_object_set_new(val, "workid", json_string(work->workid)); params = json_dumps(val, 0); json_decref(val); - req = malloc(128 + 2*80 + strlen(work->txs) + strlen(params)); + req = malloc(160 + 2*112 + strlen(work->txs) + strlen(params)); sprintf(req, "{\"method\": \"submitblock\", \"params\": [\"%s%s\", %s], \"id\":1}\r\n", data_str, work->txs, params); free(params); } else { - req = malloc(128 + 2*80 + strlen(work->txs)); + req = malloc(160 + 2*112 + strlen(work->txs)); sprintf(req, "{\"method\": \"submitblock\", \"params\": [\"%s%s\"], \"id\":1}\r\n", data_str, work->txs); @@ -823,14 +803,14 @@ static const char *getwork_req = "{\"method\": \"getwork\", \"params\": [], \"id\":0}\r\n"; #define GBT_CAPABILITIES "[\"coinbasetxn\", \"coinbasevalue\", \"longpoll\", \"workid\"]" -#define GBT_RULES "[\"segwit\"]" static const char *gbt_req = "{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": " - GBT_CAPABILITIES ", \"rules\": " GBT_RULES "}], \"id\":0}\r\n"; + GBT_CAPABILITIES "}], \"id\":0}\r\n"; + static const char *gbt_lp_req = "{\"method\": \"getblocktemplate\", \"params\": [{\"capabilities\": " - GBT_CAPABILITIES ", \"rules\": " GBT_RULES ", \"longpollid\": \"%s\"}], \"id\":0}\r\n"; + GBT_CAPABILITIES "}], \"id\":0}\r\n"; static bool get_upstream_work(CURL *curl, struct work *work) { @@ -868,14 +848,11 @@ start: if (!val) return false; - if (have_gbt) { - rc = gbt_work_decode(json_object_get(val, "result"), work); - if (!have_gbt) { - json_decref(val); - goto start; - } - } else - rc = work_decode(json_object_get(val, "result"), work); + rc = gbt_work_decode(json_object_get(val, "result"), work); + if (!rc) { + json_decref(val); + goto start; + } if (opt_debug && rc) { timeval_subtract(&diff, &tv_end, &tv_start); @@ -1114,10 +1091,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) free(xnonce2str); } - if (opt_algo == ALGO_SCRYPT) - diff_to_target(work->target, sctx->job.diff / 65536.0); - else - diff_to_target(work->target, sctx->job.diff); + diff_to_target(work->target, sctx->job.diff); } static void *miner_thread(void *userdata) @@ -1148,15 +1122,6 @@ static void *miner_thread(void *userdata) affine_to_cpu(thr_id, thr_id % num_processors); } - if (opt_algo == ALGO_SCRYPT) { - scratchbuf = scrypt_buffer_alloc(opt_scrypt_n); - if (!scratchbuf) { - applog(LOG_ERR, "scrypt buffer allocation failed"); - pthread_mutex_lock(&applog_lock); - exit(1); - } - } - while (1) { unsigned long hashes_done; struct timeval tv_start, tv_end, diff; @@ -1175,7 +1140,7 @@ static void *miner_thread(void *userdata) pthread_mutex_lock(&g_work_lock); if (!have_stratum && (time(NULL) - g_work_time >= min_scantime || - work.data[19] >= end_nonce)) { + work.data[27] >= end_nonce)) { work_free(&g_work); if (unlikely(!get_work(mythr, &g_work))) { applog(LOG_ERR, "work retrieval failed, exiting " @@ -1190,15 +1155,16 @@ static void *miner_thread(void *userdata) continue; } } - if (memcmp(work.data, g_work.data, 76)) { + /* if (memcmp(work.data, g_work.data, 76)) { */ + if (memcmp(work.data, g_work.data, 108)) { work_free(&work); work_copy(&work, &g_work); - work.data[19] = 0xffffffffU / opt_n_threads * thr_id; + work.data[27] = 0xffffffffU / opt_n_threads * thr_id; } else - work.data[19]++; + work.data[27]++; pthread_mutex_unlock(&g_work_lock); work_restart[thr_id].restart = 0; - + /* adjust max_nonce to meet target scan time */ if (have_stratum) max64 = LP_SCANTIME; @@ -1208,11 +1174,8 @@ static void *miner_thread(void *userdata) max64 *= thr_hashrates[thr_id]; if (max64 <= 0) { switch (opt_algo) { - case ALGO_SCRYPT: - max64 = opt_scrypt_n < 16 ? 0x3ffff : 0x3fffff / opt_scrypt_n; - break; - case ALGO_SHA256D: - max64 = 0x1fffff; + case ALGO_LBRY: + max64 = 0x1fffff; break; } } @@ -1226,17 +1189,12 @@ static void *miner_thread(void *userdata) /* scan nonces for a proof-of-work hash */ switch (opt_algo) { - case ALGO_SCRYPT: - rc = scanhash_scrypt(thr_id, work.data, scratchbuf, work.target, - max_nonce, &hashes_done, opt_scrypt_n); + case ALGO_LBRY: + rc = scanhash_lbry(thr_id, work.data, work.target, + max_nonce, &hashes_done); break; - case ALGO_SHA256D: - rc = scanhash_sha256d(thr_id, work.data, work.target, - max_nonce, &hashes_done); - break; - - default: + default: /* should never happen */ goto out; } @@ -1344,16 +1302,13 @@ start: } if (likely(val)) { bool rc; - applog(LOG_INFO, "LONGPOLL pushed new work"); + //applog(LOG_INFO, "LONGPOLL pushed new work"); res = json_object_get(val, "result"); soval = json_object_get(res, "submitold"); submit_old = soval ? json_is_true(soval) : false; pthread_mutex_lock(&g_work_lock); work_free(&g_work); - if (have_gbt) - rc = gbt_work_decode(res, &g_work); - else - rc = work_decode(res, &g_work); + rc = gbt_work_decode(res, &g_work); if (rc) { time(&g_work_time); restart_threads(); @@ -1566,15 +1521,6 @@ static void parse_arg(int key, char *arg, char *pname) opt_algo = i; break; } - if (arg[v] == ':' && i == ALGO_SCRYPT) { - char *ep; - v = strtol(arg+v+1, &ep, 10); - if (*ep || v & (v-1) || v < 2) - continue; - opt_algo = i; - opt_scrypt_n = v; - break; - } } } if (i == ARRAY_SIZE(algo_names)) { diff --git a/lbry.c b/lbry.c new file mode 100644 index 0000000..aad3655 --- /dev/null +++ b/lbry.c @@ -0,0 +1,128 @@ +#include "miner.h" + +#include +#include +#include +#include + +#include "sha3/sph_sha2.h" +#include "sha3/sph_ripemd.h" + +//#define DEBUG_ALGO + +/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ +typedef struct { + sph_sha256_context sha256; + sph_sha512_context sha512; + sph_ripemd160_context ripemd; +} lbryhash_context_holder; + +/* no need to copy, because close reinit the context */ +static THREADLOCAL lbryhash_context_holder ctx; + +void init_lbry_contexts(void *dummy) +{ + sph_sha256_init(&ctx.sha256); + sph_sha512_init(&ctx.sha512); + sph_ripemd160_init(&ctx.ripemd); +} + +void lbryhash(void* output, const void* input) +{ + uint32_t hashA[16], hashB[16], hashC[16]; + + memset(hashA, 0, 16 * sizeof(uint32_t)); + memset(hashB, 0, 16 * sizeof(uint32_t)); + memset(hashC, 0, 16 * sizeof(uint32_t)); + + sph_sha256 (&ctx.sha256, input, 112); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha256 (&ctx.sha256, hashA, 32); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha512 (&ctx.sha512, hashA, 32); + sph_sha512_close(&ctx.sha512, hashA); + + sph_ripemd160 (&ctx.ripemd, hashA, 32); + sph_ripemd160_close(&ctx.ripemd, hashB); + + sph_ripemd160 (&ctx.ripemd, hashA+8, 32); + sph_ripemd160_close(&ctx.ripemd, hashC); + + sph_sha256 (&ctx.sha256, hashB, 20); + sph_sha256 (&ctx.sha256, hashC, 20); + sph_sha256_close(&ctx.sha256, hashA); + + sph_sha256 (&ctx.sha256, hashA, 32); + sph_sha256_close(&ctx.sha256, hashA); + + memcpy(output, hashA, 32); +} + +int scanhash_lbry(int thr_id, uint32_t *pdata, const uint32_t *ptarget, + uint32_t max_nonce, uint64_t *hashes_done) +{ + uint32_t n = pdata[27] - 1; + const uint32_t first_nonce = pdata[27]; + const uint32_t Htarg = ptarget[7]; + + uint32_t hash64[8] __attribute__((aligned(32))); + uint32_t endiandata[32]; + + uint64_t htmax[] = { + 0, + 0xF, + 0xFF, + 0xFFF, + 0xFFFF, + 0x10000000 + }; + uint32_t masks[] = { + 0xFFFFFFFF, + 0xFFFFFFF0, + 0xFFFFFF00, + 0xFFFFF000, + 0xFFFF0000, + 0 + }; + + // we need bigendian data... + for (int kk=0; kk < 32; kk++) { + be32enc(&endiandata[kk], ((uint32_t*)pdata)[kk]); + }; +#ifdef DEBUG_ALGO + printf("[%d] Htarg=%X\n", thr_id, Htarg); +#endif + for (int m=0; m < sizeof(masks); m++) { + if (Htarg <= htmax[m]) { + uint32_t mask = masks[m]; + do { + pdata[27] = ++n; + be32enc(&endiandata[27], n); + lbryhash(hash64, &endiandata); +#ifndef DEBUG_ALGO + if ((!(hash64[7] & mask)) && fulltest(hash64, ptarget)) { + *hashes_done = n - first_nonce + 1; + return true; + } +#else + if (!(n % 0x1000) && !thr_id) printf("."); + if (!(hash64[7] & mask)) { + printf("[%d]",thr_id); + if (fulltest(hash64, ptarget)) { + *hashes_done = n - first_nonce + 1; + return true; + } + } +#endif + } while (n < max_nonce && !work_restart[thr_id].restart); + // see blake.c if else to understand the loop on htmax => mask + break; + } + } + + *hashes_done = n - first_nonce + 1; + pdata[27] = n; + return 0; +} diff --git a/miner.h b/miner.h index ba9163a..c436a8d 100644 --- a/miner.h +++ b/miner.h @@ -58,6 +58,14 @@ enum { #define likely(expr) (expr) #endif +#if defined(_MSC_VER) +#define THREADLOCAL __declspec(thread) +#elif defined(__GNUC__) +#define THREADLOCAL __thread +#else +#define THREADLOCAL +#endif + #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) #endif diff --git a/sha3/md_helper.c b/sha3/md_helper.c new file mode 100644 index 0000000..5f235f0 --- /dev/null +++ b/sha3/md_helper.c @@ -0,0 +1,348 @@ +/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * This file contains some functions which implement the external data + * handling and padding for Merkle-Damgard hash functions which follow + * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian). + * + * API: this file is meant to be included, not compiled as a stand-alone + * file. Some macros must be defined: + * RFUN name for the round function + * HASH "short name" for the hash function + * BE32 defined for big-endian, 32-bit based (e.g. SHA-1) + * LE32 defined for little-endian, 32-bit based (e.g. MD5) + * BE64 defined for big-endian, 64-bit based (e.g. SHA-512) + * LE64 defined for little-endian, 64-bit based (no example yet) + * PW01 if defined, append 0x01 instead of 0x80 (for Tiger) + * BLEN if defined, length of a message block (in bytes) + * PLW1 if defined, length is defined on one 64-bit word only (for Tiger) + * PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL) + * SVAL if defined, reference to the context state information + * + * BLEN is used when a message block is not 16 (32-bit or 64-bit) words: + * this is used for instance for Tiger, which works on 64-bit words but + * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are + * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is + * set, then only one word (64 bits) will be used to encode the input + * message length (in bits), otherwise two words will be used (as in + * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but + * not PLW1), four 64-bit words will be used to encode the message length + * (in bits). Note that regardless of those settings, only 64-bit message + * lengths are supported (in bits): messages longer than 2 Exabytes will be + * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about + * 2 millions Terabytes, which is huge). + * + * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close() + * function. This is used for Tiger2, which is identical to Tiger except + * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead + * of the 0x01 from original Tiger). + * + * The RFUN function is invoked with two arguments, the first pointing to + * aligned data (as a "const void *"), the second being state information + * from the context structure. By default, this state information is the + * "val" field from the context, and this field is assumed to be an array + * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64). + * from the context structure. The "val" field can have any type, except + * for the output encoding which assumes that it is an array of "sph_u32" + * values. By defining NO_OUTPUT, this last step is deactivated; the + * includer code is then responsible for writing out the hash result. When + * NO_OUTPUT is defined, the third parameter to the "close()" function is + * ignored. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4146) +#endif + +#undef SPH_XCAT +#define SPH_XCAT(a, b) SPH_XCAT_(a, b) +#undef SPH_XCAT_ +#define SPH_XCAT_(a, b) a ## b + +#undef SPH_BLEN +#undef SPH_WLEN +#if defined BE64 || defined LE64 +#define SPH_BLEN 128U +#define SPH_WLEN 8U +#else +#define SPH_BLEN 64U +#define SPH_WLEN 4U +#endif + +#ifdef BLEN +#undef SPH_BLEN +#define SPH_BLEN BLEN +#endif + +#undef SPH_MAXPAD +#if defined PLW1 +#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN) +#elif defined PLW4 +#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2)) +#else +#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1)) +#endif + +#undef SPH_VAL +#undef SPH_NO_OUTPUT +#ifdef SVAL +#define SPH_VAL SVAL +#define SPH_NO_OUTPUT 1 +#else +#define SPH_VAL sc->val +#endif + +#ifndef CLOSE_ONLY + +#ifdef SPH_UPTR +static void +SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len) +#else +void +SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) +#endif +{ + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; + unsigned current; + + sc = cc; +#if SPH_64 + current = (unsigned)sc->count & (SPH_BLEN - 1U); +#else + current = (unsigned)sc->count_low & (SPH_BLEN - 1U); +#endif + while (len > 0) { + unsigned clen; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + + clen = SPH_BLEN - current; + if (clen > len) + clen = len; + memcpy(sc->buf + current, data, clen); + data = (const unsigned char *)data + clen; + current += clen; + len -= clen; + if (current == SPH_BLEN) { + RFUN(sc->buf, SPH_VAL); + current = 0; + } +#if SPH_64 + sc->count += clen; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + clen); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; +#endif + } +} + +#ifdef SPH_UPTR +void +SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len) +{ + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; + unsigned current; + size_t orig_len; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + + if (len < (2 * SPH_BLEN)) { + SPH_XCAT(HASH, _short)(cc, data, len); + return; + } + sc = cc; +#if SPH_64 + current = (unsigned)sc->count & (SPH_BLEN - 1U); +#else + current = (unsigned)sc->count_low & (SPH_BLEN - 1U); +#endif + if (current > 0) { + unsigned t; + + t = SPH_BLEN - current; + SPH_XCAT(HASH, _short)(cc, data, t); + data = (const unsigned char *)data + t; + len -= t; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) { + SPH_XCAT(HASH, _short)(cc, data, len); + return; + } +#endif + orig_len = len; + while (len >= SPH_BLEN) { + RFUN(data, SPH_VAL); + len -= SPH_BLEN; + data = (const unsigned char *)data + SPH_BLEN; + } + if (len > 0) + memcpy(sc->buf, data, len); +#if SPH_64 + sc->count += (sph_u64)orig_len; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + orig_len); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; + /* + * This code handles the improbable situation where "size_t" is + * greater than 32 bits, and yet we do not have a 64-bit type. + */ + orig_len >>= 12; + orig_len >>= 10; + orig_len >>= 10; + sc->count_high += orig_len; +#endif +} +#endif + +#endif + +/* + * Perform padding and produce result. The context is NOT reinitialized + * by this function. + */ +static void +SPH_XCAT(HASH, _addbits_and_close)(void *cc, + unsigned ub, unsigned n, void *dst, unsigned rnum) +{ + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc; + unsigned current, u; +#if !SPH_64 + sph_u32 low, high; +#endif + + sc = cc; +#if SPH_64 + current = (unsigned)sc->count & (SPH_BLEN - 1U); +#else + current = (unsigned)sc->count_low & (SPH_BLEN - 1U); +#endif +#ifdef PW01 + sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n); +#else + { + unsigned z; + + z = 0x80 >> n; + sc->buf[current ++] = ((ub & -z) | z) & 0xFF; + } +#endif + if (current > SPH_MAXPAD) { + memset(sc->buf + current, 0, SPH_BLEN - current); + RFUN(sc->buf, SPH_VAL); + memset(sc->buf, 0, SPH_MAXPAD); + } else { + memset(sc->buf + current, 0, SPH_MAXPAD - current); + } +#if defined BE64 +#if defined PLW1 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#elif defined PLW4 + memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN); + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, + sc->count >> 61); + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN, + SPH_T64(sc->count << 3) + (sph_u64)n); +#else + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61); + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, + SPH_T64(sc->count << 3) + (sph_u64)n); +#endif +#elif defined LE64 +#if defined PLW1 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#elif defined PLW1 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); + sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); + memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN); +#else + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); + sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61); +#endif +#else +#if SPH_64 +#ifdef BE32 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#else + sph_enc64le_aligned(sc->buf + SPH_MAXPAD, + SPH_T64(sc->count << 3) + (sph_u64)n); +#endif +#else + low = sc->count_low; + high = SPH_T32((sc->count_high << 3) | (low >> 29)); + low = SPH_T32(low << 3) + (sph_u32)n; +#ifdef BE32 + sph_enc32be(sc->buf + SPH_MAXPAD, high); + sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low); +#else + sph_enc32le(sc->buf + SPH_MAXPAD, low); + sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high); +#endif +#endif +#endif + RFUN(sc->buf, SPH_VAL); +#ifdef SPH_NO_OUTPUT + (void)dst; + (void)rnum; + (void)u; +#else + for (u = 0; u < rnum; u ++) { +#if defined BE64 + sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]); +#elif defined LE64 + sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]); +#elif defined BE32 + sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]); +#else + sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]); +#endif + } +#endif +} + +static void +SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum) +{ + SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum); +} diff --git a/sha3/sph_ripemd.h b/sha3/sph_ripemd.h new file mode 100644 index 0000000..8fbf3ab --- /dev/null +++ b/sha3/sph_ripemd.h @@ -0,0 +1,281 @@ +/* $Id: sph_ripemd.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * RIPEMD, RIPEMD-128 and RIPEMD-160 interface. + * + * RIPEMD was first described in: Research and Development in Advanced + * Communication Technologies in Europe, "RIPE Integrity Primitives: + * Final Report of RACE Integrity Primitives Evaluation (R1040)", RACE, + * June 1992. + * + * A new, strengthened version, dubbed RIPEMD-160, was published in: H. + * Dobbertin, A. Bosselaers, and B. Preneel, "RIPEMD-160, a strengthened + * version of RIPEMD", Fast Software Encryption - FSE'96, LNCS 1039, + * Springer (1996), pp. 71--82. + * + * This article describes both RIPEMD-160, with a 160-bit output, and a + * reduced version called RIPEMD-128, which has a 128-bit output. RIPEMD-128 + * was meant as a "drop-in" replacement for any hash function with 128-bit + * output, especially the original RIPEMD. + * + * @warning Collisions, and an efficient method to build other collisions, + * have been published for the original RIPEMD, which is thus considered as + * cryptographically broken. It is also very rarely encountered, and there + * seems to exist no free description or implementation of RIPEMD (except + * the sphlib code, of course). As of january 2007, RIPEMD-128 and RIPEMD-160 + * seem as secure as their output length allows. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_ripemd.h + * @author Thomas Pornin + */ + +#ifndef SPH_RIPEMD_H__ +#define SPH_RIPEMD_H__ + +#include +#include "sph_types.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +/** + * Output size (in bits) for RIPEMD. + */ +#define SPH_SIZE_ripemd 128 + +/** + * Output size (in bits) for RIPEMD-128. + */ +#define SPH_SIZE_ripemd128 128 + +/** + * Output size (in bits) for RIPEMD-160. + */ +#define SPH_SIZE_ripemd160 160 + +/** + * This structure is a context for RIPEMD computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd_context; + +/** + * Initialize a RIPEMD context. This process performs no memory allocation. + * + * @param cc the RIPEMD context (pointer to + * a sph_ripemd_context) + */ +void sph_ripemd_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD context + * @param dst the destination buffer + */ +void sph_ripemd_close(void *cc, void *dst); + +/** + * Apply the RIPEMD compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-128 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-128 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-128 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[4]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd128_context; + +/** + * Initialize a RIPEMD-128 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-128 context (pointer to + * a sph_ripemd128_context) + */ +void sph_ripemd128_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-128 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd128(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-128 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (16 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-128 context + * @param dst the destination buffer + */ +void sph_ripemd128_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-128 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 128-bit input and output + */ +void sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]); + +/* ===================================================================== */ + +/** + * This structure is a context for RIPEMD-160 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a RIPEMD-160 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running RIPEMD-160 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[5]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_ripemd160_context; + +/** + * Initialize a RIPEMD-160 context. This process performs no memory allocation. + * + * @param cc the RIPEMD-160 context (pointer to + * a sph_ripemd160_context) + */ +void sph_ripemd160_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RIPEMD-160 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_ripemd160(void *cc, const void *data, size_t len); + +/** + * Terminate the current RIPEMD-160 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (20 bytes). The context is automatically + * reinitialized. + * + * @param cc the RIPEMD-160 context + * @param dst the destination buffer + */ +void sph_ripemd160_close(void *cc, void *dst); + +/** + * Apply the RIPEMD-160 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the little-endian decoding). The + * val parameter contains the 5 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 160-bit input and output + */ +void sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sha3/sph_sha2.h b/sha3/sph_sha2.h new file mode 100644 index 0000000..3b7d17f --- /dev/null +++ b/sha3/sph_sha2.h @@ -0,0 +1,378 @@ +/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */ +/** + * SHA-224, SHA-256, SHA-384 and SHA-512 interface. + * + * SHA-256 has been published in FIPS 180-2, now amended with a change + * notice to include SHA-224 as well (which is a simple variation on + * SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS + * standards can be found at: + * http://csrc.nist.gov/publications/fips/ + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_sha2.h + * @author Thomas Pornin + */ + +#ifndef SPH_SHA2_H__ +#define SPH_SHA2_H__ + +#include +#include "sph_types.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +/** + * Output size (in bits) for SHA-224. + */ +#define SPH_SIZE_sha224 224 + +/** + * Output size (in bits) for SHA-256. + */ +#define SPH_SIZE_sha256 256 + +/** + * This structure is a context for SHA-224 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a SHA-224 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running SHA-224 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[64]; /* first field, for alignment */ + sph_u32 val[8]; +#if SPH_64 + sph_u64 count; +#else + sph_u32 count_high, count_low; +#endif +#endif +} sph_sha224_context; + +/** + * This structure is a context for SHA-256 computations. It is identical + * to the SHA-224 context. However, a context is initialized for SHA-224 + * or SHA-256, but not both (the internal IV is not the + * same). + */ +typedef sph_sha224_context sph_sha256_context; + +/** + * Initialize a SHA-224 context. This process performs no memory allocation. + * + * @param cc the SHA-224 context (pointer to + * a sph_sha224_context) + */ +void sph_sha224_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the SHA-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha224(void *cc, const void *data, size_t len); + +/** + * Terminate the current SHA-224 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (28 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-224 context + * @param dst the destination buffer + */ +void sph_sha224_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (28 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-224 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Apply the SHA-224 compression function on the provided data. The + * msg parameter contains the 16 32-bit input blocks, + * as numerical values (hence after the big-endian decoding). The + * val parameter contains the 8 32-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 256-bit input and output + */ +void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]); + +/** + * Initialize a SHA-256 context. This process performs no memory allocation. + * + * @param cc the SHA-256 context (pointer to + * a sph_sha256_context) + */ +void sph_sha256_init(void *cc); + +#ifdef DOXYGEN_IGNORE +/** + * Process some data bytes, for SHA-256. This function is identical to + * sha_224() + * + * @param cc the SHA-224 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha256(void *cc, const void *data, size_t len); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha256 sph_sha224 +#endif + +/** + * Terminate the current SHA-256 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-256 context + * @param dst the destination buffer + */ +void sph_sha256_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (32 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-256 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef DOXYGEN_IGNORE +/** + * Apply the SHA-256 compression function on the provided data. This + * function is identical to sha224_comp(). + * + * @param msg the message block (16 values) + * @param val the function 256-bit input and output + */ +void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha256_comp sph_sha224_comp +#endif + +#if SPH_64 + +/** + * Output size (in bits) for SHA-384. + */ +#define SPH_SIZE_sha384 384 + +/** + * Output size (in bits) for SHA-512. + */ +#define SPH_SIZE_sha512 512 + +/** + * This structure is a context for SHA-384 computations: it contains the + * intermediate values and some data from the last entered block. Once + * a SHA-384 computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running SHA-384 computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char buf[128]; /* first field, for alignment */ + sph_u64 val[8]; + sph_u64 count; +#endif +} sph_sha384_context; + +/** + * Initialize a SHA-384 context. This process performs no memory allocation. + * + * @param cc the SHA-384 context (pointer to + * a sph_sha384_context) + */ +void sph_sha384_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the SHA-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha384(void *cc, const void *data, size_t len); + +/** + * Terminate the current SHA-384 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (48 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-384 context + * @param dst the destination buffer + */ +void sph_sha384_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (48 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-384 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +/** + * Apply the SHA-384 compression function on the provided data. The + * msg parameter contains the 16 64-bit input blocks, + * as numerical values (hence after the big-endian decoding). The + * val parameter contains the 8 64-bit input blocks for + * the compression function; the output is written in place in this + * array. + * + * @param msg the message block (16 values) + * @param val the function 512-bit input and output + */ +void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]); + +/** + * This structure is a context for SHA-512 computations. It is identical + * to the SHA-384 context. However, a context is initialized for SHA-384 + * or SHA-512, but not both (the internal IV is not the + * same). + */ +typedef sph_sha384_context sph_sha512_context; + +/** + * Initialize a SHA-512 context. This process performs no memory allocation. + * + * @param cc the SHA-512 context (pointer to + * a sph_sha512_context) + */ +void sph_sha512_init(void *cc); + +#ifdef DOXYGEN_IGNORE +/** + * Process some data bytes, for SHA-512. This function is identical to + * sph_sha384(). + * + * @param cc the SHA-384 context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_sha512(void *cc, const void *data, size_t len); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha512 sph_sha384 +#endif + +/** + * Terminate the current SHA-512 computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (64 bytes). The context is automatically + * reinitialized. + * + * @param cc the SHA-512 context + * @param dst the destination buffer + */ +void sph_sha512_close(void *cc, void *dst); + +/** + * Add a few additional bits (0 to 7) to the current computation, then + * terminate it and output the result in the provided buffer, which must + * be wide enough to accomodate the result (64 bytes). If bit number i + * in ub has value 2^i, then the extra bits are those + * numbered 7 downto 8-n (this is the big-endian convention at the byte + * level). The context is automatically reinitialized. + * + * @param cc the SHA-512 context + * @param ub the extra bits + * @param n the number of extra bits (0 to 7) + * @param dst the destination buffer + */ +void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst); + +#ifdef DOXYGEN_IGNORE +/** + * Apply the SHA-512 compression function. This function is identical to + * sph_sha384_comp(). + * + * @param msg the message block (16 values) + * @param val the function 512-bit input and output + */ +void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]); +#endif + +#ifndef DOXYGEN_IGNORE +#define sph_sha512_comp sph_sha384_comp +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/sha3/sph_types.h b/sha3/sph_types.h new file mode 100644 index 0000000..7295b0b --- /dev/null +++ b/sha3/sph_types.h @@ -0,0 +1,1976 @@ +/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */ +/** + * Basic type definitions. + * + * This header file defines the generic integer types that will be used + * for the implementation of hash functions; it also contains helper + * functions which encode and decode multi-byte integer values, using + * either little-endian or big-endian conventions. + * + * This file contains a compile-time test on the size of a byte + * (the unsigned char C type). If bytes are not octets, + * i.e. if they do not have a size of exactly 8 bits, then compilation + * is aborted. Architectures where bytes are not octets are relatively + * rare, even in the embedded devices market. We forbid non-octet bytes + * because there is no clear convention on how octet streams are encoded + * on such systems. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_types.h + * @author Thomas Pornin + */ + +#ifndef SPH_TYPES_H__ +#define SPH_TYPES_H__ + +#include + +/* + * All our I/O functions are defined over octet streams. We do not know + * how to handle input data if bytes are not octets. + */ +#if CHAR_BIT != 8 +#error This code requires 8-bit bytes +#endif + +/* ============= BEGIN documentation block for Doxygen ============ */ + +#ifdef DOXYGEN_IGNORE + +/** @mainpage sphlib C code documentation + * + * @section overview Overview + * + * sphlib is a library which contains implementations of + * various cryptographic hash functions. These pages have been generated + * with doxygen and + * document the API for the C implementations. + * + * The API is described in appropriate header files, which are available + * in the "Files" section. Each hash function family has its own header, + * whose name begins with "sph_" and contains the family + * name. For instance, the API for the RIPEMD hash functions is available + * in the header file sph_ripemd.h. + * + * @section principles API structure and conventions + * + * @subsection io Input/output conventions + * + * In all generality, hash functions operate over strings of bits. + * Individual bits are rarely encountered in C programming or actual + * communication protocols; most protocols converge on the ubiquitous + * "octet" which is a group of eight bits. Data is thus expressed as a + * stream of octets. The C programming language contains the notion of a + * "byte", which is a data unit managed under the type "unsigned + * char". The C standard prescribes that a byte should hold at + * least eight bits, but possibly more. Most modern architectures, even + * in the embedded world, feature eight-bit bytes, i.e. map bytes to + * octets. + * + * Nevertheless, for some of the implemented hash functions, an extra + * API has been added, which allows the input of arbitrary sequences of + * bits: when the computation is about to be closed, 1 to 7 extra bits + * can be added. The functions for which this API is implemented include + * the SHA-2 functions and all SHA-3 candidates. + * + * sphlib defines hash function which may hash octet streams, + * i.e. streams of bits where the number of bits is a multiple of eight. + * The data input functions in the sphlib API expect data + * as anonymous pointers ("const void *") with a length + * (of type "size_t") which gives the input data chunk length + * in bytes. A byte is assumed to be an octet; the sph_types.h + * header contains a compile-time test which prevents compilation on + * architectures where this property is not met. + * + * The hash function output is also converted into bytes. All currently + * implemented hash functions have an output width which is a multiple of + * eight, and this is likely to remain true for new designs. + * + * Most hash functions internally convert input data into 32-bit of 64-bit + * words, using either little-endian or big-endian conversion. The hash + * output also often consists of such words, which are encoded into output + * bytes with a similar endianness convention. Some hash functions have + * been only loosely specified on that subject; when necessary, + * sphlib has been tested against published "reference" + * implementations in order to use the same conventions. + * + * @subsection shortname Function short name + * + * Each implemented hash function has a "short name" which is used + * internally to derive the identifiers for the functions and context + * structures which the function uses. For instance, MD5 has the short + * name "md5". Short names are listed in the next section, + * for the implemented hash functions. In subsequent sections, the + * short name will be assumed to be "XXX": replace with the + * actual hash function name to get the C identifier. + * + * Note: some functions within the same family share the same core + * elements, such as update function or context structure. Correspondingly, + * some of the defined types or functions may actually be macros which + * transparently evaluate to another type or function name. + * + * @subsection context Context structure + * + * Each implemented hash fonction has its own context structure, available + * under the type name "sph_XXX_context" for the hash function + * with short name "XXX". This structure holds all needed + * state for a running hash computation. + * + * The contents of these structures are meant to be opaque, and private + * to the implementation. However, these contents are specified in the + * header files so that application code which uses sphlib + * may access the size of those structures. + * + * The caller is responsible for allocating the context structure, + * whether by dynamic allocation (malloc() or equivalent), + * static allocation (a global permanent variable), as an automatic + * variable ("on the stack"), or by any other mean which ensures proper + * structure alignment. sphlib code performs no dynamic + * allocation by itself. + * + * The context must be initialized before use, using the + * sph_XXX_init() function. This function sets the context + * state to proper initial values for hashing. + * + * Since all state data is contained within the context structure, + * sphlib is thread-safe and reentrant: several hash + * computations may be performed in parallel, provided that they do not + * operate on the same context. Moreover, a running computation can be + * cloned by copying the context (with a simple memcpy()): + * the context and its clone are then independant and may be updated + * with new data and/or closed without interfering with each other. + * Similarly, a context structure can be moved in memory at will: + * context structures contain no pointer, in particular no pointer to + * themselves. + * + * @subsection dataio Data input + * + * Hashed data is input with the sph_XXX() fonction, which + * takes as parameters a pointer to the context, a pointer to the data + * to hash, and the number of data bytes to hash. The context is updated + * with the new data. + * + * Data can be input in one or several calls, with arbitrary input lengths. + * However, it is best, performance wise, to input data by relatively big + * chunks (say a few kilobytes), because this allows sphlib to + * optimize things and avoid internal copying. + * + * When all data has been input, the context can be closed with + * sph_XXX_close(). The hash output is computed and written + * into the provided buffer. The caller must take care to provide a + * buffer of appropriate length; e.g., when using SHA-1, the output is + * a 20-byte word, therefore the output buffer must be at least 20-byte + * long. + * + * For some hash functions, the sph_XXX_addbits_and_close() + * function can be used instead of sph_XXX_close(). This + * function can take a few extra bits to be added at + * the end of the input message. This allows hashing messages with a + * bit length which is not a multiple of 8. The extra bits are provided + * as an unsigned integer value, and a bit count. The bit count must be + * between 0 and 7, inclusive. The extra bits are provided as bits 7 to + * 0 (bits of numerical value 128, 64, 32... downto 0), in that order. + * For instance, to add three bits of value 1, 1 and 0, the unsigned + * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count + * will be 3. + * + * The SPH_SIZE_XXX macro is defined for each hash function; + * it evaluates to the function output size, expressed in bits. For instance, + * SPH_SIZE_sha1 evaluates to 160. + * + * When closed, the context is automatically reinitialized and can be + * immediately used for another computation. It is not necessary to call + * sph_XXX_init() after a close. Note that + * sph_XXX_init() can still be called to "reset" a context, + * i.e. forget previously input data, and get back to the initial state. + * + * @subsection alignment Data alignment + * + * "Alignment" is a property of data, which is said to be "properly + * aligned" when its emplacement in memory is such that the data can + * be optimally read by full words. This depends on the type of access; + * basically, some hash functions will read data by 32-bit or 64-bit + * words. sphlib does not mandate such alignment for input + * data, but using aligned data can substantially improve performance. + * + * As a rule, it is best to input data by chunks whose length (in bytes) + * is a multiple of eight, and which begins at "generally aligned" + * addresses, such as the base address returned by a call to + * malloc(). + * + * @section functions Implemented functions + * + * We give here the list of implemented functions. They are grouped by + * family; to each family corresponds a specific header file. Each + * individual function has its associated "short name". Please refer to + * the documentation for that header file to get details on the hash + * function denomination and provenance. + * + * Note: the functions marked with a '(64)' in the list below are + * available only if the C compiler provides an integer type of length + * 64 bits or more. Such a type is mandatory in the latest C standard + * (ISO 9899:1999, aka "C99") and is present in several older compilers + * as well, so chances are that such a type is available. + * + * - HAVAL family: file sph_haval.h + * - HAVAL-128/3 (128-bit, 3 passes): short name: haval128_3 + * - HAVAL-128/4 (128-bit, 4 passes): short name: haval128_4 + * - HAVAL-128/5 (128-bit, 5 passes): short name: haval128_5 + * - HAVAL-160/3 (160-bit, 3 passes): short name: haval160_3 + * - HAVAL-160/4 (160-bit, 4 passes): short name: haval160_4 + * - HAVAL-160/5 (160-bit, 5 passes): short name: haval160_5 + * - HAVAL-192/3 (192-bit, 3 passes): short name: haval192_3 + * - HAVAL-192/4 (192-bit, 4 passes): short name: haval192_4 + * - HAVAL-192/5 (192-bit, 5 passes): short name: haval192_5 + * - HAVAL-224/3 (224-bit, 3 passes): short name: haval224_3 + * - HAVAL-224/4 (224-bit, 4 passes): short name: haval224_4 + * - HAVAL-224/5 (224-bit, 5 passes): short name: haval224_5 + * - HAVAL-256/3 (256-bit, 3 passes): short name: haval256_3 + * - HAVAL-256/4 (256-bit, 4 passes): short name: haval256_4 + * - HAVAL-256/5 (256-bit, 5 passes): short name: haval256_5 + * - MD2: file sph_md2.h, short name: md2 + * - MD4: file sph_md4.h, short name: md4 + * - MD5: file sph_md5.h, short name: md5 + * - PANAMA: file sph_panama.h, short name: panama + * - RadioGatun family: file sph_radiogatun.h + * - RadioGatun[32]: short name: radiogatun32 + * - RadioGatun[64]: short name: radiogatun64 (64) + * - RIPEMD family: file sph_ripemd.h + * - RIPEMD: short name: ripemd + * - RIPEMD-128: short name: ripemd128 + * - RIPEMD-160: short name: ripemd160 + * - SHA-0: file sph_sha0.h, short name: sha0 + * - SHA-1: file sph_sha1.h, short name: sha1 + * - SHA-2 family, 32-bit hashes: file sph_sha2.h + * - SHA-224: short name: sha224 + * - SHA-256: short name: sha256 + * - SHA-384: short name: sha384 (64) + * - SHA-512: short name: sha512 (64) + * - Tiger family: file sph_tiger.h + * - Tiger: short name: tiger (64) + * - Tiger2: short name: tiger2 (64) + * - WHIRLPOOL family: file sph_whirlpool.h + * - WHIRLPOOL-0: short name: whirlpool0 (64) + * - WHIRLPOOL-1: short name: whirlpool1 (64) + * - WHIRLPOOL: short name: whirlpool (64) + * + * The fourteen second-round SHA-3 candidates are also implemented; + * when applicable, the implementations follow the "final" specifications + * as published for the third round of the SHA-3 competition (BLAKE, + * Groestl, JH, Keccak and Skein have been tweaked for third round). + * + * - BLAKE family: file sph_blake.h + * - BLAKE-224: short name: blake224 + * - BLAKE-256: short name: blake256 + * - BLAKE-384: short name: blake384 + * - BLAKE-512: short name: blake512 + * - BMW (Blue Midnight Wish) family: file sph_bmw.h + * - BMW-224: short name: bmw224 + * - BMW-256: short name: bmw256 + * - BMW-384: short name: bmw384 (64) + * - BMW-512: short name: bmw512 (64) + * - CubeHash family: file sph_cubehash.h (specified as + * CubeHash16/32 in the CubeHash specification) + * - CubeHash-224: short name: cubehash224 + * - CubeHash-256: short name: cubehash256 + * - CubeHash-384: short name: cubehash384 + * - CubeHash-512: short name: cubehash512 + * - ECHO family: file sph_echo.h + * - ECHO-224: short name: echo224 + * - ECHO-256: short name: echo256 + * - ECHO-384: short name: echo384 + * - ECHO-512: short name: echo512 + * - Fugue family: file sph_fugue.h + * - Fugue-224: short name: fugue224 + * - Fugue-256: short name: fugue256 + * - Fugue-384: short name: fugue384 + * - Fugue-512: short name: fugue512 + * - Groestl family: file sph_groestl.h + * - Groestl-224: short name: groestl224 + * - Groestl-256: short name: groestl256 + * - Groestl-384: short name: groestl384 + * - Groestl-512: short name: groestl512 + * - Hamsi family: file sph_hamsi.h + * - Hamsi-224: short name: hamsi224 + * - Hamsi-256: short name: hamsi256 + * - Hamsi-384: short name: hamsi384 + * - Hamsi-512: short name: hamsi512 + * - JH family: file sph_jh.h + * - JH-224: short name: jh224 + * - JH-256: short name: jh256 + * - JH-384: short name: jh384 + * - JH-512: short name: jh512 + * - Keccak family: file sph_keccak.h + * - Keccak-224: short name: keccak224 + * - Keccak-256: short name: keccak256 + * - Keccak-384: short name: keccak384 + * - Keccak-512: short name: keccak512 + * - Luffa family: file sph_luffa.h + * - Luffa-224: short name: luffa224 + * - Luffa-256: short name: luffa256 + * - Luffa-384: short name: luffa384 + * - Luffa-512: short name: luffa512 + * - Shabal family: file sph_shabal.h + * - Shabal-192: short name: shabal192 + * - Shabal-224: short name: shabal224 + * - Shabal-256: short name: shabal256 + * - Shabal-384: short name: shabal384 + * - Shabal-512: short name: shabal512 + * - SHAvite-3 family: file sph_shavite.h + * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"): + * short name: shabal224 + * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"): + * short name: shabal256 + * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"): + * short name: shabal384 + * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"): + * short name: shabal512 + * - SIMD family: file sph_simd.h + * - SIMD-224: short name: simd224 + * - SIMD-256: short name: simd256 + * - SIMD-384: short name: simd384 + * - SIMD-512: short name: simd512 + * - Skein family: file sph_skein.h + * - Skein-224 (nominally specified as Skein-512-224): short name: + * skein224 (64) + * - Skein-256 (nominally specified as Skein-512-256): short name: + * skein256 (64) + * - Skein-384 (nominally specified as Skein-512-384): short name: + * skein384 (64) + * - Skein-512 (nominally specified as Skein-512-512): short name: + * skein512 (64) + * + * For the second-round SHA-3 candidates, the functions are as specified + * for round 2, i.e. with the "tweaks" that some candidates added + * between round 1 and round 2. Also, some of the submitted packages for + * round 2 contained errors, in the specification, reference code, or + * both. sphlib implements the corrected versions. + */ + +/** @hideinitializer + * Unsigned integer type whose length is at least 32 bits; on most + * architectures, it will have a width of exactly 32 bits. Unsigned C + * types implement arithmetics modulo a power of 2; use the + * SPH_T32() macro to ensure that the value is truncated + * to exactly 32 bits. Unless otherwise specified, all macros and + * functions which accept sph_u32 values assume that these + * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures + * where sph_u32 is larger than that. + */ +typedef __arch_dependant__ sph_u32; + +/** @hideinitializer + * Signed integer type corresponding to sph_u32; it has + * width 32 bits or more. + */ +typedef __arch_dependant__ sph_s32; + +/** @hideinitializer + * Unsigned integer type whose length is at least 64 bits; on most + * architectures which feature such a type, it will have a width of + * exactly 64 bits. C99-compliant platform will have this type; it + * is also defined when the GNU compiler (gcc) is used, and on + * platforms where unsigned long is large enough. If this + * type is not available, then some hash functions which depends on + * a 64-bit type will not be available (most notably SHA-384, SHA-512, + * Tiger and WHIRLPOOL). + */ +typedef __arch_dependant__ sph_u64; + +/** @hideinitializer + * Signed integer type corresponding to sph_u64; it has + * width 64 bits or more. + */ +typedef __arch_dependant__ sph_s64; + +/** + * This macro expands the token x into a suitable + * constant expression of type sph_u32. Depending on + * how this type is defined, a suffix such as UL may + * be appended to the argument. + * + * @param x the token to expand into a suitable constant expression + */ +#define SPH_C32(x) + +/** + * Truncate a 32-bit value to exactly 32 bits. On most systems, this is + * a no-op, recognized as such by the compiler. + * + * @param x the value to truncate (of type sph_u32) + */ +#define SPH_T32(x) + +/** + * Rotate a 32-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 31. This macro assumes that its + * first argument fits in 32 bits (no extra bit allowed on machines where + * sph_u32 is wider); both arguments may be evaluated + * several times. + * + * @param x the value to rotate (of type sph_u32) + * @param n the rotation count (between 1 and 31, inclusive) + */ +#define SPH_ROTL32(x, n) + +/** + * Rotate a 32-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 31. This macro assumes that its + * first argument fits in 32 bits (no extra bit allowed on machines where + * sph_u32 is wider); both arguments may be evaluated + * several times. + * + * @param x the value to rotate (of type sph_u32) + * @param n the rotation count (between 1 and 31, inclusive) + */ +#define SPH_ROTR32(x, n) + +/** + * This macro is defined on systems for which a 64-bit type has been + * detected, and is used for sph_u64. + */ +#define SPH_64 + +/** + * This macro is defined on systems for the "native" integer size is + * 64 bits (64-bit values fit in one register). + */ +#define SPH_64_TRUE + +/** + * This macro expands the token x into a suitable + * constant expression of type sph_u64. Depending on + * how this type is defined, a suffix such as ULL may + * be appended to the argument. This macro is defined only if a + * 64-bit type was detected and used for sph_u64. + * + * @param x the token to expand into a suitable constant expression + */ +#define SPH_C64(x) + +/** + * Truncate a 64-bit value to exactly 64 bits. On most systems, this is + * a no-op, recognized as such by the compiler. This macro is defined only + * if a 64-bit type was detected and used for sph_u64. + * + * @param x the value to truncate (of type sph_u64) + */ +#define SPH_T64(x) + +/** + * Rotate a 64-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 63. This macro assumes that its + * first argument fits in 64 bits (no extra bit allowed on machines where + * sph_u64 is wider); both arguments may be evaluated + * several times. This macro is defined only if a 64-bit type was detected + * and used for sph_u64. + * + * @param x the value to rotate (of type sph_u64) + * @param n the rotation count (between 1 and 63, inclusive) + */ +#define SPH_ROTL64(x, n) + +/** + * Rotate a 64-bit value by a number of bits to the left. The rotate + * count must reside between 1 and 63. This macro assumes that its + * first argument fits in 64 bits (no extra bit allowed on machines where + * sph_u64 is wider); both arguments may be evaluated + * several times. This macro is defined only if a 64-bit type was detected + * and used for sph_u64. + * + * @param x the value to rotate (of type sph_u64) + * @param n the rotation count (between 1 and 63, inclusive) + */ +#define SPH_ROTR64(x, n) + +/** + * This macro evaluates to inline or an equivalent construction, + * if available on the compilation platform, or to nothing otherwise. This + * is used to declare inline functions, for which the compiler should + * endeavour to include the code directly in the caller. Inline functions + * are typically defined in header files as replacement for macros. + */ +#define SPH_INLINE + +/** + * This macro is defined if the platform has been detected as using + * little-endian convention. This implies that the sph_u32 + * type (and the sph_u64 type also, if it is defined) has + * an exact width (i.e. exactly 32-bit, respectively 64-bit). + */ +#define SPH_LITTLE_ENDIAN + +/** + * This macro is defined if the platform has been detected as using + * big-endian convention. This implies that the sph_u32 + * type (and the sph_u64 type also, if it is defined) has + * an exact width (i.e. exactly 32-bit, respectively 64-bit). + */ +#define SPH_BIG_ENDIAN + +/** + * This macro is defined if 32-bit words (and 64-bit words, if defined) + * can be read from and written to memory efficiently in little-endian + * convention. This is the case for little-endian platforms, and also + * for the big-endian platforms which have special little-endian access + * opcodes (e.g. Ultrasparc). + */ +#define SPH_LITTLE_FAST + +/** + * This macro is defined if 32-bit words (and 64-bit words, if defined) + * can be read from and written to memory efficiently in big-endian + * convention. This is the case for little-endian platforms, and also + * for the little-endian platforms which have special big-endian access + * opcodes. + */ +#define SPH_BIG_FAST + +/** + * On some platforms, this macro is defined to an unsigned integer type + * into which pointer values may be cast. The resulting value can then + * be tested for being a multiple of 2, 4 or 8, indicating an aligned + * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses. + */ +#define SPH_UPTR + +/** + * When defined, this macro indicates that unaligned memory accesses + * are possible with only a minor penalty, and thus should be prefered + * over strategies which first copy data to an aligned buffer. + */ +#define SPH_UNALIGNED + +/** + * Byte-swap a 32-bit word (i.e. 0x12345678 becomes + * 0x78563412). This is an inline function which resorts + * to inline assembly on some platforms, for better performance. + * + * @param x the 32-bit value to byte-swap + * @return the byte-swapped value + */ +static inline sph_u32 sph_bswap32(sph_u32 x); + +/** + * Byte-swap a 64-bit word. This is an inline function which resorts + * to inline assembly on some platforms, for better performance. This + * function is defined only if a suitable 64-bit type was found for + * sph_u64 + * + * @param x the 64-bit value to byte-swap + * @return the byte-swapped value + */ +static inline sph_u64 sph_bswap64(sph_u64 x); + +/** + * Decode a 16-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline unsigned sph_dec16le(const void *src); + +/** + * Encode a 16-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc16le(void *dst, unsigned val); + +/** + * Decode a 16-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline unsigned sph_dec16be(const void *src); + +/** + * Encode a 16-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc16be(void *dst, unsigned val); + +/** + * Decode a 32-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32le(const void *src); + +/** + * Decode a 32-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec32le() function. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32le_aligned(const void *src); + +/** + * Encode a 32-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32le(void *dst, sph_u32 val); + +/** + * Encode a 32-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc32le() function. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32le_aligned(void *dst, sph_u32 val); + +/** + * Decode a 32-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32be(const void *src); + +/** + * Decode a 32-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec32be() function. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u32 sph_dec32be_aligned(const void *src); + +/** + * Encode a 32-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32be(void *dst, sph_u32 val); + +/** + * Encode a 32-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc32be() function. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc32be_aligned(void *dst, sph_u32 val); + +/** + * Decode a 64-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64le(const void *src); + +/** + * Decode a 64-bit unsigned value from memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec64le() function. This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64le_aligned(const void *src); + +/** + * Encode a 64-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64le(void *dst, sph_u64 val); + +/** + * Encode a 64-bit unsigned value into memory, in little-endian convention + * (least significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc64le() function. This function is defined + * only if a suitable 64-bit type was detected and used for + * sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64le_aligned(void *dst, sph_u64 val); + +/** + * Decode a 64-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64be(const void *src); + +/** + * Decode a 64-bit unsigned value from memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * source address is suitably aligned for a direct access, if the platform + * supports such things; it can thus be marginally faster than the generic + * sph_dec64be() function. This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param src the source address + * @return the decoded value + */ +static inline sph_u64 sph_dec64be_aligned(const void *src); + +/** + * Encode a 64-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function is defined only + * if a suitable 64-bit type was detected and used for sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64be(void *dst, sph_u64 val); + +/** + * Encode a 64-bit unsigned value into memory, in big-endian convention + * (most significant byte comes first). This function assumes that the + * destination address is suitably aligned for a direct access, if the + * platform supports such things; it can thus be marginally faster than + * the generic sph_enc64be() function. This function is defined + * only if a suitable 64-bit type was detected and used for + * sph_u64. + * + * @param dst the destination buffer + * @param val the value to encode + */ +static inline void sph_enc64be_aligned(void *dst, sph_u64 val); + +#endif + +/* ============== END documentation block for Doxygen ============= */ + +#ifndef DOXYGEN_IGNORE + +/* + * We want to define the types "sph_u32" and "sph_u64" which hold + * unsigned values of at least, respectively, 32 and 64 bits. These + * tests should select appropriate types for most platforms. The + * macro "SPH_64" is defined if the 64-bit is supported. + */ + +#undef SPH_64 +#undef SPH_64_TRUE + +#if defined __STDC__ && __STDC_VERSION__ >= 199901L + +/* + * On C99 implementations, we can use to get an exact 64-bit + * type, if any, or otherwise use a wider type (which must exist, for + * C99 conformance). + */ + +#include + +#ifdef UINT32_MAX +typedef uint32_t sph_u32; +typedef int32_t sph_s32; +#else +typedef uint_fast32_t sph_u32; +typedef int_fast32_t sph_s32; +#endif +#if !SPH_NO_64 +#ifdef UINT64_MAX +typedef uint64_t sph_u64; +typedef int64_t sph_s64; +#else +typedef uint_fast64_t sph_u64; +typedef int_fast64_t sph_s64; +#endif +#endif + +#define SPH_C32(x) ((sph_u32)(x)) +#if !SPH_NO_64 +#define SPH_C64(x) ((sph_u64)(x)) +#define SPH_64 1 +#endif + +#else + +/* + * On non-C99 systems, we use "unsigned int" if it is wide enough, + * "unsigned long" otherwise. This supports all "reasonable" architectures. + * We have to be cautious: pre-C99 preprocessors handle constants + * differently in '#if' expressions. Hence the shifts to test UINT_MAX. + */ + +#if ((UINT_MAX >> 11) >> 11) >= 0x3FF + +typedef unsigned int sph_u32; +typedef int sph_s32; + +#define SPH_C32(x) ((sph_u32)(x ## U)) + +#else + +typedef unsigned long sph_u32; +typedef long sph_s32; + +#define SPH_C32(x) ((sph_u32)(x ## UL)) + +#endif + +#if !SPH_NO_64 + +/* + * We want a 64-bit type. We use "unsigned long" if it is wide enough (as + * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9), + * "unsigned long long" otherwise, if available. We use ULLONG_MAX to + * test whether "unsigned long long" is available; we also know that + * gcc features this type, even if the libc header do not know it. + */ + +#if ((ULONG_MAX >> 31) >> 31) >= 3 + +typedef unsigned long sph_u64; +typedef long sph_s64; + +#define SPH_C64(x) ((sph_u64)(x ## UL)) + +#define SPH_64 1 + +#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__ + +typedef unsigned long long sph_u64; +typedef long long sph_s64; + +#define SPH_C64(x) ((sph_u64)(x ## ULL)) + +#define SPH_64 1 + +#else + +/* + * No 64-bit type... + */ + +#endif + +#endif + +#endif + +/* + * If the "unsigned long" type has length 64 bits or more, then this is + * a "true" 64-bit architectures. This is also true with Visual C on + * amd64, even though the "long" type is limited to 32 bits. + */ +#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64) +#define SPH_64_TRUE 1 +#endif + +/* + * Implementation note: some processors have specific opcodes to perform + * a rotation. Recent versions of gcc recognize the expression above and + * use the relevant opcodes, when appropriate. + */ + +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#if SPH_64 + +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#endif + +#ifndef DOXYGEN_IGNORE +/* + * Define SPH_INLINE to be an "inline" qualifier, if available. We define + * some small macro-like functions which benefit greatly from being inlined. + */ +#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__ +#define SPH_INLINE inline +#elif defined _MSC_VER +#define SPH_INLINE __inline +#else +#define SPH_INLINE +#endif +#endif + +/* + * We define some macros which qualify the architecture. These macros + * may be explicit set externally (e.g. as compiler parameters). The + * code below sets those macros if they are not already defined. + * + * Most macros are boolean, thus evaluate to either zero or non-zero. + * The SPH_UPTR macro is special, in that it evaluates to a C type, + * or is not defined. + * + * SPH_UPTR if defined: unsigned type to cast pointers into + * + * SPH_UNALIGNED non-zero if unaligned accesses are efficient + * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian + * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian + * SPH_LITTLE_FAST non-zero if little-endian decoding is fast + * SPH_BIG_FAST non-zero if big-endian decoding is fast + * + * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit + * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN + * _must_ be non-zero in those situations. The 32-bit and 64-bit types + * _must_ also have an exact width. + * + * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode + * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode + * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc + * SPH_I386_GCC x86-compatible (32-bit) with gcc + * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C + * SPH_AMD64_GCC x86-compatible (64-bit) with gcc + * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C + * SPH_PPC32_GCC PowerPC, 32-bit, with gcc + * SPH_PPC64_GCC PowerPC, 64-bit, with gcc + * + * TODO: enhance automatic detection, for more architectures and compilers. + * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with + * some very fast functions (e.g. MD4) when using unaligned input data. + * The CPU-specific-with-GCC macros are useful only for inline assembly, + * normally restrained to this header file. + */ + +/* + * 32-bit x86, aka "i386 compatible". + */ +#if defined __i386__ || defined _M_IX86 + +#define SPH_DETECT_UNALIGNED 1 +#define SPH_DETECT_LITTLE_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u32 +#ifdef __GNUC__ +#define SPH_DETECT_I386_GCC 1 +#endif +#ifdef _MSC_VER +#define SPH_DETECT_I386_MSVC 1 +#endif + +/* + * 64-bit x86, hereafter known as "amd64". + */ +#elif defined __x86_64 || defined _M_X64 + +#define SPH_DETECT_UNALIGNED 1 +#define SPH_DETECT_LITTLE_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u64 +#ifdef __GNUC__ +#define SPH_DETECT_AMD64_GCC 1 +#endif +#ifdef _MSC_VER +#define SPH_DETECT_AMD64_MSVC 1 +#endif + +/* + * 64-bit Sparc architecture (implies v9). + */ +#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \ + || defined __sparcv9 + +#define SPH_DETECT_BIG_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u64 +#ifdef __GNUC__ +#define SPH_DETECT_SPARCV9_GCC_64 1 +#define SPH_DETECT_LITTLE_FAST 1 +#endif + +/* + * 32-bit Sparc. + */ +#elif (defined __sparc__ || defined __sparc) \ + && !(defined __sparcv9 || defined __arch64__) + +#define SPH_DETECT_BIG_ENDIAN 1 +#define SPH_DETECT_UPTR sph_u32 +#if defined __GNUC__ && defined __sparc_v9__ +#define SPH_DETECT_SPARCV9_GCC_32 1 +#define SPH_DETECT_LITTLE_FAST 1 +#endif + +/* + * ARM, little-endian. + */ +#elif defined __arm__ && __ARMEL__ + +#define SPH_DETECT_LITTLE_ENDIAN 1 + +/* + * MIPS, little-endian. + */ +#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__ + +#define SPH_DETECT_LITTLE_ENDIAN 1 + +/* + * MIPS, big-endian. + */ +#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__ + +#define SPH_DETECT_BIG_ENDIAN 1 + +/* + * PowerPC. + */ +#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \ + || defined _ARCH_PPC + +/* + * Note: we do not declare cross-endian access to be "fast": even if + * using inline assembly, implementation should still assume that + * keeping the decoded word in a temporary is faster than decoding + * it again. + */ +#if defined __GNUC__ +#if SPH_64_TRUE +#define SPH_DETECT_PPC64_GCC 1 +#else +#define SPH_DETECT_PPC32_GCC 1 +#endif +#endif + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +#define SPH_DETECT_BIG_ENDIAN 1 +#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN +#define SPH_DETECT_LITTLE_ENDIAN 1 +#endif + +/* + * Itanium, 64-bit. + */ +#elif defined __ia64 || defined __ia64__ \ + || defined __itanium__ || defined _M_IA64 + +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN +#define SPH_DETECT_BIG_ENDIAN 1 +#else +#define SPH_DETECT_LITTLE_ENDIAN 1 +#endif +#if defined __LP64__ || defined _LP64 +#define SPH_DETECT_UPTR sph_u64 +#else +#define SPH_DETECT_UPTR sph_u32 +#endif + +#endif + +#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64 +#define SPH_DETECT_SPARCV9_GCC 1 +#endif + +#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED +#define SPH_UNALIGNED SPH_DETECT_UNALIGNED +#endif +#if defined SPH_DETECT_UPTR && !defined SPH_UPTR +#define SPH_UPTR SPH_DETECT_UPTR +#endif +#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN +#define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN +#endif +#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN +#define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN +#endif +#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST +#define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST +#endif +#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST +#define SPH_BIG_FAST SPH_DETECT_BIG_FAST +#endif +#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32 +#define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32 +#endif +#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64 +#define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64 +#endif +#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC +#define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC +#endif +#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC +#define SPH_I386_GCC SPH_DETECT_I386_GCC +#endif +#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC +#define SPH_I386_MSVC SPH_DETECT_I386_MSVC +#endif +#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC +#define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC +#endif +#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC +#define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC +#endif +#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC +#define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC +#endif +#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC +#define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC +#endif + +#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST +#define SPH_LITTLE_FAST 1 +#endif +#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST +#define SPH_BIG_FAST 1 +#endif + +#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN) +#error SPH_UPTR defined, but endianness is not known. +#endif + +#if SPH_I386_GCC && !SPH_NO_ASM + +/* + * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit + * values. + */ + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); + return x; +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + return ((sph_u64)sph_bswap32((sph_u32)x) << 32) + | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); +} + +#endif + +#elif SPH_AMD64_GCC && !SPH_NO_ASM + +/* + * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit + * and 64-bit values. + */ + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x)); + return x; +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x)); + return x; +} + +#endif + +/* + * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough + * to generate proper opcodes for endianness swapping with the pure C + * implementation below. + * + +#elif SPH_I386_MSVC && !SPH_NO_ASM + +static __inline sph_u32 __declspec(naked) __fastcall +sph_bswap32(sph_u32 x) +{ + __asm { + bswap ecx + mov eax,ecx + ret + } +} + +#if SPH_64 + +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + return ((sph_u64)sph_bswap32((sph_u32)x) << 32) + | (sph_u64)sph_bswap32((sph_u32)(x >> 32)); +} + +#endif + + * + * [end of disabled code] + */ + +#else + +static SPH_INLINE sph_u32 +sph_bswap32(sph_u32 x) +{ + x = SPH_T32((x << 16) | (x >> 16)); + x = ((x & SPH_C32(0xFF00FF00)) >> 8) + | ((x & SPH_C32(0x00FF00FF)) << 8); + return x; +} + +#if SPH_64 + +/** + * Byte-swap a 64-bit value. + * + * @param x the input value + * @return the byte-swapped value + */ +static SPH_INLINE sph_u64 +sph_bswap64(sph_u64 x) +{ + x = SPH_T64((x << 32) | (x >> 32)); + x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16) + | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16); + x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8) + | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8); + return x; +} + +#endif + +#endif + +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + +/* + * On UltraSPARC systems, native ordering is big-endian, but it is + * possible to perform little-endian read accesses by specifying the + * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use + * the opcode "lda [%reg]0x88,%dst", where %reg is the register which + * contains the source address and %dst is the destination register, + * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register + * to get the address space name. The latter format is better since it + * combines an addition and the actual access in a single opcode; but + * it requires the setting (and subsequent resetting) of %asi, which is + * slow. Some operations (i.e. MD5 compression function) combine many + * successive little-endian read accesses, which may share the same + * %asi setting. The macros below contain the appropriate inline + * assembly. + */ + +#define SPH_SPARCV9_SET_ASI \ + sph_u32 sph_sparcv9_asi; \ + __asm__ __volatile__ ( \ + "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi)); + +#define SPH_SPARCV9_RESET_ASI \ + __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi)); + +#define SPH_SPARCV9_DEC32LE(base, idx) ({ \ + sph_u32 sph_sparcv9_tmp; \ + __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \ + : "=r" (sph_sparcv9_tmp) : "r" (base)); \ + sph_sparcv9_tmp; \ + }) + +#endif + +static SPH_INLINE void +sph_enc16be(void *dst, unsigned val) +{ + ((unsigned char *)dst)[0] = (val >> 8); + ((unsigned char *)dst)[1] = val; +} + +static SPH_INLINE unsigned +sph_dec16be(const void *src) +{ + return ((unsigned)(((const unsigned char *)src)[0]) << 8) + | (unsigned)(((const unsigned char *)src)[1]); +} + +static SPH_INLINE void +sph_enc16le(void *dst, unsigned val) +{ + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = val >> 8; +} + +static SPH_INLINE unsigned +sph_dec16le(const void *src) +{ + return (unsigned)(((const unsigned char *)src)[0]) + | ((unsigned)(((const unsigned char *)src)[1]) << 8); +} + +/** + * Encode a 32-bit value into the provided buffer (big endian convention). + * + * @param dst the destination buffer + * @param val the 32-bit value to encode + */ +static SPH_INLINE void +sph_enc32be(void *dst, sph_u32 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; +#else + if (((SPH_UPTR)dst & 3) == 0) { +#if SPH_LITTLE_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; + } else { + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; + } +#endif +#else + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (big endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (32-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc32be_aligned(void *dst, sph_u32 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u32 *)dst = sph_bswap32(val); +#elif SPH_BIG_ENDIAN + *(sph_u32 *)dst = val; +#else + ((unsigned char *)dst)[0] = (val >> 24); + ((unsigned char *)dst)[1] = (val >> 16); + ((unsigned char *)dst)[2] = (val >> 8); + ((unsigned char *)dst)[3] = val; +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (big endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32be(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif +#else + if (((SPH_UPTR)src & 3) == 0) { +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif + } else { + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); + } +#endif +#else + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (big endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (32-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32be_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#elif SPH_BIG_ENDIAN + return *(const sph_u32 *)src; +#else + return ((sph_u32)(((const unsigned char *)src)[0]) << 24) + | ((sph_u32)(((const unsigned char *)src)[1]) << 16) + | ((sph_u32)(((const unsigned char *)src)[2]) << 8) + | (sph_u32)(((const unsigned char *)src)[3]); +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (little endian convention). + * + * @param dst the destination buffer + * @param val the 32-bit value to encode + */ +static SPH_INLINE void +sph_enc32le(void *dst, sph_u32 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; +#else + if (((SPH_UPTR)dst & 3) == 0) { +#if SPH_BIG_ENDIAN + val = sph_bswap32(val); +#endif + *(sph_u32 *)dst = val; + } else { + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + } +#endif +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); +#endif +} + +/** + * Encode a 32-bit value into the provided buffer (little endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (32-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc32le_aligned(void *dst, sph_u32 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u32 *)dst = val; +#elif SPH_BIG_ENDIAN + *(sph_u32 *)dst = sph_bswap32(val); +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (little endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32le(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + return sph_bswap32(*(const sph_u32 *)src); +#else + return *(const sph_u32 *)src; +#endif +#else + if (((SPH_UPTR)src & 3) == 0) { +#if SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + sph_u32 tmp; + + /* + * "__volatile__" is needed here because without it, + * gcc-3.4.3 miscompiles the code and performs the + * access before the test on the address, thus triggering + * a bus error... + */ + __asm__ __volatile__ ( + "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * On PowerPC, this turns out not to be worth the effort: the inline + * assembly makes GCC optimizer uncomfortable, which tends to nullify + * the decoding gains. + * + * For most hash functions, using this inline assembly trick changes + * hashing speed by less than 5% and often _reduces_ it. The biggest + * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is + * less then 10%. The speed gain on CubeHash is probably due to the + * chronic shortage of registers that CubeHash endures; for the other + * functions, the generic code appears to be efficient enough already. + * +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ( + "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap32(*(const sph_u32 *)src); +#endif +#else + return *(const sph_u32 *)src; +#endif + } else { + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); + } +#endif +#else + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); +#endif +} + +/** + * Decode a 32-bit value from the provided buffer (little endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (32-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u32 +sph_dec32le_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return *(const sph_u32 *)src; +#elif SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM + sph_u32 tmp; + + __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap32(*(const sph_u32 *)src); +#endif +#else + return (sph_u32)(((const unsigned char *)src)[0]) + | ((sph_u32)(((const unsigned char *)src)[1]) << 8) + | ((sph_u32)(((const unsigned char *)src)[2]) << 16) + | ((sph_u32)(((const unsigned char *)src)[3]) << 24); +#endif +} + +#if SPH_64 + +/** + * Encode a 64-bit value into the provided buffer (big endian convention). + * + * @param dst the destination buffer + * @param val the 64-bit value to encode + */ +static SPH_INLINE void +sph_enc64be(void *dst, sph_u64 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; +#else + if (((SPH_UPTR)dst & 7) == 0) { +#if SPH_LITTLE_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; + } else { + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; + } +#endif +#else + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (big endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (64-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc64be_aligned(void *dst, sph_u64 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u64 *)dst = sph_bswap64(val); +#elif SPH_BIG_ENDIAN + *(sph_u64 *)dst = val; +#else + ((unsigned char *)dst)[0] = (val >> 56); + ((unsigned char *)dst)[1] = (val >> 48); + ((unsigned char *)dst)[2] = (val >> 40); + ((unsigned char *)dst)[3] = (val >> 32); + ((unsigned char *)dst)[4] = (val >> 24); + ((unsigned char *)dst)[5] = (val >> 16); + ((unsigned char *)dst)[6] = (val >> 8); + ((unsigned char *)dst)[7] = val; +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (big endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64be(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif +#else + if (((SPH_UPTR)src & 7) == 0) { +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif + } else { + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); + } +#endif +#else + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (big endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (64-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64be_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#elif SPH_BIG_ENDIAN + return *(const sph_u64 *)src; +#else + return ((sph_u64)(((const unsigned char *)src)[0]) << 56) + | ((sph_u64)(((const unsigned char *)src)[1]) << 48) + | ((sph_u64)(((const unsigned char *)src)[2]) << 40) + | ((sph_u64)(((const unsigned char *)src)[3]) << 32) + | ((sph_u64)(((const unsigned char *)src)[4]) << 24) + | ((sph_u64)(((const unsigned char *)src)[5]) << 16) + | ((sph_u64)(((const unsigned char *)src)[6]) << 8) + | (sph_u64)(((const unsigned char *)src)[7]); +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (little endian convention). + * + * @param dst the destination buffer + * @param val the 64-bit value to encode + */ +static SPH_INLINE void +sph_enc64le(void *dst, sph_u64 val) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; +#else + if (((SPH_UPTR)dst & 7) == 0) { +#if SPH_BIG_ENDIAN + val = sph_bswap64(val); +#endif + *(sph_u64 *)dst = val; + } else { + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); + } +#endif +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); +#endif +} + +/** + * Encode a 64-bit value into the provided buffer (little endian convention). + * The destination buffer must be properly aligned. + * + * @param dst the destination buffer (64-bit aligned) + * @param val the value to encode + */ +static SPH_INLINE void +sph_enc64le_aligned(void *dst, sph_u64 val) +{ +#if SPH_LITTLE_ENDIAN + *(sph_u64 *)dst = val; +#elif SPH_BIG_ENDIAN + *(sph_u64 *)dst = sph_bswap64(val); +#else + ((unsigned char *)dst)[0] = val; + ((unsigned char *)dst)[1] = (val >> 8); + ((unsigned char *)dst)[2] = (val >> 16); + ((unsigned char *)dst)[3] = (val >> 24); + ((unsigned char *)dst)[4] = (val >> 32); + ((unsigned char *)dst)[5] = (val >> 40); + ((unsigned char *)dst)[6] = (val >> 48); + ((unsigned char *)dst)[7] = (val >> 56); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (little endian convention). + * + * @param src the source buffer + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64le(const void *src) +{ +#if defined SPH_UPTR +#if SPH_UNALIGNED +#if SPH_BIG_ENDIAN + return sph_bswap64(*(const sph_u64 *)src); +#else + return *(const sph_u64 *)src; +#endif +#else + if (((SPH_UPTR)src & 7) == 0) { +#if SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ( + "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif SPH_PPC32_GCC && !SPH_NO_ASM + return (sph_u64)sph_dec32le_aligned(src) + | ((sph_u64)sph_dec32le_aligned( + (const char *)src + 4) << 32); +#elif SPH_PPC64_GCC && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ( + "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap64(*(const sph_u64 *)src); +#endif +#else + return *(const sph_u64 *)src; +#endif + } else { + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); + } +#endif +#else + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); +#endif +} + +/** + * Decode a 64-bit value from the provided buffer (little endian convention). + * The source buffer must be properly aligned. + * + * @param src the source buffer (64-bit aligned) + * @return the decoded value + */ +static SPH_INLINE sph_u64 +sph_dec64le_aligned(const void *src) +{ +#if SPH_LITTLE_ENDIAN + return *(const sph_u64 *)src; +#elif SPH_BIG_ENDIAN +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src)); + return tmp; +/* + * Not worth it generally. + * +#elif SPH_PPC32_GCC && !SPH_NO_ASM + return (sph_u64)sph_dec32le_aligned(src) + | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32); +#elif SPH_PPC64_GCC && !SPH_NO_ASM + sph_u64 tmp; + + __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src)); + return tmp; + */ +#else + return sph_bswap64(*(const sph_u64 *)src); +#endif +#else + return (sph_u64)(((const unsigned char *)src)[0]) + | ((sph_u64)(((const unsigned char *)src)[1]) << 8) + | ((sph_u64)(((const unsigned char *)src)[2]) << 16) + | ((sph_u64)(((const unsigned char *)src)[3]) << 24) + | ((sph_u64)(((const unsigned char *)src)[4]) << 32) + | ((sph_u64)(((const unsigned char *)src)[5]) << 40) + | ((sph_u64)(((const unsigned char *)src)[6]) << 48) + | ((sph_u64)(((const unsigned char *)src)[7]) << 56); +#endif +} + +#endif + +#endif /* Doxygen excluded block */ + +#endif diff --git a/sph_ripemd.c b/sph_ripemd.c new file mode 100644 index 0000000..0a3f336 --- /dev/null +++ b/sph_ripemd.c @@ -0,0 +1,841 @@ +/* $Id: ripemd.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * RIPEMD-160 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_ripemd.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +/* + * Round functions for RIPEMD (original). + */ +#define F(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define G(x, y, z) (((x) & (y)) | (((x) | (y)) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + +static const sph_u32 oIV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), + SPH_C32(0x98BADCFE), SPH_C32(0x10325476) +}; + +/* + * Round functions for RIPEMD-128 and RIPEMD-160. + */ +#define F1(x, y, z) ((x) ^ (y) ^ (z)) +#define F2(x, y, z) ((((y) ^ (z)) & (x)) ^ (z)) +#define F3(x, y, z) (((x) | ~(y)) ^ (z)) +#define F4(x, y, z) ((((x) ^ (y)) & (z)) ^ (y)) +#define F5(x, y, z) ((x) ^ ((y) | ~(z))) + +static const sph_u32 IV[5] = { + SPH_C32(0x67452301), SPH_C32(0xEFCDAB89), SPH_C32(0x98BADCFE), + SPH_C32(0x10325476), SPH_C32(0xC3D2E1F0) +}; + +#define ROTL SPH_ROTL32 + +/* ===================================================================== */ +/* + * RIPEMD (original hash, deprecated). + */ + +#define FF1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) \ + + (X) + SPH_C32(0x5A827999)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH1(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x6ED9EBA1)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define FF2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + F(B, C, D) \ + + (X) + SPH_C32(0x50A28BE6)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define GG2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + G(B, C, D) + (X)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define HH2(A, B, C, D, X, s) do { \ + sph_u32 tmp = SPH_T32((A) + H(B, C, D) \ + + (X) + SPH_C32(0x5C4DD124)); \ + (A) = ROTL(tmp, (s)); \ + } while (0) + +#define RIPEMD_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + FF1(A1, B1, C1, D1, in( 0), 11); \ + FF1(D1, A1, B1, C1, in( 1), 14); \ + FF1(C1, D1, A1, B1, in( 2), 15); \ + FF1(B1, C1, D1, A1, in( 3), 12); \ + FF1(A1, B1, C1, D1, in( 4), 5); \ + FF1(D1, A1, B1, C1, in( 5), 8); \ + FF1(C1, D1, A1, B1, in( 6), 7); \ + FF1(B1, C1, D1, A1, in( 7), 9); \ + FF1(A1, B1, C1, D1, in( 8), 11); \ + FF1(D1, A1, B1, C1, in( 9), 13); \ + FF1(C1, D1, A1, B1, in(10), 14); \ + FF1(B1, C1, D1, A1, in(11), 15); \ + FF1(A1, B1, C1, D1, in(12), 6); \ + FF1(D1, A1, B1, C1, in(13), 7); \ + FF1(C1, D1, A1, B1, in(14), 9); \ + FF1(B1, C1, D1, A1, in(15), 8); \ + \ + GG1(A1, B1, C1, D1, in( 7), 7); \ + GG1(D1, A1, B1, C1, in( 4), 6); \ + GG1(C1, D1, A1, B1, in(13), 8); \ + GG1(B1, C1, D1, A1, in( 1), 13); \ + GG1(A1, B1, C1, D1, in(10), 11); \ + GG1(D1, A1, B1, C1, in( 6), 9); \ + GG1(C1, D1, A1, B1, in(15), 7); \ + GG1(B1, C1, D1, A1, in( 3), 15); \ + GG1(A1, B1, C1, D1, in(12), 7); \ + GG1(D1, A1, B1, C1, in( 0), 12); \ + GG1(C1, D1, A1, B1, in( 9), 15); \ + GG1(B1, C1, D1, A1, in( 5), 9); \ + GG1(A1, B1, C1, D1, in(14), 7); \ + GG1(D1, A1, B1, C1, in( 2), 11); \ + GG1(C1, D1, A1, B1, in(11), 13); \ + GG1(B1, C1, D1, A1, in( 8), 12); \ + \ + HH1(A1, B1, C1, D1, in( 3), 11); \ + HH1(D1, A1, B1, C1, in(10), 13); \ + HH1(C1, D1, A1, B1, in( 2), 14); \ + HH1(B1, C1, D1, A1, in( 4), 7); \ + HH1(A1, B1, C1, D1, in( 9), 14); \ + HH1(D1, A1, B1, C1, in(15), 9); \ + HH1(C1, D1, A1, B1, in( 8), 13); \ + HH1(B1, C1, D1, A1, in( 1), 15); \ + HH1(A1, B1, C1, D1, in(14), 6); \ + HH1(D1, A1, B1, C1, in( 7), 8); \ + HH1(C1, D1, A1, B1, in( 0), 13); \ + HH1(B1, C1, D1, A1, in( 6), 6); \ + HH1(A1, B1, C1, D1, in(11), 12); \ + HH1(D1, A1, B1, C1, in(13), 5); \ + HH1(C1, D1, A1, B1, in( 5), 7); \ + HH1(B1, C1, D1, A1, in(12), 5); \ + \ + FF2(A2, B2, C2, D2, in( 0), 11); \ + FF2(D2, A2, B2, C2, in( 1), 14); \ + FF2(C2, D2, A2, B2, in( 2), 15); \ + FF2(B2, C2, D2, A2, in( 3), 12); \ + FF2(A2, B2, C2, D2, in( 4), 5); \ + FF2(D2, A2, B2, C2, in( 5), 8); \ + FF2(C2, D2, A2, B2, in( 6), 7); \ + FF2(B2, C2, D2, A2, in( 7), 9); \ + FF2(A2, B2, C2, D2, in( 8), 11); \ + FF2(D2, A2, B2, C2, in( 9), 13); \ + FF2(C2, D2, A2, B2, in(10), 14); \ + FF2(B2, C2, D2, A2, in(11), 15); \ + FF2(A2, B2, C2, D2, in(12), 6); \ + FF2(D2, A2, B2, C2, in(13), 7); \ + FF2(C2, D2, A2, B2, in(14), 9); \ + FF2(B2, C2, D2, A2, in(15), 8); \ + \ + GG2(A2, B2, C2, D2, in( 7), 7); \ + GG2(D2, A2, B2, C2, in( 4), 6); \ + GG2(C2, D2, A2, B2, in(13), 8); \ + GG2(B2, C2, D2, A2, in( 1), 13); \ + GG2(A2, B2, C2, D2, in(10), 11); \ + GG2(D2, A2, B2, C2, in( 6), 9); \ + GG2(C2, D2, A2, B2, in(15), 7); \ + GG2(B2, C2, D2, A2, in( 3), 15); \ + GG2(A2, B2, C2, D2, in(12), 7); \ + GG2(D2, A2, B2, C2, in( 0), 12); \ + GG2(C2, D2, A2, B2, in( 9), 15); \ + GG2(B2, C2, D2, A2, in( 5), 9); \ + GG2(A2, B2, C2, D2, in(14), 7); \ + GG2(D2, A2, B2, C2, in( 2), 11); \ + GG2(C2, D2, A2, B2, in(11), 13); \ + GG2(B2, C2, D2, A2, in( 8), 12); \ + \ + HH2(A2, B2, C2, D2, in( 3), 11); \ + HH2(D2, A2, B2, C2, in(10), 13); \ + HH2(C2, D2, A2, B2, in( 2), 14); \ + HH2(B2, C2, D2, A2, in( 4), 7); \ + HH2(A2, B2, C2, D2, in( 9), 14); \ + HH2(D2, A2, B2, C2, in(15), 9); \ + HH2(C2, D2, A2, B2, in( 8), 13); \ + HH2(B2, C2, D2, A2, in( 1), 15); \ + HH2(A2, B2, C2, D2, in(14), 6); \ + HH2(D2, A2, B2, C2, in( 7), 8); \ + HH2(C2, D2, A2, B2, in( 0), 13); \ + HH2(B2, C2, D2, A2, in( 6), 6); \ + HH2(A2, B2, C2, D2, in(11), 12); \ + HH2(D2, A2, B2, C2, in(13), 5); \ + HH2(C2, D2, A2, B2, in( 5), 7); \ + HH2(B2, C2, D2, A2, in(12), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD. The data must be aligned for 32-bit access. + */ +static void +ripemd_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD_IN(x) X_var[x] + +#endif + RIPEMD_ROUND_BODY(RIPEMD_IN, r); +#undef RIPEMD_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd_init(void *cc) +{ + sph_ripemd_context *sc; + + sc = cc; + memcpy(sc->val, oIV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd_round +#define HASH ripemd +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd_close(void *cc, void *dst) +{ + ripemd_close(cc, dst, 4); + sph_ripemd_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD_IN(x) msg[x] + RIPEMD_ROUND_BODY(RIPEMD_IN, val); +#undef RIPEMD_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-128. + */ + +/* + * Round constants for RIPEMD-128. + */ +#define sK11 SPH_C32(0x00000000) +#define sK12 SPH_C32(0x5A827999) +#define sK13 SPH_C32(0x6ED9EBA1) +#define sK14 SPH_C32(0x8F1BBCDC) + +#define sK21 SPH_C32(0x50A28BE6) +#define sK22 SPH_C32(0x5C4DD124) +#define sK23 SPH_C32(0x6D703EF3) +#define sK24 SPH_C32(0x00000000) + +#define sRR(a, b, c, d, f, s, r, k) do { \ + a = ROTL(SPH_T32(a + f(b, c, d) + r + k), s); \ + } while (0) + +#define sROUND1(a, b, c, d, f, s, r, k) \ + sRR(a ## 1, b ## 1, c ## 1, d ## 1, f, s, r, sK1 ## k) + +#define sROUND2(a, b, c, d, f, s, r, k) \ + sRR(a ## 2, b ## 2, c ## 2, d ## 2, f, s, r, sK2 ## k) + +/* + * This macro defines the body for a RIPEMD-128 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 4 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD128_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1; \ + sph_u32 A2, B2, C2, D2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + \ + sROUND1(A, B, C, D, F1, 11, in( 0), 1); \ + sROUND1(D, A, B, C, F1, 14, in( 1), 1); \ + sROUND1(C, D, A, B, F1, 15, in( 2), 1); \ + sROUND1(B, C, D, A, F1, 12, in( 3), 1); \ + sROUND1(A, B, C, D, F1, 5, in( 4), 1); \ + sROUND1(D, A, B, C, F1, 8, in( 5), 1); \ + sROUND1(C, D, A, B, F1, 7, in( 6), 1); \ + sROUND1(B, C, D, A, F1, 9, in( 7), 1); \ + sROUND1(A, B, C, D, F1, 11, in( 8), 1); \ + sROUND1(D, A, B, C, F1, 13, in( 9), 1); \ + sROUND1(C, D, A, B, F1, 14, in(10), 1); \ + sROUND1(B, C, D, A, F1, 15, in(11), 1); \ + sROUND1(A, B, C, D, F1, 6, in(12), 1); \ + sROUND1(D, A, B, C, F1, 7, in(13), 1); \ + sROUND1(C, D, A, B, F1, 9, in(14), 1); \ + sROUND1(B, C, D, A, F1, 8, in(15), 1); \ + \ + sROUND1(A, B, C, D, F2, 7, in( 7), 2); \ + sROUND1(D, A, B, C, F2, 6, in( 4), 2); \ + sROUND1(C, D, A, B, F2, 8, in(13), 2); \ + sROUND1(B, C, D, A, F2, 13, in( 1), 2); \ + sROUND1(A, B, C, D, F2, 11, in(10), 2); \ + sROUND1(D, A, B, C, F2, 9, in( 6), 2); \ + sROUND1(C, D, A, B, F2, 7, in(15), 2); \ + sROUND1(B, C, D, A, F2, 15, in( 3), 2); \ + sROUND1(A, B, C, D, F2, 7, in(12), 2); \ + sROUND1(D, A, B, C, F2, 12, in( 0), 2); \ + sROUND1(C, D, A, B, F2, 15, in( 9), 2); \ + sROUND1(B, C, D, A, F2, 9, in( 5), 2); \ + sROUND1(A, B, C, D, F2, 11, in( 2), 2); \ + sROUND1(D, A, B, C, F2, 7, in(14), 2); \ + sROUND1(C, D, A, B, F2, 13, in(11), 2); \ + sROUND1(B, C, D, A, F2, 12, in( 8), 2); \ + \ + sROUND1(A, B, C, D, F3, 11, in( 3), 3); \ + sROUND1(D, A, B, C, F3, 13, in(10), 3); \ + sROUND1(C, D, A, B, F3, 6, in(14), 3); \ + sROUND1(B, C, D, A, F3, 7, in( 4), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 9), 3); \ + sROUND1(D, A, B, C, F3, 9, in(15), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 8), 3); \ + sROUND1(B, C, D, A, F3, 15, in( 1), 3); \ + sROUND1(A, B, C, D, F3, 14, in( 2), 3); \ + sROUND1(D, A, B, C, F3, 8, in( 7), 3); \ + sROUND1(C, D, A, B, F3, 13, in( 0), 3); \ + sROUND1(B, C, D, A, F3, 6, in( 6), 3); \ + sROUND1(A, B, C, D, F3, 5, in(13), 3); \ + sROUND1(D, A, B, C, F3, 12, in(11), 3); \ + sROUND1(C, D, A, B, F3, 7, in( 5), 3); \ + sROUND1(B, C, D, A, F3, 5, in(12), 3); \ + \ + sROUND1(A, B, C, D, F4, 11, in( 1), 4); \ + sROUND1(D, A, B, C, F4, 12, in( 9), 4); \ + sROUND1(C, D, A, B, F4, 14, in(11), 4); \ + sROUND1(B, C, D, A, F4, 15, in(10), 4); \ + sROUND1(A, B, C, D, F4, 14, in( 0), 4); \ + sROUND1(D, A, B, C, F4, 15, in( 8), 4); \ + sROUND1(C, D, A, B, F4, 9, in(12), 4); \ + sROUND1(B, C, D, A, F4, 8, in( 4), 4); \ + sROUND1(A, B, C, D, F4, 9, in(13), 4); \ + sROUND1(D, A, B, C, F4, 14, in( 3), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 7), 4); \ + sROUND1(B, C, D, A, F4, 6, in(15), 4); \ + sROUND1(A, B, C, D, F4, 8, in(14), 4); \ + sROUND1(D, A, B, C, F4, 6, in( 5), 4); \ + sROUND1(C, D, A, B, F4, 5, in( 6), 4); \ + sROUND1(B, C, D, A, F4, 12, in( 2), 4); \ + \ + sROUND2(A, B, C, D, F4, 8, in( 5), 1); \ + sROUND2(D, A, B, C, F4, 9, in(14), 1); \ + sROUND2(C, D, A, B, F4, 9, in( 7), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 0), 1); \ + sROUND2(A, B, C, D, F4, 13, in( 9), 1); \ + sROUND2(D, A, B, C, F4, 15, in( 2), 1); \ + sROUND2(C, D, A, B, F4, 15, in(11), 1); \ + sROUND2(B, C, D, A, F4, 5, in( 4), 1); \ + sROUND2(A, B, C, D, F4, 7, in(13), 1); \ + sROUND2(D, A, B, C, F4, 7, in( 6), 1); \ + sROUND2(C, D, A, B, F4, 8, in(15), 1); \ + sROUND2(B, C, D, A, F4, 11, in( 8), 1); \ + sROUND2(A, B, C, D, F4, 14, in( 1), 1); \ + sROUND2(D, A, B, C, F4, 14, in(10), 1); \ + sROUND2(C, D, A, B, F4, 12, in( 3), 1); \ + sROUND2(B, C, D, A, F4, 6, in(12), 1); \ + \ + sROUND2(A, B, C, D, F3, 9, in( 6), 2); \ + sROUND2(D, A, B, C, F3, 13, in(11), 2); \ + sROUND2(C, D, A, B, F3, 15, in( 3), 2); \ + sROUND2(B, C, D, A, F3, 7, in( 7), 2); \ + sROUND2(A, B, C, D, F3, 12, in( 0), 2); \ + sROUND2(D, A, B, C, F3, 8, in(13), 2); \ + sROUND2(C, D, A, B, F3, 9, in( 5), 2); \ + sROUND2(B, C, D, A, F3, 11, in(10), 2); \ + sROUND2(A, B, C, D, F3, 7, in(14), 2); \ + sROUND2(D, A, B, C, F3, 7, in(15), 2); \ + sROUND2(C, D, A, B, F3, 12, in( 8), 2); \ + sROUND2(B, C, D, A, F3, 7, in(12), 2); \ + sROUND2(A, B, C, D, F3, 6, in( 4), 2); \ + sROUND2(D, A, B, C, F3, 15, in( 9), 2); \ + sROUND2(C, D, A, B, F3, 13, in( 1), 2); \ + sROUND2(B, C, D, A, F3, 11, in( 2), 2); \ + \ + sROUND2(A, B, C, D, F2, 9, in(15), 3); \ + sROUND2(D, A, B, C, F2, 7, in( 5), 3); \ + sROUND2(C, D, A, B, F2, 15, in( 1), 3); \ + sROUND2(B, C, D, A, F2, 11, in( 3), 3); \ + sROUND2(A, B, C, D, F2, 8, in( 7), 3); \ + sROUND2(D, A, B, C, F2, 6, in(14), 3); \ + sROUND2(C, D, A, B, F2, 6, in( 6), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 9), 3); \ + sROUND2(A, B, C, D, F2, 12, in(11), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 8), 3); \ + sROUND2(C, D, A, B, F2, 5, in(12), 3); \ + sROUND2(B, C, D, A, F2, 14, in( 2), 3); \ + sROUND2(A, B, C, D, F2, 13, in(10), 3); \ + sROUND2(D, A, B, C, F2, 13, in( 0), 3); \ + sROUND2(C, D, A, B, F2, 7, in( 4), 3); \ + sROUND2(B, C, D, A, F2, 5, in(13), 3); \ + \ + sROUND2(A, B, C, D, F1, 15, in( 8), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 6), 4); \ + sROUND2(C, D, A, B, F1, 8, in( 4), 4); \ + sROUND2(B, C, D, A, F1, 11, in( 1), 4); \ + sROUND2(A, B, C, D, F1, 14, in( 3), 4); \ + sROUND2(D, A, B, C, F1, 14, in(11), 4); \ + sROUND2(C, D, A, B, F1, 6, in(15), 4); \ + sROUND2(B, C, D, A, F1, 14, in( 0), 4); \ + sROUND2(A, B, C, D, F1, 6, in( 5), 4); \ + sROUND2(D, A, B, C, F1, 9, in(12), 4); \ + sROUND2(C, D, A, B, F1, 12, in( 2), 4); \ + sROUND2(B, C, D, A, F1, 9, in(13), 4); \ + sROUND2(A, B, C, D, F1, 12, in( 9), 4); \ + sROUND2(D, A, B, C, F1, 5, in( 7), 4); \ + sROUND2(C, D, A, B, F1, 15, in(10), 4); \ + sROUND2(B, C, D, A, F1, 8, in(14), 4); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + A2); \ + (h)[2] = SPH_T32((h)[3] + A1 + B2); \ + (h)[3] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-128. The data must be aligned for 32-bit access. + */ +static void +ripemd128_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD128_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD128_IN(x) X_var[x] + +#endif + RIPEMD128_ROUND_BODY(RIPEMD128_IN, r); +#undef RIPEMD128_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_init(void *cc) +{ + sph_ripemd128_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd128_round +#define HASH ripemd128 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd128_close(void *cc, void *dst) +{ + ripemd128_close(cc, dst, 4); + sph_ripemd128_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd128_comp(const sph_u32 msg[16], sph_u32 val[4]) +{ +#define RIPEMD128_IN(x) msg[x] + RIPEMD128_ROUND_BODY(RIPEMD128_IN, val); +#undef RIPEMD128_IN +} + +/* ===================================================================== */ +/* + * RIPEMD-160. + */ + +/* + * Round constants for RIPEMD-160. + */ +#define K11 SPH_C32(0x00000000) +#define K12 SPH_C32(0x5A827999) +#define K13 SPH_C32(0x6ED9EBA1) +#define K14 SPH_C32(0x8F1BBCDC) +#define K15 SPH_C32(0xA953FD4E) + +#define K21 SPH_C32(0x50A28BE6) +#define K22 SPH_C32(0x5C4DD124) +#define K23 SPH_C32(0x6D703EF3) +#define K24 SPH_C32(0x7A6D76E9) +#define K25 SPH_C32(0x00000000) + +#define RR(a, b, c, d, e, f, s, r, k) do { \ + a = SPH_T32(ROTL(SPH_T32(a + f(b, c, d) + r + k), s) + e); \ + c = ROTL(c, 10); \ + } while (0) + +#define ROUND1(a, b, c, d, e, f, s, r, k) \ + RR(a ## 1, b ## 1, c ## 1, d ## 1, e ## 1, f, s, r, K1 ## k) + +#define ROUND2(a, b, c, d, e, f, s, r, k) \ + RR(a ## 2, b ## 2, c ## 2, d ## 2, e ## 2, f, s, r, K2 ## k) + +/* + * This macro defines the body for a RIPEMD-160 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "h" parameter should evaluate to + * an array or pointer expression designating the array of 5 words which + * contains the input and output of the compression function. + */ + +#define RIPEMD160_ROUND_BODY(in, h) do { \ + sph_u32 A1, B1, C1, D1, E1; \ + sph_u32 A2, B2, C2, D2, E2; \ + sph_u32 tmp; \ + \ + A1 = A2 = (h)[0]; \ + B1 = B2 = (h)[1]; \ + C1 = C2 = (h)[2]; \ + D1 = D2 = (h)[3]; \ + E1 = E2 = (h)[4]; \ + \ + ROUND1(A, B, C, D, E, F1, 11, in( 0), 1); \ + ROUND1(E, A, B, C, D, F1, 14, in( 1), 1); \ + ROUND1(D, E, A, B, C, F1, 15, in( 2), 1); \ + ROUND1(C, D, E, A, B, F1, 12, in( 3), 1); \ + ROUND1(B, C, D, E, A, F1, 5, in( 4), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in( 5), 1); \ + ROUND1(E, A, B, C, D, F1, 7, in( 6), 1); \ + ROUND1(D, E, A, B, C, F1, 9, in( 7), 1); \ + ROUND1(C, D, E, A, B, F1, 11, in( 8), 1); \ + ROUND1(B, C, D, E, A, F1, 13, in( 9), 1); \ + ROUND1(A, B, C, D, E, F1, 14, in(10), 1); \ + ROUND1(E, A, B, C, D, F1, 15, in(11), 1); \ + ROUND1(D, E, A, B, C, F1, 6, in(12), 1); \ + ROUND1(C, D, E, A, B, F1, 7, in(13), 1); \ + ROUND1(B, C, D, E, A, F1, 9, in(14), 1); \ + ROUND1(A, B, C, D, E, F1, 8, in(15), 1); \ + \ + ROUND1(E, A, B, C, D, F2, 7, in( 7), 2); \ + ROUND1(D, E, A, B, C, F2, 6, in( 4), 2); \ + ROUND1(C, D, E, A, B, F2, 8, in(13), 2); \ + ROUND1(B, C, D, E, A, F2, 13, in( 1), 2); \ + ROUND1(A, B, C, D, E, F2, 11, in(10), 2); \ + ROUND1(E, A, B, C, D, F2, 9, in( 6), 2); \ + ROUND1(D, E, A, B, C, F2, 7, in(15), 2); \ + ROUND1(C, D, E, A, B, F2, 15, in( 3), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(12), 2); \ + ROUND1(A, B, C, D, E, F2, 12, in( 0), 2); \ + ROUND1(E, A, B, C, D, F2, 15, in( 9), 2); \ + ROUND1(D, E, A, B, C, F2, 9, in( 5), 2); \ + ROUND1(C, D, E, A, B, F2, 11, in( 2), 2); \ + ROUND1(B, C, D, E, A, F2, 7, in(14), 2); \ + ROUND1(A, B, C, D, E, F2, 13, in(11), 2); \ + ROUND1(E, A, B, C, D, F2, 12, in( 8), 2); \ + \ + ROUND1(D, E, A, B, C, F3, 11, in( 3), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in(10), 3); \ + ROUND1(B, C, D, E, A, F3, 6, in(14), 3); \ + ROUND1(A, B, C, D, E, F3, 7, in( 4), 3); \ + ROUND1(E, A, B, C, D, F3, 14, in( 9), 3); \ + ROUND1(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND1(C, D, E, A, B, F3, 13, in( 8), 3); \ + ROUND1(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND1(A, B, C, D, E, F3, 14, in( 2), 3); \ + ROUND1(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND1(D, E, A, B, C, F3, 13, in( 0), 3); \ + ROUND1(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND1(B, C, D, E, A, F3, 5, in(13), 3); \ + ROUND1(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND1(E, A, B, C, D, F3, 7, in( 5), 3); \ + ROUND1(D, E, A, B, C, F3, 5, in(12), 3); \ + \ + ROUND1(C, D, E, A, B, F4, 11, in( 1), 4); \ + ROUND1(B, C, D, E, A, F4, 12, in( 9), 4); \ + ROUND1(A, B, C, D, E, F4, 14, in(11), 4); \ + ROUND1(E, A, B, C, D, F4, 15, in(10), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 0), 4); \ + ROUND1(C, D, E, A, B, F4, 15, in( 8), 4); \ + ROUND1(B, C, D, E, A, F4, 9, in(12), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in( 4), 4); \ + ROUND1(E, A, B, C, D, F4, 9, in(13), 4); \ + ROUND1(D, E, A, B, C, F4, 14, in( 3), 4); \ + ROUND1(C, D, E, A, B, F4, 5, in( 7), 4); \ + ROUND1(B, C, D, E, A, F4, 6, in(15), 4); \ + ROUND1(A, B, C, D, E, F4, 8, in(14), 4); \ + ROUND1(E, A, B, C, D, F4, 6, in( 5), 4); \ + ROUND1(D, E, A, B, C, F4, 5, in( 6), 4); \ + ROUND1(C, D, E, A, B, F4, 12, in( 2), 4); \ + \ + ROUND1(B, C, D, E, A, F5, 9, in( 4), 5); \ + ROUND1(A, B, C, D, E, F5, 15, in( 0), 5); \ + ROUND1(E, A, B, C, D, F5, 5, in( 5), 5); \ + ROUND1(D, E, A, B, C, F5, 11, in( 9), 5); \ + ROUND1(C, D, E, A, B, F5, 6, in( 7), 5); \ + ROUND1(B, C, D, E, A, F5, 8, in(12), 5); \ + ROUND1(A, B, C, D, E, F5, 13, in( 2), 5); \ + ROUND1(E, A, B, C, D, F5, 12, in(10), 5); \ + ROUND1(D, E, A, B, C, F5, 5, in(14), 5); \ + ROUND1(C, D, E, A, B, F5, 12, in( 1), 5); \ + ROUND1(B, C, D, E, A, F5, 13, in( 3), 5); \ + ROUND1(A, B, C, D, E, F5, 14, in( 8), 5); \ + ROUND1(E, A, B, C, D, F5, 11, in(11), 5); \ + ROUND1(D, E, A, B, C, F5, 8, in( 6), 5); \ + ROUND1(C, D, E, A, B, F5, 5, in(15), 5); \ + ROUND1(B, C, D, E, A, F5, 6, in(13), 5); \ + \ + ROUND2(A, B, C, D, E, F5, 8, in( 5), 1); \ + ROUND2(E, A, B, C, D, F5, 9, in(14), 1); \ + ROUND2(D, E, A, B, C, F5, 9, in( 7), 1); \ + ROUND2(C, D, E, A, B, F5, 11, in( 0), 1); \ + ROUND2(B, C, D, E, A, F5, 13, in( 9), 1); \ + ROUND2(A, B, C, D, E, F5, 15, in( 2), 1); \ + ROUND2(E, A, B, C, D, F5, 15, in(11), 1); \ + ROUND2(D, E, A, B, C, F5, 5, in( 4), 1); \ + ROUND2(C, D, E, A, B, F5, 7, in(13), 1); \ + ROUND2(B, C, D, E, A, F5, 7, in( 6), 1); \ + ROUND2(A, B, C, D, E, F5, 8, in(15), 1); \ + ROUND2(E, A, B, C, D, F5, 11, in( 8), 1); \ + ROUND2(D, E, A, B, C, F5, 14, in( 1), 1); \ + ROUND2(C, D, E, A, B, F5, 14, in(10), 1); \ + ROUND2(B, C, D, E, A, F5, 12, in( 3), 1); \ + ROUND2(A, B, C, D, E, F5, 6, in(12), 1); \ + \ + ROUND2(E, A, B, C, D, F4, 9, in( 6), 2); \ + ROUND2(D, E, A, B, C, F4, 13, in(11), 2); \ + ROUND2(C, D, E, A, B, F4, 15, in( 3), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in( 7), 2); \ + ROUND2(A, B, C, D, E, F4, 12, in( 0), 2); \ + ROUND2(E, A, B, C, D, F4, 8, in(13), 2); \ + ROUND2(D, E, A, B, C, F4, 9, in( 5), 2); \ + ROUND2(C, D, E, A, B, F4, 11, in(10), 2); \ + ROUND2(B, C, D, E, A, F4, 7, in(14), 2); \ + ROUND2(A, B, C, D, E, F4, 7, in(15), 2); \ + ROUND2(E, A, B, C, D, F4, 12, in( 8), 2); \ + ROUND2(D, E, A, B, C, F4, 7, in(12), 2); \ + ROUND2(C, D, E, A, B, F4, 6, in( 4), 2); \ + ROUND2(B, C, D, E, A, F4, 15, in( 9), 2); \ + ROUND2(A, B, C, D, E, F4, 13, in( 1), 2); \ + ROUND2(E, A, B, C, D, F4, 11, in( 2), 2); \ + \ + ROUND2(D, E, A, B, C, F3, 9, in(15), 3); \ + ROUND2(C, D, E, A, B, F3, 7, in( 5), 3); \ + ROUND2(B, C, D, E, A, F3, 15, in( 1), 3); \ + ROUND2(A, B, C, D, E, F3, 11, in( 3), 3); \ + ROUND2(E, A, B, C, D, F3, 8, in( 7), 3); \ + ROUND2(D, E, A, B, C, F3, 6, in(14), 3); \ + ROUND2(C, D, E, A, B, F3, 6, in( 6), 3); \ + ROUND2(B, C, D, E, A, F3, 14, in( 9), 3); \ + ROUND2(A, B, C, D, E, F3, 12, in(11), 3); \ + ROUND2(E, A, B, C, D, F3, 13, in( 8), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(12), 3); \ + ROUND2(C, D, E, A, B, F3, 14, in( 2), 3); \ + ROUND2(B, C, D, E, A, F3, 13, in(10), 3); \ + ROUND2(A, B, C, D, E, F3, 13, in( 0), 3); \ + ROUND2(E, A, B, C, D, F3, 7, in( 4), 3); \ + ROUND2(D, E, A, B, C, F3, 5, in(13), 3); \ + \ + ROUND2(C, D, E, A, B, F2, 15, in( 8), 4); \ + ROUND2(B, C, D, E, A, F2, 5, in( 6), 4); \ + ROUND2(A, B, C, D, E, F2, 8, in( 4), 4); \ + ROUND2(E, A, B, C, D, F2, 11, in( 1), 4); \ + ROUND2(D, E, A, B, C, F2, 14, in( 3), 4); \ + ROUND2(C, D, E, A, B, F2, 14, in(11), 4); \ + ROUND2(B, C, D, E, A, F2, 6, in(15), 4); \ + ROUND2(A, B, C, D, E, F2, 14, in( 0), 4); \ + ROUND2(E, A, B, C, D, F2, 6, in( 5), 4); \ + ROUND2(D, E, A, B, C, F2, 9, in(12), 4); \ + ROUND2(C, D, E, A, B, F2, 12, in( 2), 4); \ + ROUND2(B, C, D, E, A, F2, 9, in(13), 4); \ + ROUND2(A, B, C, D, E, F2, 12, in( 9), 4); \ + ROUND2(E, A, B, C, D, F2, 5, in( 7), 4); \ + ROUND2(D, E, A, B, C, F2, 15, in(10), 4); \ + ROUND2(C, D, E, A, B, F2, 8, in(14), 4); \ + \ + ROUND2(B, C, D, E, A, F1, 8, in(12), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(15), 5); \ + ROUND2(E, A, B, C, D, F1, 12, in(10), 5); \ + ROUND2(D, E, A, B, C, F1, 9, in( 4), 5); \ + ROUND2(C, D, E, A, B, F1, 12, in( 1), 5); \ + ROUND2(B, C, D, E, A, F1, 5, in( 5), 5); \ + ROUND2(A, B, C, D, E, F1, 14, in( 8), 5); \ + ROUND2(E, A, B, C, D, F1, 6, in( 7), 5); \ + ROUND2(D, E, A, B, C, F1, 8, in( 6), 5); \ + ROUND2(C, D, E, A, B, F1, 13, in( 2), 5); \ + ROUND2(B, C, D, E, A, F1, 6, in(13), 5); \ + ROUND2(A, B, C, D, E, F1, 5, in(14), 5); \ + ROUND2(E, A, B, C, D, F1, 15, in( 0), 5); \ + ROUND2(D, E, A, B, C, F1, 13, in( 3), 5); \ + ROUND2(C, D, E, A, B, F1, 11, in( 9), 5); \ + ROUND2(B, C, D, E, A, F1, 11, in(11), 5); \ + \ + tmp = SPH_T32((h)[1] + C1 + D2); \ + (h)[1] = SPH_T32((h)[2] + D1 + E2); \ + (h)[2] = SPH_T32((h)[3] + E1 + A2); \ + (h)[3] = SPH_T32((h)[4] + A1 + B2); \ + (h)[4] = SPH_T32((h)[0] + B1 + C2); \ + (h)[0] = tmp; \ + } while (0) + +/* + * One round of RIPEMD-160. The data must be aligned for 32-bit access. + */ +static void +ripemd160_round(const unsigned char *data, sph_u32 r[5]) +{ +#if SPH_LITTLE_FAST + +#define RIPEMD160_IN(x) sph_dec32le_aligned(data + (4 * (x))) + +#else + + sph_u32 X_var[16]; + int i; + + for (i = 0; i < 16; i ++) + X_var[i] = sph_dec32le_aligned(data + 4 * i); +#define RIPEMD160_IN(x) X_var[x] + +#endif + RIPEMD160_ROUND_BODY(RIPEMD160_IN, r); +#undef RIPEMD160_IN +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_init(void *cc) +{ + sph_ripemd160_context *sc; + + sc = cc; + memcpy(sc->val, IV, sizeof sc->val); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN ripemd160_round +#define HASH ripemd160 +#define LE32 1 +#include "md_helper.c" +#undef RFUN +#undef HASH +#undef LE32 + +/* see sph_ripemd.h */ +void +sph_ripemd160_close(void *cc, void *dst) +{ + ripemd160_close(cc, dst, 5); + sph_ripemd160_init(cc); +} + +/* see sph_ripemd.h */ +void +sph_ripemd160_comp(const sph_u32 msg[16], sph_u32 val[5]) +{ +#define RIPEMD160_IN(x) msg[x] + RIPEMD160_ROUND_BODY(RIPEMD160_IN, val); +#undef RIPEMD160_IN +} + +#ifdef __cplusplus +} +#endif diff --git a/sph_sha2.c b/sph_sha2.c new file mode 100644 index 0000000..a54dbf5 --- /dev/null +++ b/sph_sha2.c @@ -0,0 +1,698 @@ +/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * SHA-224 / SHA-256 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_sha2.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2 +#define SPH_SMALL_FOOTPRINT_SHA2 1 +#endif + +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X))) + +#define ROTR SPH_ROTR32 + +#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22)) +#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25)) +#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3)) +#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10)) + +static const sph_u32 H224[8] = { + SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17), + SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511), + SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4) +}; + +static const sph_u32 H256[8] = { + SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372), + SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C), + SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19) +}; + +/* + * The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256 + * compression function implementation. The "in" parameter should + * evaluate, when applied to a numerical input parameter from 0 to 15, + * to an expression which yields the corresponding input block. The "r" + * parameter should evaluate to an array or pointer expression + * designating the array of 8 words which contains the input and output + * of the compression function. + */ + +#if SPH_SMALL_FOOTPRINT_SHA2 + +static const sph_u32 K[64] = { + SPH_C32(0x428A2F98), SPH_C32(0x71374491), + SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5), + SPH_C32(0x3956C25B), SPH_C32(0x59F111F1), + SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5), + SPH_C32(0xD807AA98), SPH_C32(0x12835B01), + SPH_C32(0x243185BE), SPH_C32(0x550C7DC3), + SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE), + SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174), + SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786), + SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC), + SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA), + SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA), + SPH_C32(0x983E5152), SPH_C32(0xA831C66D), + SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7), + SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147), + SPH_C32(0x06CA6351), SPH_C32(0x14292967), + SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138), + SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13), + SPH_C32(0x650A7354), SPH_C32(0x766A0ABB), + SPH_C32(0x81C2C92E), SPH_C32(0x92722C85), + SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B), + SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3), + SPH_C32(0xD192E819), SPH_C32(0xD6990624), + SPH_C32(0xF40E3585), SPH_C32(0x106AA070), + SPH_C32(0x19A4C116), SPH_C32(0x1E376C08), + SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5), + SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A), + SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3), + SPH_C32(0x748F82EE), SPH_C32(0x78A5636F), + SPH_C32(0x84C87814), SPH_C32(0x8CC70208), + SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB), + SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2) +}; + +#define SHA2_MEXP1(in, pc) do { \ + W[pc] = in(pc); \ + } while (0) + +#define SHA2_MEXP2(in, pc) do { \ + W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \ + + W[((pc) - 7) & 0x0F] \ + + SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \ + } while (0) + +#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \ + sph_u32 t1, t2; \ + SHA2_MEXP ## n(in, pc); \ + t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \ + + K[pcount + (pc)] + W[(pc) & 0x0F]); \ + t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \ + d = SPH_T32(d + t1); \ + h = SPH_T32(t1 + t2); \ + } while (0) + +#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \ + SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc) +#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \ + SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc) + +#define SHA2_ROUND_BODY(in, r) do { \ + sph_u32 A, B, C, D, E, F, G, H; \ + sph_u32 W[16]; \ + unsigned pcount; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + F = (r)[5]; \ + G = (r)[6]; \ + H = (r)[7]; \ + pcount = 0; \ + SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \ + SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \ + SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \ + SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \ + SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \ + SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \ + SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \ + SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \ + SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \ + SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \ + SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \ + SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \ + SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \ + SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \ + SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \ + SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \ + for (pcount = 16; pcount < 64; pcount += 16) { \ + SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \ + SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \ + SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \ + SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \ + SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \ + SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \ + SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \ + SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \ + SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \ + SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \ + SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \ + SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \ + SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \ + SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \ + SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \ + SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \ + } \ + (r)[0] = SPH_T32((r)[0] + A); \ + (r)[1] = SPH_T32((r)[1] + B); \ + (r)[2] = SPH_T32((r)[2] + C); \ + (r)[3] = SPH_T32((r)[3] + D); \ + (r)[4] = SPH_T32((r)[4] + E); \ + (r)[5] = SPH_T32((r)[5] + F); \ + (r)[6] = SPH_T32((r)[6] + G); \ + (r)[7] = SPH_T32((r)[7] + H); \ + } while (0) + +#else + +#define SHA2_ROUND_BODY(in, r) do { \ + sph_u32 A, B, C, D, E, F, G, H, T1, T2; \ + sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \ + sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \ + \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + F = (r)[5]; \ + G = (r)[6]; \ + H = (r)[7]; \ + W00 = in(0); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x428A2F98) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = in(1); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x71374491) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = in(2); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0xB5C0FBCF) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = in(3); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0xE9B5DBA5) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = in(4); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x3956C25B) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = in(5); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x59F111F1) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = in(6); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x923F82A4) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = in(7); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0xAB1C5ED5) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = in(8); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0xD807AA98) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = in(9); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x12835B01) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = in(10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x243185BE) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = in(11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x550C7DC3) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = in(12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x72BE5D74) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = in(13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x80DEB1FE) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = in(14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x9BDC06A7) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = in(15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0xC19BF174) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0xE49B69C1) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0xEFBE4786) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x0FC19DC6) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x240CA1CC) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x2DE92C6F) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x4A7484AA) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x5CB0A9DC) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x76F988DA) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x983E5152) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0xA831C66D) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0xB00327C8) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0xBF597FC7) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0xC6E00BF3) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0xD5A79147) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x06CA6351) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x14292967) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x27B70A85) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x2E1B2138) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x4D2C6DFC) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x53380D13) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x650A7354) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x766A0ABB) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x81C2C92E) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x92722C85) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0xA2BFE8A1) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0xA81A664B) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0xC24B8B70) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0xC76C51A3) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0xD192E819) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0xD6990624) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0xF40E3585) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x106AA070) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x19A4C116) + W00); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x1E376C08) + W01); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x2748774C) + W02); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x34B0BCB5) + W03); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x391C0CB3) + W04); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0x4ED8AA4A) + W05); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0x5B9CCA4F) + W06); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0x682E6FF3) + W07); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \ + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \ + + SPH_C32(0x748F82EE) + W08); \ + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \ + D = SPH_T32(D + T1); \ + H = SPH_T32(T1 + T2); \ + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \ + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \ + + SPH_C32(0x78A5636F) + W09); \ + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \ + C = SPH_T32(C + T1); \ + G = SPH_T32(T1 + T2); \ + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \ + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \ + + SPH_C32(0x84C87814) + W10); \ + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \ + B = SPH_T32(B + T1); \ + F = SPH_T32(T1 + T2); \ + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \ + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \ + + SPH_C32(0x8CC70208) + W11); \ + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \ + A = SPH_T32(A + T1); \ + E = SPH_T32(T1 + T2); \ + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \ + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \ + + SPH_C32(0x90BEFFFA) + W12); \ + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \ + H = SPH_T32(H + T1); \ + D = SPH_T32(T1 + T2); \ + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \ + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \ + + SPH_C32(0xA4506CEB) + W13); \ + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \ + G = SPH_T32(G + T1); \ + C = SPH_T32(T1 + T2); \ + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \ + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \ + + SPH_C32(0xBEF9A3F7) + W14); \ + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \ + F = SPH_T32(F + T1); \ + B = SPH_T32(T1 + T2); \ + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \ + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \ + + SPH_C32(0xC67178F2) + W15); \ + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \ + E = SPH_T32(E + T1); \ + A = SPH_T32(T1 + T2); \ + (r)[0] = SPH_T32((r)[0] + A); \ + (r)[1] = SPH_T32((r)[1] + B); \ + (r)[2] = SPH_T32((r)[2] + C); \ + (r)[3] = SPH_T32((r)[3] + D); \ + (r)[4] = SPH_T32((r)[4] + E); \ + (r)[5] = SPH_T32((r)[5] + F); \ + (r)[6] = SPH_T32((r)[6] + G); \ + (r)[7] = SPH_T32((r)[7] + H); \ + } while (0) + +#endif + +/* + * One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access. + */ +static void +sha2_round(const unsigned char *data, sph_u32 r[8]) +{ +#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x))) + SHA2_ROUND_BODY(SHA2_IN, r); +#undef SHA2_IN +} + +/* see sph_sha2.h */ +void +sph_sha224_init(void *cc) +{ + sph_sha224_context *sc; + + sc = cc; + memcpy(sc->val, H224, sizeof H224); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +/* see sph_sha2.h */ +void +sph_sha256_init(void *cc) +{ + sph_sha256_context *sc; + + sc = cc; + memcpy(sc->val, H256, sizeof H256); +#if SPH_64 + sc->count = 0; +#else + sc->count_high = sc->count_low = 0; +#endif +} + +#define RFUN sha2_round +#define HASH sha224 +#define BE32 1 +#include "md_helper.c" + +/* see sph_sha2.h */ +void +sph_sha224_close(void *cc, void *dst) +{ + sha224_close(cc, dst, 7); + sph_sha224_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha224_addbits_and_close(cc, ub, n, dst, 7); + sph_sha224_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha256_close(void *cc, void *dst) +{ + sha224_close(cc, dst, 8); + sph_sha256_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha224_addbits_and_close(cc, ub, n, dst, 8); + sph_sha256_init(cc); +} + +/* see sph_sha2.h */ +void +sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]) +{ +#define SHA2_IN(x) msg[x] + SHA2_ROUND_BODY(SHA2_IN, val); +#undef SHA2_IN +} + +#ifdef __cplusplus +} +#endif diff --git a/sph_sha2big.c b/sph_sha2big.c new file mode 100644 index 0000000..2d8466d --- /dev/null +++ b/sph_sha2big.c @@ -0,0 +1,255 @@ +/* $Id: sha2big.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * SHA-384 / SHA-512 implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_sha2.h" + +#ifdef __cplusplus +extern "C"{ +#endif + +#if SPH_64 + +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z))) + +#define ROTR64 SPH_ROTR64 + +#define BSG5_0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) +#define BSG5_1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) +#define SSG5_0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7)) +#define SSG5_1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6)) + +static const sph_u64 K512[80] = { + SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD), + SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC), + SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019), + SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118), + SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE), + SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2), + SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1), + SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694), + SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3), + SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65), + SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483), + SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5), + SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210), + SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4), + SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725), + SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70), + SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926), + SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF), + SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8), + SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B), + SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001), + SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30), + SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910), + SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8), + SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53), + SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8), + SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB), + SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3), + SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60), + SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC), + SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9), + SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B), + SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207), + SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178), + SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6), + SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B), + SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493), + SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C), + SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A), + SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817) +}; + +static const sph_u64 H384[8] = { + SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507), + SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939), + SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511), + SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4) +}; + +static const sph_u64 H512[8] = { + SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B), + SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1), + SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F), + SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179) +}; + +/* + * This macro defines the body for a SHA-384 / SHA-512 compression function + * implementation. The "in" parameter should evaluate, when applied to a + * numerical input parameter from 0 to 15, to an expression which yields + * the corresponding input block. The "r" parameter should evaluate to + * an array or pointer expression designating the array of 8 words which + * contains the input and output of the compression function. + * + * SHA-512 is hard for the compiler. If the loop is completely unrolled, + * then the code will be quite huge (possibly more than 100 kB), and the + * performance will be degraded due to cache misses on the code. We + * unroll only eight steps, which avoids all needless copies when + * 64-bit registers are swapped. + */ + +#define SHA3_STEP(A, B, C, D, E, F, G, H, i) do { \ + sph_u64 T1, T2; \ + T1 = SPH_T64(H + BSG5_1(E) + CH(E, F, G) + K512[i] + W[i]); \ + T2 = SPH_T64(BSG5_0(A) + MAJ(A, B, C)); \ + D = SPH_T64(D + T1); \ + H = SPH_T64(T1 + T2); \ + } while (0) + +#define SHA3_ROUND_BODY(in, r) do { \ + int i; \ + sph_u64 A, B, C, D, E, F, G, H; \ + sph_u64 W[80]; \ + \ + for (i = 0; i < 16; i ++) \ + W[i] = in(i); \ + for (i = 16; i < 80; i ++) \ + W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] \ + + SSG5_0(W[i - 15]) + W[i - 16]); \ + A = (r)[0]; \ + B = (r)[1]; \ + C = (r)[2]; \ + D = (r)[3]; \ + E = (r)[4]; \ + F = (r)[5]; \ + G = (r)[6]; \ + H = (r)[7]; \ + for (i = 0; i < 80; i += 8) { \ + SHA3_STEP(A, B, C, D, E, F, G, H, i + 0); \ + SHA3_STEP(H, A, B, C, D, E, F, G, i + 1); \ + SHA3_STEP(G, H, A, B, C, D, E, F, i + 2); \ + SHA3_STEP(F, G, H, A, B, C, D, E, i + 3); \ + SHA3_STEP(E, F, G, H, A, B, C, D, i + 4); \ + SHA3_STEP(D, E, F, G, H, A, B, C, i + 5); \ + SHA3_STEP(C, D, E, F, G, H, A, B, i + 6); \ + SHA3_STEP(B, C, D, E, F, G, H, A, i + 7); \ + } \ + (r)[0] = SPH_T64((r)[0] + A); \ + (r)[1] = SPH_T64((r)[1] + B); \ + (r)[2] = SPH_T64((r)[2] + C); \ + (r)[3] = SPH_T64((r)[3] + D); \ + (r)[4] = SPH_T64((r)[4] + E); \ + (r)[5] = SPH_T64((r)[5] + F); \ + (r)[6] = SPH_T64((r)[6] + G); \ + (r)[7] = SPH_T64((r)[7] + H); \ + } while (0) + +/* + * One round of SHA-384 / SHA-512. The data must be aligned for 64-bit access. + */ +static void +sha3_round(const unsigned char *data, sph_u64 r[8]) +{ +#define SHA3_IN(x) sph_dec64be_aligned(data + (8 * (x))) + SHA3_ROUND_BODY(SHA3_IN, r); +#undef SHA3_IN +} + +/* see sph_sha3.h */ +void +sph_sha384_init(void *cc) +{ + sph_sha384_context *sc; + + sc = cc; + memcpy(sc->val, H384, sizeof H384); + sc->count = 0; +} + +/* see sph_sha3.h */ +void +sph_sha512_init(void *cc) +{ + sph_sha512_context *sc; + + sc = cc; + memcpy(sc->val, H512, sizeof H512); + sc->count = 0; +} + +#define RFUN sha3_round +#define HASH sha384 +#define BE64 1 +#include "md_helper.c" + +/* see sph_sha3.h */ +void +sph_sha384_close(void *cc, void *dst) +{ + sha384_close(cc, dst, 6); + sph_sha384_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha384_addbits_and_close(cc, ub, n, dst, 6); + sph_sha384_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha512_close(void *cc, void *dst) +{ + sha384_close(cc, dst, 8); + sph_sha512_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst) +{ + sha384_addbits_and_close(cc, ub, n, dst, 8); + sph_sha512_init(cc); +} + +/* see sph_sha3.h */ +void +sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]) +{ +#define SHA3_IN(x) msg[x] + SHA3_ROUND_BODY(SHA3_IN, val); +#undef SHA3_IN +} + +#endif + +#ifdef __cplusplus +} +#endif