misc changes

git-svn-id: https://bitcoin.svn.sourceforge.net/svnroot/bitcoin/trunk@131 1a98c847-1fd6-4fd8-948a-caf3550aa51b
This commit is contained in:
s_nakamoto 2010-08-15 21:05:16 +00:00
parent 01cd2fdaf3
commit 4bd188c438
8 changed files with 538 additions and 52 deletions

View file

@ -373,6 +373,8 @@ bool AppInit2(int argc, char* argv[])
wxMessageBox(_("Invalid amount for -paytxfee=<amount>"), "Bitcoin");
return false;
}
if (nTransactionFee > 1 * COIN)
wxMessageBox(_("Warning: -paytxfee is set very high. This is the transaction fee you will pay if you send a transaction."), "Bitcoin");
}
//

View file

@ -538,7 +538,7 @@ bool CTransaction::AcceptTransaction(CTxDB& txdb, bool fCheckInputs, bool* pfMis
// Check against previous transactions
map<uint256, CTxIndex> mapUnused;
int64 nFees = 0;
if (fCheckInputs && !ConnectInputs(txdb, mapUnused, CDiskTxPos(1,1,1), 0, nFees, false, false))
if (fCheckInputs && !ConnectInputs(txdb, mapUnused, CDiskTxPos(1,1,1), pindexBest, nFees, false, false))
{
if (pfMissingInputs)
*pfMissingInputs = true;
@ -744,7 +744,7 @@ void ResendWalletTransactions()
if (GetTime() < nNextTime)
return;
bool fFirst = (nNextTime == 0);
nNextTime = GetTime() + GetRand(120 * 60);
nNextTime = GetTime() + GetRand(30 * 60);
if (fFirst)
return;
@ -760,7 +760,7 @@ void ResendWalletTransactions()
CWalletTx& wtx = item.second;
// Don't rebroadcast until it's had plenty of time that
// it should have gotten in already by now.
if (nTimeBestReceived - wtx.nTimeReceived > 60 * 60)
if (nTimeBestReceived - (int64)wtx.nTimeReceived > 5 * 60)
mapSorted.insert(make_pair(wtx.nTimeReceived, &wtx));
}
foreach(PAIRTYPE(const unsigned int, CWalletTx*)& item, mapSorted)
@ -931,7 +931,8 @@ bool CTransaction::DisconnectInputs(CTxDB& txdb)
}
bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx, int nHeight, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee)
bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx,
CBlockIndex* pindexBlock, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee)
{
// Take over previous transactions' spent pointers
if (!IsCoinBase())
@ -983,9 +984,9 @@ bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPoo
// If prev is coinbase, check that it's matured
if (txPrev.IsCoinBase())
for (CBlockIndex* pindex = pindexBest; pindex && nBestHeight - pindex->nHeight < COINBASE_MATURITY-1; pindex = pindex->pprev)
for (CBlockIndex* pindex = pindexBlock; pindex && pindexBlock->nHeight - pindex->nHeight < COINBASE_MATURITY; pindex = pindex->pprev)
if (pindex->nBlockPos == txindex.pos.nBlockPos && pindex->nFile == txindex.pos.nFile)
return error("ConnectInputs() : tried to spend coinbase at depth %d", nBestHeight - pindex->nHeight);
return error("ConnectInputs() : tried to spend coinbase at depth %d", pindexBlock->nHeight - pindex->nHeight);
// Verify signature
if (!VerifySignature(txPrev, *this, i))
@ -1019,7 +1020,7 @@ bool CTransaction::ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPoo
if (fBlock)
{
// Add transaction to disk index
if (!txdb.AddTxIndex(*this, posThisTx, nHeight))
if (!txdb.AddTxIndex(*this, posThisTx, pindexBlock->nHeight))
return error("ConnectInputs() : AddTxPos failed");
}
else if (fMiner)
@ -1108,7 +1109,7 @@ bool CBlock::ConnectBlock(CTxDB& txdb, CBlockIndex* pindex)
CDiskTxPos posThisTx(pindex->nFile, pindex->nBlockPos, nTxPos);
nTxPos += ::GetSerializeSize(tx, SER_DISK);
if (!tx.ConnectInputs(txdb, mapUnused, posThisTx, pindex->nHeight, nFees, true, false))
if (!tx.ConnectInputs(txdb, mapUnused, posThisTx, pindex, nFees, true, false))
return false;
}
@ -1379,14 +1380,12 @@ bool CBlock::AcceptBlock()
return error("AcceptBlock() : incorrect proof of work");
// Check that the block chain matches the known block chain up to a checkpoint
if (pindexPrev->nHeight+1 == 11111 && hash != uint256("0x0000000069e244f73d78e8fd29ba2fd2ed618bd6fa2ee92559f542fdb26e7c1d"))
return error("AcceptBlock() : rejected by checkpoint lockin at 11111");
if (pindexPrev->nHeight+1 == 33333 && hash != uint256("0x000000002dd5588a74784eaa7ab0507a18ad16a236e7b1ce69f00d7ddfb5d0a6"))
return error("AcceptBlock() : rejected by checkpoint lockin at 33333");
if (pindexPrev->nHeight+1 == 68555 && hash != uint256("0x00000000001e1b4903550a0b96e9a9405c8a95f387162e4944e8d9fbe501cd6a"))
return error("AcceptBlock() : rejected by checkpoint lockin at 68555");
if (pindexPrev->nHeight+1 == 70567 && hash != uint256("0x00000000006a49b14bcf27462068f1264c961f11fa2e0eddd2be0791e1d4124a"))
return error("AcceptBlock() : rejected by checkpoint lockin at 70567");
if ((pindexPrev->nHeight+1 == 11111 && hash != uint256("0x0000000069e244f73d78e8fd29ba2fd2ed618bd6fa2ee92559f542fdb26e7c1d")) ||
(pindexPrev->nHeight+1 == 33333 && hash != uint256("0x000000002dd5588a74784eaa7ab0507a18ad16a236e7b1ce69f00d7ddfb5d0a6")) ||
(pindexPrev->nHeight+1 == 68555 && hash != uint256("0x00000000001e1b4903550a0b96e9a9405c8a95f387162e4944e8d9fbe501cd6a")) ||
(pindexPrev->nHeight+1 == 70567 && hash != uint256("0x00000000006a49b14bcf27462068f1264c961f11fa2e0eddd2be0791e1d4124a")) ||
(pindexPrev->nHeight+1 == 74000 && hash != uint256("0x0000000000573993a3c9e41ce34471c079dcf5f52a0e824a81e7f953b8661a20")))
return error("AcceptBlock() : rejected by checkpoint lockin");
// Write block to history file
if (!CheckDiskSpace(::GetSerializeSize(*this, SER_DISK)))
@ -2577,6 +2576,8 @@ inline void SHA256Transform(void* pstate, void* pinput, const void* pinit)
CryptoPP::SHA256::Transform((CryptoPP::word32*)pstate, (CryptoPP::word32*)pinput);
}
static const int NPAR = 32;
extern void Double_BlockSHA256(const void* pin, void* pout, const void* pinit, unsigned int hash[8][NPAR], const void* init2);
@ -2658,7 +2659,7 @@ void BitcoinMiner()
int64 nMinFee = tx.GetMinFee(nBlockSize);
map<uint256, CTxIndex> mapTestPoolTmp(mapTestPool);
if (!tx.ConnectInputs(txdb, mapTestPoolTmp, CDiskTxPos(1,1,1), 0, nFees, false, true, nMinFee))
if (!tx.ConnectInputs(txdb, mapTestPoolTmp, CDiskTxPos(1,1,1), pindexPrev, nFees, false, true, nMinFee))
continue;
swap(mapTestPool, mapTestPoolTmp);
@ -2719,14 +2720,40 @@ void BitcoinMiner()
//
// Search
//
bool f4WaySSE2 = mapArgs.count("-4way");
int64 nStart = GetTime();
uint256 hashTarget = CBigNum().SetCompact(pblock->nBits).getuint256();
uint256 hashbuf[2];
uint256& hash = *alignup<16>(hashbuf);
loop
{
SHA256Transform(&tmp.hash1, (char*)&tmp.block + 64, &midstate);
SHA256Transform(&hash, &tmp.hash1, pSHA256InitState);
#ifdef FOURWAYSSE2
if (f4WaySSE2)
{
// tcatm's 4-way SSE2 SHA-256
tmp.block.nNonce += NPAR;
unsigned int thashbuf[9][NPAR];
unsigned int (&thash)[9][NPAR] = *alignup<16>(&thashbuf);
Double_BlockSHA256((char*)&tmp.block + 64, &tmp.hash1, &midstate, thash, pSHA256InitState);
((unsigned short*)&hash)[14] = 0xffff;
for (int j = 0; j < NPAR; j++)
{
if (thash[7][j] == 0)
{
for (int i = 0; i < sizeof(hash)/4; i++)
((unsigned int*)&hash)[i] = thash[i][j];
pblock->nNonce = ByteReverse(tmp.block.nNonce + j);
}
}
}
else
#endif
{
// Crypto++ SHA-256
tmp.block.nNonce++;
SHA256Transform(&tmp.hash1, (char*)&tmp.block + 64, &midstate);
SHA256Transform(&hash, &tmp.hash1, pSHA256InitState);
}
if (((unsigned short*)&hash)[14] == 0)
{
@ -2736,7 +2763,10 @@ void BitcoinMiner()
if (hash <= hashTarget)
{
pblock->nNonce = ByteReverse(tmp.block.nNonce);
#ifdef FOURWAYSSE2
if (!f4WaySSE2)
#endif
pblock->nNonce = ByteReverse(tmp.block.nNonce);
assert(hash == pblock->GetHash());
//// debug print
@ -2775,7 +2805,7 @@ void BitcoinMiner()
// Update nTime every few seconds
const unsigned int nMask = 0xffff;
const int nHashesPerCycle = (nMask+1);
if ((++tmp.block.nNonce & nMask) == 0)
if ((tmp.block.nNonce & nMask) == 0)
{
// Meter hashes/sec
static int nCycleCounter;

3
main.h
View file

@ -613,7 +613,8 @@ public:
bool DisconnectInputs(CTxDB& txdb);
bool ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx, int nHeight, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee=0);
bool ConnectInputs(CTxDB& txdb, map<uint256, CTxIndex>& mapTestPool, CDiskTxPos posThisTx,
CBlockIndex* pindexBlock, int64& nFees, bool fBlock, bool fMiner, int64 nMinFee=0);
bool ClientConnectInputs();
bool AcceptTransaction(CTxDB& txdb, bool fCheckInputs=true, bool* pfMissingInputs=NULL);

View file

@ -16,12 +16,30 @@ static const CBigNum bnZero(0);
static const CBigNum bnOne(1);
static const CBigNum bnFalse(0);
static const CBigNum bnTrue(1);
static const size_t nMaxNumSize = 258;
static const size_t nMaxNumSize = 4;
CBigNum CastToBigNum(const valtype& vch)
{
if (vch.size() > nMaxNumSize)
throw runtime_error("CastToBigNum() : overflow");
// Get rid of extra leading zeros
return CBigNum(CBigNum(vch).getvch());
}
bool CastToBool(const valtype& vch)
{
return (CBigNum(vch) != bnZero);
for (int i = 0; i < vch.size(); i++)
{
if (vch[i] != 0)
{
// Can be negative zero
if (i == vch.size()-1 && vch[i] == 0x80)
return false;
return true;
}
}
return false;
}
void MakeSameSize(valtype& vch1, valtype& vch2)
@ -68,11 +86,28 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
valtype vchPushValue;
if (!script.GetOp(pc, opcode, vchPushValue))
return false;
if (vchPushValue.size() > 5000)
if (vchPushValue.size() > 520)
return false;
if (opcode > OP_16 && nOpCount++ > 200)
return false;
if (opcode == OP_CAT ||
opcode == OP_SUBSTR ||
opcode == OP_LEFT ||
opcode == OP_RIGHT ||
opcode == OP_INVERT ||
opcode == OP_AND ||
opcode == OP_OR ||
opcode == OP_XOR ||
opcode == OP_2MUL ||
opcode == OP_2DIV ||
opcode == OP_MUL ||
opcode == OP_DIV ||
opcode == OP_MOD ||
opcode == OP_LSHIFT ||
opcode == OP_RSHIFT)
return false;
if (fExec && opcode <= OP_PUSHDATA4)
stack.push_back(vchPushValue);
else if (fExec || (OP_IF <= opcode && opcode <= OP_ENDIF))
@ -332,7 +367,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (xn ... x2 x1 x0 n - ... x2 x1 x0 xn)
if (stack.size() < 2)
return false;
int n = CBigNum(stacktop(-1)).getint();
int n = CastToBigNum(stacktop(-1)).getint();
stack.pop_back();
if (n < 0 || n >= stack.size())
return false;
@ -387,7 +422,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
valtype& vch2 = stacktop(-1);
vch1.insert(vch1.end(), vch2.begin(), vch2.end());
stack.pop_back();
if (stacktop(-1).size() > 5000)
if (stacktop(-1).size() > 520)
return false;
}
break;
@ -398,8 +433,8 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < 3)
return false;
valtype& vch = stacktop(-3);
int nBegin = CBigNum(stacktop(-2)).getint();
int nEnd = nBegin + CBigNum(stacktop(-1)).getint();
int nBegin = CastToBigNum(stacktop(-2)).getint();
int nEnd = nBegin + CastToBigNum(stacktop(-1)).getint();
if (nBegin < 0 || nEnd < nBegin)
return false;
if (nBegin > vch.size())
@ -420,7 +455,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < 2)
return false;
valtype& vch = stacktop(-2);
int nSize = CBigNum(stacktop(-1)).getint();
int nSize = CastToBigNum(stacktop(-1)).getint();
if (nSize < 0)
return false;
if (nSize > vch.size())
@ -531,9 +566,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (in -- out)
if (stack.size() < 1)
return false;
if (stacktop(-1).size() > nMaxNumSize)
return false;
CBigNum bn(stacktop(-1));
CBigNum bn = CastToBigNum(stacktop(-1));
switch (opcode)
{
case OP_1ADD: bn += bnOne; break;
@ -572,11 +605,8 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (x1 x2 -- out)
if (stack.size() < 2)
return false;
if (stacktop(-2).size() > nMaxNumSize ||
stacktop(-1).size() > nMaxNumSize)
return false;
CBigNum bn1(stacktop(-2));
CBigNum bn2(stacktop(-1));
CBigNum bn1 = CastToBigNum(stacktop(-2));
CBigNum bn2 = CastToBigNum(stacktop(-1));
CBigNum bn;
switch (opcode)
{
@ -646,13 +676,9 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
// (x min max -- out)
if (stack.size() < 3)
return false;
if (stacktop(-3).size() > nMaxNumSize ||
stacktop(-2).size() > nMaxNumSize ||
stacktop(-1).size() > nMaxNumSize)
return false;
CBigNum bn1(stacktop(-3));
CBigNum bn2(stacktop(-2));
CBigNum bn3(stacktop(-1));
CBigNum bn1 = CastToBigNum(stacktop(-3));
CBigNum bn2 = CastToBigNum(stacktop(-2));
CBigNum bn3 = CastToBigNum(stacktop(-1));
bool fValue = (bn2 <= bn1 && bn1 < bn3);
stack.pop_back();
stack.pop_back();
@ -748,7 +774,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < i)
return false;
int nKeysCount = CBigNum(stacktop(-i)).getint();
int nKeysCount = CastToBigNum(stacktop(-i)).getint();
if (nKeysCount < 0)
return false;
int ikey = ++i;
@ -756,7 +782,7 @@ bool EvalScript(vector<vector<unsigned char> >& stack, const CScript& script, co
if (stack.size() < i)
return false;
int nSigsCount = CBigNum(stacktop(-i)).getint();
int nSigsCount = CastToBigNum(stacktop(-i)).getint();
if (nSigsCount < 0 || nSigsCount > nKeysCount)
return false;
int isig = ++i;

View file

@ -19,8 +19,8 @@ class CScript;
class CDataStream;
class CAutoFile;
static const int VERSION = 308;
static const char* pszSubVer = ".4";
static const int VERSION = 309;
static const char* pszSubVer = ".0";

View file

@ -7,7 +7,7 @@ RequestExecutionLevel highest
# General Symbol Definitions
!define REGKEY "SOFTWARE\$(^Name)"
!define VERSION 0.3.8
!define VERSION 0.3.9
!define COMPANY "Bitcoin project"
!define URL http://www.bitcoin.org/
@ -42,12 +42,12 @@ Var StartMenuGroup
!insertmacro MUI_LANGUAGE English
# Installer attributes
OutFile bitcoin-0.3.8-win32-setup.exe
OutFile bitcoin-0.3.9-win32-setup.exe
InstallDir $PROGRAMFILES\Bitcoin
CRCCheck on
XPStyle on
ShowInstDetails show
VIProductVersion 0.3.8.0
VIProductVersion 0.3.9.0
VIAddVersionKey ProductName Bitcoin
VIAddVersionKey ProductVersion "${VERSION}"
VIAddVersionKey CompanyName "${COMPANY}"

419
sha256.cpp Normal file
View file

@ -0,0 +1,419 @@
// Copyright (c) 2010 Satoshi Nakamoto
// Distributed under the MIT/X11 software license, see the accompanying
// file license.txt or http://www.opensource.org/licenses/mit-license.php.
#include <string.h>
#include <assert.h>
#include <xmmintrin.h>
#include <stdint.h>
#include <stdio.h>
#define NPAR 32
static const unsigned int sha256_consts[] = {
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, /* 0 */
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, /* 8 */
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, /* 16 */
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, /* 24 */
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, /* 32 */
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, /* 40 */
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, /* 48 */
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, /* 56 */
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
};
static inline __m128i Ch(const __m128i b, const __m128i c, const __m128i d) {
return (b & c) ^ (~b & d);
}
static inline __m128i Maj(const __m128i b, const __m128i c, const __m128i d) {
return (b & c) ^ (b & d) ^ (c & d);
}
static inline __m128i ROTR(__m128i x, const int n) {
return _mm_srli_epi32(x, n) | _mm_slli_epi32(x, 32 - n);
}
static inline __m128i SHR(__m128i x, const int n) {
return _mm_srli_epi32(x, n);
}
/* SHA256 Functions */
#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
static inline unsigned int store32(const __m128i x, int i) {
union { unsigned int ret[4]; __m128i x; } box;
box.x = x;
return box.ret[i];
}
static inline void store_epi32(const __m128i x, unsigned int *x0, unsigned int *x1, unsigned int *x2, unsigned int *x3) {
union { unsigned int ret[4]; __m128i x; } box;
box.x = x;
*x0 = box.ret[3]; *x1 = box.ret[2]; *x2 = box.ret[1]; *x3 = box.ret[0];
}
#define add4(x0, x1, x2, x3) _mm_add_epi32(_mm_add_epi32(_mm_add_epi32(x0, x1), x2), x3)
#define add5(x0, x1, x2, x3, x4) _mm_add_epi32(add4(x0, x1, x2, x3), x4)
#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
T1 = add5(h, BIGSIGMA1_256(e), Ch(e, f, g), _mm_set1_epi32(sha256_consts[i]), w); \
d = _mm_add_epi32(d, T1); \
h = _mm_add_epi32(T1, _mm_add_epi32(BIGSIGMA0_256(a), Maj(a, b, c)));
static inline void dumpreg(__m128i x, char *msg) {
union { unsigned int ret[4]; __m128i x; } box;
box.x = x ;
printf("%s %08x %08x %08x %08x\n", msg, box.ret[0], box.ret[1], box.ret[2], box.ret[3]);
}
#if 1
#define dumpstate(i) printf("%s: %08x %08x %08x %08x %08x %08x %08x %08x %08x\n", \
__func__, store32(w0, i), store32(a, i), store32(b, i), store32(c, i), store32(d, i), store32(e, i), store32(f, i), store32(g, i), store32(h, i));
#else
#define dumpstate()
#endif
void Double_BlockSHA256(const void* pin, void* pad, const void *pre, unsigned int thash[9][NPAR], const void *init)
{
unsigned int* In = (unsigned int*)pin;
unsigned int* Pad = (unsigned int*)pad;
unsigned int* hPre = (unsigned int*)pre;
unsigned int* hInit = (unsigned int*)init;
unsigned int i, j, k;
/* vectors used in calculation */
__m128i w0, w1, w2, w3, w4, w5, w6, w7;
__m128i w8, w9, w10, w11, w12, w13, w14, w15;
__m128i T1;
__m128i a, b, c, d, e, f, g, h;
__m128i nonce;
/* nonce offset for vector */
__m128i offset = _mm_set_epi32(0x00000003, 0x00000002, 0x00000001, 0x00000000);
for(k = 0; k<NPAR; k+=4) {
w0 = _mm_set1_epi32(In[0]);
w1 = _mm_set1_epi32(In[1]);
w2 = _mm_set1_epi32(In[2]);
//w3 = _mm_set1_epi32(In[3]); nonce will be later hacked into the hash
w4 = _mm_set1_epi32(In[4]);
w5 = _mm_set1_epi32(In[5]);
w6 = _mm_set1_epi32(In[6]);
w7 = _mm_set1_epi32(In[7]);
w8 = _mm_set1_epi32(In[8]);
w9 = _mm_set1_epi32(In[9]);
w10 = _mm_set1_epi32(In[10]);
w11 = _mm_set1_epi32(In[11]);
w12 = _mm_set1_epi32(In[12]);
w13 = _mm_set1_epi32(In[13]);
w14 = _mm_set1_epi32(In[14]);
w15 = _mm_set1_epi32(In[15]);
/* hack nonce into lowest byte of w3 */
nonce = _mm_set1_epi32(In[3]);
nonce = _mm_add_epi32(nonce, offset);
nonce = _mm_add_epi32(nonce, _mm_set1_epi32(k));
w3 = nonce;
a = _mm_set1_epi32(hPre[0]);
b = _mm_set1_epi32(hPre[1]);
c = _mm_set1_epi32(hPre[2]);
d = _mm_set1_epi32(hPre[3]);
e = _mm_set1_epi32(hPre[4]);
f = _mm_set1_epi32(hPre[5]);
g = _mm_set1_epi32(hPre[6]);
h = _mm_set1_epi32(hPre[7]);
SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
#define store_load(x, i, dest) \
T1 = _mm_set1_epi32((hPre)[i]); \
dest = _mm_add_epi32(T1, x);
store_load(a, 0, w0);
store_load(b, 1, w1);
store_load(c, 2, w2);
store_load(d, 3, w3);
store_load(e, 4, w4);
store_load(f, 5, w5);
store_load(g, 6, w6);
store_load(h, 7, w7);
w8 = _mm_set1_epi32(Pad[8]);
w9 = _mm_set1_epi32(Pad[9]);
w10 = _mm_set1_epi32(Pad[10]);
w11 = _mm_set1_epi32(Pad[11]);
w12 = _mm_set1_epi32(Pad[12]);
w13 = _mm_set1_epi32(Pad[13]);
w14 = _mm_set1_epi32(Pad[14]);
w15 = _mm_set1_epi32(Pad[15]);
a = _mm_set1_epi32(hInit[0]);
b = _mm_set1_epi32(hInit[1]);
c = _mm_set1_epi32(hInit[2]);
d = _mm_set1_epi32(hInit[3]);
e = _mm_set1_epi32(hInit[4]);
f = _mm_set1_epi32(hInit[5]);
g = _mm_set1_epi32(hInit[6]);
h = _mm_set1_epi32(hInit[7]);
SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
w0 = add4(SIGMA1_256(w14), w9, SIGMA0_256(w1), w0);
SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
w1 = add4(SIGMA1_256(w15), w10, SIGMA0_256(w2), w1);
SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
w2 = add4(SIGMA1_256(w0), w11, SIGMA0_256(w3), w2);
SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
w3 = add4(SIGMA1_256(w1), w12, SIGMA0_256(w4), w3);
SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
w4 = add4(SIGMA1_256(w2), w13, SIGMA0_256(w5), w4);
SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
w5 = add4(SIGMA1_256(w3), w14, SIGMA0_256(w6), w5);
SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
w6 = add4(SIGMA1_256(w4), w15, SIGMA0_256(w7), w6);
SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
w7 = add4(SIGMA1_256(w5), w0, SIGMA0_256(w8), w7);
SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
w8 = add4(SIGMA1_256(w6), w1, SIGMA0_256(w9), w8);
SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
w9 = add4(SIGMA1_256(w7), w2, SIGMA0_256(w10), w9);
SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
w10 = add4(SIGMA1_256(w8), w3, SIGMA0_256(w11), w10);
SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
w11 = add4(SIGMA1_256(w9), w4, SIGMA0_256(w12), w11);
SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
w12 = add4(SIGMA1_256(w10), w5, SIGMA0_256(w13), w12);
SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
w13 = add4(SIGMA1_256(w11), w6, SIGMA0_256(w14), w13);
SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
w14 = add4(SIGMA1_256(w12), w7, SIGMA0_256(w15), w14);
SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
w15 = add4(SIGMA1_256(w13), w8, SIGMA0_256(w0), w15);
SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
/* store resulsts directly in thash */
#define store_2(x,i) \
w0 = _mm_set1_epi32(hInit[i]); \
*(__m128i *)&(thash)[i][0+k] = _mm_add_epi32(w0, x);
store_2(a, 0);
store_2(b, 1);
store_2(c, 2);
store_2(d, 3);
store_2(e, 4);
store_2(f, 5);
store_2(g, 6);
store_2(h, 7);
*(__m128i *)&(thash)[8][0+k] = nonce;
}
}

View file

@ -19,6 +19,14 @@ bool fCommandLine = false;
// Workaround for "multiple definition of `_tls_used'"
// http://svn.boost.org/trac/boost/ticket/4258
extern "C" void tss_cleanup_implemented() { }
// Init openssl library multithreading support
static boost::interprocess::interprocess_mutex** ppmutexOpenSSL;
void locking_callback(int mode, int i, const char* file, int line)