Merge pull request #2914 from gmaxwell/bloom_faster

Performance optimization for bloom filters.
This commit is contained in:
Gavin Andresen 2013-08-20 15:52:49 -07:00
commit 5250fdcc6c
4 changed files with 33 additions and 7 deletions

View file

@ -23,6 +23,8 @@ vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM
// The ideal number of hash functions is filter size * ln(2) / number of elements // The ideal number of hash functions is filter size * ln(2) / number of elements
// Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits // Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
// See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas // See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
isFull(false),
isEmpty(false),
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)), nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
nTweak(nTweakIn), nTweak(nTweakIn),
nFlags(nFlagsIn) nFlags(nFlagsIn)
@ -37,7 +39,7 @@ inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<
void CBloomFilter::insert(const vector<unsigned char>& vKey) void CBloomFilter::insert(const vector<unsigned char>& vKey)
{ {
if (vData.size() == 1 && vData[0] == 0xff) if (isFull)
return; return;
for (unsigned int i = 0; i < nHashFuncs; i++) for (unsigned int i = 0; i < nHashFuncs; i++)
{ {
@ -45,6 +47,7 @@ void CBloomFilter::insert(const vector<unsigned char>& vKey)
// Sets bit nIndex of vData // Sets bit nIndex of vData
vData[nIndex >> 3] |= bit_mask[7 & nIndex]; vData[nIndex >> 3] |= bit_mask[7 & nIndex];
} }
isEmpty = false;
} }
void CBloomFilter::insert(const COutPoint& outpoint) void CBloomFilter::insert(const COutPoint& outpoint)
@ -63,8 +66,10 @@ void CBloomFilter::insert(const uint256& hash)
bool CBloomFilter::contains(const vector<unsigned char>& vKey) const bool CBloomFilter::contains(const vector<unsigned char>& vKey) const
{ {
if (vData.size() == 1 && vData[0] == 0xff) if (isFull)
return true; return true;
if (isEmpty)
return false;
for (unsigned int i = 0; i < nHashFuncs; i++) for (unsigned int i = 0; i < nHashFuncs; i++)
{ {
unsigned int nIndex = Hash(i, vKey); unsigned int nIndex = Hash(i, vKey);
@ -99,6 +104,10 @@ bool CBloomFilter::IsRelevantAndUpdate(const CTransaction& tx, const uint256& ha
bool fFound = false; bool fFound = false;
// Match if the filter contains the hash of tx // Match if the filter contains the hash of tx
// for finding tx when they appear in a block // for finding tx when they appear in a block
if (isFull)
return true;
if (isEmpty)
return false;
if (contains(hash)) if (contains(hash))
fFound = true; fFound = true;
@ -158,3 +167,16 @@ bool CBloomFilter::IsRelevantAndUpdate(const CTransaction& tx, const uint256& ha
return false; return false;
} }
void CBloomFilter::UpdateEmptyFull()
{
bool full = true;
bool empty = true;
for (unsigned int i = 0; i < vData.size(); i++)
{
full &= vData[i] == 0xff;
empty &= vData[i] == 0;
}
isFull = full;
isEmpty = empty;
}

View file

@ -42,6 +42,8 @@ class CBloomFilter
{ {
private: private:
std::vector<unsigned char> vData; std::vector<unsigned char> vData;
bool isFull;
bool isEmpty;
unsigned int nHashFuncs; unsigned int nHashFuncs;
unsigned int nTweak; unsigned int nTweak;
unsigned char nFlags; unsigned char nFlags;
@ -57,9 +59,7 @@ public:
// It should generally always be a random value (and is largely only exposed for unit testing) // It should generally always be a random value (and is largely only exposed for unit testing)
// nFlags should be one of the BLOOM_UPDATE_* enums (not _MASK) // nFlags should be one of the BLOOM_UPDATE_* enums (not _MASK)
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak, unsigned char nFlagsIn); CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak, unsigned char nFlagsIn);
// Using a filter initialized with this results in undefined behavior CBloomFilter() : isFull(true) {}
// Should only be used for deserialization
CBloomFilter() {}
IMPLEMENT_SERIALIZE IMPLEMENT_SERIALIZE
( (
@ -83,6 +83,9 @@ public:
// Also adds any outputs which match the filter to the filter (to match their spending txes) // Also adds any outputs which match the filter to the filter (to match their spending txes)
bool IsRelevantAndUpdate(const CTransaction& tx, const uint256& hash); bool IsRelevantAndUpdate(const CTransaction& tx, const uint256& hash);
// Checks for empty and full filters to avoid wasting cpu
void UpdateEmptyFull();
}; };
#endif /* BITCOIN_BLOOM_H */ #endif /* BITCOIN_BLOOM_H */

View file

@ -3893,6 +3893,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
LOCK(pfrom->cs_filter); LOCK(pfrom->cs_filter);
delete pfrom->pfilter; delete pfrom->pfilter;
pfrom->pfilter = new CBloomFilter(filter); pfrom->pfilter = new CBloomFilter(filter);
filter.UpdateEmptyFull();
} }
pfrom->fRelayTxes = true; pfrom->fRelayTxes = true;
} }
@ -3922,7 +3923,7 @@ bool static ProcessMessage(CNode* pfrom, string strCommand, CDataStream& vRecv)
{ {
LOCK(pfrom->cs_filter); LOCK(pfrom->cs_filter);
delete pfrom->pfilter; delete pfrom->pfilter;
pfrom->pfilter = NULL; pfrom->pfilter = new CBloomFilter();
pfrom->fRelayTxes = true; pfrom->fRelayTxes = true;
} }

View file

@ -267,7 +267,7 @@ public:
nMisbehavior = 0; nMisbehavior = 0;
fRelayTxes = false; fRelayTxes = false;
setInventoryKnown.max_size(SendBufferSize() / 1000); setInventoryKnown.max_size(SendBufferSize() / 1000);
pfilter = NULL; pfilter = new CBloomFilter();
// Be shy and don't send version until we hear // Be shy and don't send version until we hear
if (hSocket != INVALID_SOCKET && !fInbound) if (hSocket != INVALID_SOCKET && !fInbound)