Add a nTweak to bloom filters to tweak the seed.
This commit is contained in:
parent
4c8fc1a588
commit
b1f99bed6f
3 changed files with 57 additions and 24 deletions
|
@ -15,7 +15,7 @@ using namespace std;
|
||||||
|
|
||||||
static const unsigned char bit_mask[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
|
static const unsigned char bit_mask[8] = {0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80};
|
||||||
|
|
||||||
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate) :
|
CBloomFilter::CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweakIn) :
|
||||||
// The ideal size for a bloom filter with a given number of elements and false positive rate is:
|
// The ideal size for a bloom filter with a given number of elements and false positive rate is:
|
||||||
// - nElements * log(fp rate) / ln(2)^2
|
// - nElements * log(fp rate) / ln(2)^2
|
||||||
// We ignore filter parameters which will create a bloom filter larger than the protocol limits
|
// We ignore filter parameters which will create a bloom filter larger than the protocol limits
|
||||||
|
@ -23,14 +23,15 @@ vData(min((unsigned int)(-1 / LN2SQUARED * nElements * log(nFPRate)), MAX_BLOOM
|
||||||
// The ideal number of hash functions is filter size * ln(2) / number of elements
|
// The ideal number of hash functions is filter size * ln(2) / number of elements
|
||||||
// Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
|
// Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
|
||||||
// See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
|
// See http://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
|
||||||
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS))
|
nHashFuncs(min((unsigned int)(vData.size() * 8 / nElements * LN2), MAX_HASH_FUNCS)),
|
||||||
|
nTweak(nTweakIn)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
|
inline unsigned int CBloomFilter::Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const
|
||||||
{
|
{
|
||||||
// 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
|
// 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
|
||||||
return MurmurHash3(nHashNum * 0xFBA4C795, vDataToHash) % (vData.size() * 8);
|
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash) % (vData.size() * 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CBloomFilter::insert(const vector<unsigned char>& vKey)
|
void CBloomFilter::insert(const vector<unsigned char>& vKey)
|
||||||
|
|
|
@ -33,6 +33,7 @@ class CBloomFilter
|
||||||
private:
|
private:
|
||||||
std::vector<unsigned char> vData;
|
std::vector<unsigned char> vData;
|
||||||
unsigned int nHashFuncs;
|
unsigned int nHashFuncs;
|
||||||
|
unsigned int nTweak;
|
||||||
|
|
||||||
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
|
unsigned int Hash(unsigned int nHashNum, const std::vector<unsigned char>& vDataToHash) const;
|
||||||
|
|
||||||
|
@ -41,7 +42,9 @@ public:
|
||||||
// Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
|
// Note that if the given parameters will result in a filter outside the bounds of the protocol limits,
|
||||||
// the filter created will be as close to the given parameters as possible within the protocol limits.
|
// the filter created will be as close to the given parameters as possible within the protocol limits.
|
||||||
// This will apply if nFPRate is very low or nElements is unreasonably high.
|
// This will apply if nFPRate is very low or nElements is unreasonably high.
|
||||||
CBloomFilter(unsigned int nElements, double nFPRate);
|
// nTweak is a constant which is added to the seed value passed to the hash function
|
||||||
|
// It should generally always be a random value (and is largely only exposed for unit testing)
|
||||||
|
CBloomFilter(unsigned int nElements, double nFPRate, unsigned int nTweak);
|
||||||
// Using a filter initialized with this results in undefined behavior
|
// Using a filter initialized with this results in undefined behavior
|
||||||
// Should only be used for deserialization
|
// Should only be used for deserialization
|
||||||
CBloomFilter() {}
|
CBloomFilter() {}
|
||||||
|
@ -50,6 +53,7 @@ public:
|
||||||
(
|
(
|
||||||
READWRITE(vData);
|
READWRITE(vData);
|
||||||
READWRITE(nHashFuncs);
|
READWRITE(nHashFuncs);
|
||||||
|
READWRITE(nTweak);
|
||||||
)
|
)
|
||||||
|
|
||||||
void insert(const std::vector<unsigned char>& vKey);
|
void insert(const std::vector<unsigned char>& vKey);
|
||||||
|
|
File diff suppressed because one or more lines are too long
Loading…
Reference in a new issue