replace modulus with FastMod
Replaces the slow modulo operation with a much faster 32bit multiplication & shift. This works because the hash should be uniformly distributed between 0 and 2^32-1. This speeds up the benchmark by a factor of about 1.3: RollingBloom, 5, 1500000, 3.73733, 4.97569e-07, 4.99002e-07, 4.98372e-07 # before RollingBloom, 5, 1500000, 2.86842, 3.81630e-07, 3.83730e-07, 3.82473e-07 # FastMod Be aware that this changes the position of the bits that are toggled, so this should probably not be used for CBloomFilter which is serialized.
This commit is contained in:
parent
66cc47be98
commit
9aac9f90d5
1 changed files with 11 additions and 2 deletions
|
@ -245,6 +245,14 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
|
||||||
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
|
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
|
||||||
|
// which should be the case for a good hash.
|
||||||
|
// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
|
||||||
|
static inline uint32_t FastMod(uint32_t x, size_t n) {
|
||||||
|
return ((uint64_t)x * (uint64_t)n) >> 32;
|
||||||
|
}
|
||||||
|
|
||||||
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
||||||
{
|
{
|
||||||
if (nEntriesThisGeneration == nEntriesPerGeneration) {
|
if (nEntriesThisGeneration == nEntriesPerGeneration) {
|
||||||
|
@ -268,7 +276,8 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
|
||||||
for (int n = 0; n < nHashFuncs; n++) {
|
for (int n = 0; n < nHashFuncs; n++) {
|
||||||
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
||||||
int bit = h & 0x3F;
|
int bit = h & 0x3F;
|
||||||
uint32_t pos = (h >> 6) % data.size();
|
/* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
|
||||||
|
uint32_t pos = FastMod(h, data.size());
|
||||||
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
|
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
|
||||||
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
|
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
|
||||||
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
|
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
|
||||||
|
@ -286,7 +295,7 @@ bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
|
||||||
for (int n = 0; n < nHashFuncs; n++) {
|
for (int n = 0; n < nHashFuncs; n++) {
|
||||||
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
uint32_t h = RollingBloomHash(n, nTweak, vKey);
|
||||||
int bit = h & 0x3F;
|
int bit = h & 0x3F;
|
||||||
uint32_t pos = (h >> 6) % data.size();
|
uint32_t pos = FastMod(h, data.size());
|
||||||
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
|
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
|
||||||
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
|
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in a new issue