replace modulus with FastMod

Replaces the slow modulo operation with a much faster 32bit multiplication & shift. This works
because the hash should be uniformly distributed between 0 and 2^32-1. This speeds up the benchmark
by a factor of about 1.3:

RollingBloom, 5, 1500000, 3.73733, 4.97569e-07, 4.99002e-07, 4.98372e-07 # before
RollingBloom, 5, 1500000, 2.86842, 3.81630e-07, 3.83730e-07, 3.82473e-07 # FastMod

Be aware that this changes the position of the bits that are toggled, so this should probably
not be used for CBloomFilter which is serialized.
This commit is contained in:
Martin Ankerl 2018-05-06 13:55:33 +02:00
parent 66cc47be98
commit 9aac9f90d5

View file

@ -245,6 +245,14 @@ static inline uint32_t RollingBloomHash(unsigned int nHashNum, uint32_t nTweak,
return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash); return MurmurHash3(nHashNum * 0xFBA4C795 + nTweak, vDataToHash);
} }
// A replacement for x % n. This assumes that x and n are 32bit integers, and x is a uniformly random distributed 32bit value
// which should be the case for a good hash.
// See https://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
static inline uint32_t FastMod(uint32_t x, size_t n) {
return ((uint64_t)x * (uint64_t)n) >> 32;
}
void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey) void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
{ {
if (nEntriesThisGeneration == nEntriesPerGeneration) { if (nEntriesThisGeneration == nEntriesPerGeneration) {
@ -268,7 +276,8 @@ void CRollingBloomFilter::insert(const std::vector<unsigned char>& vKey)
for (int n = 0; n < nHashFuncs; n++) { for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = RollingBloomHash(n, nTweak, vKey); uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F; int bit = h & 0x3F;
uint32_t pos = (h >> 6) % data.size(); /* FastMod works with the upper bits of h, so it is safe to ignore that the lower bits of h are already used for bit. */
uint32_t pos = FastMod(h, data.size());
/* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */ /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit; data[pos & ~1] = (data[pos & ~1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration & 1)) << bit;
data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit; data[pos | 1] = (data[pos | 1] & ~(((uint64_t)1) << bit)) | ((uint64_t)(nGeneration >> 1)) << bit;
@ -286,7 +295,7 @@ bool CRollingBloomFilter::contains(const std::vector<unsigned char>& vKey) const
for (int n = 0; n < nHashFuncs; n++) { for (int n = 0; n < nHashFuncs; n++) {
uint32_t h = RollingBloomHash(n, nTweak, vKey); uint32_t h = RollingBloomHash(n, nTweak, vKey);
int bit = h & 0x3F; int bit = h & 0x3F;
uint32_t pos = (h >> 6) % data.size(); uint32_t pos = FastMod(h, data.size());
/* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */ /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) { if (!(((data[pos & ~1] | data[pos | 1]) >> bit) & 1)) {
return false; return false;