Merge #14172: Refactor and add tests for BlockFilter construction

e4ed8ce2c8 blockfilter: Remove default clause in switch statement. (Jim Posen)
c30620983d blockfilter: Additional constructors for BlockFilter. (Jim Posen)
20b812993a blockfilter: Refactor GCS params into struct. (Jim Posen)

Pull request description:

  These commits have been split out of #14121 because they are fairly independent and that PR is very large.

Tree-SHA512: b9643b159e114df50a295f433e807afe6082db55a2a3a17401c1509b850c71bf5011ab3638863b46663709726be4445be6fde1dec514aec7696135497a9f0183
This commit is contained in:
MarcoFalke 2018-12-22 14:16:30 +01:00
commit a4564b9b07
No known key found for this signature in database
GPG key ID: D2EA4850E7528B25
4 changed files with 99 additions and 49 deletions

View file

@ -17,7 +17,7 @@ static void ConstructGCSFilter(benchmark::State& state)
uint64_t siphash_k0 = 0; uint64_t siphash_k0 = 0;
while (state.KeepRunning()) { while (state.KeepRunning()) {
GCSFilter filter(siphash_k0, 0, 20, 1 << 20, elements); GCSFilter filter({siphash_k0, 0, 20, 1 << 20}, elements);
siphash_k0++; siphash_k0++;
} }
@ -32,7 +32,7 @@ static void MatchGCSFilter(benchmark::State& state)
element[1] = static_cast<unsigned char>(i >> 8); element[1] = static_cast<unsigned char>(i >> 8);
elements.insert(std::move(element)); elements.insert(std::move(element));
} }
GCSFilter filter(0, 0, 20, 1 << 20, elements); GCSFilter filter({0, 0, 20, 1 << 20}, elements);
while (state.KeepRunning()) { while (state.KeepRunning()) {
filter.Match(GCSFilter::Element()); filter.Match(GCSFilter::Element());

View file

@ -79,7 +79,7 @@ static uint64_t MapIntoRange(uint64_t x, uint64_t n)
uint64_t GCSFilter::HashToRange(const Element& element) const uint64_t GCSFilter::HashToRange(const Element& element) const
{ {
uint64_t hash = CSipHasher(m_siphash_k0, m_siphash_k1) uint64_t hash = CSipHasher(m_params.m_siphash_k0, m_params.m_siphash_k1)
.Write(element.data(), element.size()) .Write(element.data(), element.size())
.Finalize(); .Finalize();
return MapIntoRange(hash, m_F); return MapIntoRange(hash, m_F);
@ -96,16 +96,13 @@ std::vector<uint64_t> GCSFilter::BuildHashedSet(const ElementSet& elements) cons
return hashed_elements; return hashed_elements;
} }
GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M) GCSFilter::GCSFilter(const Params& params)
: m_siphash_k0(siphash_k0), m_siphash_k1(siphash_k1), m_P(P), m_M(M), m_N(0), m_F(0) : m_params(params), m_N(0), m_F(0), m_encoded{0}
{} {}
GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, GCSFilter::GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter)
std::vector<unsigned char> encoded_filter) : m_params(params), m_encoded(std::move(encoded_filter))
: GCSFilter(siphash_k0, siphash_k1, P, M)
{ {
m_encoded = std::move(encoded_filter);
VectorReader stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); VectorReader stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0);
uint64_t N = ReadCompactSize(stream); uint64_t N = ReadCompactSize(stream);
@ -113,29 +110,28 @@ GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32
if (m_N != N) { if (m_N != N) {
throw std::ios_base::failure("N must be <2^32"); throw std::ios_base::failure("N must be <2^32");
} }
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_M); m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);
// Verify that the encoded filter contains exactly N elements. If it has too much or too little // Verify that the encoded filter contains exactly N elements. If it has too much or too little
// data, a std::ios_base::failure exception will be raised. // data, a std::ios_base::failure exception will be raised.
BitStreamReader<VectorReader> bitreader(stream); BitStreamReader<VectorReader> bitreader(stream);
for (uint64_t i = 0; i < m_N; ++i) { for (uint64_t i = 0; i < m_N; ++i) {
GolombRiceDecode(bitreader, m_P); GolombRiceDecode(bitreader, m_params.m_P);
} }
if (!stream.empty()) { if (!stream.empty()) {
throw std::ios_base::failure("encoded_filter contains excess data"); throw std::ios_base::failure("encoded_filter contains excess data");
} }
} }
GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, GCSFilter::GCSFilter(const Params& params, const ElementSet& elements)
const ElementSet& elements) : m_params(params)
: GCSFilter(siphash_k0, siphash_k1, P, M)
{ {
size_t N = elements.size(); size_t N = elements.size();
m_N = static_cast<uint32_t>(N); m_N = static_cast<uint32_t>(N);
if (m_N != N) { if (m_N != N) {
throw std::invalid_argument("N must be <2^32"); throw std::invalid_argument("N must be <2^32");
} }
m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_M); m_F = static_cast<uint64_t>(m_N) * static_cast<uint64_t>(m_params.m_M);
CVectorWriter stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0); CVectorWriter stream(GCS_SER_TYPE, GCS_SER_VERSION, m_encoded, 0);
@ -150,7 +146,7 @@ GCSFilter::GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32
uint64_t last_value = 0; uint64_t last_value = 0;
for (uint64_t value : BuildHashedSet(elements)) { for (uint64_t value : BuildHashedSet(elements)) {
uint64_t delta = value - last_value; uint64_t delta = value - last_value;
GolombRiceEncode(bitwriter, m_P, delta); GolombRiceEncode(bitwriter, m_params.m_P, delta);
last_value = value; last_value = value;
} }
@ -170,7 +166,7 @@ bool GCSFilter::MatchInternal(const uint64_t* element_hashes, size_t size) const
uint64_t value = 0; uint64_t value = 0;
size_t hashes_index = 0; size_t hashes_index = 0;
for (uint32_t i = 0; i < m_N; ++i) { for (uint32_t i = 0; i < m_N; ++i) {
uint64_t delta = GolombRiceDecode(bitreader, m_P); uint64_t delta = GolombRiceDecode(bitreader, m_params.m_P);
value += delta; value += delta;
while (true) { while (true) {
@ -225,19 +221,39 @@ static GCSFilter::ElementSet BasicFilterElements(const CBlock& block,
return elements; return elements;
} }
BlockFilter::BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
std::vector<unsigned char> filter)
: m_filter_type(filter_type), m_block_hash(block_hash)
{
GCSFilter::Params params;
if (!BuildParams(params)) {
throw std::invalid_argument("unknown filter_type");
}
m_filter = GCSFilter(params, std::move(filter));
}
BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo) BlockFilter::BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo)
: m_filter_type(filter_type), m_block_hash(block.GetHash()) : m_filter_type(filter_type), m_block_hash(block.GetHash())
{ {
switch (m_filter_type) { GCSFilter::Params params;
case BlockFilterType::BASIC: if (!BuildParams(params)) {
m_filter = GCSFilter(m_block_hash.GetUint64(0), m_block_hash.GetUint64(1),
BASIC_FILTER_P, BASIC_FILTER_M,
BasicFilterElements(block, block_undo));
break;
default:
throw std::invalid_argument("unknown filter_type"); throw std::invalid_argument("unknown filter_type");
} }
m_filter = GCSFilter(params, BasicFilterElements(block, block_undo));
}
bool BlockFilter::BuildParams(GCSFilter::Params& params) const
{
switch (m_filter_type) {
case BlockFilterType::BASIC:
params.m_siphash_k0 = m_block_hash.GetUint64(0);
params.m_siphash_k1 = m_block_hash.GetUint64(1);
params.m_P = BASIC_FILTER_P;
params.m_M = BASIC_FILTER_M;
return true;
}
return false;
} }
uint256 BlockFilter::GetHash() const uint256 BlockFilter::GetHash() const

View file

@ -25,11 +25,20 @@ public:
typedef std::vector<unsigned char> Element; typedef std::vector<unsigned char> Element;
typedef std::unordered_set<Element, ByteVectorHash> ElementSet; typedef std::unordered_set<Element, ByteVectorHash> ElementSet;
private: struct Params
{
uint64_t m_siphash_k0; uint64_t m_siphash_k0;
uint64_t m_siphash_k1; uint64_t m_siphash_k1;
uint8_t m_P; //!< Golomb-Rice coding parameter uint8_t m_P; //!< Golomb-Rice coding parameter
uint32_t m_M; //!< Inverse false positive rate uint32_t m_M; //!< Inverse false positive rate
Params(uint64_t siphash_k0 = 0, uint64_t siphash_k1 = 0, uint8_t P = 0, uint32_t M = 1)
: m_siphash_k0(siphash_k0), m_siphash_k1(siphash_k1), m_P(P), m_M(M)
{}
};
private:
Params m_params;
uint32_t m_N; //!< Number of elements in the filter uint32_t m_N; //!< Number of elements in the filter
uint64_t m_F; //!< Range of element hashes, F = N * M uint64_t m_F; //!< Range of element hashes, F = N * M
std::vector<unsigned char> m_encoded; std::vector<unsigned char> m_encoded;
@ -45,19 +54,16 @@ private:
public: public:
/** Constructs an empty filter. */ /** Constructs an empty filter. */
GCSFilter(uint64_t siphash_k0 = 0, uint64_t siphash_k1 = 0, uint8_t P = 0, uint32_t M = 0); explicit GCSFilter(const Params& params = Params());
/** Reconstructs an already-created filter from an encoding. */ /** Reconstructs an already-created filter from an encoding. */
GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, GCSFilter(const Params& params, std::vector<unsigned char> encoded_filter);
std::vector<unsigned char> encoded_filter);
/** Builds a new filter from the params and set of elements. */ /** Builds a new filter from the params and set of elements. */
GCSFilter(uint64_t siphash_k0, uint64_t siphash_k1, uint8_t P, uint32_t M, GCSFilter(const Params& params, const ElementSet& elements);
const ElementSet& elements);
uint8_t GetP() const { return m_P; }
uint32_t GetN() const { return m_N; } uint32_t GetN() const { return m_N; }
uint32_t GetM() const { return m_M; } const Params& GetParams() const { return m_params; }
const std::vector<unsigned char>& GetEncoded() const { return m_encoded; } const std::vector<unsigned char>& GetEncoded() const { return m_encoded; }
/** /**
@ -93,13 +99,21 @@ private:
uint256 m_block_hash; uint256 m_block_hash;
GCSFilter m_filter; GCSFilter m_filter;
bool BuildParams(GCSFilter::Params& params) const;
public: public:
// Construct a new BlockFilter of the specified type from a block. BlockFilter() = default;
//! Reconstruct a BlockFilter from parts.
BlockFilter(BlockFilterType filter_type, const uint256& block_hash,
std::vector<unsigned char> filter);
//! Construct a new BlockFilter of the specified type from a block.
BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo); BlockFilter(BlockFilterType filter_type, const CBlock& block, const CBlockUndo& block_undo);
BlockFilterType GetFilterType() const { return m_filter_type; } BlockFilterType GetFilterType() const { return m_filter_type; }
const uint256& GetBlockHash() const { return m_block_hash; }
const GCSFilter& GetFilter() const { return m_filter; } const GCSFilter& GetFilter() const { return m_filter; }
const std::vector<unsigned char>& GetEncodedFilter() const const std::vector<unsigned char>& GetEncodedFilter() const
@ -107,10 +121,10 @@ public:
return m_filter.GetEncoded(); return m_filter.GetEncoded();
} }
// Compute the filter hash. //! Compute the filter hash.
uint256 GetHash() const; uint256 GetHash() const;
// Compute the filter header given the previous one. //! Compute the filter header given the previous one.
uint256 ComputeHeader(const uint256& prev_header) const; uint256 ComputeHeader(const uint256& prev_header) const;
template <typename Stream> template <typename Stream>
@ -131,15 +145,11 @@ public:
m_filter_type = static_cast<BlockFilterType>(filter_type); m_filter_type = static_cast<BlockFilterType>(filter_type);
switch (m_filter_type) { GCSFilter::Params params;
case BlockFilterType::BASIC: if (!BuildParams(params)) {
m_filter = GCSFilter(m_block_hash.GetUint64(0), m_block_hash.GetUint64(1),
BASIC_FILTER_P, BASIC_FILTER_M, std::move(encoded_filter));
break;
default:
throw std::ios_base::failure("unknown filter_type"); throw std::ios_base::failure("unknown filter_type");
} }
m_filter = GCSFilter(params, std::move(encoded_filter));
} }
}; };

View file

@ -29,7 +29,7 @@ BOOST_AUTO_TEST_CASE(gcsfilter_test)
excluded_elements.insert(std::move(element2)); excluded_elements.insert(std::move(element2));
} }
GCSFilter filter(0, 0, 10, 1 << 10, included_elements); GCSFilter filter({0, 0, 10, 1 << 10}, included_elements);
for (const auto& element : included_elements) { for (const auto& element : included_elements) {
BOOST_CHECK(filter.Match(element)); BOOST_CHECK(filter.Match(element));
@ -39,6 +39,19 @@ BOOST_AUTO_TEST_CASE(gcsfilter_test)
} }
} }
BOOST_AUTO_TEST_CASE(gcsfilter_default_constructor)
{
GCSFilter filter;
BOOST_CHECK_EQUAL(filter.GetN(), 0);
BOOST_CHECK_EQUAL(filter.GetEncoded().size(), 1);
const GCSFilter::Params& params = filter.GetParams();
BOOST_CHECK_EQUAL(params.m_siphash_k0, 0);
BOOST_CHECK_EQUAL(params.m_siphash_k1, 0);
BOOST_CHECK_EQUAL(params.m_P, 0);
BOOST_CHECK_EQUAL(params.m_M, 1);
}
BOOST_AUTO_TEST_CASE(blockfilter_basic_test) BOOST_AUTO_TEST_CASE(blockfilter_basic_test)
{ {
CScript included_scripts[5], excluded_scripts[3]; CScript included_scripts[5], excluded_scripts[3];
@ -88,6 +101,17 @@ BOOST_AUTO_TEST_CASE(blockfilter_basic_test)
for (const CScript& script : excluded_scripts) { for (const CScript& script : excluded_scripts) {
BOOST_CHECK(!filter.Match(GCSFilter::Element(script.begin(), script.end()))); BOOST_CHECK(!filter.Match(GCSFilter::Element(script.begin(), script.end())));
} }
// Test serialization/unserialization.
BlockFilter block_filter2;
CDataStream stream(SER_NETWORK, PROTOCOL_VERSION);
stream << block_filter;
stream >> block_filter2;
BOOST_CHECK_EQUAL(block_filter.GetFilterType(), block_filter2.GetFilterType());
BOOST_CHECK_EQUAL(block_filter.GetBlockHash(), block_filter2.GetBlockHash());
BOOST_CHECK(block_filter.GetEncodedFilter() == block_filter2.GetEncodedFilter());
} }
BOOST_AUTO_TEST_CASE(blockfilters_json_test) BOOST_AUTO_TEST_CASE(blockfilters_json_test)