gcs: update fp and modulus values based on recent optimality analysis

In this commit, we decrease the default fp rate to 19, or 1/2^19. We do
this as recent analysis by sipa on the bitcoin dev mailing list has
shown that optimally, we can use a value of 2^19 for the fp rate, while
use n=1.497137*2^P rather than n directly. As a result, we can shrink
the filter size by quite a bit, while still maintaining a sane false
positive value.
This commit is contained in:
Olaoluwa Osuntokun 2018-05-28 20:22:24 -07:00
parent 996307736e
commit 0ecd90b8d6
5 changed files with 105 additions and 114 deletions

View file

@ -8,6 +8,8 @@ package builder
import (
"crypto/rand"
"encoding/binary"
"fmt"
"math"
"github.com/btcsuite/btcd/chaincfg/chainhash"
"github.com/btcsuite/btcd/txscript"
@ -15,13 +17,20 @@ import (
"github.com/btcsuite/btcutil/gcs"
)
// DefaultP is the default collision probability (2^-20)
const DefaultP = 20
const (
// DefaultP is the default collision probability (2^-19)
DefaultP = 19
// DefaultM is the default value used for the hash range.
DefaultM uint64 = 784931
)
// GCSBuilder is a utility class that makes building GCS filters convenient.
type GCSBuilder struct {
p uint8
m uint64
key [gcs.KeySize]byte
// data is a set of entries represented as strings. This is done to
@ -120,6 +129,23 @@ func (b *GCSBuilder) SetP(p uint8) *GCSBuilder {
return b
}
// SetM sets the filter's modulous value after calling Builder().
func (b *GCSBuilder) SetM(m uint64) *GCSBuilder {
// Do nothing if the builder's already errored out.
if b.err != nil {
return b
}
// Basic sanity check.
if m > uint64(math.MaxUint32) {
b.err = gcs.ErrPTooBig
return b
}
b.m = m
return b
}
// Preallocate sets the estimated filter size after calling Builder() to reduce
// the probability of memory reallocations. If the builder has already had data
// added to it, Preallocate has no effect.
@ -221,46 +247,57 @@ func (b *GCSBuilder) Build() (*gcs.Filter, error) {
return nil, b.err
}
// We'll ensure that all the parmaters we need to actually build the
// filter properly are set.
if b.p == 0 {
return nil, fmt.Errorf("p value is not set, cannot build")
}
if b.m == 0 {
return nil, fmt.Errorf("m value is not set, cannot build")
}
dataSlice := make([][]byte, 0, len(b.data))
for item := range b.data {
dataSlice = append(dataSlice, []byte(item))
}
return gcs.BuildGCSFilter(b.p, b.key, dataSlice)
return gcs.BuildGCSFilter(b.p, b.m, b.key, dataSlice)
}
// WithKeyPN creates a GCSBuilder with specified key and the passed probability
// and estimated filter size.
func WithKeyPN(key [gcs.KeySize]byte, p uint8, n uint32) *GCSBuilder {
// WithKeyPNM creates a GCSBuilder with specified key and the passed
// probability, modulus and estimated filter size.
func WithKeyPNM(key [gcs.KeySize]byte, p uint8, n uint32, m uint64) *GCSBuilder {
b := GCSBuilder{}
return b.SetKey(key).SetP(p).Preallocate(n)
return b.SetKey(key).SetP(p).SetM(m).Preallocate(n)
}
// WithKeyP creates a GCSBuilder with specified key and the passed probability.
// Estimated filter size is set to zero, which means more reallocations are
// done when building the filter.
func WithKeyP(key [gcs.KeySize]byte, p uint8) *GCSBuilder {
return WithKeyPN(key, p, 0)
// WithKeyPM creates a GCSBuilder with specified key and the passed
// probability. Estimated filter size is set to zero, which means more
// reallocations are done when building the filter.
func WithKeyPM(key [gcs.KeySize]byte, p uint8, m uint64) *GCSBuilder {
return WithKeyPNM(key, p, 0, m)
}
// WithKey creates a GCSBuilder with specified key. Probability is set to 19
// (2^-19 collision probability). Estimated filter size is set to zero, which
// means more reallocations are done when building the filter.
func WithKey(key [gcs.KeySize]byte) *GCSBuilder {
return WithKeyPN(key, DefaultP, 0)
return WithKeyPNM(key, DefaultP, 0, DefaultM)
}
// WithKeyHashPN creates a GCSBuilder with key derived from the specified
// WithKeyHashPNM creates a GCSBuilder with key derived from the specified
// chainhash.Hash and the passed probability and estimated filter size.
func WithKeyHashPN(keyHash *chainhash.Hash, p uint8, n uint32) *GCSBuilder {
return WithKeyPN(DeriveKey(keyHash), p, n)
func WithKeyHashPNM(keyHash *chainhash.Hash, p uint8, n uint32,
m uint64) *GCSBuilder {
return WithKeyPNM(DeriveKey(keyHash), p, n, m)
}
// WithKeyHashP creates a GCSBuilder with key derived from the specified
// WithKeyHashPM creates a GCSBuilder with key derived from the specified
// chainhash.Hash and the passed probability. Estimated filter size is set to
// zero, which means more reallocations are done when building the filter.
func WithKeyHashP(keyHash *chainhash.Hash, p uint8) *GCSBuilder {
return WithKeyHashPN(keyHash, p, 0)
func WithKeyHashPM(keyHash *chainhash.Hash, p uint8, m uint64) *GCSBuilder {
return WithKeyHashPNM(keyHash, p, 0, m)
}
// WithKeyHash creates a GCSBuilder with key derived from the specified
@ -268,25 +305,25 @@ func WithKeyHashP(keyHash *chainhash.Hash, p uint8) *GCSBuilder {
// Estimated filter size is set to zero, which means more reallocations are
// done when building the filter.
func WithKeyHash(keyHash *chainhash.Hash) *GCSBuilder {
return WithKeyHashPN(keyHash, DefaultP, 0)
return WithKeyHashPNM(keyHash, DefaultP, 0, DefaultM)
}
// WithRandomKeyPN creates a GCSBuilder with a cryptographically random key and
// WithRandomKeyPNM creates a GCSBuilder with a cryptographically random key and
// the passed probability and estimated filter size.
func WithRandomKeyPN(p uint8, n uint32) *GCSBuilder {
func WithRandomKeyPNM(p uint8, n uint32, m uint64) *GCSBuilder {
key, err := RandomKey()
if err != nil {
b := GCSBuilder{err: err}
return &b
}
return WithKeyPN(key, p, n)
return WithKeyPNM(key, p, n, m)
}
// WithRandomKeyP creates a GCSBuilder with a cryptographically random key and
// WithRandomKeyPM creates a GCSBuilder with a cryptographically random key and
// the passed probability. Estimated filter size is set to zero, which means
// more reallocations are done when building the filter.
func WithRandomKeyP(p uint8) *GCSBuilder {
return WithRandomKeyPN(p, 0)
func WithRandomKeyPM(p uint8, m uint64) *GCSBuilder {
return WithRandomKeyPNM(p, 0, m)
}
// WithRandomKey creates a GCSBuilder with a cryptographically random key.
@ -294,7 +331,7 @@ func WithRandomKeyP(p uint8) *GCSBuilder {
// size is set to zero, which means more reallocations are done when
// building the filter.
func WithRandomKey() *GCSBuilder {
return WithRandomKeyPN(DefaultP, 0)
return WithRandomKeyPNM(DefaultP, 0, DefaultM)
}
// BuildBasicFilter builds a basic GCS filter from a block. A basic GCS filter

View file

@ -105,7 +105,7 @@ func TestUseBlockHash(t *testing.T) {
BuilderTest(b, hash, builder.DefaultP, outPoint, addrBytes, witness, t)
// Create a GCSBuilder with a key hash and non-default P and test it.
b = builder.WithKeyHashP(hash, 30)
b = builder.WithKeyHashPM(hash, 30, 90)
BuilderTest(b, hash, 30, outPoint, addrBytes, witness, t)
// Create a GCSBuilder with a random key, set the key from a hash
@ -135,7 +135,7 @@ func TestUseBlockHash(t *testing.T) {
BuilderTest(b, hash, builder.DefaultP, outPoint, addrBytes, witness, t)
// Create a GCSBuilder with a random key and non-default P and test it.
b = builder.WithRandomKeyP(30)
b = builder.WithRandomKeyPM(30, 90)
key2, err := b.Key()
if err != nil {
t.Fatalf("Builder instantiation with random key failed: %s",
@ -162,7 +162,7 @@ func TestUseBlockHash(t *testing.T) {
BuilderTest(b, hash, builder.DefaultP, outPoint, addrBytes, witness, t)
// Create a GCSBuilder with a known key and non-default P and test it.
b = builder.WithKeyP(testKey, 30)
b = builder.WithKeyPM(testKey, 30, 90)
key, err = b.Key()
if err != nil {
t.Fatalf("Builder instantiation with known key failed: %s",
@ -177,7 +177,7 @@ func TestUseBlockHash(t *testing.T) {
// Create a GCSBuilder with a known key and too-high P and ensure error
// works throughout all functions that use it.
b = builder.WithRandomKeyP(33).SetKeyFromHash(hash).SetKey(testKey)
b = builder.WithRandomKeyPM(33, 99).SetKeyFromHash(hash).SetKey(testKey)
b.SetP(30).AddEntry(hash.CloneBytes()).AddEntries(contents)
b.AddOutPoint(outPoint).AddHash(hash).AddScript(addrBytes)
_, err = b.Key()

View file

@ -52,7 +52,6 @@ const (
// number to reduce, and our modulus N divided into its high 32-bits and lower
// 32-bits.
func fastReduction(v, nHi, nLo uint64) uint64 {
// First, we'll spit the item we need to reduce into its higher and
// lower bits.
vhi := v >> 32
@ -82,16 +81,17 @@ func fastReduction(v, nHi, nLo uint64) uint64 {
// in building the filter is required in order to match filter values and is
// not included in the serialized form.
type Filter struct {
n uint32
p uint8
modulusNP uint64
n uint32
p uint8
modulusNP uint64
filterData []byte
}
// BuildGCSFilter builds a new GCS filter with the collision probability of
// `1/(2**P)`, key `key`, and including every `[]byte` in `data` as a member of
// the set.
func BuildGCSFilter(P uint8, key [KeySize]byte, data [][]byte) (*Filter, error) {
func BuildGCSFilter(P uint8, M uint64, key [KeySize]byte, data [][]byte) (*Filter, error) {
// Some initial parameter checks: make sure we have data from which to
// build the filter, and make sure our parameters will fit the hash
// function we're using.
@ -107,7 +107,11 @@ func BuildGCSFilter(P uint8, key [KeySize]byte, data [][]byte) (*Filter, error)
n: uint32(len(data)),
p: P,
}
f.modulusNP = uint64(f.n) << P
// First we'll compute the value of m, which is the modulus we use
// within our finite field. We want to compute: mScalar * 2^P. We use
// math.Round in order to round the value up, rather than down.
f.modulusNP = uint64(f.n) * M
// Shortcut if the filter is empty.
if f.n == 0 {
@ -142,7 +146,7 @@ func BuildGCSFilter(P uint8, key [KeySize]byte, data [][]byte) (*Filter, error)
for _, v := range values {
// Calculate the difference between this value and the last,
// modulo P.
remainder = (v - lastValue) & ((uint64(1) << P) - 1)
remainder = (v - lastValue) & ((uint64(1) << f.p) - 1)
// Calculate the difference between this value and the last,
// divided by P.
@ -170,7 +174,7 @@ func BuildGCSFilter(P uint8, key [KeySize]byte, data [][]byte) (*Filter, error)
// FromBytes deserializes a GCS filter from a known N, P, and serialized filter
// as returned by Bytes().
func FromBytes(N uint32, P uint8, d []byte) (*Filter, error) {
func FromBytes(N uint32, P uint8, M uint64, d []byte) (*Filter, error) {
// Basic sanity check.
if P > 32 {
@ -182,7 +186,11 @@ func FromBytes(N uint32, P uint8, d []byte) (*Filter, error) {
n: N,
p: P,
}
f.modulusNP = uint64(f.n) << P
// First we'll compute the value of m, which is the modulus we use
// within our finite field. We want to compute: mScalar * 2^P. We use
// math.Round in order to round the value up, rather than down.
f.modulusNP = uint64(f.n) * M
// Copy the filter.
f.filterData = make([]byte, len(d))
@ -193,7 +201,7 @@ func FromBytes(N uint32, P uint8, d []byte) (*Filter, error) {
// FromNBytes deserializes a GCS filter from a known P, and serialized N and
// filter as returned by NBytes().
func FromNBytes(P uint8, d []byte) (*Filter, error) {
func FromNBytes(P uint8, M uint64, d []byte) (*Filter, error) {
buffer := bytes.NewBuffer(d)
N, err := wire.ReadVarInt(buffer, varIntProtoVer)
if err != nil {
@ -202,34 +210,7 @@ func FromNBytes(P uint8, d []byte) (*Filter, error) {
if N >= (1 << 32) {
return nil, ErrNTooBig
}
return FromBytes(uint32(N), P, buffer.Bytes())
}
// FromPBytes deserializes a GCS filter from a known N, and serialized P and
// filter as returned by NBytes().
func FromPBytes(N uint32, d []byte) (*Filter, error) {
return FromBytes(N, d[0], d[1:])
}
// FromNPBytes deserializes a GCS filter from a serialized N, P, and filter as
// returned by NPBytes().
func FromNPBytes(d []byte) (*Filter, error) {
buffer := bytes.NewBuffer(d)
N, err := wire.ReadVarInt(buffer, varIntProtoVer)
if err != nil {
return nil, err
}
if N >= (1 << 32) {
return nil, ErrNTooBig
}
P, err := buffer.ReadByte()
if err != nil {
return nil, err
}
return FromBytes(uint32(N), P, buffer.Bytes())
return FromBytes(uint32(N), P, M, buffer.Bytes())
}
// Bytes returns the serialized format of the GCS filter, which does not

View file

@ -18,8 +18,11 @@ var (
// No need to allocate an err variable in every test
err error
// Collision probability for the tests (1/2**20)
P = uint8(20)
// Collision probability for the tests (1/2**19)
P = uint8(19)
// Modulus value for the tests.
M uint64 = 784931
// Filters are conserved between tests but we must define with an
// interface which functions we're testing because the gcsFilter type
@ -79,7 +82,7 @@ func TestGCSFilterBuild(t *testing.T) {
for i := 0; i < gcs.KeySize; i += 4 {
binary.BigEndian.PutUint32(key[i:], rand.Uint32())
}
filter, err = gcs.BuildGCSFilter(P, key, contents)
filter, err = gcs.BuildGCSFilter(P, M, key, contents)
if err != nil {
t.Fatalf("Filter build failed: %s", err.Error())
}
@ -91,7 +94,7 @@ func TestGCSFilterCopy(t *testing.T) {
if err != nil {
t.Fatalf("Filter Bytes() failed: %v", err)
}
filter2, err = gcs.FromBytes(filter.N(), P, serialized2)
filter2, err = gcs.FromBytes(filter.N(), P, M, serialized2)
if err != nil {
t.Fatalf("Filter copy failed: %s", err.Error())
}
@ -99,23 +102,7 @@ func TestGCSFilterCopy(t *testing.T) {
if err != nil {
t.Fatalf("Filter NBytes() failed: %v", err)
}
filter3, err = gcs.FromNBytes(filter.P(), serialized3)
if err != nil {
t.Fatalf("Filter copy failed: %s", err.Error())
}
serialized4, err := filter.PBytes()
if err != nil {
t.Fatalf("Filter PBytes() failed: %v", err)
}
filter4, err = gcs.FromPBytes(filter.N(), serialized4)
if err != nil {
t.Fatalf("Filter copy failed: %s", err.Error())
}
serialized5, err := filter.NPBytes()
if err != nil {
t.Fatalf("Filter NPBytes() failed: %v", err)
}
filter5, err = gcs.FromNPBytes(serialized5)
filter3, err = gcs.FromNBytes(filter.P(), M, serialized3)
if err != nil {
t.Fatalf("Filter copy failed: %s", err.Error())
}
@ -136,24 +123,12 @@ func TestGCSFilterMetadata(t *testing.T) {
if filter.P() != filter3.P() {
t.Fatal("P doesn't match between copied filters")
}
if filter.P() != filter4.P() {
t.Fatal("P doesn't match between copied filters")
}
if filter.P() != filter5.P() {
t.Fatal("P doesn't match between copied filters")
}
if filter.N() != filter2.N() {
t.Fatal("N doesn't match between copied filters")
}
if filter.N() != filter3.N() {
t.Fatal("N doesn't match between copied filters")
}
if filter.N() != filter4.N() {
t.Fatal("N doesn't match between copied filters")
}
if filter.N() != filter5.N() {
t.Fatal("N doesn't match between copied filters")
}
serialized, err := filter.Bytes()
if err != nil {
t.Fatalf("Filter Bytes() failed: %v", err)
@ -179,13 +154,6 @@ func TestGCSFilterMetadata(t *testing.T) {
if !bytes.Equal(serialized, serialized4) {
t.Fatal("Bytes don't match between copied filters")
}
serialized5, err := filter5.Bytes()
if err != nil {
t.Fatalf("Filter Bytes() failed: %v", err)
}
if !bytes.Equal(serialized, serialized5) {
t.Fatal("Bytes don't match between copied filters")
}
}
// TestGCSFilterMatch checks that both the built and copied filters match

View file

@ -38,16 +38,18 @@ func BenchmarkGCSFilterBuild50000(b *testing.B) {
for i := 0; i < gcs.KeySize; i += 4 {
binary.BigEndian.PutUint32(testKey[i:], rand.Uint32())
}
randFilterElems, genErr := genRandFilterElements(50000)
if err != nil {
b.Fatalf("unable to generate random item: %v", genErr)
}
b.StartTimer()
var localFilter *gcs.Filter
for i := 0; i < b.N; i++ {
localFilter, err = gcs.BuildGCSFilter(
P, key, randFilterElems,
P, M, key, randFilterElems,
)
if err != nil {
b.Fatalf("unable to generate filter: %v", err)
@ -63,16 +65,19 @@ func BenchmarkGCSFilterBuild100000(b *testing.B) {
for i := 0; i < gcs.KeySize; i += 4 {
binary.BigEndian.PutUint32(testKey[i:], rand.Uint32())
}
randFilterElems, genErr := genRandFilterElements(100000)
if err != nil {
b.Fatalf("unable to generate random item: %v", genErr)
}
b.StartTimer()
var localFilter *gcs.Filter
for i := 0; i < b.N; i++ {
localFilter, err = gcs.BuildGCSFilter(P, key,
randFilterElems)
localFilter, err = gcs.BuildGCSFilter(
P, M, key, randFilterElems,
)
if err != nil {
b.Fatalf("unable to generate filter: %v", err)
}
@ -87,7 +92,7 @@ var (
// BenchmarkGCSFilterMatch benchmarks querying a filter for a single value.
func BenchmarkGCSFilterMatch(b *testing.B) {
b.StopTimer()
filter, err := gcs.BuildGCSFilter(P, key, contents)
filter, err := gcs.BuildGCSFilter(P, M, key, contents)
if err != nil {
b.Fatalf("Failed to build filter")
}
@ -114,7 +119,7 @@ func BenchmarkGCSFilterMatch(b *testing.B) {
// values.
func BenchmarkGCSFilterMatchAny(b *testing.B) {
b.StopTimer()
filter, err := gcs.BuildGCSFilter(P, key, contents)
filter, err := gcs.BuildGCSFilter(P, M, key, contents)
if err != nil {
b.Fatalf("Failed to build filter")
}