sketching pruning; it doesn't work yet

This commit is contained in:
Brannon King 2021-10-04 16:35:56 -04:00
parent 742881a29a
commit e056f8ac23
6 changed files with 104 additions and 19 deletions

View file

@ -94,7 +94,23 @@ func New(cfg config.Config) (*ClaimTrie, error) {
var trie merkletrie.MerkleTrie var trie merkletrie.MerkleTrie
if cfg.RamTrie { if cfg.RamTrie {
trie = merkletrie.NewRamTrie() f := func(prefix merkletrie.KeyType) ([]merkletrie.KeyType, []*chainhash.Hash) {
var names []merkletrie.KeyType
var hashes []*chainhash.Hash
_ = nodeRepo.IterateChildren(prefix, func(changes []change.Change) bool {
if len(changes) <= 0 {
return true
}
childName := make(merkletrie.KeyType, len(changes[0].Name))
copy(childName, changes[0].Name)
names = append(names, childName)
hash, _ := nodeManager.Hash(childName)
hashes = append(hashes, hash)
return true
})
return names, hashes
}
trie = merkletrie.NewRamTrie(f)
} else { } else {
// Initialize repository for MerkleTrie. The cleanup is delegated to MerkleTrie. // Initialize repository for MerkleTrie. The cleanup is delegated to MerkleTrie.

View file

@ -239,7 +239,10 @@ func TestRebuild(t *testing.T) {
r.NotNil(m) r.NotNil(m)
r.NotEqual(*merkletrie.EmptyTrieHash, *m) r.NotEqual(*merkletrie.EmptyTrieHash, *m)
ct.merkleTrie = merkletrie.NewRamTrie() ct.merkleTrie = merkletrie.NewRamTrie(func(name merkletrie.KeyType) ([]merkletrie.KeyType, []*chainhash.Hash) {
r.Fail("Unexpected entrance on %s", string(name))
return nil, nil
})
ct.runFullTrieRebuild(nil, nil) ct.runFullTrieRebuild(nil, nil)
m2 := ct.MerkleHash() m2 := ct.MerkleHash()

View file

@ -176,6 +176,8 @@ func NewNodeStatsCommand() *cobra.Command {
n := 0 n := 0
c := 0 c := 0
withUpdates := 0
changeGap := 0
err = repo.IterateChildren([]byte{}, func(changes []change.Change) bool { err = repo.IterateChildren([]byte{}, func(changes []change.Change) bool {
c += len(changes) c += len(changes)
n++ n++
@ -183,9 +185,14 @@ func NewNodeStatsCommand() *cobra.Command {
fmt.Printf("Name: %s, Hex: %s, Changes: %d\n", string(changes[0].Name), fmt.Printf("Name: %s, Hex: %s, Changes: %d\n", string(changes[0].Name),
hex.EncodeToString(changes[0].Name), len(changes)) hex.EncodeToString(changes[0].Name), len(changes))
} }
if len(changes) > 3 {
withUpdates++
changeGap += int(changes[3].Height - changes[0].Height)
}
return true return true
}) })
fmt.Printf("\nNames: %d, Average changes: %.2f\n", n, float64(c)/float64(n)) fmt.Printf("\nNames: %d, Average changes: %.2f\n", n, float64(c)/float64(n))
fmt.Printf("\nNames with updates: %d, Average 1st change gap: %.2f\n", withUpdates, float64(changeGap)/float64(withUpdates))
return errors.Wrapf(err, "iterate node repo") return errors.Wrapf(err, "iterate node repo")
}, },
} }

View file

@ -12,7 +12,7 @@ var DefaultConfig = Config{
RamTrie: true, // as it stands the other trie uses more RAM, more time, and 40GB+ of disk space RamTrie: true, // as it stands the other trie uses more RAM, more time, and 40GB+ of disk space
DataDir: filepath.Join(btcutil.AppDataDir("chain", false), "data"), DataDir: filepath.Join(btcutil.AppDataDir("lbcd", false), "data"),
BlockRepoPebble: pebbleConfig{ BlockRepoPebble: pebbleConfig{
Path: "blocks_pebble_db", Path: "blocks_pebble_db",

View file

@ -57,7 +57,7 @@ func sortSearch(nodes []*collapsedVertex, b byte) int {
} }
func (ptn *collapsedVertex) findNearest(key KeyType) (int, *collapsedVertex) { func (ptn *collapsedVertex) findNearest(key KeyType) (int, *collapsedVertex) {
// none of the children overlap on the first char or we would have a parent node with that char // none of the children overlap on the first char, or we would have a parent node with that char
index := sortSearch(ptn.children, key[0]) index := sortSearch(ptn.children, key[0])
hits := ptn.children[index:] hits := ptn.children[index:]
if len(hits) > 0 { if len(hits) > 0 {
@ -67,13 +67,18 @@ func (ptn *collapsedVertex) findNearest(key KeyType) (int, *collapsedVertex) {
} }
type collapsedTrie struct { type collapsedTrie struct {
Root *collapsedVertex Root *collapsedVertex
Nodes int Nodes int
Reload func(prefix KeyType, child *collapsedVertex)
} }
func NewCollapsedTrie() *collapsedTrie { func NewCollapsedTrie() *collapsedTrie {
// we never delete the Root node // we never delete the Root node
return &collapsedTrie{Root: &collapsedVertex{key: make(KeyType, 0)}, Nodes: 1} return &collapsedTrie{
Root: &collapsedVertex{key: make(KeyType, 0)},
Nodes: 1,
Reload: func(_ KeyType, child *collapsedVertex) {},
}
} }
func (pt *collapsedTrie) NodeCount() int { func (pt *collapsedTrie) NodeCount() int {
@ -93,7 +98,8 @@ func matchLength(a, b KeyType) int {
return minLen return minLen
} }
func (pt *collapsedTrie) insert(value KeyType, node *collapsedVertex) (bool, *collapsedVertex) { func (pt *collapsedTrie) insert(master, value KeyType, node *collapsedVertex) (bool, *collapsedVertex) {
pt.Reload(master[0:len(master)-len(value)], node)
index, child := node.findNearest(value) index, child := node.findNearest(value)
match := 0 match := 0
if index >= 0 { // if we found a child if index >= 0 { // if we found a child
@ -119,7 +125,7 @@ func (pt *collapsedTrie) insert(value KeyType, node *collapsedVertex) (bool, *co
return true, child return true, child
} }
} }
return pt.insert(value[match:], child) return pt.insert(master, value[match:], child)
} }
func (pt *collapsedTrie) InsertOrFind(value KeyType) (bool, *collapsedVertex) { func (pt *collapsedTrie) InsertOrFind(value KeyType) (bool, *collapsedVertex) {
@ -130,14 +136,15 @@ func (pt *collapsedTrie) InsertOrFind(value KeyType) (bool, *collapsedVertex) {
return false, pt.Root return false, pt.Root
} }
// we store the name so we need to make our own copy of it // we store the name, so we need to make our own copy of it.
// this avoids errors where this function is called via the DB iterator // this avoids errors where this function is called via the DB iterator
v2 := make([]byte, len(value)) v2 := make([]byte, len(value))
copy(v2, value) copy(v2, value)
return pt.insert(v2, pt.Root) return pt.insert(v2, v2, pt.Root)
} }
func find(value KeyType, node *collapsedVertex, pathIndexes *[]int, path *[]*collapsedVertex) *collapsedVertex { func (pt *collapsedTrie) find(master, value KeyType, node *collapsedVertex, pathIndexes *[]int, path *[]*collapsedVertex) *collapsedVertex {
pt.Reload(master[0:len(master)-len(value)], node)
index, child := node.findNearest(value) index, child := node.findNearest(value)
if index < 0 { if index < 0 {
return nil return nil
@ -161,22 +168,22 @@ func find(value KeyType, node *collapsedVertex, pathIndexes *[]int, path *[]*col
if path != nil { if path != nil {
*path = append(*path, child) *path = append(*path, child)
} }
return find(value[match:], child, pathIndexes, path) return pt.find(master, value[match:], child, pathIndexes, path)
} }
func (pt *collapsedTrie) Find(value KeyType) *collapsedVertex { func (pt *collapsedTrie) Find(value KeyType) *collapsedVertex {
if len(value) <= 0 { if len(value) <= 0 {
return pt.Root return pt.Root
} }
return find(value, pt.Root, nil, nil) return pt.find(value, value, pt.Root, nil, nil)
} }
func (pt *collapsedTrie) FindPath(value KeyType) ([]int, []*collapsedVertex) { func (pt *collapsedTrie) FindPath(value KeyType) ([]int, []*collapsedVertex) {
pathIndexes := []int{-1} pathIndexes := []int{-1}
path := []*collapsedVertex{pt.Root} path := []*collapsedVertex{pt.Root}
if len(value) > 0 { if len(value) > 0 {
result := find(value, pt.Root, &pathIndexes, &path) result := pt.find(value, value, pt.Root, &pathIndexes, &path)
if result == nil { // not sure I want this line if result == nil { // not sure that I want this line
return nil, nil return nil, nil
} }
} }
@ -186,7 +193,7 @@ func (pt *collapsedTrie) FindPath(value KeyType) ([]int, []*collapsedVertex) {
// IterateFrom can be used to find a value and run a function on that value. // IterateFrom can be used to find a value and run a function on that value.
// If the handler returns true it continues to iterate through the children of value. // If the handler returns true it continues to iterate through the children of value.
func (pt *collapsedTrie) IterateFrom(start KeyType, handler func(name KeyType, value *collapsedVertex) bool) { func (pt *collapsedTrie) IterateFrom(start KeyType, handler func(name KeyType, value *collapsedVertex) bool) {
node := find(start, pt.Root, nil, nil) node := pt.find(start, start, pt.Root, nil, nil)
if node == nil { if node == nil {
return return
} }

View file

@ -23,20 +23,49 @@ type RamTrie struct {
bufs *sync.Pool bufs *sync.Pool
} }
func NewRamTrie() *RamTrie { func NewRamTrie(f func(name KeyType) ([]KeyType, []*chainhash.Hash)) *RamTrie {
return &RamTrie{ return &RamTrie{
bufs: &sync.Pool{ bufs: &sync.Pool{
New: func() interface{} { New: func() interface{} {
return new(bytes.Buffer) return new(bytes.Buffer)
}, },
}, },
collapsedTrie: collapsedTrie{Root: &collapsedVertex{}}, collapsedTrie: collapsedTrie{
Root: &collapsedVertex{},
Nodes: 1,
Reload: func(prefix KeyType, v *collapsedVertex) {
// Reload is always called before the node is used
if len(prefix) <= 0 {
return
}
p := getOrMakePayload(v)
if p.pruned { // it's been pruned
names, hashes := f(prefix)
for i, name := range names {
cv := &collapsedVertex{
children: nil,
key: name[len(prefix):],
payload: &ramTriePayload{
merkleHash: nil,
claimHash: hashes[i],
hit: 1,
},
}
// assuming that names come out in order:
v.children = append(v.children, cv)
}
p.pruned = false
}
},
},
} }
} }
type ramTriePayload struct { type ramTriePayload struct {
merkleHash *chainhash.Hash merkleHash *chainhash.Hash
claimHash *chainhash.Hash claimHash *chainhash.Hash
hit int32
pruned bool
} }
func (r *ramTriePayload) clear() { func (r *ramTriePayload) clear() {
@ -85,6 +114,7 @@ func (rt *RamTrie) Update(name []byte, h *chainhash.Hash, _ bool) {
} }
func (rt *RamTrie) MerkleHash() *chainhash.Hash { func (rt *RamTrie) MerkleHash() *chainhash.Hash {
rootHashCalls++
if h := rt.merkleHash(rt.Root); h == nil { if h := rt.merkleHash(rt.Root); h == nil {
return EmptyTrieHash return EmptyTrieHash
} }
@ -94,8 +124,10 @@ func (rt *RamTrie) MerkleHash() *chainhash.Hash {
func (rt *RamTrie) merkleHash(v *collapsedVertex) *chainhash.Hash { func (rt *RamTrie) merkleHash(v *collapsedVertex) *chainhash.Hash {
p := getOrMakePayload(v) p := getOrMakePayload(v)
if p.merkleHash != nil { if p.merkleHash != nil {
runCleanup(v)
return p.merkleHash return p.merkleHash
} }
p.hit++
b := rt.bufs.Get().(*bytes.Buffer) b := rt.bufs.Get().(*bytes.Buffer)
defer rt.bufs.Put(b) defer rt.bufs.Put(b)
@ -130,17 +162,37 @@ func (rt *RamTrie) completeHash(h *chainhash.Hash, childKey KeyType) []byte {
} }
func (rt *RamTrie) MerkleHashAllClaims() *chainhash.Hash { func (rt *RamTrie) MerkleHashAllClaims() *chainhash.Hash {
rootHashCalls++
if h := rt.merkleHashAllClaims(rt.Root); h == nil { if h := rt.merkleHashAllClaims(rt.Root); h == nil {
return EmptyTrieHash return EmptyTrieHash
} }
return getOrMakePayload(rt.Root).merkleHash return getOrMakePayload(rt.Root).merkleHash
} }
var rootHashCalls = 0 // TODO: put this in rt
const cleanupMod = 1024
const threshold = 42 // have to be used 4% of time to stay in cache
func runCleanup(v *collapsedVertex) {
if (rootHashCalls % cleanupMod) == cleanupMod-1 {
for _, c := range v.children {
if c.payload != nil && c.payload.(*ramTriePayload).hit > threshold {
return
}
}
v.children = nil
p := getOrMakePayload(v)
p.pruned = true
}
}
func (rt *RamTrie) merkleHashAllClaims(v *collapsedVertex) *chainhash.Hash { func (rt *RamTrie) merkleHashAllClaims(v *collapsedVertex) *chainhash.Hash {
p := getOrMakePayload(v) p := getOrMakePayload(v)
if p.merkleHash != nil { if p.merkleHash != nil {
runCleanup(v)
return p.merkleHash return p.merkleHash
} }
p.hit++
childHashes := make([]*chainhash.Hash, 0, len(v.children)) childHashes := make([]*chainhash.Hash, 0, len(v.children))
for _, ch := range v.children { for _, ch := range v.children {