diff --git a/claimtrie/claimtrie.go b/claimtrie/claimtrie.go index f99a147e..23054c13 100644 --- a/claimtrie/claimtrie.go +++ b/claimtrie/claimtrie.go @@ -94,7 +94,23 @@ func New(cfg config.Config) (*ClaimTrie, error) { var trie merkletrie.MerkleTrie if cfg.RamTrie { - trie = merkletrie.NewRamTrie() + f := func(prefix merkletrie.KeyType) ([]merkletrie.KeyType, []*chainhash.Hash) { + var names []merkletrie.KeyType + var hashes []*chainhash.Hash + _ = nodeRepo.IterateChildren(prefix, func(changes []change.Change) bool { + if len(changes) <= 0 { + return true + } + childName := make(merkletrie.KeyType, len(changes[0].Name)) + copy(childName, changes[0].Name) + names = append(names, childName) + hash, _ := nodeManager.Hash(childName) + hashes = append(hashes, hash) + return true + }) + return names, hashes + } + trie = merkletrie.NewRamTrie(f) } else { // Initialize repository for MerkleTrie. The cleanup is delegated to MerkleTrie. diff --git a/claimtrie/claimtrie_test.go b/claimtrie/claimtrie_test.go index d93db227..056a9bf1 100644 --- a/claimtrie/claimtrie_test.go +++ b/claimtrie/claimtrie_test.go @@ -239,7 +239,10 @@ func TestRebuild(t *testing.T) { r.NotNil(m) r.NotEqual(*merkletrie.EmptyTrieHash, *m) - ct.merkleTrie = merkletrie.NewRamTrie() + ct.merkleTrie = merkletrie.NewRamTrie(func(name merkletrie.KeyType) ([]merkletrie.KeyType, []*chainhash.Hash) { + r.Fail("Unexpected entrance on %s", string(name)) + return nil, nil + }) ct.runFullTrieRebuild(nil, nil) m2 := ct.MerkleHash() diff --git a/claimtrie/cmd/cmd/node.go b/claimtrie/cmd/cmd/node.go index 08112e94..de9d333a 100644 --- a/claimtrie/cmd/cmd/node.go +++ b/claimtrie/cmd/cmd/node.go @@ -176,6 +176,8 @@ func NewNodeStatsCommand() *cobra.Command { n := 0 c := 0 + withUpdates := 0 + changeGap := 0 err = repo.IterateChildren([]byte{}, func(changes []change.Change) bool { c += len(changes) n++ @@ -183,9 +185,14 @@ func NewNodeStatsCommand() *cobra.Command { fmt.Printf("Name: %s, Hex: %s, Changes: %d\n", string(changes[0].Name), hex.EncodeToString(changes[0].Name), len(changes)) } + if len(changes) > 3 { + withUpdates++ + changeGap += int(changes[3].Height - changes[0].Height) + } return true }) fmt.Printf("\nNames: %d, Average changes: %.2f\n", n, float64(c)/float64(n)) + fmt.Printf("\nNames with updates: %d, Average 1st change gap: %.2f\n", withUpdates, float64(changeGap)/float64(withUpdates)) return errors.Wrapf(err, "iterate node repo") }, } diff --git a/claimtrie/config/config.go b/claimtrie/config/config.go index 4920ca17..a933b9f8 100644 --- a/claimtrie/config/config.go +++ b/claimtrie/config/config.go @@ -12,7 +12,7 @@ var DefaultConfig = Config{ RamTrie: true, // as it stands the other trie uses more RAM, more time, and 40GB+ of disk space - DataDir: filepath.Join(btcutil.AppDataDir("chain", false), "data"), + DataDir: filepath.Join(btcutil.AppDataDir("lbcd", false), "data"), BlockRepoPebble: pebbleConfig{ Path: "blocks_pebble_db", diff --git a/claimtrie/merkletrie/collapsedtrie.go b/claimtrie/merkletrie/collapsedtrie.go index 1c6f2c5f..f24d5e24 100644 --- a/claimtrie/merkletrie/collapsedtrie.go +++ b/claimtrie/merkletrie/collapsedtrie.go @@ -57,7 +57,7 @@ func sortSearch(nodes []*collapsedVertex, b byte) int { } func (ptn *collapsedVertex) findNearest(key KeyType) (int, *collapsedVertex) { - // none of the children overlap on the first char or we would have a parent node with that char + // none of the children overlap on the first char, or we would have a parent node with that char index := sortSearch(ptn.children, key[0]) hits := ptn.children[index:] if len(hits) > 0 { @@ -67,13 +67,18 @@ func (ptn *collapsedVertex) findNearest(key KeyType) (int, *collapsedVertex) { } type collapsedTrie struct { - Root *collapsedVertex - Nodes int + Root *collapsedVertex + Nodes int + Reload func(prefix KeyType, child *collapsedVertex) } func NewCollapsedTrie() *collapsedTrie { // we never delete the Root node - return &collapsedTrie{Root: &collapsedVertex{key: make(KeyType, 0)}, Nodes: 1} + return &collapsedTrie{ + Root: &collapsedVertex{key: make(KeyType, 0)}, + Nodes: 1, + Reload: func(_ KeyType, child *collapsedVertex) {}, + } } func (pt *collapsedTrie) NodeCount() int { @@ -93,7 +98,8 @@ func matchLength(a, b KeyType) int { return minLen } -func (pt *collapsedTrie) insert(value KeyType, node *collapsedVertex) (bool, *collapsedVertex) { +func (pt *collapsedTrie) insert(master, value KeyType, node *collapsedVertex) (bool, *collapsedVertex) { + pt.Reload(master[0:len(master)-len(value)], node) index, child := node.findNearest(value) match := 0 if index >= 0 { // if we found a child @@ -119,7 +125,7 @@ func (pt *collapsedTrie) insert(value KeyType, node *collapsedVertex) (bool, *co return true, child } } - return pt.insert(value[match:], child) + return pt.insert(master, value[match:], child) } func (pt *collapsedTrie) InsertOrFind(value KeyType) (bool, *collapsedVertex) { @@ -130,14 +136,15 @@ func (pt *collapsedTrie) InsertOrFind(value KeyType) (bool, *collapsedVertex) { return false, pt.Root } - // we store the name so we need to make our own copy of it + // we store the name, so we need to make our own copy of it. // this avoids errors where this function is called via the DB iterator v2 := make([]byte, len(value)) copy(v2, value) - return pt.insert(v2, pt.Root) + return pt.insert(v2, v2, pt.Root) } -func find(value KeyType, node *collapsedVertex, pathIndexes *[]int, path *[]*collapsedVertex) *collapsedVertex { +func (pt *collapsedTrie) find(master, value KeyType, node *collapsedVertex, pathIndexes *[]int, path *[]*collapsedVertex) *collapsedVertex { + pt.Reload(master[0:len(master)-len(value)], node) index, child := node.findNearest(value) if index < 0 { return nil @@ -161,22 +168,22 @@ func find(value KeyType, node *collapsedVertex, pathIndexes *[]int, path *[]*col if path != nil { *path = append(*path, child) } - return find(value[match:], child, pathIndexes, path) + return pt.find(master, value[match:], child, pathIndexes, path) } func (pt *collapsedTrie) Find(value KeyType) *collapsedVertex { if len(value) <= 0 { return pt.Root } - return find(value, pt.Root, nil, nil) + return pt.find(value, value, pt.Root, nil, nil) } func (pt *collapsedTrie) FindPath(value KeyType) ([]int, []*collapsedVertex) { pathIndexes := []int{-1} path := []*collapsedVertex{pt.Root} if len(value) > 0 { - result := find(value, pt.Root, &pathIndexes, &path) - if result == nil { // not sure I want this line + result := pt.find(value, value, pt.Root, &pathIndexes, &path) + if result == nil { // not sure that I want this line return nil, nil } } @@ -186,7 +193,7 @@ func (pt *collapsedTrie) FindPath(value KeyType) ([]int, []*collapsedVertex) { // IterateFrom can be used to find a value and run a function on that value. // If the handler returns true it continues to iterate through the children of value. func (pt *collapsedTrie) IterateFrom(start KeyType, handler func(name KeyType, value *collapsedVertex) bool) { - node := find(start, pt.Root, nil, nil) + node := pt.find(start, start, pt.Root, nil, nil) if node == nil { return } diff --git a/claimtrie/merkletrie/ramtrie.go b/claimtrie/merkletrie/ramtrie.go index 646a6b32..65fa8235 100644 --- a/claimtrie/merkletrie/ramtrie.go +++ b/claimtrie/merkletrie/ramtrie.go @@ -23,20 +23,49 @@ type RamTrie struct { bufs *sync.Pool } -func NewRamTrie() *RamTrie { +func NewRamTrie(f func(name KeyType) ([]KeyType, []*chainhash.Hash)) *RamTrie { return &RamTrie{ bufs: &sync.Pool{ New: func() interface{} { return new(bytes.Buffer) }, }, - collapsedTrie: collapsedTrie{Root: &collapsedVertex{}}, + collapsedTrie: collapsedTrie{ + Root: &collapsedVertex{}, + Nodes: 1, + Reload: func(prefix KeyType, v *collapsedVertex) { + // Reload is always called before the node is used + if len(prefix) <= 0 { + return + } + p := getOrMakePayload(v) + if p.pruned { // it's been pruned + names, hashes := f(prefix) + for i, name := range names { + cv := &collapsedVertex{ + children: nil, + key: name[len(prefix):], + payload: &ramTriePayload{ + merkleHash: nil, + claimHash: hashes[i], + hit: 1, + }, + } + // assuming that names come out in order: + v.children = append(v.children, cv) + } + p.pruned = false + } + }, + }, } } type ramTriePayload struct { merkleHash *chainhash.Hash claimHash *chainhash.Hash + hit int32 + pruned bool } func (r *ramTriePayload) clear() { @@ -85,6 +114,7 @@ func (rt *RamTrie) Update(name []byte, h *chainhash.Hash, _ bool) { } func (rt *RamTrie) MerkleHash() *chainhash.Hash { + rootHashCalls++ if h := rt.merkleHash(rt.Root); h == nil { return EmptyTrieHash } @@ -94,8 +124,10 @@ func (rt *RamTrie) MerkleHash() *chainhash.Hash { func (rt *RamTrie) merkleHash(v *collapsedVertex) *chainhash.Hash { p := getOrMakePayload(v) if p.merkleHash != nil { + runCleanup(v) return p.merkleHash } + p.hit++ b := rt.bufs.Get().(*bytes.Buffer) defer rt.bufs.Put(b) @@ -130,17 +162,37 @@ func (rt *RamTrie) completeHash(h *chainhash.Hash, childKey KeyType) []byte { } func (rt *RamTrie) MerkleHashAllClaims() *chainhash.Hash { + rootHashCalls++ if h := rt.merkleHashAllClaims(rt.Root); h == nil { return EmptyTrieHash } return getOrMakePayload(rt.Root).merkleHash } +var rootHashCalls = 0 // TODO: put this in rt +const cleanupMod = 1024 +const threshold = 42 // have to be used 4% of time to stay in cache + +func runCleanup(v *collapsedVertex) { + if (rootHashCalls % cleanupMod) == cleanupMod-1 { + for _, c := range v.children { + if c.payload != nil && c.payload.(*ramTriePayload).hit > threshold { + return + } + } + v.children = nil + p := getOrMakePayload(v) + p.pruned = true + } +} + func (rt *RamTrie) merkleHashAllClaims(v *collapsedVertex) *chainhash.Hash { p := getOrMakePayload(v) if p.merkleHash != nil { + runCleanup(v) return p.merkleHash } + p.hit++ childHashes := make([]*chainhash.Hash, 0, len(v.children)) for _, ch := range v.children {