claimtrie/commit.go

package claimtrie

import (
	"bytes"
	"encoding/gob"

	"github.com/lbryio/claimtrie/claim"
	"github.com/lbryio/claimtrie/trie"

	"github.com/btcsuite/btcd/chaincfg/chainhash"
	"github.com/pkg/errors"
	"github.com/syndtr/goleveldb/leveldb"
)

// CommitVisit ...
type CommitVisit func(c *Commit)

// CommitMeta represent the meta associated with each commit.
type CommitMeta struct {
	Height claim.Height
}

func newCommit(head *Commit, meta CommitMeta, h *chainhash.Hash) *Commit {
	return &Commit{
		MerkleRoot: h,
		Meta:       meta,
	}
}

// Commit ...
type Commit struct {
	MerkleRoot *chainhash.Hash
	Meta       CommitMeta
}

// CommitMgr ...
type CommitMgr struct {
	db      *leveldb.DB
	commits []*Commit
	head    *Commit
}

// NewCommitMgr ...
func NewCommitMgr(db *leveldb.DB) *CommitMgr {
	head := newCommit(nil, CommitMeta{0}, trie.EmptyTrieHash)
	cm := CommitMgr{
		db:   db,
		head: head,
	}
	cm.commits = append(cm.commits, head)
	return &cm
}

// Head ...
func (cm *CommitMgr) Head() *Commit {
	return cm.head
}

// Commit ...
func (cm *CommitMgr) Commit(ht claim.Height, merkle *chainhash.Hash) {
	if ht == 0 {
		return
	}
	c := newCommit(cm.head, CommitMeta{ht}, merkle)
	cm.commits = append(cm.commits, c)
	cm.head = c
}

// Reset ...
func (cm *CommitMgr) Reset(ht claim.Height) {
	for i := len(cm.commits) - 1; i >= 0; i-- {
		c := cm.commits[i]
		if c.Meta.Height <= ht {
			cm.head = c
			cm.commits = cm.commits[:i+1]
			break
		}
	}
	if cm.head.Meta.Height == ht {
		return
	}
	cm.Commit(ht, cm.head.MerkleRoot)
}

// Save ...
func (cm *CommitMgr) Save() error {
	exported := struct {
		Commits []*Commit
		Head    *Commit
	}{
		Commits: cm.commits,
		Head:    cm.head,
	}

	buf := bytes.NewBuffer(nil)
	if err := gob.NewEncoder(buf).Encode(exported); err != nil {
		return errors.Wrapf(err, "gob.Encode()", err)
	}
	if err := cm.db.Put([]byte("CommitMgr"), buf.Bytes(), nil); err != nil {
		return errors.Wrapf(err, "db.Put(CommitMgr)")
	}
	return nil
}

// Load ...
func (cm *CommitMgr) Load() error {
	exported := struct {
		Commits []*Commit
		Head    *Commit
	}{}

	data, err := cm.db.Get([]byte("CommitMgr"), nil)
	if err != nil {
		return errors.Wrapf(err, "db.Get(CommitMgr)")
	}
	if err := gob.NewDecoder(bytes.NewBuffer(data)).Decode(&exported); err != nil {
		return errors.Wrapf(err, "gob.Encode()", err)
	}
	cm.commits = exported.Commits
	cm.head = exported.Head
	return nil
}

// Log ...
func (cm *CommitMgr) Log(ht claim.Height, visit CommitVisit) {
	for i := len(cm.commits) - 1; i >= 0; i-- {
		c := cm.commits[i]
		if c.Meta.Height > ht {
			continue
		}
		visit(c)
	}
}
wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00			`package claimtrie`

			`import (`
wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00			`"bytes"`
			`"encoding/gob"`

wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00			`"github.com/lbryio/claimtrie/claim"`
wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00			`"github.com/lbryio/claimtrie/trie"`
wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00
			`"github.com/btcsuite/btcd/chaincfg/chainhash"`
wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00			`"github.com/pkg/errors"`
			`"github.com/syndtr/goleveldb/leveldb"`
wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00			`)`

wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00			`// CommitVisit ...`
			`type CommitVisit func(c *Commit)`

wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00			`// CommitMeta represent the meta associated with each commit.`
			`type CommitMeta struct {`
			`Height claim.Height`
			`}`

			`func newCommit(head Commit, meta CommitMeta, h chainhash.Hash) *Commit {`
			`return &Commit{`
			`MerkleRoot: h,`
			`Meta: meta,`
			`}`
			`}`

			`// Commit ...`
			`type Commit struct {`
			`MerkleRoot *chainhash.Hash`
			`Meta CommitMeta`
			`}`

wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00			`// CommitMgr ...`
			`type CommitMgr struct {`
			`db *leveldb.DB`
			`commits []*Commit`
			`head *Commit`
			`}`
wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00
wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00			`// NewCommitMgr ...`
			`func NewCommitMgr(db leveldb.DB) CommitMgr {`
			`head := newCommit(nil, CommitMeta{0}, trie.EmptyTrieHash)`
			`cm := CommitMgr{`
			`db: db,`
			`head: head,`
			`}`
			`cm.commits = append(cm.commits, head)`
			`return &cm`
			`}`

			`// Head ...`
			`func (cm CommitMgr) Head() Commit {`
			`return cm.head`
			`}`

			`// Commit ...`
			`func (cm CommitMgr) Commit(ht claim.Height, merkle chainhash.Hash) {`
			`if ht == 0 {`
			`return`
			`}`
			`c := newCommit(cm.head, CommitMeta{ht}, merkle)`
			`cm.commits = append(cm.commits, c)`
			`cm.head = c`
			`}`

			`// Reset ...`
			`func (cm *CommitMgr) Reset(ht claim.Height) {`
			`for i := len(cm.commits) - 1; i >= 0; i-- {`
			`c := cm.commits[i]`
			`if c.Meta.Height <= ht {`
			`cm.head = c`
			`cm.commits = cm.commits[:i+1]`
			`break`
			`}`
			`}`
			`if cm.head.Meta.Height == ht {`
			`return`
			`}`
			`cm.Commit(ht, cm.head.MerkleRoot)`
			`}`

			`// Save ...`
			`func (cm *CommitMgr) Save() error {`
			`exported := struct {`
			`Commits []*Commit`
			`Head *Commit`
			`}{`
			`Commits: cm.commits,`
			`Head: cm.head,`
			`}`

			`buf := bytes.NewBuffer(nil)`
			`if err := gob.NewEncoder(buf).Encode(exported); err != nil {`
			`return errors.Wrapf(err, "gob.Encode()", err)`
			`}`
			`if err := cm.db.Put([]byte("CommitMgr"), buf.Bytes(), nil); err != nil {`
			`return errors.Wrapf(err, "db.Put(CommitMgr)")`
wip: a few updates so far. (the code is not cleaned up yet, especially DB related part) 1. Separate claim nodes from the Trie to NodeMgr (Node Manager). The Trie is mainly responsible for rsolving the MerkleHash. The Node Manager, which manages all the claim nodes implements KeyValue interface. type KeyValue interface{ Get(Key) error Set(Key, Value) error } When the Trie traverses to the Value node, it consults the KV with the prefix to get the value, which is the Hash of Best Claim. 2. Versioined/Snapshot based/Copy-on-Write Merkle Trie. Every resolved trie node is saved to the TrieDB (leveldb) with it's Hash as Key and content as Value. The content has the following format: Char (1B) Hash (32B) {0 to 256 entries } VHash (32B) (0 or 1 entry) The nodes are immutable and content(hash)-addressable. This gives the benefit of de-dup for free. 3. The NodeManager implements Replay, and can construct any past state. After experimentng on Memento vs Replay with the real dataset on the mainnet. I decided to go with Replay (at least for now) for a few reasons: a. Concurrency and usability. In the real world scenario, the ClaimTrie is always working on the Tip of the chain to accept Claim Script, update its own state and generate the Hash. On the other hand, most of the client requests are interested in the past state with minimal number of confirmations required. With Memento, the ClaimTrie has to either: a. Pin down the node, and likely the ClaimTrie itself as well, as it doesn't have the latest state (in terms of the whole Trie) to resolve the Hash. Undo the changes and redo the changes after serving the request. b. Copy the current state of the node and rollback that node to serve the request in the background. With Replay, the ClaimTrie can simply spin a background task without any pause. The history of the nodes is immutable and read-only, so there is contention in reconstructing a node. b. Negligible performance difference. Most of the nodes only have few commands to playback. The time to playback is negligible, and will be dominated by the I/O if the node was flushed to the disk. c. Simplicity. Implementing undo saves more changes of states during the process, and has to pay much more attention to the bidding rules. 2018-08-03 07:15:08 +02:00			`}`
			`return nil`
			`}`
wip: commits, nodes, and trie are all backed with storage. 2018-08-06 02:43:38 +02:00
			`// Load ...`
			`func (cm *CommitMgr) Load() error {`
			`exported := struct {`
			`Commits []*Commit`
			`Head *Commit`
			`}{}`

			`data, err := cm.db.Get([]byte("CommitMgr"), nil)`
			`if err != nil {`
			`return errors.Wrapf(err, "db.Get(CommitMgr)")`
			`}`
			`if err := gob.NewDecoder(bytes.NewBuffer(data)).Decode(&exported); err != nil {`
			`return errors.Wrapf(err, "gob.Encode()", err)`
			`}`
			`cm.commits = exported.Commits`
			`cm.head = exported.Head`
			`return nil`
			`}`

			`// Log ...`
			`func (cm *CommitMgr) Log(ht claim.Height, visit CommitVisit) {`
			`for i := len(cm.commits) - 1; i >= 0; i-- {`
			`c := cm.commits[i]`
			`if c.Meta.Height > ht {`
			`continue`
			`}`
			`visit(c)`
			`}`
			`}`