Add treapNode pool. Reduce cloneTreapNode() allocations. #47

Closed
moodyjon wants to merge 3 commits from treap_node_pool into master
7 changed files with 412 additions and 55 deletions

View file

@ -288,6 +288,9 @@ type dbCacheSnapshot struct {
dbSnapshot *leveldb.Snapshot
pendingKeys *treap.Immutable
pendingRemove *treap.Immutable
pendingKeysSnap *treap.SnapRecord
pendingRemoveSnap *treap.SnapRecord
cacheFlushed int
}
// Has returns whether or not the passed key exists.
@ -327,6 +330,18 @@ func (snap *dbCacheSnapshot) Get(key []byte) []byte {
// Release releases the snapshot.
func (snap *dbCacheSnapshot) Release() {
snap.dbSnapshot.Release()
if snap.cacheFlushed > 0 && snap.pendingKeys != nil {
snap.pendingKeys.Recycle(snap.pendingKeysSnap)
}
if snap.cacheFlushed > 0 && snap.pendingRemove != nil {
snap.pendingRemove.Recycle(snap.pendingRemoveSnap)
}
if snap.pendingKeysSnap != nil {
snap.pendingKeysSnap.Release()
}
if snap.pendingRemoveSnap != nil {
snap.pendingRemoveSnap.Release()
}
snap.pendingKeys = nil
snap.pendingRemove = nil
}
@ -411,6 +426,12 @@ func (c *dbCache) Snapshot() (*dbCacheSnapshot, error) {
pendingKeys: c.cachedKeys,
pendingRemove: c.cachedRemove,
}
if cacheSnapshot.pendingKeys != nil {
cacheSnapshot.pendingKeysSnap = cacheSnapshot.pendingKeys.Snapshot()
}
if cacheSnapshot.pendingRemove != nil {
cacheSnapshot.pendingRemoveSnap = cacheSnapshot.pendingRemove.Snapshot()
}
c.cacheLock.RUnlock()
return cacheSnapshot, nil
}
@ -485,7 +506,7 @@ func (c *dbCache) commitTreaps(pendingKeys, pendingRemove TreapForEacher) error
// cache to the underlying database.
//
// This function MUST be called with the database write lock held.
func (c *dbCache) flush() error {
func (c *dbCache) flush(tx *transaction) error {
c.lastFlush = time.Now()
// Sync the current write file associated with the block store. This is
@ -499,12 +520,10 @@ func (c *dbCache) flush() error {
// Since the cached keys to be added and removed use an immutable treap,
// a snapshot is simply obtaining the root of the tree under the lock
// which is used to atomically swap the root.
c.cacheLock.Lock()
c.cacheLock.RLock()
cachedKeys := c.cachedKeys
cachedRemove := c.cachedRemove
c.cachedKeys = treap.NewImmutable()
c.cachedRemove = treap.NewImmutable()
c.cacheLock.Unlock()
c.cacheLock.RUnlock()
// Nothing to do if there is no data to flush.
if cachedKeys.Len() == 0 && cachedRemove.Len() == 0 {
@ -516,6 +535,20 @@ func (c *dbCache) flush() error {
return err
}
c.cacheLock.Lock()
c.cachedKeys = treap.NewImmutable()
c.cachedRemove = treap.NewImmutable()
c.cacheLock.Unlock()
cachedKeys.Recycle(nil)
cachedRemove.Recycle(nil)
// Make a note that cache was flushed so tx.snapshot.Release()
// can also call Recycle() to free more nodes.
if tx != nil && tx.snapshot != nil {
tx.snapshot.cacheFlushed++
}
return nil
}
@ -564,7 +597,7 @@ func (c *dbCache) commitTx(tx *transaction) error {
// Flush the cache and write the current transaction directly to the
// database if a flush is needed.
if c.needsFlush(tx) {
if err := c.flush(); err != nil {
if err := c.flush(tx); err != nil {
return err
}
@ -574,9 +607,16 @@ func (c *dbCache) commitTx(tx *transaction) error {
return err
}
pk := tx.pendingKeys
pr := tx.pendingRemove
// Clear the transaction entries since they have been committed.
tx.pendingKeys = nil
tx.pendingRemove = nil
pk.Recycle()
pr.Recycle()
return nil
}
@ -593,19 +633,23 @@ func (c *dbCache) commitTx(tx *transaction) error {
// Apply every key to add in the database transaction to the cache.
tx.pendingKeys.ForEach(func(k, v []byte) bool {
newCachedRemove = newCachedRemove.Delete(k)
newCachedKeys = newCachedKeys.Put(k, v)
treap.DeleteM(&newCachedRemove, k, tx.snapshot.pendingRemoveSnap)
treap.PutM(&newCachedKeys, k, v, tx.snapshot.pendingKeysSnap)
return true
})
pk := tx.pendingKeys
tx.pendingKeys = nil
pk.Recycle()
// Apply every key to remove in the database transaction to the cache.
tx.pendingRemove.ForEach(func(k, v []byte) bool {
newCachedKeys = newCachedKeys.Delete(k)
newCachedRemove = newCachedRemove.Put(k, nil)
treap.DeleteM(&newCachedKeys, k, tx.snapshot.pendingKeysSnap)
treap.PutM(&newCachedRemove, k, nil, tx.snapshot.pendingRemoveSnap)
return true
})
pr := tx.pendingRemove
tx.pendingRemove = nil
pr.Recycle()
// Atomically replace the immutable treaps which hold the cached keys to
// add and delete.
@ -613,6 +657,7 @@ func (c *dbCache) commitTx(tx *transaction) error {
c.cachedKeys = newCachedKeys
c.cachedRemove = newCachedRemove
c.cacheLock.Unlock()
return nil
}
@ -622,7 +667,7 @@ func (c *dbCache) commitTx(tx *transaction) error {
// This function MUST be called with the database write lock held.
func (c *dbCache) Close() error {
// Flush any outstanding cached entries to disk.
if err := c.flush(); err != nil {
if err := c.flush(nil); err != nil {
// Even if there is an error while flushing, attempt to close
// the underlying database. The error is ignored since it would
// mask the flush error.

View file

@ -319,7 +319,7 @@ func testWriteFailures(tc *testContext) bool {
file: &mockFile{forceSyncErr: true, maxSize: -1},
}
store.writeCursor.Unlock()
err := tc.db.(*db).cache.flush()
err := tc.db.(*db).cache.flush(nil)
if !checkDbError(tc.t, testName, err, database.ErrDriverSpecific) {
return false
}

View file

@ -6,6 +6,7 @@ package treap
import (
"math/rand"
"sync"
"time"
)
@ -23,7 +24,7 @@ const (
// size in that case is acceptable since it avoids the need to import
// unsafe. It consists of 24-bytes for each key and value + 8 bytes for
// each of the priority, left, and right fields (24*2 + 8*3).
nodeFieldsSize = 72
nodeFieldsSize = 96
)
var (
@ -33,6 +34,15 @@ var (
emptySlice = make([]byte, 0)
)
const (
// mutableGeneration is the generation number for nodes in a Mutable treap.
mutableGeneration int = -1
// recycleGeneration indicates node is scheduled for recycling back to nodePool.
recycleGeneration int = -2
// poolGeneration is the generation number for free nodes in the nodePool.
poolGeneration int = -3
)
// treapNode represents a node in the treap.
type treapNode struct {
key []byte
@ -40,6 +50,8 @@ type treapNode struct {
priority int
left *treapNode
right *treapNode
generation int
next *treapNode
}
// nodeSize returns the number of bytes the specified node occupies including
@ -48,10 +60,40 @@ func nodeSize(node *treapNode) uint64 {
return nodeFieldsSize + uint64(len(node.key)+len(node.value))
}
// newTreapNode returns a new node from the given key, value, and priority. The
// node is not initially linked to any others.
func newTreapNode(key, value []byte, priority int) *treapNode {
return &treapNode{key: key, value: value, priority: priority}
func poolNewTreapNode() interface{} {
return &treapNode{key: nil, value: nil, priority: 0, generation: poolGeneration}
}
// Pool of treapNode available for reuse.
var nodePool sync.Pool = sync.Pool{New: poolNewTreapNode}
// getTreapNode returns a node from nodePool with the given key, value, priority,
// and generation. The node is not initially linked to any others.
func getTreapNode(key, value []byte, priority int, generation int) *treapNode {
n := nodePool.Get().(*treapNode)
n.key = key
n.value = value
n.priority = priority
n.left = nil
n.right = nil
n.generation = generation
n.next = nil
return n
}
// putTreapNode returns a node back to nodePool for reuse.
func putTreapNode(n *treapNode) {
if n.generation <= poolGeneration {
panic("double free of treapNode detected")
}
n.key = nil
n.value = nil
n.priority = 0
n.left = nil
n.right = nil
n.generation = poolGeneration
n.next = nil
nodePool.Put(n)
}
// parentStack represents a stack of parent treap nodes that are used during

View file

@ -49,7 +49,7 @@ testLoop:
for j := 0; j < test.numNodes; j++ {
var key [4]byte
binary.BigEndian.PutUint32(key[:], uint32(j))
node := newTreapNode(key[:], key[:], 0)
node := getTreapNode(key[:], key[:], 0, 0)
nodes = append(nodes, node)
}

View file

@ -7,17 +7,15 @@ package treap
import (
"bytes"
"math/rand"
"sync"
)
// cloneTreapNode returns a shallow copy of the passed node.
func cloneTreapNode(node *treapNode) *treapNode {
return &treapNode{
key: node.key,
value: node.value,
priority: node.priority,
left: node.left,
right: node.right,
}
clone := getTreapNode(node.key, node.value, node.priority, node.generation+1)
clone.left = node.left
clone.right = node.right
return clone
}
// Immutable represents a treap data structure which is used to hold ordered
@ -43,11 +41,19 @@ type Immutable struct {
// totalSize is the best estimate of the total size of of all data in
// the treap including the keys, values, and node sizes.
totalSize uint64
// generation number starts at 0 after NewImmutable(), and
// is incremented with every Put()/Delete().
generation int
// snap is a pointer to a node in snapshot history linked list.
// A value nil means no snapshots are outstanding.
snap **SnapRecord
}
// newImmutable returns a new immutable treap given the passed parameters.
func newImmutable(root *treapNode, count int, totalSize uint64) *Immutable {
return &Immutable{root: root, count: count, totalSize: totalSize}
func newImmutable(root *treapNode, count int, totalSize uint64, generation int, snap **SnapRecord) *Immutable {
return &Immutable{root: root, count: count, totalSize: totalSize, generation: generation, snap: snap}
}
// Len returns the number of items stored in the treap.
@ -104,8 +110,8 @@ func (t *Immutable) Get(key []byte) []byte {
return nil
}
// Put inserts the passed key/value pair.
func (t *Immutable) Put(key, value []byte) *Immutable {
// put inserts the passed key/value pair.
func (t *Immutable) put(key, value []byte) (tp *Immutable, old parentStack) {
// Use an empty byte slice for the value when none was provided. This
// ultimately allows key existence to be determined from the value since
// an empty byte slice is distinguishable from nil.
@ -115,8 +121,8 @@ func (t *Immutable) Put(key, value []byte) *Immutable {
// The node is the root of the tree if there isn't already one.
if t.root == nil {
root := newTreapNode(key, value, rand.Int())
return newImmutable(root, 1, nodeSize(root))
root := getTreapNode(key, value, rand.Int(), t.generation+1)
return newImmutable(root, 1, nodeSize(root), t.generation+1, t.snap), parentStack{}
}
// Find the binary tree insertion point and construct a replaced list of
@ -128,9 +134,11 @@ func (t *Immutable) Put(key, value []byte) *Immutable {
// When the key matches an entry already in the treap, replace the node
// with a new one that has the new value set and return.
var parents parentStack
var oldParents parentStack
var compareResult int
for node := t.root; node != nil; {
// Clone the node and link its parent to it if needed.
oldParents.Push(node)
nodeCopy := cloneTreapNode(node)
if oldParent := parents.At(0); oldParent != nil {
if oldParent.left == node {
@ -161,11 +169,11 @@ func (t *Immutable) Put(key, value []byte) *Immutable {
newRoot := parents.At(parents.Len() - 1)
newTotalSize := t.totalSize - uint64(len(node.value)) +
uint64(len(value))
return newImmutable(newRoot, t.count, newTotalSize)
return newImmutable(newRoot, t.count, newTotalSize, t.generation+1, t.snap), oldParents
}
// Link the new node into the binary tree in the correct position.
node := newTreapNode(key, value, rand.Int())
node := getTreapNode(key, value, rand.Int(), t.generation+1)
parent := parents.At(0)
if compareResult < 0 {
parent.left = node
@ -205,19 +213,65 @@ func (t *Immutable) Put(key, value []byte) *Immutable {
}
}
return newImmutable(newRoot, t.count+1, t.totalSize+nodeSize(node))
return newImmutable(newRoot, t.count+1, t.totalSize+nodeSize(node), t.generation+1, t.snap), oldParents
}
// Delete removes the passed key from the treap and returns the resulting treap
// Put is the immutable variant of put. Old nodes become garbage unless referenced elswhere.
func (t *Immutable) Put(key, value []byte) *Immutable {
tp, _ := t.put(key, value)
return tp
}
// PutM is the mutable variant of put. Old nodes are recycled if possible. This is
// only safe in structured scenarios using SnapRecord to track treap instances.
// The outstanding SnapRecords serve to protect nodes from recycling when they might
// be present in one or more snapshots. This is useful in scenarios where multiple
// Put/Delete() ops are applied to a treap and intermediate treap states are not
// created or desired. For example:
//
// for i := range keys {
// t = t.Put(keys[i])
// }
//
// ...may be replaced with:
//
// for i := range keys {
// PutM(t, keys[i], nil)
// }
//
// If "excluded" is provided, that snapshot is ignored when counting
// snapshot records.
//
func PutM(dest **Immutable, key, value []byte, excluded *SnapRecord) {
tp, old := (*dest).put(key, value)
// Examine old nodes and recycle if possible.
snapRecordMutex.Lock()
defer snapRecordMutex.Unlock()
snapCount, maxSnap, minSnap := (*dest).snapCount(nil)
for old.Len() > 0 {
node := old.Pop()
if snapCount == 0 || node.generation > maxSnap.generation {
putTreapNode(node)
} else {
// Defer recycle until Release() on oldest snap (minSnap).
node.generation = recycleGeneration
node.next = minSnap.recycle
minSnap.recycle = node
}
}
*dest = tp
}
// del removes the passed key from the treap and returns the resulting treap
// if it exists. The original immutable treap is returned if the key does not
// exist.
func (t *Immutable) Delete(key []byte) *Immutable {
func (t *Immutable) del(key []byte) (d *Immutable, old parentStack) {
// Find the node for the key while constructing a list of parents while
// doing so.
var parents parentStack
var oldParents parentStack
var delNode *treapNode
for node := t.root; node != nil; {
parents.Push(node)
oldParents.Push(node)
// Traverse left or right depending on the result of the
// comparison.
@ -238,14 +292,14 @@ func (t *Immutable) Delete(key []byte) *Immutable {
// There is nothing to do if the key does not exist.
if delNode == nil {
return t
return t, parentStack{}
}
// When the only node in the tree is the root node and it is the one
// being deleted, there is nothing else to do besides removing it.
parent := parents.At(1)
parent := oldParents.At(1)
if parent == nil && delNode.left == nil && delNode.right == nil {
return newImmutable(nil, 0, 0)
return newImmutable(nil, 0, 0, t.generation+1, t.snap), oldParents
}
// Construct a replaced list of parents and the node to delete itself.
@ -253,8 +307,8 @@ func (t *Immutable) Delete(key []byte) *Immutable {
// therefore all ancestors of the node that will be deleted, up to and
// including the root, need to be replaced.
var newParents parentStack
for i := parents.Len(); i > 0; i-- {
node := parents.At(i - 1)
for i := oldParents.Len(); i > 0; i-- {
node := oldParents.At(i - 1)
nodeCopy := cloneTreapNode(node)
if oldParent := newParents.At(0); oldParent != nil {
if oldParent.left == node {
@ -326,7 +380,53 @@ func (t *Immutable) Delete(key []byte) *Immutable {
parent.left = nil
}
return newImmutable(newRoot, t.count-1, t.totalSize-nodeSize(delNode))
return newImmutable(newRoot, t.count-1, t.totalSize-nodeSize(delNode), t.generation+1, t.snap), oldParents
}
// Delete is the immutable variant of del. Old nodes become garbage unless referenced elswhere.
func (t *Immutable) Delete(key []byte) *Immutable {
tp, _ := t.del(key)
return tp
}
// DeleteM is the mutable variant of del. Old nodes are recycled if possible. This is
// only safe in structured scenarios using SnapRecord to track treap instances.
// The outstanding SnapRecords serve to protect nodes from recycling when they might
// be present in one or more snapshots. This is useful in scenarios where multiple
// Put/Delete() ops are applied to a treap and intermediate treap states are not
// created or desired. For example:
//
// for i := range keys {
// t = t.Delete(keys[i])
// }
//
// ...may be replaced with:
//
// for i := range keys {
// DeleteM(t, keys[i], nil)
// }
//
// If "excluded" is provided, that snapshot is ignored when counting
// snapshot records.
//
func DeleteM(dest **Immutable, key []byte, excluded *SnapRecord) {
tp, old := (*dest).del(key)
// Examine old nodes and recycle if possible.
snapRecordMutex.Lock()
defer snapRecordMutex.Unlock()
snapCount, maxSnap, minSnap := (*dest).snapCount(nil)
for old.Len() > 0 {
node := old.Pop()
if snapCount == 0 || node.generation > maxSnap.generation {
putTreapNode(node)
} else {
// Defer recycle until Release() on oldest snap (minSnap).
node.generation = recycleGeneration
node.next = minSnap.recycle
minSnap.recycle = node
}
}
*dest = tp
}
// ForEach invokes the passed function with every key/value pair in the treap
@ -358,3 +458,141 @@ func (t *Immutable) ForEach(fn func(k, v []byte) bool) {
func NewImmutable() *Immutable {
return &Immutable{}
}
// SnapRecord assists in tracking outstanding snapshots. While a SnapRecord
// is present and has not been Released(), treap nodes at or below this
// generation are protected from Recycle().
type SnapRecord struct {
generation int
rp **SnapRecord
prev *SnapRecord
next *SnapRecord
recycle *treapNode
}
var snapRecordMutex sync.Mutex
// Snapshot makes a SnapRecord and links it into the snapshot history of a treap.
func (t *Immutable) Snapshot() *SnapRecord {
snapRecordMutex.Lock()
defer snapRecordMutex.Unlock()
rp := t.snap
var next *SnapRecord = nil
var prev *SnapRecord = nil
if rp != nil {
prev = *rp
if *rp != nil {
next = (*rp).next
}
}
// Create a new record stamped with the current generation. Link it
// following the existing snapshot record, if any.
p := new(*SnapRecord)
*p = &SnapRecord{generation: t.generation, rp: p, prev: prev, next: next}
t.snap = p
if rp != nil && *rp != nil {
(*rp).next = *(t.snap)
}
return *(t.snap)
}
// Release of SnapRecord unlinks that record from the snapshot history of a treap.
func (r *SnapRecord) Release() {
snapRecordMutex.Lock()
defer snapRecordMutex.Unlock()
// Unlink this record.
*(r.rp) = nil
if r.next != nil {
r.next.prev = r.prev
*(r.rp) = r.next
}
if r.prev != nil {
r.prev.next = r.next
*(r.rp) = r.prev
}
// Handle deferred recycle list.
for node := r.recycle; node != nil; {
next := node.next
putTreapNode(node)
node = next
}
}
// snapCount returns the number of snapshots outstanding which were created
// but not released. When snapshots are absent, mutable PutM()/DeleteM() can
// recycle nodes more aggressively. The record "excluded" is not counted.
func (t *Immutable) snapCount(excluded *SnapRecord) (count int, maxSnap, minSnap *SnapRecord) {
// snapRecordMutex should be locked already
count, maxSnap, minSnap = 0, nil, nil
if t.snap == nil || *(t.snap) == nil {
// No snapshots.
return count, maxSnap, minSnap
}
// Count snapshots taken BEFORE creation of this instance.
for h := *(t.snap); h != nil; h = h.prev {
if h != excluded {
count++
if maxSnap == nil || maxSnap.generation < h.generation {
maxSnap = h
}
if minSnap == nil || minSnap.generation > h.generation {
minSnap = h
}
}
}
// Count snapshots taken AFTER creation of this instance.
for h := (*(t.snap)).next; h != nil; h = h.next {
if h != excluded {
count++
if maxSnap == nil || maxSnap.generation < h.generation {
maxSnap = h
}
if minSnap == nil || minSnap.generation > h.generation {
minSnap = h
}
}
}
return count, maxSnap, minSnap
}
func (t *Immutable) Recycle(excluded *SnapRecord) {
snapRecordMutex.Lock()
_, maxSnap, _ := t.snapCount(excluded)
snapGen := 0
if maxSnap != nil {
snapGen = maxSnap.generation
}
snapRecordMutex.Unlock()
var parents parentStack
for node := t.root; node != nil; node = node.left {
parents.Push(node)
}
for parents.Len() > 0 {
node := parents.Pop()
// Extend the nodes to traverse by all children to the left of
// the current node's right child.
for n := node.right; n != nil; n = n.left {
parents.Push(n)
}
// Recycle node if it cannot be in a snapshot. Note that nodes
// scheduled for deferred recycling will have negative generation
// (recycleGeneration) and will not qualify.
if node.generation > snapGen {
putTreapNode(node)
}
}
}

View file

@ -21,6 +21,10 @@ type Mutable struct {
// totalSize is the best estimate of the total size of of all data in
// the treap including the keys, values, and node sizes.
totalSize uint64
// generation number is the constant mutableGeneration, unless
// creation of a treap.Iterator bumps it.
generation int
}
// Len returns the number of items stored in the treap.
@ -113,7 +117,7 @@ func (t *Mutable) Put(key, value []byte) {
// The node is the root of the tree if there isn't already one.
if t.root == nil {
node := newTreapNode(key, value, rand.Int())
node := getTreapNode(key, value, rand.Int(), t.generation)
t.count = 1
t.totalSize = nodeSize(node)
t.root = node
@ -145,7 +149,7 @@ func (t *Mutable) Put(key, value []byte) {
}
// Link the new node into the binary tree in the correct position.
node := newTreapNode(key, value, rand.Int())
node := getTreapNode(key, value, rand.Int(), t.generation)
t.count++
t.totalSize += nodeSize(node)
parent := parents.At(0)
@ -190,6 +194,9 @@ func (t *Mutable) Delete(key []byte) {
t.root = nil
t.count = 0
t.totalSize = 0
if node.generation == t.generation && node.generation == mutableGeneration {
putTreapNode(node)
}
return
}
@ -238,6 +245,9 @@ func (t *Mutable) Delete(key []byte) {
}
t.count--
t.totalSize -= nodeSize(node)
if node.generation == t.generation && node.generation == mutableGeneration {
putTreapNode(node)
}
}
// ForEach invokes the passed function with every key/value pair in the treap
@ -274,5 +284,26 @@ func (t *Mutable) Reset() {
// NewMutable returns a new empty mutable treap ready for use. See the
// documentation for the Mutable structure for more details.
func NewMutable() *Mutable {
return &Mutable{}
return &Mutable{generation: mutableGeneration}
}
func (t *Mutable) Recycle() {
var parents parentStack
for node := t.root; node != nil; node = node.left {
parents.Push(node)
}
for parents.Len() > 0 {
node := parents.Pop()
// Extend the nodes to traverse by all children to the left of
// the current node's right child.
for n := node.right; n != nil; n = n.left {
parents.Push(n)
}
if node.generation == t.generation && node.generation == mutableGeneration {
putTreapNode(node)
}
}
}

View file

@ -326,6 +326,7 @@ func (iter *Iterator) ForceReseek() {
// }
// }
func (t *Mutable) Iterator(startKey, limitKey []byte) *Iterator {
t.generation++
iter := &Iterator{
t: t,
root: t.root,