Allow it to configure the memtable representation

This commit is contained in:
hofmockel 2014-04-28 20:32:33 +02:00
parent fef21c8965
commit 1cb9ec4ee1
6 changed files with 155 additions and 1 deletions

View file

@ -645,6 +645,18 @@ Options object
| *Type:* ``int``
| *Default:* ``8``
.. py:attribute:: memtable_factory
This is a factory that provides MemTableRep objects.
Right now you can assing instances of the following classes.
* :py:class:`rocksdb.VectorMemtableFactory`
* :py:class:`rocksdb.SkipListMemtableFactory`
* :py:class:`rocksdb.HashSkipListMemtableFactory`
* :py:class:`rocksdb.HashLinkListMemtableFactory`
*Default:* :py:class:`rocksdb.SkipListMemtableFactory`
.. py:attribute:: table_factory
Factory for the files forming the persisten data storage.
@ -857,3 +869,67 @@ Tutorial of rocksdb table formats is available here:
:param int index_sparseness:
Need to build one index record for how many keys for binary search.
.. _memtable_factories_label:
MemtableFactories
=================
RocksDB has different classes to represent the in-memory buffer for the current
operations. You have to assing instances of the following classes to
:py:attr:`rocksdb.Options.memtable_factory`.
This page has a comparison the most popular ones.
https://github.com/facebook/rocksdb/wiki/Hash-based-memtable-implementations
.. py:class:: rocksdb.VectorMemtableFactory
This creates MemTableReps that are backed by an std::vector.
On iteration, the vector is sorted. This is useful for workloads where
iteration is very rare and writes are generally not issued after reads begin.
.. py:method:: __init__(count=0)
:param int count:
Passed to the constructor of the underlying std::vector of each
VectorRep. On initialization, the underlying array will be at
least count bytes reserved for usage.
.. py:class:: rocksdb.SkipListMemtableFactory
This uses a skip list to store keys.
.. py:method:: __init__()
.. py:class:: rocksdb.HashSkipListMemtableFactory
This class contains a fixed array of buckets, each pointing
to a skiplist (null if the bucket is empty).
.. note::
:py:attr:`rocksdb.Options.prefix_extractor` must be set, otherwise
rocksdb fails back to skip-list.
.. py:method:: __init__(bucket_count = 1000000, skiplist_height = 4, skiplist_branching_factor = 4)
:param int bucket_count: number of fixed array buckets
:param int skiplist_height: the max height of the skiplist
:param int skiplist_branching_factor:
probabilistic size ratio between adjacent link lists in the skiplist
.. py:class:: rocksdb.HashLinkListMemtableFactory
The factory is to create memtables with a hashed linked list.
It contains a fixed array of buckets, each pointing to a sorted single
linked list (null if the bucket is empty).
.. note::
:py:attr:`rocksdb.Options.prefix_extractor` must be set, otherwise
rocksdb fails back to skip-list.
.. py:method:: __init__(bucket_count=50000)
:param int bucket: number of fixed array buckets

View file

@ -30,6 +30,8 @@ Target is to work with the next version of rocksdb (2.8.fb)
* https://github.com/facebook/rocksdb/wiki/PlainTable-Format
* https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database%3F
* Add :py:attr:`rocksdb.Options.memtable_factory` option.
Version 0.1
-----------

View file

@ -25,6 +25,7 @@ cimport iterator
cimport backup
cimport env
cimport table_factory
cimport memtablerep
from slice_ cimport Slice
from status cimport Status
@ -583,6 +584,41 @@ cdef class TotalOrderPlainTableFactory(PyTableFactory):
index_sparseness))
#############################################
### Here are the MemtableFactories
@cython.internal
cdef class PyMemtableFactory(object):
cdef shared_ptr[memtablerep.MemTableRepFactory] factory
cdef shared_ptr[memtablerep.MemTableRepFactory] get_memtable_factory(self):
return self.factory
cdef class SkipListMemtableFactory(PyMemtableFactory):
def __init__(self):
self.factory.reset(memtablerep.NewSkipListFactory())
cdef class VectorMemtableFactory(PyMemtableFactory):
def __init__(self, count=0):
self.factory.reset(memtablerep.NewVectorRepFactory(count))
cdef class HashSkipListMemtableFactory(PyMemtableFactory):
def __init__(
self,
bucket_count=1000000,
skiplist_height=4,
skiplist_branching_factor=4):
self.factory.reset(
memtablerep.NewHashSkipListRepFactory(
bucket_count,
skiplist_height,
skiplist_branching_factor))
cdef class HashLinkListMemtableFactory(PyMemtableFactory):
def __init__(self, bucket_count=50000):
self.factory.reset(memtablerep.NewHashLinkListRepFactory(bucket_count))
##################################
cdef class CompressionType(object):
no_compression = u'no_compression'
snappy_compression = u'snappy_compression'
@ -598,6 +634,8 @@ cdef class Options(object):
cdef PyCache py_block_cache_compressed
cdef PySliceTransform py_prefix_extractor
cdef PyTableFactory py_table_factory
cdef PyMemtableFactory py_memtable_factory
# Used to protect sharing of Options with many DB-objects
cdef cpp_bool in_use
@ -618,6 +656,7 @@ cdef class Options(object):
self.py_block_cache_compressed = None
self.py_prefix_extractor = None
self.py_table_factory = None
self.py_memtable_factory = None
for key, value in kwargs.items():
setattr(self, key, value)
@ -1024,8 +1063,17 @@ cdef class Options(object):
return self.py_table_factory
def __set__(self, PyTableFactory value):
self.py_table_factory = value
self.opts.table_factory = value.get_table_factory()
property memtable_factory:
def __get__(self):
return self.py_memtable_factory
def __set__(self, PyMemtableFactory value):
self.py_memtable_factory = value
self.opts.memtable_factory = value.get_memtable_factory()
property inplace_update_num_locks:
def __get__(self):
return self.opts.inplace_update_num_locks

View file

@ -0,0 +1,15 @@
#include "rocksdb/memtablerep.h"
using rocksdb::MemTableRepFactory;
using rocksdb::VectorRepFactory;
using rocksdb::SkipListFactory;
namespace py_rocks {
MemTableRepFactory* NewVectorRepFactory(size_t count = 0) {
return new VectorRepFactory(count);
}
MemTableRepFactory* NewSkipListFactory() {
return new SkipListFactory();
}
}

12
rocksdb/memtablerep.pxd Normal file
View file

@ -0,0 +1,12 @@
from libc.stdint cimport int32_t
cdef extern from "rocksdb/memtablerep.h" namespace "rocksdb":
cdef cppclass MemTableRepFactory:
MemTableRepFactory()
cdef MemTableRepFactory* NewHashSkipListRepFactory(size_t, int32_t, int32_t)
cdef MemTableRepFactory* NewHashLinkListRepFactory(size_t)
cdef extern from "cpp/memtable_factories.hpp" namespace "py_rocks":
cdef MemTableRepFactory* NewVectorRepFactory(size_t)
cdef MemTableRepFactory* NewSkipListFactory()

View file

@ -12,6 +12,7 @@ from slice_ cimport Slice
from snapshot cimport Snapshot
from slice_transform cimport SliceTransform
from table_factory cimport TableFactory
from memtablerep cimport MemTableRepFactory
cdef extern from "rocksdb/options.h" namespace "rocksdb":
ctypedef enum CompressionType:
@ -104,7 +105,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb":
# TODO: CompactionOptionsUniversal compaction_options_universal
cpp_bool filter_deletes
uint64_t max_sequential_skip_in_iterations
# TODO: memtable_factory
shared_ptr[MemTableRepFactory] memtable_factory
shared_ptr[TableFactory] table_factory
# TODO: table_properties_collectors
cpp_bool inplace_update_support