Add support for the 'PlainTableFactories'

This commit is contained in:
hofmockel 2014-04-27 19:20:30 +02:00
parent 85fe7c095f
commit 482379cf94
5 changed files with 149 additions and 2 deletions

View file

@ -645,6 +645,18 @@ Options object
| *Type:* ``int``
| *Default:* ``8``
.. py:attribute:: table_factory
Factory for the files forming the persisten data storage.
Sometimes they are also named SST-Files. Right now you can assign
instances of the following classes
* :py:class:`rocksdb.BlockBasedTableFactory`
* :py:class:`rocksdb.PlainTableFactory`
* :py:class:`rocksdb.TotalOrderPlainTableFactory`
*Default:* :py:class:`rocksdb.BlockBasedTableFactory`
.. py:attribute:: inplace_update_support
Allows thread-safe inplace updates. Requires Updates if
@ -779,3 +791,69 @@ LRUCache
the least-used order. If not enough space is freed, further free the
entries in least used order.
TableFactories
==============
Currently RocksDB supports two types of tables: plain table and block-based table.
Instances of this classes can assigned to :py:attr:`rocksdb.Options.table_factory`
* *Block-based table:* This is the default table type that RocksDB inherited from
LevelDB. It was designed for storing data in hard disk or flash device.
* *Plain table:* It is one of RocksDB's SST file format optimized
for low query latency on pure-memory or really low-latency media.
Tutorial of rocksdb table formats is available here:
https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
.. py:class:: rocksdb.BlockBasedTableFactory
Wraps BlockBasedTableFactory of RocksDB.
.. py:class:: rocksdb.PlainTableFactory
Plain Table with prefix-only seek. It wraps rocksdb PlainTableFactory.
For this factory, you need to set :py:attr:`rocksdb.Options.prefix_extractor`
properly to make it work. Look-up will start with prefix hash lookup for
key prefix. Inside the hash bucket found, a binary search is executed for
hash conflicts. Finally, a linear search is used.
.. py:method:: __init__(user_key_len=0, bloom_bits_per_prefix=10, hash_table_ratio=0.75, index_sparseness=10)
:param int user_key_len:
Plain table has optimization for fix-sized keys, which can be
specified via user_key_len.
Alternatively, you can pass `0` if your keys have variable lengths.
:param int bloom_bits_per_key:
The number of bits used for bloom filer per prefix.
You may disable it by passing `0`.
:param float hash_table_ratio:
The desired utilization of the hash table used for prefix hashing.
hash_table_ratio = number of prefixes / #buckets in the hash table.
:param int index_sparseness:
Inside each prefix, need to build one index record for how
many keys for binary search inside each hash bucket.
.. py:class:: rocksdb.TotalOrderPlainTableFactory
This factory of plain table ignores Options.prefix_extractor and assumes no
hashable prefix available to the key structure. Lookup will be based on
binary search index only. Total order seek() can be issued.
.. py:method:: __init__(user_key_len=0, bloom_bits_per_key=0, index_sparseness=16)
:param int user_key_len:
Plain table has optimization for fix-sized keys, which can be
specified via user_key_len.
Alternatively, you can pass `0` if your keys have variable lengths.
:param int bloom_bits_per_key:
The number of bits used for bloom filer per key.
You may disable it by passing a zero.
:param int index_sparseness:
Need to build one index record for how many keys for binary search.

View file

@ -24,6 +24,7 @@ cimport db
cimport iterator
cimport backup
cimport env
cimport table_factory
from slice_ cimport Slice
from status cimport Status
@ -539,8 +540,49 @@ cdef cpp_bool slice_in_range_callback(
tb = traceback.format_exc()
logger.Log(log, "Error in slice transfrom callback: %s", <bytes>tb)
error_msg.assign(<bytes>str(error))
###########################################
## Here are the TableFactories
@cython.internal
cdef class PyTableFactory(object):
cdef shared_ptr[table_factory.TableFactory] factory
cdef shared_ptr[table_factory.TableFactory] get_table_factory(self):
return self.factory
cdef class BlockBasedTableFactory(PyTableFactory):
def __init__(self):
self.factory.reset(table_factory.NewBlockBasedTableFactory())
cdef class PlainTableFactory(PyTableFactory):
def __init__(
self,
user_key_len=0,
bloom_bits_per_prefix=10,
hash_table_ratio=0.75,
index_sparseness=10):
self.factory.reset(
table_factory.NewPlainTableFactory(
user_key_len,
bloom_bits_per_prefix,
hash_table_ratio,
index_sparseness))
cdef class TotalOrderPlainTableFactory(PyTableFactory):
def __init__(
self,
user_key_len=0,
bloom_bits_per_key=0,
index_sparseness=16):
self.factory.reset(
table_factory.NewTotalOrderPlainTableFactory(
user_key_len,
bloom_bits_per_key,
index_sparseness))
#############################################
cdef class CompressionType(object):
no_compression = u'no_compression'
snappy_compression = u'snappy_compression'
@ -555,6 +597,7 @@ cdef class Options(object):
cdef PyCache py_block_cache
cdef PyCache py_block_cache_compressed
cdef PySliceTransform py_prefix_extractor
cdef PyTableFactory py_table_factory
# Used to protect sharing of Options with many DB-objects
cdef cpp_bool in_use
@ -574,6 +617,7 @@ cdef class Options(object):
self.py_block_cache = None
self.py_block_cache_compressed = None
self.py_prefix_extractor = None
self.py_table_factory = None
for key, value in kwargs.items():
setattr(self, key, value)
@ -975,6 +1019,13 @@ cdef class Options(object):
def __set__(self, value):
self.opts.inplace_update_support = value
property table_factory:
def __get__(self):
return self.py_table_factory
def __set__(self, PyTableFactory value):
self.opts.table_factory = value.get_table_factory()
property inplace_update_num_locks:
def __get__(self):
return self.opts.inplace_update_num_locks

View file

@ -11,6 +11,7 @@ from logger cimport Logger
from slice_ cimport Slice
from snapshot cimport Snapshot
from slice_transform cimport SliceTransform
from table_factory cimport TableFactory
cdef extern from "rocksdb/options.h" namespace "rocksdb":
ctypedef enum CompressionType:
@ -104,7 +105,7 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb":
cpp_bool filter_deletes
uint64_t max_sequential_skip_in_iterations
# TODO: memtable_factory
# TODO: table_factory
shared_ptr[TableFactory] table_factory
# TODO: table_properties_collectors
cpp_bool inplace_update_support
size_t inplace_update_num_locks

View file

@ -0,0 +1,9 @@
from libc.stdint cimport uint32_t
cdef extern from "rocksdb/table.h" namespace "rocksdb":
cdef cppclass TableFactory:
TableFactory()
cdef TableFactory* NewBlockBasedTableFactory()
cdef TableFactory* NewPlainTableFactory(uint32_t, int, double, size_t)
cdef TableFactory* NewTotalOrderPlainTableFactory(uint32_t, int, size_t)

View file

@ -61,3 +61,11 @@ class TestOptions(unittest.TestCase):
self.assertEqual(name, opts.db_log_dir)
self.assertEqual(name, opts.wal_dir)
def test_table_factory(self):
opts = rocksdb.Options()
self.assertIsNone(opts.table_factory)
opts.table_factory = rocksdb.BlockBasedTableFactory()
opts.table_factory = rocksdb.PlainTableFactory()
opts.table_factory = rocksdb.TotalOrderPlainTableFactory()