Document the new init-methods for the SST-table-builders
This commit is contained in:
parent
13518d2680
commit
a3072c79b3
3 changed files with 145 additions and 13 deletions
|
@ -902,6 +902,26 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
||||||
|
|
||||||
Wraps BlockBasedTableFactory of RocksDB.
|
Wraps BlockBasedTableFactory of RocksDB.
|
||||||
|
|
||||||
|
.. py:method:: __init__(index_type='binary_search', hash_index_allow_collision=True, checksum='crc32')
|
||||||
|
|
||||||
|
:param string index_type:
|
||||||
|
* ``binary_search`` a space efficient index block that is optimized
|
||||||
|
for binary-search-based index.
|
||||||
|
* ``hash_search`` the hash index. If enabled, will do hash lookup
|
||||||
|
when `Options.prefix_extractor` is provided.
|
||||||
|
|
||||||
|
:param bool hash_index_allow_collision:
|
||||||
|
Influence the behavior when ``hash_search`` is used.
|
||||||
|
If ``False``, stores a precise prefix to block range mapping.
|
||||||
|
If ``True``, does not store prefix and allows prefix hash collision
|
||||||
|
(less memory consumption)
|
||||||
|
|
||||||
|
:param string checksum:
|
||||||
|
Use the specified checksum type. Newly created table files will be
|
||||||
|
protected with this checksum type. Old table files will still be readable,
|
||||||
|
even though they have different checksum type.
|
||||||
|
Can be either ``crc32`` or ``xxhash``.
|
||||||
|
|
||||||
.. py:class:: rocksdb.PlainTableFactory
|
.. py:class:: rocksdb.PlainTableFactory
|
||||||
|
|
||||||
Plain Table with prefix-only seek. It wraps rocksdb PlainTableFactory.
|
Plain Table with prefix-only seek. It wraps rocksdb PlainTableFactory.
|
||||||
|
@ -911,7 +931,7 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
||||||
key prefix. Inside the hash bucket found, a binary search is executed for
|
key prefix. Inside the hash bucket found, a binary search is executed for
|
||||||
hash conflicts. Finally, a linear search is used.
|
hash conflicts. Finally, a linear search is used.
|
||||||
|
|
||||||
.. py:method:: __init__(user_key_len=0, bloom_bits_per_prefix=10, hash_table_ratio=0.75, index_sparseness=10)
|
.. py:method:: __init__(user_key_len=0, bloom_bits_per_key=10, hash_table_ratio=0.75, index_sparseness=10, huge_page_tlb_size=0, encoding_type='plain', full_scan_mode=False, store_index_in_file=False)
|
||||||
|
|
||||||
:param int user_key_len:
|
:param int user_key_len:
|
||||||
Plain table has optimization for fix-sized keys, which can be
|
Plain table has optimization for fix-sized keys, which can be
|
||||||
|
@ -929,6 +949,45 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
||||||
:param int index_sparseness:
|
:param int index_sparseness:
|
||||||
Inside each prefix, need to build one index record for how
|
Inside each prefix, need to build one index record for how
|
||||||
many keys for binary search inside each hash bucket.
|
many keys for binary search inside each hash bucket.
|
||||||
|
For encoding type ``prefix``, the value will be used when
|
||||||
|
writing to determine an interval to rewrite the full key.
|
||||||
|
It will also be used as a suggestion and satisfied when possible.
|
||||||
|
|
||||||
|
:param int huge_page_tlb_size:
|
||||||
|
If <=0, allocate hash indexes and blooms from malloc.
|
||||||
|
Otherwise from huge page TLB.
|
||||||
|
The user needs to reserve huge pages for it to be allocated, like:
|
||||||
|
``sysctl -w vm.nr_hugepages=20``
|
||||||
|
See linux doc Documentation/vm/hugetlbpage.txt
|
||||||
|
|
||||||
|
:param string encoding_type:
|
||||||
|
How to encode the keys. The value will determine how to encode keys
|
||||||
|
when writing to a new SST file. This value will be stored
|
||||||
|
inside the SST file which will be used when reading from the
|
||||||
|
file, which makes it possible for users to choose different
|
||||||
|
encoding type when reopening a DB. Files with different
|
||||||
|
encoding types can co-exist in the same DB and can be read.
|
||||||
|
|
||||||
|
* ``plain``: Always write full keys without any special encoding.
|
||||||
|
* ``prefix``: Find opportunity to write the same prefix once for multiple rows.
|
||||||
|
In some cases, when a key follows a previous key with the same prefix,
|
||||||
|
instead of writing out the full key, it just writes out the size of the
|
||||||
|
shared prefix, as well as other bytes, to save some bytes.
|
||||||
|
|
||||||
|
When using this option, the user is required to use the same prefix
|
||||||
|
extractor to make sure the same prefix will be extracted from the same key.
|
||||||
|
The Name() value of the prefix extractor will be stored in the file.
|
||||||
|
When reopening the file, the name of the options.prefix_extractor given
|
||||||
|
will be bitwise compared to the prefix extractors stored in the file.
|
||||||
|
An error will be returned if the two don't match.
|
||||||
|
|
||||||
|
:param bool full_scan_mode:
|
||||||
|
Mode for reading the whole file one record by one without using the index.
|
||||||
|
|
||||||
|
:param bool store_index_in_file:
|
||||||
|
Compute plain table index and bloom filter during file building
|
||||||
|
and store it in file. When reading file, index will be mmaped
|
||||||
|
instead of recomputation.
|
||||||
|
|
||||||
.. _memtable_factories_label:
|
.. _memtable_factories_label:
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@ import cython
|
||||||
from libcpp.string cimport string
|
from libcpp.string cimport string
|
||||||
from libcpp.deque cimport deque
|
from libcpp.deque cimport deque
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
|
from cpython cimport bool as py_bool
|
||||||
from libcpp cimport bool as cpp_bool
|
from libcpp cimport bool as cpp_bool
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t
|
||||||
from cython.operator cimport dereference as deref
|
from cython.operator cimport dereference as deref
|
||||||
|
@ -561,23 +562,65 @@ cdef class PyTableFactory(object):
|
||||||
return self.factory
|
return self.factory
|
||||||
|
|
||||||
cdef class BlockBasedTableFactory(PyTableFactory):
|
cdef class BlockBasedTableFactory(PyTableFactory):
|
||||||
def __init__(self):
|
def __init__(self,
|
||||||
self.factory.reset(table_factory.NewBlockBasedTableFactory())
|
index_type='binary_search',
|
||||||
|
py_bool hash_index_allow_collision=True,
|
||||||
|
checksum='crc32'):
|
||||||
|
|
||||||
|
cdef table_factory.BlockBasedTableOptions table_options
|
||||||
|
|
||||||
|
if index_type == 'binary_search':
|
||||||
|
table_options.index_type = table_factory.kBinarySearch
|
||||||
|
elif index_type == 'hash_search':
|
||||||
|
table_options.index_type = table_factory.kHashSearch
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown index_type: %s" % index_type)
|
||||||
|
|
||||||
|
if hash_index_allow_collision:
|
||||||
|
table_options.hash_index_allow_collision = True
|
||||||
|
else:
|
||||||
|
table_options.hash_index_allow_collision = False
|
||||||
|
|
||||||
|
if checksum == 'crc32':
|
||||||
|
table_options.checksum = table_factory.kCRC32c
|
||||||
|
elif checksum == 'xxhash':
|
||||||
|
table_options.checksum = table_factory.kxxHash
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown checksum: %s" % checksum)
|
||||||
|
|
||||||
|
self.factory.reset(table_factory.NewBlockBasedTableFactory(table_options))
|
||||||
|
|
||||||
cdef class PlainTableFactory(PyTableFactory):
|
cdef class PlainTableFactory(PyTableFactory):
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
user_key_len=0,
|
user_key_len=0,
|
||||||
bloom_bits_per_prefix=10,
|
bloom_bits_per_key=10,
|
||||||
hash_table_ratio=0.75,
|
hash_table_ratio=0.75,
|
||||||
index_sparseness=10):
|
index_sparseness=10,
|
||||||
|
huge_page_tlb_size=0,
|
||||||
|
encoding_type='plain',
|
||||||
|
py_bool full_scan_mode=False,
|
||||||
|
py_bool store_index_in_file=False):
|
||||||
|
|
||||||
self.factory.reset(
|
cdef table_factory.PlainTableOptions table_options
|
||||||
table_factory.NewPlainTableFactory(
|
|
||||||
user_key_len,
|
table_options.user_key_len = user_key_len
|
||||||
bloom_bits_per_prefix,
|
table_options.bloom_bits_per_key = bloom_bits_per_key
|
||||||
hash_table_ratio,
|
table_options.hash_table_ratio = hash_table_ratio
|
||||||
index_sparseness))
|
table_options.index_sparseness = index_sparseness
|
||||||
|
table_options.huge_page_tlb_size = huge_page_tlb_size
|
||||||
|
|
||||||
|
if encoding_type == 'plain':
|
||||||
|
table_options.encoding_type = table_factory.kPlain
|
||||||
|
elif encoding_type == 'prefix':
|
||||||
|
table_options.encoding_type = table_factory.kPrefix
|
||||||
|
else:
|
||||||
|
raise ValueError("Unknown encoding_type: %s" % encoding_type)
|
||||||
|
|
||||||
|
table_options.full_scan_mode = full_scan_mode
|
||||||
|
table_options.store_index_in_file = store_index_in_file
|
||||||
|
|
||||||
|
self.factory.reset( table_factory.NewPlainTableFactory(table_options))
|
||||||
#############################################
|
#############################################
|
||||||
|
|
||||||
### Here are the MemtableFactories
|
### Here are the MemtableFactories
|
||||||
|
|
|
@ -1,8 +1,38 @@
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t
|
||||||
|
from libcpp cimport bool as cpp_bool
|
||||||
|
|
||||||
cdef extern from "rocksdb/table.h" namespace "rocksdb":
|
cdef extern from "rocksdb/table.h" namespace "rocksdb":
|
||||||
cdef cppclass TableFactory:
|
cdef cppclass TableFactory:
|
||||||
TableFactory()
|
TableFactory()
|
||||||
|
|
||||||
cdef TableFactory* NewBlockBasedTableFactory()
|
ctypedef enum BlockBasedTableIndexType:
|
||||||
cdef TableFactory* NewPlainTableFactory(uint32_t, int, double, size_t)
|
kBinarySearch "rocksdb::BlockBasedTableOptions::IndexType::kBinarySearch"
|
||||||
|
kHashSearch "rocksdb::BlockBasedTableOptions::IndexType::kHashSearch"
|
||||||
|
|
||||||
|
ctypedef enum ChecksumType:
|
||||||
|
kCRC32c
|
||||||
|
kxxHash
|
||||||
|
|
||||||
|
cdef cppclass BlockBasedTableOptions:
|
||||||
|
BlockBasedTableOptions()
|
||||||
|
BlockBasedTableIndexType index_type
|
||||||
|
cpp_bool hash_index_allow_collision
|
||||||
|
ChecksumType checksum
|
||||||
|
|
||||||
|
cdef TableFactory* NewBlockBasedTableFactory(const BlockBasedTableOptions&)
|
||||||
|
|
||||||
|
ctypedef enum EncodingType:
|
||||||
|
kPlain
|
||||||
|
kPrefix
|
||||||
|
|
||||||
|
cdef cppclass PlainTableOptions:
|
||||||
|
uint32_t user_key_len
|
||||||
|
int bloom_bits_per_key
|
||||||
|
double hash_table_ratio
|
||||||
|
size_t index_sparseness
|
||||||
|
size_t huge_page_tlb_size
|
||||||
|
EncodingType encoding_type
|
||||||
|
cpp_bool full_scan_mode
|
||||||
|
cpp_bool store_index_in_file
|
||||||
|
|
||||||
|
cdef TableFactory* NewPlainTableFactory(const PlainTableOptions&)
|
||||||
|
|
Loading…
Reference in a new issue