Document the new init-methods for the SST-table-builders
This commit is contained in:
parent
13518d2680
commit
a3072c79b3
3 changed files with 145 additions and 13 deletions
|
@ -902,6 +902,26 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
|||
|
||||
Wraps BlockBasedTableFactory of RocksDB.
|
||||
|
||||
.. py:method:: __init__(index_type='binary_search', hash_index_allow_collision=True, checksum='crc32')
|
||||
|
||||
:param string index_type:
|
||||
* ``binary_search`` a space efficient index block that is optimized
|
||||
for binary-search-based index.
|
||||
* ``hash_search`` the hash index. If enabled, will do hash lookup
|
||||
when `Options.prefix_extractor` is provided.
|
||||
|
||||
:param bool hash_index_allow_collision:
|
||||
Influence the behavior when ``hash_search`` is used.
|
||||
If ``False``, stores a precise prefix to block range mapping.
|
||||
If ``True``, does not store prefix and allows prefix hash collision
|
||||
(less memory consumption)
|
||||
|
||||
:param string checksum:
|
||||
Use the specified checksum type. Newly created table files will be
|
||||
protected with this checksum type. Old table files will still be readable,
|
||||
even though they have different checksum type.
|
||||
Can be either ``crc32`` or ``xxhash``.
|
||||
|
||||
.. py:class:: rocksdb.PlainTableFactory
|
||||
|
||||
Plain Table with prefix-only seek. It wraps rocksdb PlainTableFactory.
|
||||
|
@ -911,7 +931,7 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
|||
key prefix. Inside the hash bucket found, a binary search is executed for
|
||||
hash conflicts. Finally, a linear search is used.
|
||||
|
||||
.. py:method:: __init__(user_key_len=0, bloom_bits_per_prefix=10, hash_table_ratio=0.75, index_sparseness=10)
|
||||
.. py:method:: __init__(user_key_len=0, bloom_bits_per_key=10, hash_table_ratio=0.75, index_sparseness=10, huge_page_tlb_size=0, encoding_type='plain', full_scan_mode=False, store_index_in_file=False)
|
||||
|
||||
:param int user_key_len:
|
||||
Plain table has optimization for fix-sized keys, which can be
|
||||
|
@ -929,6 +949,45 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats
|
|||
:param int index_sparseness:
|
||||
Inside each prefix, need to build one index record for how
|
||||
many keys for binary search inside each hash bucket.
|
||||
For encoding type ``prefix``, the value will be used when
|
||||
writing to determine an interval to rewrite the full key.
|
||||
It will also be used as a suggestion and satisfied when possible.
|
||||
|
||||
:param int huge_page_tlb_size:
|
||||
If <=0, allocate hash indexes and blooms from malloc.
|
||||
Otherwise from huge page TLB.
|
||||
The user needs to reserve huge pages for it to be allocated, like:
|
||||
``sysctl -w vm.nr_hugepages=20``
|
||||
See linux doc Documentation/vm/hugetlbpage.txt
|
||||
|
||||
:param string encoding_type:
|
||||
How to encode the keys. The value will determine how to encode keys
|
||||
when writing to a new SST file. This value will be stored
|
||||
inside the SST file which will be used when reading from the
|
||||
file, which makes it possible for users to choose different
|
||||
encoding type when reopening a DB. Files with different
|
||||
encoding types can co-exist in the same DB and can be read.
|
||||
|
||||
* ``plain``: Always write full keys without any special encoding.
|
||||
* ``prefix``: Find opportunity to write the same prefix once for multiple rows.
|
||||
In some cases, when a key follows a previous key with the same prefix,
|
||||
instead of writing out the full key, it just writes out the size of the
|
||||
shared prefix, as well as other bytes, to save some bytes.
|
||||
|
||||
When using this option, the user is required to use the same prefix
|
||||
extractor to make sure the same prefix will be extracted from the same key.
|
||||
The Name() value of the prefix extractor will be stored in the file.
|
||||
When reopening the file, the name of the options.prefix_extractor given
|
||||
will be bitwise compared to the prefix extractors stored in the file.
|
||||
An error will be returned if the two don't match.
|
||||
|
||||
:param bool full_scan_mode:
|
||||
Mode for reading the whole file one record by one without using the index.
|
||||
|
||||
:param bool store_index_in_file:
|
||||
Compute plain table index and bloom filter during file building
|
||||
and store it in file. When reading file, index will be mmaped
|
||||
instead of recomputation.
|
||||
|
||||
.. _memtable_factories_label:
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ import cython
|
|||
from libcpp.string cimport string
|
||||
from libcpp.deque cimport deque
|
||||
from libcpp.vector cimport vector
|
||||
from cpython cimport bool as py_bool
|
||||
from libcpp cimport bool as cpp_bool
|
||||
from libc.stdint cimport uint32_t
|
||||
from cython.operator cimport dereference as deref
|
||||
|
@ -561,23 +562,65 @@ cdef class PyTableFactory(object):
|
|||
return self.factory
|
||||
|
||||
cdef class BlockBasedTableFactory(PyTableFactory):
|
||||
def __init__(self):
|
||||
self.factory.reset(table_factory.NewBlockBasedTableFactory())
|
||||
def __init__(self,
|
||||
index_type='binary_search',
|
||||
py_bool hash_index_allow_collision=True,
|
||||
checksum='crc32'):
|
||||
|
||||
cdef table_factory.BlockBasedTableOptions table_options
|
||||
|
||||
if index_type == 'binary_search':
|
||||
table_options.index_type = table_factory.kBinarySearch
|
||||
elif index_type == 'hash_search':
|
||||
table_options.index_type = table_factory.kHashSearch
|
||||
else:
|
||||
raise ValueError("Unknown index_type: %s" % index_type)
|
||||
|
||||
if hash_index_allow_collision:
|
||||
table_options.hash_index_allow_collision = True
|
||||
else:
|
||||
table_options.hash_index_allow_collision = False
|
||||
|
||||
if checksum == 'crc32':
|
||||
table_options.checksum = table_factory.kCRC32c
|
||||
elif checksum == 'xxhash':
|
||||
table_options.checksum = table_factory.kxxHash
|
||||
else:
|
||||
raise ValueError("Unknown checksum: %s" % checksum)
|
||||
|
||||
self.factory.reset(table_factory.NewBlockBasedTableFactory(table_options))
|
||||
|
||||
cdef class PlainTableFactory(PyTableFactory):
|
||||
def __init__(
|
||||
self,
|
||||
user_key_len=0,
|
||||
bloom_bits_per_prefix=10,
|
||||
bloom_bits_per_key=10,
|
||||
hash_table_ratio=0.75,
|
||||
index_sparseness=10):
|
||||
index_sparseness=10,
|
||||
huge_page_tlb_size=0,
|
||||
encoding_type='plain',
|
||||
py_bool full_scan_mode=False,
|
||||
py_bool store_index_in_file=False):
|
||||
|
||||
self.factory.reset(
|
||||
table_factory.NewPlainTableFactory(
|
||||
user_key_len,
|
||||
bloom_bits_per_prefix,
|
||||
hash_table_ratio,
|
||||
index_sparseness))
|
||||
cdef table_factory.PlainTableOptions table_options
|
||||
|
||||
table_options.user_key_len = user_key_len
|
||||
table_options.bloom_bits_per_key = bloom_bits_per_key
|
||||
table_options.hash_table_ratio = hash_table_ratio
|
||||
table_options.index_sparseness = index_sparseness
|
||||
table_options.huge_page_tlb_size = huge_page_tlb_size
|
||||
|
||||
if encoding_type == 'plain':
|
||||
table_options.encoding_type = table_factory.kPlain
|
||||
elif encoding_type == 'prefix':
|
||||
table_options.encoding_type = table_factory.kPrefix
|
||||
else:
|
||||
raise ValueError("Unknown encoding_type: %s" % encoding_type)
|
||||
|
||||
table_options.full_scan_mode = full_scan_mode
|
||||
table_options.store_index_in_file = store_index_in_file
|
||||
|
||||
self.factory.reset( table_factory.NewPlainTableFactory(table_options))
|
||||
#############################################
|
||||
|
||||
### Here are the MemtableFactories
|
||||
|
|
|
@ -1,8 +1,38 @@
|
|||
from libc.stdint cimport uint32_t
|
||||
from libcpp cimport bool as cpp_bool
|
||||
|
||||
cdef extern from "rocksdb/table.h" namespace "rocksdb":
|
||||
cdef cppclass TableFactory:
|
||||
TableFactory()
|
||||
|
||||
cdef TableFactory* NewBlockBasedTableFactory()
|
||||
cdef TableFactory* NewPlainTableFactory(uint32_t, int, double, size_t)
|
||||
ctypedef enum BlockBasedTableIndexType:
|
||||
kBinarySearch "rocksdb::BlockBasedTableOptions::IndexType::kBinarySearch"
|
||||
kHashSearch "rocksdb::BlockBasedTableOptions::IndexType::kHashSearch"
|
||||
|
||||
ctypedef enum ChecksumType:
|
||||
kCRC32c
|
||||
kxxHash
|
||||
|
||||
cdef cppclass BlockBasedTableOptions:
|
||||
BlockBasedTableOptions()
|
||||
BlockBasedTableIndexType index_type
|
||||
cpp_bool hash_index_allow_collision
|
||||
ChecksumType checksum
|
||||
|
||||
cdef TableFactory* NewBlockBasedTableFactory(const BlockBasedTableOptions&)
|
||||
|
||||
ctypedef enum EncodingType:
|
||||
kPlain
|
||||
kPrefix
|
||||
|
||||
cdef cppclass PlainTableOptions:
|
||||
uint32_t user_key_len
|
||||
int bloom_bits_per_key
|
||||
double hash_table_ratio
|
||||
size_t index_sparseness
|
||||
size_t huge_page_tlb_size
|
||||
EncodingType encoding_type
|
||||
cpp_bool full_scan_mode
|
||||
cpp_bool store_index_in_file
|
||||
|
||||
cdef TableFactory* NewPlainTableFactory(const PlainTableOptions&)
|
||||
|
|
Loading…
Reference in a new issue