From a3072c79b33f1748acdd818eeedf8b24948739c9 Mon Sep 17 00:00:00 2001 From: hofmockel Date: Fri, 22 Aug 2014 19:58:17 +0200 Subject: [PATCH] Document the new init-methods for the SST-table-builders --- docs/api/options.rst | 61 ++++++++++++++++++++++++++++++++++++- rocksdb/_rocksdb.pyx | 63 ++++++++++++++++++++++++++++++++------- rocksdb/table_factory.pxd | 34 +++++++++++++++++++-- 3 files changed, 145 insertions(+), 13 deletions(-) diff --git a/docs/api/options.rst b/docs/api/options.rst index c328768..3b23e87 100644 --- a/docs/api/options.rst +++ b/docs/api/options.rst @@ -902,6 +902,26 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats Wraps BlockBasedTableFactory of RocksDB. + .. py:method:: __init__(index_type='binary_search', hash_index_allow_collision=True, checksum='crc32') + + :param string index_type: + * ``binary_search`` a space efficient index block that is optimized + for binary-search-based index. + * ``hash_search`` the hash index. If enabled, will do hash lookup + when `Options.prefix_extractor` is provided. + + :param bool hash_index_allow_collision: + Influence the behavior when ``hash_search`` is used. + If ``False``, stores a precise prefix to block range mapping. + If ``True``, does not store prefix and allows prefix hash collision + (less memory consumption) + + :param string checksum: + Use the specified checksum type. Newly created table files will be + protected with this checksum type. Old table files will still be readable, + even though they have different checksum type. + Can be either ``crc32`` or ``xxhash``. + .. py:class:: rocksdb.PlainTableFactory Plain Table with prefix-only seek. It wraps rocksdb PlainTableFactory. @@ -911,7 +931,7 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats key prefix. Inside the hash bucket found, a binary search is executed for hash conflicts. Finally, a linear search is used. - .. py:method:: __init__(user_key_len=0, bloom_bits_per_prefix=10, hash_table_ratio=0.75, index_sparseness=10) + .. py:method:: __init__(user_key_len=0, bloom_bits_per_key=10, hash_table_ratio=0.75, index_sparseness=10, huge_page_tlb_size=0, encoding_type='plain', full_scan_mode=False, store_index_in_file=False) :param int user_key_len: Plain table has optimization for fix-sized keys, which can be @@ -929,6 +949,45 @@ https://github.com/facebook/rocksdb/wiki/A-Tutorial-of-RocksDB-SST-formats :param int index_sparseness: Inside each prefix, need to build one index record for how many keys for binary search inside each hash bucket. + For encoding type ``prefix``, the value will be used when + writing to determine an interval to rewrite the full key. + It will also be used as a suggestion and satisfied when possible. + + :param int huge_page_tlb_size: + If <=0, allocate hash indexes and blooms from malloc. + Otherwise from huge page TLB. + The user needs to reserve huge pages for it to be allocated, like: + ``sysctl -w vm.nr_hugepages=20`` + See linux doc Documentation/vm/hugetlbpage.txt + + :param string encoding_type: + How to encode the keys. The value will determine how to encode keys + when writing to a new SST file. This value will be stored + inside the SST file which will be used when reading from the + file, which makes it possible for users to choose different + encoding type when reopening a DB. Files with different + encoding types can co-exist in the same DB and can be read. + + * ``plain``: Always write full keys without any special encoding. + * ``prefix``: Find opportunity to write the same prefix once for multiple rows. + In some cases, when a key follows a previous key with the same prefix, + instead of writing out the full key, it just writes out the size of the + shared prefix, as well as other bytes, to save some bytes. + + When using this option, the user is required to use the same prefix + extractor to make sure the same prefix will be extracted from the same key. + The Name() value of the prefix extractor will be stored in the file. + When reopening the file, the name of the options.prefix_extractor given + will be bitwise compared to the prefix extractors stored in the file. + An error will be returned if the two don't match. + + :param bool full_scan_mode: + Mode for reading the whole file one record by one without using the index. + + :param bool store_index_in_file: + Compute plain table index and bloom filter during file building + and store it in file. When reading file, index will be mmaped + instead of recomputation. .. _memtable_factories_label: diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx index 581faef..2f391b8 100644 --- a/rocksdb/_rocksdb.pyx +++ b/rocksdb/_rocksdb.pyx @@ -2,6 +2,7 @@ import cython from libcpp.string cimport string from libcpp.deque cimport deque from libcpp.vector cimport vector +from cpython cimport bool as py_bool from libcpp cimport bool as cpp_bool from libc.stdint cimport uint32_t from cython.operator cimport dereference as deref @@ -561,23 +562,65 @@ cdef class PyTableFactory(object): return self.factory cdef class BlockBasedTableFactory(PyTableFactory): - def __init__(self): - self.factory.reset(table_factory.NewBlockBasedTableFactory()) + def __init__(self, + index_type='binary_search', + py_bool hash_index_allow_collision=True, + checksum='crc32'): + + cdef table_factory.BlockBasedTableOptions table_options + + if index_type == 'binary_search': + table_options.index_type = table_factory.kBinarySearch + elif index_type == 'hash_search': + table_options.index_type = table_factory.kHashSearch + else: + raise ValueError("Unknown index_type: %s" % index_type) + + if hash_index_allow_collision: + table_options.hash_index_allow_collision = True + else: + table_options.hash_index_allow_collision = False + + if checksum == 'crc32': + table_options.checksum = table_factory.kCRC32c + elif checksum == 'xxhash': + table_options.checksum = table_factory.kxxHash + else: + raise ValueError("Unknown checksum: %s" % checksum) + + self.factory.reset(table_factory.NewBlockBasedTableFactory(table_options)) cdef class PlainTableFactory(PyTableFactory): def __init__( self, user_key_len=0, - bloom_bits_per_prefix=10, + bloom_bits_per_key=10, hash_table_ratio=0.75, - index_sparseness=10): + index_sparseness=10, + huge_page_tlb_size=0, + encoding_type='plain', + py_bool full_scan_mode=False, + py_bool store_index_in_file=False): - self.factory.reset( - table_factory.NewPlainTableFactory( - user_key_len, - bloom_bits_per_prefix, - hash_table_ratio, - index_sparseness)) + cdef table_factory.PlainTableOptions table_options + + table_options.user_key_len = user_key_len + table_options.bloom_bits_per_key = bloom_bits_per_key + table_options.hash_table_ratio = hash_table_ratio + table_options.index_sparseness = index_sparseness + table_options.huge_page_tlb_size = huge_page_tlb_size + + if encoding_type == 'plain': + table_options.encoding_type = table_factory.kPlain + elif encoding_type == 'prefix': + table_options.encoding_type = table_factory.kPrefix + else: + raise ValueError("Unknown encoding_type: %s" % encoding_type) + + table_options.full_scan_mode = full_scan_mode + table_options.store_index_in_file = store_index_in_file + + self.factory.reset( table_factory.NewPlainTableFactory(table_options)) ############################################# ### Here are the MemtableFactories diff --git a/rocksdb/table_factory.pxd b/rocksdb/table_factory.pxd index 0a61726..cdb713f 100644 --- a/rocksdb/table_factory.pxd +++ b/rocksdb/table_factory.pxd @@ -1,8 +1,38 @@ from libc.stdint cimport uint32_t +from libcpp cimport bool as cpp_bool cdef extern from "rocksdb/table.h" namespace "rocksdb": cdef cppclass TableFactory: TableFactory() - cdef TableFactory* NewBlockBasedTableFactory() - cdef TableFactory* NewPlainTableFactory(uint32_t, int, double, size_t) + ctypedef enum BlockBasedTableIndexType: + kBinarySearch "rocksdb::BlockBasedTableOptions::IndexType::kBinarySearch" + kHashSearch "rocksdb::BlockBasedTableOptions::IndexType::kHashSearch" + + ctypedef enum ChecksumType: + kCRC32c + kxxHash + + cdef cppclass BlockBasedTableOptions: + BlockBasedTableOptions() + BlockBasedTableIndexType index_type + cpp_bool hash_index_allow_collision + ChecksumType checksum + + cdef TableFactory* NewBlockBasedTableFactory(const BlockBasedTableOptions&) + + ctypedef enum EncodingType: + kPlain + kPrefix + + cdef cppclass PlainTableOptions: + uint32_t user_key_len + int bloom_bits_per_key + double hash_table_ratio + size_t index_sparseness + size_t huge_page_tlb_size + EncodingType encoding_type + cpp_bool full_scan_mode + cpp_bool store_index_in_file + + cdef TableFactory* NewPlainTableFactory(const PlainTableOptions&)