From f897bf4911b3557fc7be14597dd056926278ccc6 Mon Sep 17 00:00:00 2001 From: hofmockel Date: Sat, 31 May 2014 20:21:42 +0200 Subject: [PATCH] Adapt to the changes of 'prefix seek api' See https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes --- docs/api/database.rst | 22 ++++++---------------- docs/changelog.rst | 28 ++++++++++++++++++++++++++++ docs/tutorial/index.rst | 8 +++++--- rocksdb/_rocksdb.pyx | 23 +++-------------------- rocksdb/options.pxd | 2 -- rocksdb/tests/test_db.py | 24 +++++++++++++++++++++--- 6 files changed, 63 insertions(+), 44 deletions(-) diff --git a/docs/api/database.rst b/docs/api/database.rst index 2a5efd2..3eac695 100644 --- a/docs/api/database.rst +++ b/docs/api/database.rst @@ -72,7 +72,7 @@ Database object :param sync: See :py:meth:`rocksdb.DB.put` :param disable_wal: See :py:meth:`rocksdb.DB.put` - .. py:method:: get(key, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + .. py:method:: get(key, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all") :param bytes key: Name to get @@ -85,10 +85,6 @@ Database object read for this iteration be cached in memory? Callers may wish to set this field to ``False`` for bulk scans. - :param bool prefix_seek: - If this option is set and memtable implementation allows. - Seek might only return keys with the same prefix as the seek-key - :param snapshot: If not ``None``, read as of the supplied snapshot (which must belong to the DB that is being read and which must @@ -107,7 +103,7 @@ Database object :returns: ``None`` if not found, else the value for this key - .. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + .. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all") :param keys: Keys to fetch :type keys: list of bytes @@ -123,7 +119,7 @@ Database object keys will not be "de-duplicated". Duplicate keys will return duplicate values in order. - .. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + .. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all") If the key definitely does not exist in the database, then this method returns ``False``, else ``True``. If the caller wants to obtain value @@ -142,12 +138,10 @@ Database object * ``(True, )`` if key is found and value in memory and ``fetch=True`` * ``(False, None)`` if key is not found - .. py:method:: iterkeys(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + .. py:method:: iterkeys(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all") Iterate over the keys - :param bytes prefix: Not implemented yet - For other params see :py:meth:`rocksdb.DB.get` :returns: @@ -156,12 +150,10 @@ Database object :rtype: :py:class:`rocksdb.BaseIterator` - .. py:method:: itervalues(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + .. py:method:: itervalues(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all") Iterate over the values - :param bytes prefix: Not implemented yet - For other params see :py:meth:`rocksdb.DB.get` :returns: @@ -170,12 +162,10 @@ Database object :rtype: :py:class:`rocksdb.BaseIterator` - .. py:method:: iteritems(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + .. py:method:: iteritems(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all") Iterate over the items - :param bytes prefix: Not implemented yet - For other params see :py:meth:`rocksdb.DB.get` :returns: diff --git a/docs/changelog.rst b/docs/changelog.rst index a6cfe4f..5626d95 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,6 +4,34 @@ Changelog Version 0.3 ----------- +Backward Incompatible Changes: +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +**Prefix Seeks:** + +According to this page https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes, +all the prefix related parameters on ``ReadOptions`` are removed. +Rocksdb realizes now if ``Options.prefix_extractor`` is set and uses then +prefix-seeks automatically. This means the following changes on pyrocksdb. + +* DB.iterkeys, DB.itervalues, DB.iteritems have *no* ``prefix`` parameter anymore. +* DB.get, DB.multi_get, DB.key_may_exist, DB.iterkeys, DB.itervalues, DB.iteritems + have *no* ``prefix_seek`` parameter anymore. + +Which means all the iterators walk now always to the *end* of the database. +So if you need to stay within a prefix, write your own code to ensure that. +For DB.iterkeys and DB.iteritems ``itertools.takewhile`` is a possible solution. :: + + from itertools import takewhile + + it = self.db.iterkeys() + it.seek(b'00002') + print list(takewhile(lambda key: key.startswith(b'00002'), it)) + + it = self.db.iteritems() + it.seek(b'00002') + print dict(takewhile(lambda item: item[0].startswith(b'00002'), it)) + Version 0.2 ----------- diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst index 7a5fdea..0457bf8 100644 --- a/docs/tutorial/index.rst +++ b/docs/tutorial/index.rst @@ -236,11 +236,13 @@ So always the first 5 bytes are used as the prefix :: db.put(b'00003.y', b'y') db.put(b'00003.z', b'z') - it = db.iteritems(prefix=b'00002') - it.seek(b'00002') + prefix = b'00002' + + it = db.iteritems() + it.seek(prefix) # prints {b'00002.z': b'z', b'00002.y': b'y', b'00002.x': b'x'} - print dict(it) + print dict(itertools.takewhile(lambda item: item[].startswith(prefix), it)) Backup And Restore diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx index 23b7f2c..97427f7 100644 --- a/rocksdb/_rocksdb.pyx +++ b/rocksdb/_rocksdb.pyx @@ -1461,40 +1461,36 @@ cdef class DB(object): return (exists, None) - def iterkeys(self, prefix=None, *args, **kwargs): + def iterkeys(self, *args, **kwargs): cdef options.ReadOptions opts cdef KeysIterator it opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) - it = KeysIterator(self) - it.set_prefix(opts, prefix) with nogil: it.ptr = self.db.NewIterator(opts) return it - def itervalues(self, prefix=None, *args, **kwargs): + def itervalues(self, *args, **kwargs): cdef options.ReadOptions opts cdef ValuesIterator it opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) it = ValuesIterator(self) - it.set_prefix(opts, prefix) with nogil: it.ptr = self.db.NewIterator(opts) return it - def iteritems(self, prefix=None, *args, **kwargs): + def iteritems(self, *args, **kwargs): cdef options.ReadOptions opts cdef ItemsIterator it opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) it = ItemsIterator(self) - it.set_prefix(opts, prefix) with nogil: it.ptr = self.db.NewIterator(opts) @@ -1541,7 +1537,6 @@ cdef class DB(object): def __parse_read_opts( verify_checksums=False, fill_cache=True, - prefix_seek=False, snapshot=None, read_tier="all"): @@ -1552,7 +1547,6 @@ cdef class DB(object): cdef options.ReadOptions opts opts.verify_checksums = py_opts['verify_checksums'] opts.fill_cache = py_opts['fill_cache'] - opts.prefix_seek = py_opts['prefix_seek'] if py_opts['snapshot'] is not None: opts.snapshot = ((py_opts['snapshot'])).ptr @@ -1591,9 +1585,6 @@ cdef class Snapshot(object): cdef class BaseIterator(object): cdef iterator.Iterator* ptr cdef DB db - # To keep a reference to the prefix - cdef object prefix - cdef Slice c_prefix def __cinit__(self, DB db): self.db = db @@ -1619,14 +1610,6 @@ cdef class BaseIterator(object): def __reversed__(self): return ReversedIterator(self) - cdef set_prefix(self, options.ReadOptions& opts, object prefix=None): - if prefix is None: - return - - self.c_prefix = bytes_to_slice(prefix) - self.prefix = prefix - opts.prefix = cython.address(self.c_prefix) - cpdef seek_to_first(self): with nogil: self.ptr.SeekToFirst() diff --git a/rocksdb/options.pxd b/rocksdb/options.pxd index 0dd5e77..0c874dc 100644 --- a/rocksdb/options.pxd +++ b/rocksdb/options.pxd @@ -123,8 +123,6 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb": cdef cppclass ReadOptions: cpp_bool verify_checksums cpp_bool fill_cache - cpp_bool prefix_seek - const Slice* prefix const Snapshot* snapshot ReadTier read_tier diff --git a/rocksdb/tests/test_db.py b/rocksdb/tests/test_db.py index 1ade1f1..d54c446 100644 --- a/rocksdb/tests/test_db.py +++ b/rocksdb/tests/test_db.py @@ -3,6 +3,7 @@ import shutil import gc import unittest import rocksdb +from itertools import takewhile def int_to_bytes(ob): return str(ob).encode('ascii') @@ -300,7 +301,10 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper): self._clean() self.db = rocksdb.DB('/tmp/test', opts) - def test_prefix(self): + def tearDown(self): + self._close_db() + + def _fill_db(self): for x in range(3000): keyx = hex(x)[2:].zfill(5).encode('utf8') + b'.x' keyy = hex(x)[2:].zfill(5).encode('utf8') + b'.y' @@ -309,12 +313,26 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper): self.db.put(keyy, b'y') self.db.put(keyz, b'z') + + def test_prefix_iterkeys(self): + self._fill_db() self.assertEqual(b'x', self.db.get(b'00001.x')) self.assertEqual(b'y', self.db.get(b'00001.y')) self.assertEqual(b'z', self.db.get(b'00001.z')) - it = self.db.iterkeys(prefix=b'00002') + it = self.db.iterkeys() it.seek(b'00002') ref = [b'00002.x', b'00002.y', b'00002.z'] - self.assertEqual(ref, list(it)) + ret = takewhile(lambda key: key.startswith(b'00002'), it) + self.assertEqual(ref, list(ret)) + + def test_prefix_iteritems(self): + self._fill_db() + + it = self.db.iteritems() + it.seek(b'00002') + + ref = {'00002.z': 'z', '00002.y': 'y', '00002.x': 'x'} + ret = takewhile(lambda item: item[0].startswith(b'00002'), it) + self.assertEqual(ref, dict(ret))