Adapt to the changes of 'prefix seek api'

See https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
This commit is contained in:
hofmockel 2014-05-31 20:21:42 +02:00
parent 5f8602391b
commit f897bf4911
6 changed files with 63 additions and 44 deletions

View file

@ -72,7 +72,7 @@ Database object
:param sync: See :py:meth:`rocksdb.DB.put`
:param disable_wal: See :py:meth:`rocksdb.DB.put`
.. py:method:: get(key, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
.. py:method:: get(key, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
:param bytes key: Name to get
@ -85,10 +85,6 @@ Database object
read for this iteration be cached in memory?
Callers may wish to set this field to ``False`` for bulk scans.
:param bool prefix_seek:
If this option is set and memtable implementation allows.
Seek might only return keys with the same prefix as the seek-key
:param snapshot:
If not ``None``, read as of the supplied snapshot
(which must belong to the DB that is being read and which must
@ -107,7 +103,7 @@ Database object
:returns: ``None`` if not found, else the value for this key
.. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
.. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
:param keys: Keys to fetch
:type keys: list of bytes
@ -123,7 +119,7 @@ Database object
keys will not be "de-duplicated".
Duplicate keys will return duplicate values in order.
.. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
.. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
If the key definitely does not exist in the database, then this method
returns ``False``, else ``True``. If the caller wants to obtain value
@ -142,12 +138,10 @@ Database object
* ``(True, <data>)`` if key is found and value in memory and ``fetch=True``
* ``(False, None)`` if key is not found
.. py:method:: iterkeys(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
.. py:method:: iterkeys(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
Iterate over the keys
:param bytes prefix: Not implemented yet
For other params see :py:meth:`rocksdb.DB.get`
:returns:
@ -156,12 +150,10 @@ Database object
:rtype: :py:class:`rocksdb.BaseIterator`
.. py:method:: itervalues(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
.. py:method:: itervalues(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
Iterate over the values
:param bytes prefix: Not implemented yet
For other params see :py:meth:`rocksdb.DB.get`
:returns:
@ -170,12 +162,10 @@ Database object
:rtype: :py:class:`rocksdb.BaseIterator`
.. py:method:: iteritems(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
.. py:method:: iteritems(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
Iterate over the items
:param bytes prefix: Not implemented yet
For other params see :py:meth:`rocksdb.DB.get`
:returns:

View file

@ -4,6 +4,34 @@ Changelog
Version 0.3
-----------
Backward Incompatible Changes:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Prefix Seeks:**
According to this page https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes,
all the prefix related parameters on ``ReadOptions`` are removed.
Rocksdb realizes now if ``Options.prefix_extractor`` is set and uses then
prefix-seeks automatically. This means the following changes on pyrocksdb.
* DB.iterkeys, DB.itervalues, DB.iteritems have *no* ``prefix`` parameter anymore.
* DB.get, DB.multi_get, DB.key_may_exist, DB.iterkeys, DB.itervalues, DB.iteritems
have *no* ``prefix_seek`` parameter anymore.
Which means all the iterators walk now always to the *end* of the database.
So if you need to stay within a prefix, write your own code to ensure that.
For DB.iterkeys and DB.iteritems ``itertools.takewhile`` is a possible solution. ::
from itertools import takewhile
it = self.db.iterkeys()
it.seek(b'00002')
print list(takewhile(lambda key: key.startswith(b'00002'), it))
it = self.db.iteritems()
it.seek(b'00002')
print dict(takewhile(lambda item: item[0].startswith(b'00002'), it))
Version 0.2
-----------

View file

@ -236,11 +236,13 @@ So always the first 5 bytes are used as the prefix ::
db.put(b'00003.y', b'y')
db.put(b'00003.z', b'z')
it = db.iteritems(prefix=b'00002')
it.seek(b'00002')
prefix = b'00002'
it = db.iteritems()
it.seek(prefix)
# prints {b'00002.z': b'z', b'00002.y': b'y', b'00002.x': b'x'}
print dict(it)
print dict(itertools.takewhile(lambda item: item[].startswith(prefix), it))
Backup And Restore

View file

@ -1461,40 +1461,36 @@ cdef class DB(object):
return (exists, None)
def iterkeys(self, prefix=None, *args, **kwargs):
def iterkeys(self, *args, **kwargs):
cdef options.ReadOptions opts
cdef KeysIterator it
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
it = KeysIterator(self)
it.set_prefix(opts, prefix)
with nogil:
it.ptr = self.db.NewIterator(opts)
return it
def itervalues(self, prefix=None, *args, **kwargs):
def itervalues(self, *args, **kwargs):
cdef options.ReadOptions opts
cdef ValuesIterator it
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
it = ValuesIterator(self)
it.set_prefix(opts, prefix)
with nogil:
it.ptr = self.db.NewIterator(opts)
return it
def iteritems(self, prefix=None, *args, **kwargs):
def iteritems(self, *args, **kwargs):
cdef options.ReadOptions opts
cdef ItemsIterator it
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
it = ItemsIterator(self)
it.set_prefix(opts, prefix)
with nogil:
it.ptr = self.db.NewIterator(opts)
@ -1541,7 +1537,6 @@ cdef class DB(object):
def __parse_read_opts(
verify_checksums=False,
fill_cache=True,
prefix_seek=False,
snapshot=None,
read_tier="all"):
@ -1552,7 +1547,6 @@ cdef class DB(object):
cdef options.ReadOptions opts
opts.verify_checksums = py_opts['verify_checksums']
opts.fill_cache = py_opts['fill_cache']
opts.prefix_seek = py_opts['prefix_seek']
if py_opts['snapshot'] is not None:
opts.snapshot = (<Snapshot?>(py_opts['snapshot'])).ptr
@ -1591,9 +1585,6 @@ cdef class Snapshot(object):
cdef class BaseIterator(object):
cdef iterator.Iterator* ptr
cdef DB db
# To keep a reference to the prefix
cdef object prefix
cdef Slice c_prefix
def __cinit__(self, DB db):
self.db = db
@ -1619,14 +1610,6 @@ cdef class BaseIterator(object):
def __reversed__(self):
return ReversedIterator(self)
cdef set_prefix(self, options.ReadOptions& opts, object prefix=None):
if prefix is None:
return
self.c_prefix = bytes_to_slice(prefix)
self.prefix = prefix
opts.prefix = cython.address(self.c_prefix)
cpdef seek_to_first(self):
with nogil:
self.ptr.SeekToFirst()

View file

@ -123,8 +123,6 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb":
cdef cppclass ReadOptions:
cpp_bool verify_checksums
cpp_bool fill_cache
cpp_bool prefix_seek
const Slice* prefix
const Snapshot* snapshot
ReadTier read_tier

View file

@ -3,6 +3,7 @@ import shutil
import gc
import unittest
import rocksdb
from itertools import takewhile
def int_to_bytes(ob):
return str(ob).encode('ascii')
@ -300,7 +301,10 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper):
self._clean()
self.db = rocksdb.DB('/tmp/test', opts)
def test_prefix(self):
def tearDown(self):
self._close_db()
def _fill_db(self):
for x in range(3000):
keyx = hex(x)[2:].zfill(5).encode('utf8') + b'.x'
keyy = hex(x)[2:].zfill(5).encode('utf8') + b'.y'
@ -309,12 +313,26 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper):
self.db.put(keyy, b'y')
self.db.put(keyz, b'z')
def test_prefix_iterkeys(self):
self._fill_db()
self.assertEqual(b'x', self.db.get(b'00001.x'))
self.assertEqual(b'y', self.db.get(b'00001.y'))
self.assertEqual(b'z', self.db.get(b'00001.z'))
it = self.db.iterkeys(prefix=b'00002')
it = self.db.iterkeys()
it.seek(b'00002')
ref = [b'00002.x', b'00002.y', b'00002.z']
self.assertEqual(ref, list(it))
ret = takewhile(lambda key: key.startswith(b'00002'), it)
self.assertEqual(ref, list(ret))
def test_prefix_iteritems(self):
self._fill_db()
it = self.db.iteritems()
it.seek(b'00002')
ref = {'00002.z': 'z', '00002.y': 'y', '00002.x': 'x'}
ret = takewhile(lambda item: item[0].startswith(b'00002'), it)
self.assertEqual(ref, dict(ret))