Adapt to the changes of 'prefix seek api'

See https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
This commit is contained in:
hofmockel 2014-05-31 20:21:42 +02:00
parent 5f8602391b
commit f897bf4911
6 changed files with 63 additions and 44 deletions

View file

@ -72,7 +72,7 @@ Database object
:param sync: See :py:meth:`rocksdb.DB.put` :param sync: See :py:meth:`rocksdb.DB.put`
:param disable_wal: See :py:meth:`rocksdb.DB.put` :param disable_wal: See :py:meth:`rocksdb.DB.put`
.. py:method:: get(key, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") .. py:method:: get(key, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
:param bytes key: Name to get :param bytes key: Name to get
@ -85,10 +85,6 @@ Database object
read for this iteration be cached in memory? read for this iteration be cached in memory?
Callers may wish to set this field to ``False`` for bulk scans. Callers may wish to set this field to ``False`` for bulk scans.
:param bool prefix_seek:
If this option is set and memtable implementation allows.
Seek might only return keys with the same prefix as the seek-key
:param snapshot: :param snapshot:
If not ``None``, read as of the supplied snapshot If not ``None``, read as of the supplied snapshot
(which must belong to the DB that is being read and which must (which must belong to the DB that is being read and which must
@ -107,7 +103,7 @@ Database object
:returns: ``None`` if not found, else the value for this key :returns: ``None`` if not found, else the value for this key
.. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") .. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
:param keys: Keys to fetch :param keys: Keys to fetch
:type keys: list of bytes :type keys: list of bytes
@ -123,7 +119,7 @@ Database object
keys will not be "de-duplicated". keys will not be "de-duplicated".
Duplicate keys will return duplicate values in order. Duplicate keys will return duplicate values in order.
.. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") .. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
If the key definitely does not exist in the database, then this method If the key definitely does not exist in the database, then this method
returns ``False``, else ``True``. If the caller wants to obtain value returns ``False``, else ``True``. If the caller wants to obtain value
@ -142,12 +138,10 @@ Database object
* ``(True, <data>)`` if key is found and value in memory and ``fetch=True`` * ``(True, <data>)`` if key is found and value in memory and ``fetch=True``
* ``(False, None)`` if key is not found * ``(False, None)`` if key is not found
.. py:method:: iterkeys(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") .. py:method:: iterkeys(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
Iterate over the keys Iterate over the keys
:param bytes prefix: Not implemented yet
For other params see :py:meth:`rocksdb.DB.get` For other params see :py:meth:`rocksdb.DB.get`
:returns: :returns:
@ -156,12 +150,10 @@ Database object
:rtype: :py:class:`rocksdb.BaseIterator` :rtype: :py:class:`rocksdb.BaseIterator`
.. py:method:: itervalues(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") .. py:method:: itervalues(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
Iterate over the values Iterate over the values
:param bytes prefix: Not implemented yet
For other params see :py:meth:`rocksdb.DB.get` For other params see :py:meth:`rocksdb.DB.get`
:returns: :returns:
@ -170,12 +162,10 @@ Database object
:rtype: :py:class:`rocksdb.BaseIterator` :rtype: :py:class:`rocksdb.BaseIterator`
.. py:method:: iteritems(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") .. py:method:: iteritems(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
Iterate over the items Iterate over the items
:param bytes prefix: Not implemented yet
For other params see :py:meth:`rocksdb.DB.get` For other params see :py:meth:`rocksdb.DB.get`
:returns: :returns:

View file

@ -4,6 +4,34 @@ Changelog
Version 0.3 Version 0.3
----------- -----------
Backward Incompatible Changes:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
**Prefix Seeks:**
According to this page https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes,
all the prefix related parameters on ``ReadOptions`` are removed.
Rocksdb realizes now if ``Options.prefix_extractor`` is set and uses then
prefix-seeks automatically. This means the following changes on pyrocksdb.
* DB.iterkeys, DB.itervalues, DB.iteritems have *no* ``prefix`` parameter anymore.
* DB.get, DB.multi_get, DB.key_may_exist, DB.iterkeys, DB.itervalues, DB.iteritems
have *no* ``prefix_seek`` parameter anymore.
Which means all the iterators walk now always to the *end* of the database.
So if you need to stay within a prefix, write your own code to ensure that.
For DB.iterkeys and DB.iteritems ``itertools.takewhile`` is a possible solution. ::
from itertools import takewhile
it = self.db.iterkeys()
it.seek(b'00002')
print list(takewhile(lambda key: key.startswith(b'00002'), it))
it = self.db.iteritems()
it.seek(b'00002')
print dict(takewhile(lambda item: item[0].startswith(b'00002'), it))
Version 0.2 Version 0.2
----------- -----------

View file

@ -236,11 +236,13 @@ So always the first 5 bytes are used as the prefix ::
db.put(b'00003.y', b'y') db.put(b'00003.y', b'y')
db.put(b'00003.z', b'z') db.put(b'00003.z', b'z')
it = db.iteritems(prefix=b'00002') prefix = b'00002'
it.seek(b'00002')
it = db.iteritems()
it.seek(prefix)
# prints {b'00002.z': b'z', b'00002.y': b'y', b'00002.x': b'x'} # prints {b'00002.z': b'z', b'00002.y': b'y', b'00002.x': b'x'}
print dict(it) print dict(itertools.takewhile(lambda item: item[].startswith(prefix), it))
Backup And Restore Backup And Restore

View file

@ -1461,40 +1461,36 @@ cdef class DB(object):
return (exists, None) return (exists, None)
def iterkeys(self, prefix=None, *args, **kwargs): def iterkeys(self, *args, **kwargs):
cdef options.ReadOptions opts cdef options.ReadOptions opts
cdef KeysIterator it cdef KeysIterator it
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
it = KeysIterator(self) it = KeysIterator(self)
it.set_prefix(opts, prefix)
with nogil: with nogil:
it.ptr = self.db.NewIterator(opts) it.ptr = self.db.NewIterator(opts)
return it return it
def itervalues(self, prefix=None, *args, **kwargs): def itervalues(self, *args, **kwargs):
cdef options.ReadOptions opts cdef options.ReadOptions opts
cdef ValuesIterator it cdef ValuesIterator it
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
it = ValuesIterator(self) it = ValuesIterator(self)
it.set_prefix(opts, prefix)
with nogil: with nogil:
it.ptr = self.db.NewIterator(opts) it.ptr = self.db.NewIterator(opts)
return it return it
def iteritems(self, prefix=None, *args, **kwargs): def iteritems(self, *args, **kwargs):
cdef options.ReadOptions opts cdef options.ReadOptions opts
cdef ItemsIterator it cdef ItemsIterator it
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
it = ItemsIterator(self) it = ItemsIterator(self)
it.set_prefix(opts, prefix)
with nogil: with nogil:
it.ptr = self.db.NewIterator(opts) it.ptr = self.db.NewIterator(opts)
@ -1541,7 +1537,6 @@ cdef class DB(object):
def __parse_read_opts( def __parse_read_opts(
verify_checksums=False, verify_checksums=False,
fill_cache=True, fill_cache=True,
prefix_seek=False,
snapshot=None, snapshot=None,
read_tier="all"): read_tier="all"):
@ -1552,7 +1547,6 @@ cdef class DB(object):
cdef options.ReadOptions opts cdef options.ReadOptions opts
opts.verify_checksums = py_opts['verify_checksums'] opts.verify_checksums = py_opts['verify_checksums']
opts.fill_cache = py_opts['fill_cache'] opts.fill_cache = py_opts['fill_cache']
opts.prefix_seek = py_opts['prefix_seek']
if py_opts['snapshot'] is not None: if py_opts['snapshot'] is not None:
opts.snapshot = (<Snapshot?>(py_opts['snapshot'])).ptr opts.snapshot = (<Snapshot?>(py_opts['snapshot'])).ptr
@ -1591,9 +1585,6 @@ cdef class Snapshot(object):
cdef class BaseIterator(object): cdef class BaseIterator(object):
cdef iterator.Iterator* ptr cdef iterator.Iterator* ptr
cdef DB db cdef DB db
# To keep a reference to the prefix
cdef object prefix
cdef Slice c_prefix
def __cinit__(self, DB db): def __cinit__(self, DB db):
self.db = db self.db = db
@ -1619,14 +1610,6 @@ cdef class BaseIterator(object):
def __reversed__(self): def __reversed__(self):
return ReversedIterator(self) return ReversedIterator(self)
cdef set_prefix(self, options.ReadOptions& opts, object prefix=None):
if prefix is None:
return
self.c_prefix = bytes_to_slice(prefix)
self.prefix = prefix
opts.prefix = cython.address(self.c_prefix)
cpdef seek_to_first(self): cpdef seek_to_first(self):
with nogil: with nogil:
self.ptr.SeekToFirst() self.ptr.SeekToFirst()

View file

@ -123,8 +123,6 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb":
cdef cppclass ReadOptions: cdef cppclass ReadOptions:
cpp_bool verify_checksums cpp_bool verify_checksums
cpp_bool fill_cache cpp_bool fill_cache
cpp_bool prefix_seek
const Slice* prefix
const Snapshot* snapshot const Snapshot* snapshot
ReadTier read_tier ReadTier read_tier

View file

@ -3,6 +3,7 @@ import shutil
import gc import gc
import unittest import unittest
import rocksdb import rocksdb
from itertools import takewhile
def int_to_bytes(ob): def int_to_bytes(ob):
return str(ob).encode('ascii') return str(ob).encode('ascii')
@ -300,7 +301,10 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper):
self._clean() self._clean()
self.db = rocksdb.DB('/tmp/test', opts) self.db = rocksdb.DB('/tmp/test', opts)
def test_prefix(self): def tearDown(self):
self._close_db()
def _fill_db(self):
for x in range(3000): for x in range(3000):
keyx = hex(x)[2:].zfill(5).encode('utf8') + b'.x' keyx = hex(x)[2:].zfill(5).encode('utf8') + b'.x'
keyy = hex(x)[2:].zfill(5).encode('utf8') + b'.y' keyy = hex(x)[2:].zfill(5).encode('utf8') + b'.y'
@ -309,12 +313,26 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper):
self.db.put(keyy, b'y') self.db.put(keyy, b'y')
self.db.put(keyz, b'z') self.db.put(keyz, b'z')
def test_prefix_iterkeys(self):
self._fill_db()
self.assertEqual(b'x', self.db.get(b'00001.x')) self.assertEqual(b'x', self.db.get(b'00001.x'))
self.assertEqual(b'y', self.db.get(b'00001.y')) self.assertEqual(b'y', self.db.get(b'00001.y'))
self.assertEqual(b'z', self.db.get(b'00001.z')) self.assertEqual(b'z', self.db.get(b'00001.z'))
it = self.db.iterkeys(prefix=b'00002') it = self.db.iterkeys()
it.seek(b'00002') it.seek(b'00002')
ref = [b'00002.x', b'00002.y', b'00002.z'] ref = [b'00002.x', b'00002.y', b'00002.z']
self.assertEqual(ref, list(it)) ret = takewhile(lambda key: key.startswith(b'00002'), it)
self.assertEqual(ref, list(ret))
def test_prefix_iteritems(self):
self._fill_db()
it = self.db.iteritems()
it.seek(b'00002')
ref = {'00002.z': 'z', '00002.y': 'y', '00002.x': 'x'}
ret = takewhile(lambda item: item[0].startswith(b'00002'), it)
self.assertEqual(ref, dict(ret))