Adapt to the changes of 'prefix seek api'
See https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes
This commit is contained in:
parent
5f8602391b
commit
f897bf4911
6 changed files with 63 additions and 44 deletions
|
@ -72,7 +72,7 @@ Database object
|
||||||
:param sync: See :py:meth:`rocksdb.DB.put`
|
:param sync: See :py:meth:`rocksdb.DB.put`
|
||||||
:param disable_wal: See :py:meth:`rocksdb.DB.put`
|
:param disable_wal: See :py:meth:`rocksdb.DB.put`
|
||||||
|
|
||||||
.. py:method:: get(key, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
|
.. py:method:: get(key, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
|
||||||
|
|
||||||
:param bytes key: Name to get
|
:param bytes key: Name to get
|
||||||
|
|
||||||
|
@ -85,10 +85,6 @@ Database object
|
||||||
read for this iteration be cached in memory?
|
read for this iteration be cached in memory?
|
||||||
Callers may wish to set this field to ``False`` for bulk scans.
|
Callers may wish to set this field to ``False`` for bulk scans.
|
||||||
|
|
||||||
:param bool prefix_seek:
|
|
||||||
If this option is set and memtable implementation allows.
|
|
||||||
Seek might only return keys with the same prefix as the seek-key
|
|
||||||
|
|
||||||
:param snapshot:
|
:param snapshot:
|
||||||
If not ``None``, read as of the supplied snapshot
|
If not ``None``, read as of the supplied snapshot
|
||||||
(which must belong to the DB that is being read and which must
|
(which must belong to the DB that is being read and which must
|
||||||
|
@ -107,7 +103,7 @@ Database object
|
||||||
|
|
||||||
:returns: ``None`` if not found, else the value for this key
|
:returns: ``None`` if not found, else the value for this key
|
||||||
|
|
||||||
.. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
|
.. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
|
||||||
|
|
||||||
:param keys: Keys to fetch
|
:param keys: Keys to fetch
|
||||||
:type keys: list of bytes
|
:type keys: list of bytes
|
||||||
|
@ -123,7 +119,7 @@ Database object
|
||||||
keys will not be "de-duplicated".
|
keys will not be "de-duplicated".
|
||||||
Duplicate keys will return duplicate values in order.
|
Duplicate keys will return duplicate values in order.
|
||||||
|
|
||||||
.. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
|
.. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
|
||||||
|
|
||||||
If the key definitely does not exist in the database, then this method
|
If the key definitely does not exist in the database, then this method
|
||||||
returns ``False``, else ``True``. If the caller wants to obtain value
|
returns ``False``, else ``True``. If the caller wants to obtain value
|
||||||
|
@ -142,12 +138,10 @@ Database object
|
||||||
* ``(True, <data>)`` if key is found and value in memory and ``fetch=True``
|
* ``(True, <data>)`` if key is found and value in memory and ``fetch=True``
|
||||||
* ``(False, None)`` if key is not found
|
* ``(False, None)`` if key is not found
|
||||||
|
|
||||||
.. py:method:: iterkeys(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
|
.. py:method:: iterkeys(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
|
||||||
|
|
||||||
Iterate over the keys
|
Iterate over the keys
|
||||||
|
|
||||||
:param bytes prefix: Not implemented yet
|
|
||||||
|
|
||||||
For other params see :py:meth:`rocksdb.DB.get`
|
For other params see :py:meth:`rocksdb.DB.get`
|
||||||
|
|
||||||
:returns:
|
:returns:
|
||||||
|
@ -156,12 +150,10 @@ Database object
|
||||||
|
|
||||||
:rtype: :py:class:`rocksdb.BaseIterator`
|
:rtype: :py:class:`rocksdb.BaseIterator`
|
||||||
|
|
||||||
.. py:method:: itervalues(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
|
.. py:method:: itervalues(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
|
||||||
|
|
||||||
Iterate over the values
|
Iterate over the values
|
||||||
|
|
||||||
:param bytes prefix: Not implemented yet
|
|
||||||
|
|
||||||
For other params see :py:meth:`rocksdb.DB.get`
|
For other params see :py:meth:`rocksdb.DB.get`
|
||||||
|
|
||||||
:returns:
|
:returns:
|
||||||
|
@ -170,12 +162,10 @@ Database object
|
||||||
|
|
||||||
:rtype: :py:class:`rocksdb.BaseIterator`
|
:rtype: :py:class:`rocksdb.BaseIterator`
|
||||||
|
|
||||||
.. py:method:: iteritems(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all")
|
.. py:method:: iteritems(fetch=False, verify_checksums=False, fill_cache=True, snapshot=None, read_tier="all")
|
||||||
|
|
||||||
Iterate over the items
|
Iterate over the items
|
||||||
|
|
||||||
:param bytes prefix: Not implemented yet
|
|
||||||
|
|
||||||
For other params see :py:meth:`rocksdb.DB.get`
|
For other params see :py:meth:`rocksdb.DB.get`
|
||||||
|
|
||||||
:returns:
|
:returns:
|
||||||
|
|
|
@ -4,6 +4,34 @@ Changelog
|
||||||
Version 0.3
|
Version 0.3
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
|
Backward Incompatible Changes:
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
**Prefix Seeks:**
|
||||||
|
|
||||||
|
According to this page https://github.com/facebook/rocksdb/wiki/Prefix-Seek-API-Changes,
|
||||||
|
all the prefix related parameters on ``ReadOptions`` are removed.
|
||||||
|
Rocksdb realizes now if ``Options.prefix_extractor`` is set and uses then
|
||||||
|
prefix-seeks automatically. This means the following changes on pyrocksdb.
|
||||||
|
|
||||||
|
* DB.iterkeys, DB.itervalues, DB.iteritems have *no* ``prefix`` parameter anymore.
|
||||||
|
* DB.get, DB.multi_get, DB.key_may_exist, DB.iterkeys, DB.itervalues, DB.iteritems
|
||||||
|
have *no* ``prefix_seek`` parameter anymore.
|
||||||
|
|
||||||
|
Which means all the iterators walk now always to the *end* of the database.
|
||||||
|
So if you need to stay within a prefix, write your own code to ensure that.
|
||||||
|
For DB.iterkeys and DB.iteritems ``itertools.takewhile`` is a possible solution. ::
|
||||||
|
|
||||||
|
from itertools import takewhile
|
||||||
|
|
||||||
|
it = self.db.iterkeys()
|
||||||
|
it.seek(b'00002')
|
||||||
|
print list(takewhile(lambda key: key.startswith(b'00002'), it))
|
||||||
|
|
||||||
|
it = self.db.iteritems()
|
||||||
|
it.seek(b'00002')
|
||||||
|
print dict(takewhile(lambda item: item[0].startswith(b'00002'), it))
|
||||||
|
|
||||||
|
|
||||||
Version 0.2
|
Version 0.2
|
||||||
-----------
|
-----------
|
||||||
|
|
|
@ -236,11 +236,13 @@ So always the first 5 bytes are used as the prefix ::
|
||||||
db.put(b'00003.y', b'y')
|
db.put(b'00003.y', b'y')
|
||||||
db.put(b'00003.z', b'z')
|
db.put(b'00003.z', b'z')
|
||||||
|
|
||||||
it = db.iteritems(prefix=b'00002')
|
prefix = b'00002'
|
||||||
it.seek(b'00002')
|
|
||||||
|
it = db.iteritems()
|
||||||
|
it.seek(prefix)
|
||||||
|
|
||||||
# prints {b'00002.z': b'z', b'00002.y': b'y', b'00002.x': b'x'}
|
# prints {b'00002.z': b'z', b'00002.y': b'y', b'00002.x': b'x'}
|
||||||
print dict(it)
|
print dict(itertools.takewhile(lambda item: item[].startswith(prefix), it))
|
||||||
|
|
||||||
|
|
||||||
Backup And Restore
|
Backup And Restore
|
||||||
|
|
|
@ -1461,40 +1461,36 @@ cdef class DB(object):
|
||||||
|
|
||||||
return (exists, None)
|
return (exists, None)
|
||||||
|
|
||||||
def iterkeys(self, prefix=None, *args, **kwargs):
|
def iterkeys(self, *args, **kwargs):
|
||||||
cdef options.ReadOptions opts
|
cdef options.ReadOptions opts
|
||||||
cdef KeysIterator it
|
cdef KeysIterator it
|
||||||
|
|
||||||
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
|
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
|
||||||
|
|
||||||
it = KeysIterator(self)
|
it = KeysIterator(self)
|
||||||
it.set_prefix(opts, prefix)
|
|
||||||
|
|
||||||
with nogil:
|
with nogil:
|
||||||
it.ptr = self.db.NewIterator(opts)
|
it.ptr = self.db.NewIterator(opts)
|
||||||
return it
|
return it
|
||||||
|
|
||||||
def itervalues(self, prefix=None, *args, **kwargs):
|
def itervalues(self, *args, **kwargs):
|
||||||
cdef options.ReadOptions opts
|
cdef options.ReadOptions opts
|
||||||
cdef ValuesIterator it
|
cdef ValuesIterator it
|
||||||
|
|
||||||
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
|
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
|
||||||
|
|
||||||
it = ValuesIterator(self)
|
it = ValuesIterator(self)
|
||||||
it.set_prefix(opts, prefix)
|
|
||||||
|
|
||||||
with nogil:
|
with nogil:
|
||||||
it.ptr = self.db.NewIterator(opts)
|
it.ptr = self.db.NewIterator(opts)
|
||||||
return it
|
return it
|
||||||
|
|
||||||
def iteritems(self, prefix=None, *args, **kwargs):
|
def iteritems(self, *args, **kwargs):
|
||||||
cdef options.ReadOptions opts
|
cdef options.ReadOptions opts
|
||||||
cdef ItemsIterator it
|
cdef ItemsIterator it
|
||||||
|
|
||||||
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
|
opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs))
|
||||||
|
|
||||||
it = ItemsIterator(self)
|
it = ItemsIterator(self)
|
||||||
it.set_prefix(opts, prefix)
|
|
||||||
|
|
||||||
with nogil:
|
with nogil:
|
||||||
it.ptr = self.db.NewIterator(opts)
|
it.ptr = self.db.NewIterator(opts)
|
||||||
|
@ -1541,7 +1537,6 @@ cdef class DB(object):
|
||||||
def __parse_read_opts(
|
def __parse_read_opts(
|
||||||
verify_checksums=False,
|
verify_checksums=False,
|
||||||
fill_cache=True,
|
fill_cache=True,
|
||||||
prefix_seek=False,
|
|
||||||
snapshot=None,
|
snapshot=None,
|
||||||
read_tier="all"):
|
read_tier="all"):
|
||||||
|
|
||||||
|
@ -1552,7 +1547,6 @@ cdef class DB(object):
|
||||||
cdef options.ReadOptions opts
|
cdef options.ReadOptions opts
|
||||||
opts.verify_checksums = py_opts['verify_checksums']
|
opts.verify_checksums = py_opts['verify_checksums']
|
||||||
opts.fill_cache = py_opts['fill_cache']
|
opts.fill_cache = py_opts['fill_cache']
|
||||||
opts.prefix_seek = py_opts['prefix_seek']
|
|
||||||
if py_opts['snapshot'] is not None:
|
if py_opts['snapshot'] is not None:
|
||||||
opts.snapshot = (<Snapshot?>(py_opts['snapshot'])).ptr
|
opts.snapshot = (<Snapshot?>(py_opts['snapshot'])).ptr
|
||||||
|
|
||||||
|
@ -1591,9 +1585,6 @@ cdef class Snapshot(object):
|
||||||
cdef class BaseIterator(object):
|
cdef class BaseIterator(object):
|
||||||
cdef iterator.Iterator* ptr
|
cdef iterator.Iterator* ptr
|
||||||
cdef DB db
|
cdef DB db
|
||||||
# To keep a reference to the prefix
|
|
||||||
cdef object prefix
|
|
||||||
cdef Slice c_prefix
|
|
||||||
|
|
||||||
def __cinit__(self, DB db):
|
def __cinit__(self, DB db):
|
||||||
self.db = db
|
self.db = db
|
||||||
|
@ -1619,14 +1610,6 @@ cdef class BaseIterator(object):
|
||||||
def __reversed__(self):
|
def __reversed__(self):
|
||||||
return ReversedIterator(self)
|
return ReversedIterator(self)
|
||||||
|
|
||||||
cdef set_prefix(self, options.ReadOptions& opts, object prefix=None):
|
|
||||||
if prefix is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
self.c_prefix = bytes_to_slice(prefix)
|
|
||||||
self.prefix = prefix
|
|
||||||
opts.prefix = cython.address(self.c_prefix)
|
|
||||||
|
|
||||||
cpdef seek_to_first(self):
|
cpdef seek_to_first(self):
|
||||||
with nogil:
|
with nogil:
|
||||||
self.ptr.SeekToFirst()
|
self.ptr.SeekToFirst()
|
||||||
|
|
|
@ -123,8 +123,6 @@ cdef extern from "rocksdb/options.h" namespace "rocksdb":
|
||||||
cdef cppclass ReadOptions:
|
cdef cppclass ReadOptions:
|
||||||
cpp_bool verify_checksums
|
cpp_bool verify_checksums
|
||||||
cpp_bool fill_cache
|
cpp_bool fill_cache
|
||||||
cpp_bool prefix_seek
|
|
||||||
const Slice* prefix
|
|
||||||
const Snapshot* snapshot
|
const Snapshot* snapshot
|
||||||
ReadTier read_tier
|
ReadTier read_tier
|
||||||
|
|
||||||
|
|
|
@ -3,6 +3,7 @@ import shutil
|
||||||
import gc
|
import gc
|
||||||
import unittest
|
import unittest
|
||||||
import rocksdb
|
import rocksdb
|
||||||
|
from itertools import takewhile
|
||||||
|
|
||||||
def int_to_bytes(ob):
|
def int_to_bytes(ob):
|
||||||
return str(ob).encode('ascii')
|
return str(ob).encode('ascii')
|
||||||
|
@ -300,7 +301,10 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper):
|
||||||
self._clean()
|
self._clean()
|
||||||
self.db = rocksdb.DB('/tmp/test', opts)
|
self.db = rocksdb.DB('/tmp/test', opts)
|
||||||
|
|
||||||
def test_prefix(self):
|
def tearDown(self):
|
||||||
|
self._close_db()
|
||||||
|
|
||||||
|
def _fill_db(self):
|
||||||
for x in range(3000):
|
for x in range(3000):
|
||||||
keyx = hex(x)[2:].zfill(5).encode('utf8') + b'.x'
|
keyx = hex(x)[2:].zfill(5).encode('utf8') + b'.x'
|
||||||
keyy = hex(x)[2:].zfill(5).encode('utf8') + b'.y'
|
keyy = hex(x)[2:].zfill(5).encode('utf8') + b'.y'
|
||||||
|
@ -309,12 +313,26 @@ class TestPrefixExtractor(unittest.TestCase, TestHelper):
|
||||||
self.db.put(keyy, b'y')
|
self.db.put(keyy, b'y')
|
||||||
self.db.put(keyz, b'z')
|
self.db.put(keyz, b'z')
|
||||||
|
|
||||||
|
|
||||||
|
def test_prefix_iterkeys(self):
|
||||||
|
self._fill_db()
|
||||||
self.assertEqual(b'x', self.db.get(b'00001.x'))
|
self.assertEqual(b'x', self.db.get(b'00001.x'))
|
||||||
self.assertEqual(b'y', self.db.get(b'00001.y'))
|
self.assertEqual(b'y', self.db.get(b'00001.y'))
|
||||||
self.assertEqual(b'z', self.db.get(b'00001.z'))
|
self.assertEqual(b'z', self.db.get(b'00001.z'))
|
||||||
|
|
||||||
it = self.db.iterkeys(prefix=b'00002')
|
it = self.db.iterkeys()
|
||||||
it.seek(b'00002')
|
it.seek(b'00002')
|
||||||
|
|
||||||
ref = [b'00002.x', b'00002.y', b'00002.z']
|
ref = [b'00002.x', b'00002.y', b'00002.z']
|
||||||
self.assertEqual(ref, list(it))
|
ret = takewhile(lambda key: key.startswith(b'00002'), it)
|
||||||
|
self.assertEqual(ref, list(ret))
|
||||||
|
|
||||||
|
def test_prefix_iteritems(self):
|
||||||
|
self._fill_db()
|
||||||
|
|
||||||
|
it = self.db.iteritems()
|
||||||
|
it.seek(b'00002')
|
||||||
|
|
||||||
|
ref = {'00002.z': 'z', '00002.y': 'y', '00002.x': 'x'}
|
||||||
|
ret = takewhile(lambda item: item[0].startswith(b'00002'), it)
|
||||||
|
self.assertEqual(ref, dict(ret))
|
||||||
|
|
Loading…
Reference in a new issue