diff --git a/docs/api/database.rst b/docs/api/database.rst new file mode 100644 index 0000000..2ebab20 --- /dev/null +++ b/docs/api/database.rst @@ -0,0 +1,358 @@ +Database interactions +********************* + +Database object +=============== + +.. py:class:: rocksdb.DB + + .. py:method:: __init__(db_name, Options opts, read_only=False) + + :param string db_name: Name of the database to open + :param opts: Options for this specific database + :type opts: :py:class:`rocksdb.Options` + :param bool read_only: If ``True`` the database is opened read-only. + All DB calls which modify data will raise an + Exception. + + + .. py:method:: put(key, value, sync=False, disable_wal=False) + + Set the database entry for "key" to "value". + + :param string key: Name for this entry + :param string value: Data for this entry + :param bool sync: + If ``True``, the write will be flushed from the operating system + buffer cache (by calling WritableFile::Sync()) before the write + is considered complete. If this flag is true, writes will be + slower. + + If this flag is ``False``, and the machine crashes, some recent + writes may be lost. Note that if it is just the process that + crashes (i.e., the machine does not reboot), no writes will be + lost even if ``sync == False``. + + In other words, a DB write with ``sync == False`` has similar + crash semantics as the "write()" system call. A DB write + with ``sync == True`` has similar crash semantics to a "write()" + system call followed by "fdatasync()". + + :param bool disable_wal: + If ``True``, writes will not first go to the write ahead log, + and the write may got lost after a crash. + + .. py:method:: delete(key, sync=False, disable_wal=False) + + Remove the database entry for "key". + + :param string key: Name to delete + :param sync: See :py:meth:`rocksdb.DB.put` + :param disable_wal: See :py:meth:`rocksdb.DB.put` + :raises rocksdb.errors.NotFound: If the key did not exists + + .. py:method:: merge(key, value, sync=False, disable_wal=False) + + Merge the database entry for "key" with "value". + The semantics of this operation is determined by the user provided + merge_operator when opening DB. + + See :py:meth:`rocksdb.DB.put` for the parameters + + :raises: + :py:exc:`rocksdb.errors.NotSupported` if this is called and + no :py:attr:`rocksdb.Options.merge_operator` was set at creation + + + .. py:method:: write(batch, sync=False, disable_wal=False) + + Apply the specified updates to the database. + + :param rocksdb.WriteBatch batch: Batch to apply + :param sync: See :py:meth:`rocksdb.DB.put` + :param disable_wal: See :py:meth:`rocksdb.DB.put` + + .. py:method:: get(key, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + + :param string key: Name to get + + :param bool verify_checksums: + If ``True``, all data read from underlying storage will be + verified against corresponding checksums. + + :param bool fill_cache: + Should the "data block", "index block" or "filter block" + read for this iteration be cached in memory? + Callers may wish to set this field to ``False`` for bulk scans. + + :param bool prefix_seek: + If this option is set and memtable implementation allows. + Seek might only return keys with the same prefix as the seek-key + + :param snapshot: + If not ``None``, read as of the supplied snapshot + (which must belong to the DB that is being read and which must + not have been released). Is it ``None`` a implicit snapshot of the + state at the beginning of this read operation is used + :type snapshot: :py:class:`rocksdb.Snapshot` + + :param string read_tier: + Specify if this read request should process data that ALREADY + resides on a particular cache. If the required data is not + found at the specified cache, + then :py:exc:`rocksdb.errors.Incomplete` is raised. + + | Use ``all`` if a fetch from disk is allowed. + | Use ``cache`` if only data from cache is allowed. + + :returns: ``None`` if not found, else the value for this key + + .. py:method:: multi_get(keys, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + + :param keys: Keys to fetch + :type keys: list of strings + + For the other params see :py:meth:`rocksdb.DB.get` + + :returns: + A ``dict`` where the value is either ``string`` or ``None`` if not found + + :raises: If the fetch for a single key fails + + .. note:: + keys will not be "de-duplicated". + Duplicate keys will return duplicate values in order. + + .. py:method:: key_may_exist(key, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + + If the key definitely does not exist in the database, then this method + returns ``False``, else ``True``. If the caller wants to obtain value + when the key is found in memory, fetch should be set to ``True``. + This check is potentially lighter-weight than invoking DB::get(). + One way to make this lighter weight is to avoid doing any IOs. + + :param string key: Key to check + :param bool fetch: Obtain also the value if found + + For the other params see :py:meth:`rocksdb.DB.get` + + :returns: + * ``(True, None)`` if key is found but value not in memory + * ``(True, None)`` if key is found and ``fetch=False`` + * ``(True, )`` if key is found and value in memory and ``fetch=True`` + * ``(False, None)`` if key is not found + + .. py:method:: iterkeys(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + + Iterate over the keys + + :param string prefix: Not implemented yet + + For other params see :py:meth:`rocksdb.DB.get` + + :returns: + A iterator object which is not valid yet. + Call first one of the seek methods of the iterator to position it + + :rtype: :py:class:`rocksdb.BaseIterator` + + .. py:method:: itervalues(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + + Iterate over the values + + :param string prefix: Not implemented yet + + For other params see :py:meth:`rocksdb.DB.get` + + :returns: + A iterator object which is not valid yet. + Call first one of the seek methods of the iterator to position it + + :rtype: :py:class:`rocksdb.BaseIterator` + + .. py:method:: iteritems(prefix=None, fetch=False, verify_checksums=False, fill_cache=True, prefix_seek=False, snapshot=None, read_tier="all") + + Iterate over the items + + :param string prefix: Not implemented yet + + For other params see :py:meth:`rocksdb.DB.get` + + :returns: + A iterator object which is not valid yet. + Call first one of the seek methods of the iterator to position it + + :rtype: :py:class:`rocksdb.BaseIterator` + + .. py:method:: snapshot() + + Return a handle to the current DB state. + Iterators created with this handle will all observe a stable snapshot + of the current DB state. + + :rtype: :py:class:`rocksdb.Snapshot` + + + .. py:method:: get_property(prop) + + DB implementations can export properties about their state + via this method. If "property" is a valid property understood by this + DB implementation, a string with its value is returned. + Otherwise ``None`` + + Valid property names include: + + * ``"rocksdb.num-files-at-level"``: return the number of files at level , + where is an ASCII representation of a level number (e.g. "0"). + + * ``"rocksdb.stats"``: returns a multi-line string that describes statistics + about the internal operation of the DB. + + * ``"rocksdb.sstables"``: returns a multi-line string that describes all + of the sstables that make up the db contents. + + .. py:method:: get_live_files_metadata() + + Returns a list of all table files. + + It returns a list of dict's were each dict has the following keys. + + ``name`` + Name of the file + + ``level`` + Level at which this file resides + + ``size`` + File size in bytes + + ``smallestkey`` + Smallest user defined key in the file + + ``largestkey`` + Largest user defined key in the file + + ``smallest_seqno`` + smallest seqno in file + + ``largest_seqno`` + largest seqno in file + + .. py:attribute:: options + + Returns the associated :py:class:`rocksdb.Options` instance. + + .. note:: + + Changes to this object have no effect anymore. + Consider this as read-only + +Iterator +======== + +.. py:class:: rocksdb.BaseIterator + + Base class for all iterators in this module. After creation a iterator is + invalid. Call one of the seek methods first before starting iteration + + .. py:method:: seek_to_first() + + Position at the first key in the source + + .. py:method:: seek_to_last() + + Position at the last key in the source + + .. py:method:: seek(key) + + :param string key: Position at the first key in the source that at or past + + Methods to support the python iterator protocol + + .. py:method:: __iter__() + .. py:method:: __next__() + .. py:method:: __reversed__() + +Snapshot +======== + +.. py:class:: rocksdb.Snapshot + + Opaque handler for a single Snapshot. + Snapshot is released if nobody holds a reference on it. + Retrieved via :py:meth:`rocksdb.DB.snapshot` + +WriteBatch +========== + +.. py:class:: rocksdb.WriteBatch + + WriteBatch holds a collection of updates to apply atomically to a DB. + + The updates are applied in the order in which they are added + to the WriteBatch. For example, the value of "key" will be "v3" + after the following batch is written:: + + batch = rocksdb.WriteBatch() + batch.put("key", "v1") + batch.delete("key") + batch.put("key", "v2") + batch.put("key", "v3") + + .. py:method:: __init__(data=None) + + Creates a WriteBatch. + + :param string data: + A serialized version of a previous WriteBatch. As retrieved + from a previous .data() call. If ``None`` a empty WriteBatch is + generated + + .. py:method:: put(key, value) + + Store the mapping "key->value" in the database. + + :param string key: Name of the entry to store + :param string value: Data of this entry + + .. py:method:: merge(key, value) + + Merge "value" with the existing value of "key" in the database. + + :param string key: Name of the entry to merge + :param string value: Data to merge + + .. py:method:: delete(key) + + If the database contains a mapping for "key", erase it. Else do nothing. + + :param string key: Key to erase + + .. py:method:: clear() + + Clear all updates buffered in this batch. + + .. py:method:: data() + + Retrieve the serialized version of this batch. + + :rtype: string + + .. py:method:: count() + + Returns the number of updates in the batch + + :rtype: int + +Errors +====== + +.. py:exception:: rocksdb.errors.NotFound +.. py:exception:: rocksdb.errors.Corruption +.. py:exception:: rocksdb.errors.NotSupported +.. py:exception:: rocksdb.errors.InvalidArgument +.. py:exception:: rocksdb.errors.RocksIOError +.. py:exception:: rocksdb.errors.MergeInProgress +.. py:exception:: rocksdb.errors.Incomplete + + diff --git a/docs/api/index.rst b/docs/api/index.rst new file mode 100644 index 0000000..8d13b20 --- /dev/null +++ b/docs/api/index.rst @@ -0,0 +1,10 @@ +Python driver for RocksDB +========================= + + .. py:module:: rocksdb + +.. toctree:: + + Options + Database + Interfaces diff --git a/docs/api/interfaces.rst b/docs/api/interfaces.rst new file mode 100644 index 0000000..804b7af --- /dev/null +++ b/docs/api/interfaces.rst @@ -0,0 +1,210 @@ +Interfaces +********** + +Comparator +========== + +.. py:class:: rocksdb.interfaces.Comparator + + A Comparator object provides a total order across slices that are + used as keys in an sstable or a database. A Comparator implementation + must be thread-safe since rocksdb may invoke its methods concurrently + from multiple threads. + + .. py:method:: compare(a, b) + + Three-way comparison. + + :param string a: First field to compare + :param string b: Second field to compare + :returns: * -1 if a < b + * 0 if a == b + * 1 if a > b + :rtype: ``int`` + + .. py:method:: name() + + The name of the comparator. Used to check for comparator + mismatches (i.e., a DB created with one comparator is + accessed using a different comparator). + + The client of this package should switch to a new name whenever + the comparator implementation changes in a way that will cause + the relative ordering of any two keys to change. + + Names starting with "rocksdb." are reserved and should not be used + by any clients of this package. + + :rtype: ``string`` + +Merge Operator +============== + + Essentially, a MergeOperator specifies the SEMANTICS of a merge, which only + client knows. It could be numeric addition, list append, string + concatenation, edit data structure, whatever. + The library, on the other hand, is concerned with the exercise of this + interface, at the right time (during get, iteration, compaction...) + + To use merge, the client needs to provide an object implementing one of + the following interfaces: + + * AssociativeMergeOperator - for most simple semantics (always take + two values, and merge them into one value, which is then put back + into rocksdb). + numeric addition and string concatenation are examples. + + * MergeOperator - the generic class for all the more complex operations. + One method (FullMerge) to merge a Put/Delete value with a merge operand. + Another method (PartialMerge) that merges two operands together. + This is especially useful if your key values have a complex structure but + you would still like to support client-specific incremental updates. + + AssociativeMergeOperator is simpler to implement. + MergeOperator is simply more powerful. + + See this page for more details + https://github.com/facebook/rocksdb/wiki/Merge-Operator + +AssociativeMergeOperator +------------------------ + +.. py:class:: rocksdb.interfaces.AssociativeMergeOperator + + .. py:method:: merge(key, existing_value, value) + + Gives the client a way to express the read -> modify -> write semantics + + :param string key: The key that's associated with this merge operation + :param string existing_value: The current value in the db. + ``None`` indicates the key does not exist + before this op + :param string value: The value to update/merge the existing_value with + + :returns: ``True`` and the new value on success. + All values passed in will be client-specific values. + So if this method returns false, it is because client + specified bad data or there was internal corruption. + The client should assume that this will be treated as an + error by the library. + + :rtype: ``(bool, string)`` + + .. py:method:: name() + + The name of the MergeOperator. Used to check for MergeOperator mismatches. + For example a DB created with one MergeOperator is accessed using a + different MergeOperator. + + :rtype: ``string`` + +MergeOperator +------------- + +.. py:class:: rocksdb.interfaces.MergeOperator + + .. py:method:: full_merge(key, existing_value, operand_list) + + Gives the client a way to express the read -> modify -> write semantics + + :param string key: The key that's associated with this merge operation. + Client could multiplex the merge operator based on it + if the key space is partitioned and different subspaces + refer to different types of data which have different + merge operation semantics + + :param string existing_value: The current value in the db. + ``None`` indicates the key does not exist + before this op + + :param operand_list: The sequence of merge operations to apply. + :type operand_list: list of strings + + :returns: ``True`` and the new value on success. + All values passed in will be client-specific values. + So if this method returns false, it is because client + specified bad data or there was internal corruption. + The client should assume that this will be treated as an + error by the library. + + :rtype: ``(bool, string)`` + + .. py:method:: partial_merge(key, left_operand, right_operand) + + This function performs merge(left_op, right_op) + when both the operands are themselves merge operation types + that you would have passed to a DB::Merge() call in the same order. + For example DB::Merge(key,left_op), followed by DB::Merge(key,right_op)). + + PartialMerge should combine them into a single merge operation that is + returned together with ``True`` + This new value should be constructed such that a call to + DB::Merge(key, new_value) would yield the same result as a call + to DB::Merge(key, left_op) followed by DB::Merge(key, right_op). + + If it is impossible or infeasible to combine the two operations, + return ``(False, None)`` The library will internally keep track of the + operations, and apply them in the correct order once a base-value + (a Put/Delete/End-of-Database) is seen. + + :param string key: the key that is associated with this merge operation. + :param string left_operand: First operand to merge + :param string right_operand: Second operand to merge + :rtype: ``(bool, string)`` + + .. note:: + + Presently there is no way to differentiate between error/corruption + and simply "return false". For now, the client should simply return + false in any case it cannot perform partial-merge, regardless of reason. + If there is corruption in the data, handle it in the FullMerge() function, + and return false there. + + .. py:method:: name() + + The name of the MergeOperator. Used to check for MergeOperator mismatches. + For example a DB created with one MergeOperator is accessed using a + different MergeOperator. + + :rtype: ``string`` + +FilterPolicy +============ + +.. py:class:: rocksdb.interfaces.FilterPolicy + + .. py:method:: create_filter(keys) + + Create a bytestring which can act as a filter for keys. + + :param keys: list of keys (potentially with duplicates) + that are ordered according to the user supplied + comparator. + :type keys: list of strings + + :returns: A filter that summarizes keys + :rtype: ``string`` + + .. py:method:: key_may_match(key, filter) + + Check if the key is maybe in the filter. + + :param string key: Key for a single entry inside the database + :param string filter: Contains the data returned by a preceding call + to create_filter on this class + :returns: This method must return ``True`` if the key was in the list + of keys passed to create_filter(). + This method may return ``True`` or ``False`` if the key was + not on the list, but it should aim to return ``False`` with + a high probability. + :rtype: ``bool`` + + + .. py:method:: name() + + Return the name of this policy. Note that if the filter encoding + changes in an incompatible way, the name returned by this method + must be changed. Otherwise, old incompatible filters may be + passed to methods of this type. + + :rtype: ``string`` diff --git a/docs/api/options.rst b/docs/api/options.rst new file mode 100644 index 0000000..14d3579 --- /dev/null +++ b/docs/api/options.rst @@ -0,0 +1,748 @@ +Options creation +**************** + +Options object +============== + + +.. py:class:: rocksdb.Options + + .. IMPORTANT:: + + The default values mentioned here, describe the values of the + C++ library only. This wrapper does not set any default value + itself. So as soon as the rocksdb developers change a default value + this document could be outdated. So if you really depend on a default + value, double check it with the according version of the C++ library. + + | Most recent default values should be here + | https://github.com/facebook/rocksdb/blob/master/include/rocksdb/options.h + | https://github.com/facebook/rocksdb/blob/master/util/options.cc + + .. py:method:: __init__(**kwargs) + + All options mentioned below can also be passed as keyword-arguments in + the constructor. For example:: + + import rocksdb + + opts = rocksdb.Options(create_if_missing=True) + # is the same as + opts = rocksdb.Options() + opts.create_if_missing = True + + + .. py:attribute:: create_if_missing + + If ``True``, the database will be created if it is missing. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: error_if_exists + + If ``True``, an error is raised if the database already exists. + + | *Type:* ``bool`` + | *Default:* ``False`` + + + .. py:attribute:: paranoid_checks + + If ``True``, the implementation will do aggressive checking of the + data it is processing and will stop early if it detects any + errors. This may have unforeseen ramifications: for example, a + corruption of one DB entry may cause a large number of entries to + become unreadable or for the entire DB to become unopenable. + If any of the writes to the database fails (Put, Delete, Merge, Write), + the database will switch to read-only mode and fail all other + Write operations. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: write_buffer_size + + Amount of data to build up in memory (backed by an unsorted log + on disk) before converting to a sorted on-disk file. + + Larger values increase performance, especially during bulk loads. + Up to max_write_buffer_number write buffers may be held in memory + at the same time, so you may wish to adjust this parameter to control + memory usage. Also, a larger write buffer will result in a longer recovery + time the next time the database is opened. + + | *Type:* ``int`` + | *Default:* ``4194304`` + + .. py:attribute:: max_write_buffer_number + + The maximum number of write buffers that are built up in memory. + The default is 2, so that when 1 write buffer is being flushed to + storage, new writes can continue to the other write buffer. + + | *Type:* ``int`` + | *Default:* ``2`` + + .. py:attribute:: min_write_buffer_number_to_merge + + The minimum number of write buffers that will be merged together + before writing to storage. If set to 1, then + all write buffers are fushed to L0 as individual files and this increases + read amplification because a get request has to check in all of these + files. Also, an in-memory merge may result in writing lesser + data to storage if there are duplicate records in each of these + individual write buffers. + + | *Type:* ``int`` + | *Default:* ``1`` + + .. py:attribute:: max_open_files + + Number of open files that can be used by the DB. You may need to + increase this if your database has a large working set (budget + one open file per 2MB of working set). + + | *Type:* ``int`` + | *Default:* ``1000`` + + .. py:attribute:: block_cache + + Control over blocks (user data is stored in a set of blocks, and + a block is the unit of reading from disk). + + If not ``None`` use the specified cache for blocks. + If ``None``, rocksdb will automatically create and use an 8MB internal cache. + + | *Type:* Instace of :py:class:`rocksdb.LRUCache` + | *Default:* ``None`` + + .. py:attribute:: block_cache_compressed + + If not ``None`` use the specified cache for compressed blocks. + If ``None``, rocksdb will not use a compressed block cache. + + | *Type:* Instace of :py:class:`rocksdb.LRUCache` + | *Default:* ``None`` + + .. py:attribute:: block_size + + Approximate size of user data packed per block. Note that the + block size specified here corresponds to uncompressed data. The + actual size of the unit read from disk may be smaller if + compression is enabled. This parameter can be changed dynamically. + + | *Type:* ``int`` + | *Default:* ``4096`` + + + .. py:attribute:: block_restart_interval + + Number of keys between restart points for delta encoding of keys. + This parameter can be changed dynamically. Most clients should + leave this parameter alone. + + | *Type:* ``int`` + | *Default:* ``16`` + + .. py:attribute:: compression + + Compress blocks using the specified compression algorithm. + This parameter can be changed dynamically. + + | *Type:* Member of :py:class:`rocksdb.CompressionType` + | *Default:* :py:attr:`rocksdb.CompressionType.snappy_compression` + + .. py:attribute:: whole_key_filtering + + If ``True``, place whole keys in the filter (not just prefixes). + This must generally be true for gets to be efficient. + + | *Type:* ``bool`` + | *Default:* ``True`` + + + .. py:attribute:: num_levels + + Number of levels for this database + + | *Type:* ``int`` + | *Default:* ``7`` + + + .. py:attribute:: level0_file_num_compaction_trigger + + Number of files to trigger level-0 compaction. A value <0 means that + level-0 compaction will not be triggered by number of files at all. + + | *Type:* ``int`` + | *Default:* ``4`` + + .. py:attribute:: level0_slowdown_writes_trigger + + Soft limit on number of level-0 files. We start slowing down writes at this + point. A value <0 means that no writing slow down will be triggered by + number of files in level-0. + + | *Type:* ``int`` + | *Default:* ``8`` + + .. py:attribute:: level0_stop_writes_trigger + + Maximum number of level-0 files. We stop writes at this point. + + | *Type:* ``int`` + | *Default:* ``12`` + + .. py:attribute:: max_mem_compaction_level + + Maximum level to which a new compacted memtable is pushed if it + does not create overlap. We try to push to level 2 to avoid the + relatively expensive level 0=>1 compactions and to avoid some + expensive manifest file operations. We do not push all the way to + the largest level since that can generate a lot of wasted disk + space if the same key space is being repeatedly overwritten. + + | *Type:* ``int`` + | *Default:* ``2`` + + + .. py:attribute:: target_file_size_base + + | Target file size for compaction. + | target_file_size_base is per-file size for level-1. + | Target file size for level L can be calculated by + | target_file_size_base * (target_file_size_multiplier ^ (L-1)). + + For example, if target_file_size_base is 2MB and + target_file_size_multiplier is 10, then each file on level-1 will + be 2MB, and each file on level 2 will be 20MB, + and each file on level-3 will be 200MB. + + | *Type:* ``int`` + | *Default:* ``2097152`` + + .. py:attribute:: target_file_size_multiplier + + | by default target_file_size_multiplier is 1, which means + | by default files in different levels will have similar size. + + | *Type:* ``int`` + | *Default:* ``1`` + + .. py:attribute:: max_bytes_for_level_base + + Control maximum total data size for a level. + *max_bytes_for_level_base* is the max total for level-1. + Maximum number of bytes for level L can be calculated as + (*max_bytes_for_level_base*) * (*max_bytes_for_level_multiplier* ^ (L-1)) + For example, if *max_bytes_for_level_base* is 20MB, and if + *max_bytes_for_level_multiplier* is 10, total data size for level-1 + will be 20MB, total file size for level-2 will be 200MB, + and total file size for level-3 will be 2GB. + + | *Type:* ``int`` + | *Default:* ``10485760`` + + .. py:attribute:: max_bytes_for_level_multiplier + + See :py:attr:`max_bytes_for_level_base` + + | *Type:* ``int`` + | *Default:* ``10`` + + .. py:attribute:: max_bytes_for_level_multiplier_additional + + Different max-size multipliers for different levels. + These are multiplied by max_bytes_for_level_multiplier to arrive + at the max-size of each level. + + | *Type:* ``[int]`` + | *Default:* ``[1, 1, 1, 1, 1, 1, 1]`` + + .. py:attribute:: expanded_compaction_factor + + Maximum number of bytes in all compacted files. We avoid expanding + the lower level file set of a compaction if it would make the + total compaction cover more than + (expanded_compaction_factor * targetFileSizeLevel()) many bytes. + + | *Type:* ``int`` + | *Default:* ``25`` + + .. py:attribute:: source_compaction_factor + + Maximum number of bytes in all source files to be compacted in a + single compaction run. We avoid picking too many files in the + source level so that we do not exceed the total source bytes + for compaction to exceed + (source_compaction_factor * targetFileSizeLevel()) many bytes. + If 1 pick maxfilesize amount of data as the source of + a compaction. + + | *Type:* ``int`` + | *Default:* ``1`` + + .. py:attribute:: max_grandparent_overlap_factor + + Control maximum bytes of overlaps in grandparent (i.e., level+2) before we + stop building a single file in a level->level+1 compaction. + + | *Type:* ``int`` + | *Default:* ``10`` + + .. py:attribute:: disable_data_sync + + If true, then the contents of data files are not synced + to stable storage. Their contents remain in the OS buffers till the + OS decides to flush them. This option is good for bulk-loading + of data. Once the bulk-loading is complete, please issue a + sync to the OS to flush all dirty buffesrs to stable storage. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: use_fsync + + If true, then every store to stable storage will issue a fsync. + If false, then every store to stable storage will issue a fdatasync. + This parameter should be set to true while storing data to + filesystem like ext3 that can lose files after a reboot. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: db_stats_log_interval + + This number controls how often a new scribe log about + db deploy stats is written out. + -1 indicates no logging at all. + + | *Type:* ``int`` + | *Default:* ``1800`` + + .. py:attribute:: db_log_dir + + This specifies the info LOG dir. + If it is empty, the log files will be in the same dir as data. + If it is non empty, the log files will be in the specified dir, + and the db data dir's absolute path will be used as the log file + name's prefix. + + | *Type:* ``string`` + | *Default:* ``""`` + + .. py:attribute:: wal_dir + + This specifies the absolute dir path for write-ahead logs (WAL). + If it is empty, the log files will be in the same dir as data, + dbname is used as the data dir by default. + If it is non empty, the log files will be in kept the specified dir. + When destroying the db, all log files in wal_dir and the dir itself is deleted + + | *Type:* ``string`` + | *Default:* ``""`` + + .. py:attribute:: disable_seek_compaction + + Disable compaction triggered by seek. + With bloomfilter and fast storage, a miss on one level + is very cheap if the file handle is cached in table cache + (which is true if max_open_files is large). + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: delete_obsolete_files_period_micros + + The periodicity when obsolete files get deleted. The default + value is 6 hours. The files that get out of scope by compaction + process will still get automatically delete on every compaction, + regardless of this setting + + | *Type:* ``int`` + | *Default:* ``21600000000`` + + .. py:attribute:: max_background_compactions + + Maximum number of concurrent background jobs, submitted to + the default LOW priority thread pool + + | *Type:* ``int`` + | *Default:* ``1`` + + .. py:attribute:: max_background_flushes + + Maximum number of concurrent background memtable flush jobs, submitted to + the HIGH priority thread pool. + By default, all background jobs (major compaction and memtable flush) go + to the LOW priority pool. If this option is set to a positive number, + memtable flush jobs will be submitted to the HIGH priority pool. + It is important when the same Env is shared by multiple db instances. + Without a separate pool, long running major compaction jobs could + potentially block memtable flush jobs of other db instances, leading to + unnecessary Put stalls. + + | *Type:* ``int`` + | *Default:* ``0`` + + .. py:attribute:: max_log_file_size + + Specify the maximal size of the info log file. If the log file + is larger than `max_log_file_size`, a new info log file will + be created. + If max_log_file_size == 0, all logs will be written to one + log file. + + | *Type:* ``int`` + | *Default:* ``0`` + + .. py:attribute:: log_file_time_to_roll + + Time for the info log file to roll (in seconds). + If specified with non-zero value, log file will be rolled + if it has been active longer than `log_file_time_to_roll`. + A value of ``0`` means disabled. + + | *Type:* ``int`` + | *Default:* ``0`` + + .. py:attribute:: keep_log_file_num + + Maximal info log files to be kept. + + | *Type:* ``int`` + | *Default:* ``1000`` + + .. py:attribute:: soft_rate_limit + + Puts are delayed 0-1 ms when any level has a compaction score that exceeds + soft_rate_limit. This is ignored when == 0.0. + CONSTRAINT: soft_rate_limit <= hard_rate_limit. If this constraint does not + hold, RocksDB will set soft_rate_limit = hard_rate_limit. + A value of ``0`` means disabled. + + | *Type:* ``float`` + | *Default:* ``0`` + + .. py:attribute:: hard_rate_limit + + Puts are delayed 1ms at a time when any level has a compaction score that + exceeds hard_rate_limit. This is ignored when <= 1.0. + A value fo ``0`` means disabled. + + | *Type:* ``float`` + | *Default:* ``0`` + + .. py:attribute:: rate_limit_delay_max_milliseconds + + Max time a put will be stalled when hard_rate_limit is enforced. If 0, then + there is no limit. + + | *Type:* ``int`` + | *Default:* ``1000`` + + .. py:attribute:: max_manifest_file_size + + manifest file is rolled over on reaching this limit. + The older manifest file be deleted. + The default value is MAX_INT so that roll-over does not take place. + + | *Type:* ``int`` + | *Default:* ``(2**64) - 1`` + + .. py:attribute:: no_block_cache + + Disable block cache. If this is set to true, + then no block cache should be used, and the block_cache should + point to ``None`` + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: table_cache_numshardbits + + Number of shards used for table cache. + + | *Type:* ``int`` + | *Default:* ``4`` + + .. py:attribute:: table_cache_remove_scan_count_limit + + During data eviction of table's LRU cache, it would be inefficient + to strictly follow LRU because this piece of memory will not really + be released unless its refcount falls to zero. Instead, make two + passes: the first pass will release items with refcount = 1, + and if not enough space releases after scanning the number of + elements specified by this parameter, we will remove items in LRU + order. + + | *Type:* ``int`` + | *Default:* ``16`` + + .. py:attribute:: arena_block_size + + size of one block in arena memory allocation. + If <= 0, a proper value is automatically calculated (usually 1/10 of + writer_buffer_size). + + | *Type:* ``int`` + | *Default:* ``0`` + + .. py:attribute:: disable_auto_compactions + + Disable automatic compactions. Manual compactions can still + be issued on this database. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: wal_ttl_seconds, wal_size_limit_mb + + The following two fields affect how archived logs will be deleted. + + 1. If both set to 0, logs will be deleted asap and will not get into + the archive. + 2. If wal_ttl_seconds is 0 and wal_size_limit_mb is not 0, + WAL files will be checked every 10 min and if total size is greater + then wal_size_limit_mb, they will be deleted starting with the + earliest until size_limit is met. All empty files will be deleted. + 3. If wal_ttl_seconds is not 0 and wal_size_limit_mb is 0, then + WAL files will be checked every wal_ttl_secondsi / 2 and those that + are older than wal_ttl_seconds will be deleted. + 4. If both are not 0, WAL files will be checked every 10 min and both + checks will be performed with ttl being first. + + | *Type:* ``int`` + | *Default:* ``0`` + + .. py:attribute:: manifest_preallocation_size + + Number of bytes to preallocate (via fallocate) the manifest + files. Default is 4mb, which is reasonable to reduce random IO + as well as prevent overallocation for mounts that preallocate + large amounts of data (such as xfs's allocsize option). + + | *Type:* ``int`` + | *Default:* ``4194304`` + + .. py:attribute:: purge_redundant_kvs_while_flush + + Purge duplicate/deleted keys when a memtable is flushed to storage. + + | *Type:* ``bool`` + | *Default:* ``True`` + + .. py:attribute:: allow_os_buffer + + Data being read from file storage may be buffered in the OS + + | *Type:* ``bool`` + | *Default:* ``True`` + + .. py:attribute:: allow_mmap_reads + + Allow the OS to mmap file for reading sst tables + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: allow_mmap_writes + + Allow the OS to mmap file for writing + + | *Type:* ``bool`` + | *Default:* ``True`` + + .. py:attribute:: is_fd_close_on_exec + + Disable child process inherit open files + + | *Type:* ``bool`` + | *Default:* ``True`` + + .. py:attribute:: skip_log_error_on_recovery + + Skip log corruption error on recovery + (If client is ok with losing most recent changes) + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: stats_dump_period_sec + + If not zero, dump rocksdb.stats to LOG every stats_dump_period_sec + + | *Type:* ``int`` + | *Default:* ``3600`` + + .. py:attribute:: block_size_deviation + + This is used to close a block before it reaches the configured + 'block_size'. If the percentage of free space in the current block is less + than this specified number and adding a new record to the block will + exceed the configured block size, then this block will be closed and the + new record will be written to the next block. + + | *Type:* ``int`` + | *Default:* ``10`` + + .. py:attribute:: advise_random_on_open + + If set true, will hint the underlying file system that the file + access pattern is random, when a sst file is opened. + + | *Type:* ``bool`` + | *Default:* ``True`` + + .. py:attribute:: use_adaptive_mutex + + Use adaptive mutex, which spins in the user space before resorting + to kernel. This could reduce context switch when the mutex is not + heavily contended. However, if the mutex is hot, we could end up + wasting spin time. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: bytes_per_sync + + Allows OS to incrementally sync files to disk while they are being + written, asynchronously, in the background. + Issue one request for every bytes_per_sync written. 0 turns it off. + + | *Type:* ``int`` + | *Default:* ``0`` + + .. py:attribute:: filter_deletes + + Use KeyMayExist API to filter deletes when this is true. + If KeyMayExist returns false, i.e. the key definitely does not exist, then + the delete is a noop. KeyMayExist only incurs in-memory look up. + This optimization avoids writing the delete to storage when appropriate. + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: max_sequential_skip_in_iterations + + An iteration->Next() sequentially skips over keys with the same + user-key unless this option is set. This number specifies the number + of keys (with the same userkey) that will be sequentially + skipped before a reseek is issued. + + | *Type:* ``int`` + | *Default:* ``8`` + + .. py:attribute:: inplace_update_support + + Allows thread-safe inplace updates. Requires Updates if + + * key exists in current memtable + * new sizeof(new_value) <= sizeof(old_value) + * old_value for that key is a put i.e. kTypeValue + + | *Type:* ``bool`` + | *Default:* ``False`` + + .. py:attribute:: inplace_update_num_locks + + | Number of locks used for inplace update. + | Default: 10000, if inplace_update_support = true, else 0. + + | *Type:* ``int`` + | *Default:* ``10000`` + + .. py:attribute:: comparator + + Comparator used to define the order of keys in the table. + A python comparator must implement the :py:class:`rocksdb.interfaces.Comparator` + interface. + + *Requires*: The client must ensure that the comparator supplied + here has the same name and orders keys *exactly* the same as the + comparator provided to previous open calls on the same DB. + + *Default:* :py:class:`rocksdb.BytewiseComparator` + + .. py:attribute:: merge_operator + + The client must provide a merge operator if Merge operation + needs to be accessed. Calling Merge on a DB without a merge operator + would result in :py:exc:`rocksdb.errors.NotSupported`. The client must + ensure that the merge operator supplied here has the same name and + *exactly* the same semantics as the merge operator provided to + previous open calls on the same DB. The only exception is reserved + for upgrade, where a DB previously without a merge operator is + introduced to Merge operation for the first time. It's necessary to + specify a merge operator when openning the DB in this case. + + A python merge operator must implement the + :py:class:`rocksdb.interfaces.MergeOperator` or + :py:class:`rocksdb.interfaces.AssociativeMergeOperator` + interface. + + *Default:* ``None`` + + .. py:attribute:: filter_policy + + If not ``None`` use the specified filter policy to reduce disk reads. + A python filter policy must implement the + :py:class:`rocksdb.interfaces.FilterPolicy` interface. + Recommendes is a instance of :py:class:`rocksdb.BloomFilterPolicy` + + *Default:* ``None`` + +CompressionTypes +================ + +.. py:class:: rocksdb.CompressionType + + Defines the support compression types + + .. py:attribute:: no_compression + .. py:attribute:: snappy_compression + .. py:attribute:: zlib_compression + .. py:attribute:: bzip2_compression + +BytewiseComparator +================== + +.. py:class:: rocksdb.BytewiseComparator + + Wraps the rocksdb Bytewise Comparator, it uses lexicographic byte-wise + ordering + +BloomFilterPolicy +================= + +.. py:class:: rocksdb.BloomFilterPolicy + + Wraps the rocksdb BloomFilter Policy + + .. py:method:: __init__(bits_per_key) + + :param int bits_per_key: + Specifies the approximately number of bits per key. + A good value for bits_per_key is 10, which yields a filter with + ~ 1% false positive rate. + + +LRUCache +======== + +.. py:class:: rocksdb.LRUCache + + Wraps the rocksdb LRUCache + + .. py:method:: __init__(capacity, shard_bits=None, rm_scan_count_limit=None) + + Create a new cache with a fixed size capacity. The cache is sharded + to 2^numShardBits shards, by hash of the key. The total capacity + is divided and evenly assigned to each shard. Inside each shard, + the eviction is done in two passes: first try to free spaces by + evicting entries that are among the most least used removeScanCountLimit + entries and do not have reference other than by the cache itself, in + the least-used order. If not enough space is freed, further free the + entries in least used order. + diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..02eeb64 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- +# +# pyrocksdb documentation build configuration file, created by +# sphinx-quickstart on Tue Dec 31 12:50:54 2013. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.todo', + 'sphinx.ext.viewcode', +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'pyrocksdb' +copyright = u'2013, sh' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = '0.1' +# The full version, including alpha/beta/rc tags. +release = '0.1' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'default' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'pyrocksdbdoc' + + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + ('index', 'pyrocksdb.tex', u'pyrocksdb Documentation', + u'sh', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'pyrocksdb', u'pyrocksdb Documentation', + [u'sh'], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + ('index', 'pyrocksdb', u'pyrocksdb Documentation', + u'sh', 'pyrocksdb', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..53add5a --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,42 @@ +Welcome to pyrocksdb's documentation! +===================================== + +Overview +-------- +Python bindings to the C++ interface of http://rocksdb.org/ using cython:: + + import rocksdb + db = rocksdb.DB("test.db", rocksdb.Options(create_if_missing=True)) + db.put("a", "b") + print db.get("a") + +Tested with python2.7 + +.. toctree:: + :maxdepth: 2 + + Instructions how to install + Tutorial + API + + +RoadMap/TODO +------------ + +* Links from tutorial to API pages (for example merge operator) +* support python3.3. + Make it fix what kind of strings are allow. + + * Arbitrary ``unicode`` and then do some encoding/decoding, like + `redis-driver `_ + + * Or just ASCII ``bytes`` and let the user handle unicode. + +* support prefix API + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/installation.rst b/docs/installation.rst new file mode 100644 index 0000000..d70a1b9 --- /dev/null +++ b/docs/installation.rst @@ -0,0 +1,35 @@ +Installing +********** +.. highlight:: bash + + +Building rocksdb +---------------- + +Briefly describes how to build rocksdb under a ordinary debian/ubuntu. +For more details consider https://github.com/facebook/rocksdb/blob/master/INSTALL.md:: + + $ apt-get install build-essential + $ apt-get install libsnappy-dev zlib1g-dev libbz2-dev libgflags-dev + $ git clone https://github.com/facebook/rocksdb.git + $ cd rocksdb + $ make librocksdb.so librocksdb.so.2 librocksdb.so.2.0 + +If you do not want to call ``make install`` export the following enviroment +variables:: + + $ export CPLUS_INCLUDE_PATH=${CPLUS_INCLUDE_PATH}:`pwd`/include + $ export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:`pwd` + $ export LIBRARY_PATH=${LIBRARY_PATH}:`pwd` + +Building pyrocksdb +------------------ + +.. code-block:: bash + + $ apt-get install python-virtualenv python-dev + $ virtualenv pyrocks_test + $ cd pyrocks_test + $ . bin/active + $ pip install Cython + $ pip install ..... diff --git a/docs/tutorial/index.rst b/docs/tutorial/index.rst new file mode 100644 index 0000000..90f4ef6 --- /dev/null +++ b/docs/tutorial/index.rst @@ -0,0 +1,168 @@ +Basic Usage of pyrocksdb +************************ + +Open +==== + +The most basic open call is :: + + import rocksdb + + db = rocksdb.DB("test.db", rocksdb.Options(create_if_missing=True)) + +A more production ready open can look like this :: + + import rocksdb + + opts = rocksdb.Options() + opts.create_if_missing = True + opts.max_open_files = 300000 + opts.write_buffer_size = 67108864 + opts.max_write_buffer_number = 3 + opts.target_file_size_base = 67108864 + opts.filter_policy = rocksdb.BloomFilterPolicy(10) + opts.block_cache = rocksdb.LRUCache(2 * (1024 ** 3)) + opts.block_cache_compressed = rocksdb.LRUCache(500 * (1024 ** 2)) + + db = rocksdb.DB("test.db", opts) + +It assings a cache of 2.5G, uses a bloom filter for faster lookups and keeps +more data (64 MB) in memory before writting a .sst file + +Access +====== + +Store, Get, Delete is straight forward :: + + # Store + db.put("key", "value") + + # Get + db.get("key") + + # Delete + db.delete("key") + +It is also possible to gather modifications and +apply them in a single operation :: + + batch = rocksdb.WriteBatch() + batch.put("key", "v1") + batch.delete("key") + batch.put("key", "v2") + batch.put("key", "v3") + + db.write(batch) + +Fetch of multiple values at once :: + + db.put("key1", "v1") + db.put("key2", "v2") + + ret = db.multi_get(["key1", "key2", "key3"]) + + # prints "v1" + print ret["key1"] + + # prints None + print ret["key3"] + +Iteration +========= + +Iterators behave slightly different than expected. Per default they are not +valid. So you have to call one of its seek methods first :: + + db.put("key1", "v1") + db.put("key2", "v2") + db.put("key3", "v3") + + it = db.iterkeys() + it.seek_to_first() + + # prints ['key1', 'key2', 'key3'] + print list(it) + + it.seek_to_last() + # prints ['key3'] + print list(it) + + it.seek('key2') + # prints ['key2', 'key3'] + print list(it) + +There are also methods to iterate over values/items :: + + it = db.itervalues() + it.seek_to_first() + + # prints ['v1', 'v2', 'v3'] + print list(it) + + it = db.iteritems() + it.seek_to_first() + + # prints [('key1', 'v1'), ('key2, 'v2'), ('key3', 'v3')] + print list(it) + +Reversed iteration :: + + it = db.iteritems() + it.seek_to_last() + + # prints [('key3', 'v3'), ('key2', 'v2'), ('key1', 'v1')] + print list(reversed(it)) + + +Snapshots +========= + +Snapshots are nice to get a consistent view on the database :: + + self.db.put("a", "1") + self.db.put("b", "2") + + snapshot = self.db.snapshot() + self.db.put("a", "2") + self.db.delete("b") + + it = self.db.iteritems() + it.seek_to_first() + + # prints {'a': '2'} + print dict(it) + + it = self.db.iteritems(snapshot=snapshot) + it.seek_to_first() + + # prints {'a': '1', 'b': '2'} + print dict(it) + + +MergeOperator +============= + +Merge operators are useful for efficient read-modify-write operations. + +The simple Associative merge :: + + class AssocCounter(rocksdb.interfaces.AssociativeMergeOperator): + def merge(self, key, existing_value, value): + if existing_value: + return (True, str(int(existing_value) + int(value))) + return (True, value) + + def name(self): + return 'AssocCounter' + + + opts = rocksdb.Options() + opts.create_if_missing = True + opts.merge_operator = AssocCounter() + db = rocksdb.DB('test.db', opts) + + db.merge("a", "1") + db.merge("a", "1") + + # prints '2' + print db.get("a") diff --git a/rocksdb/__init__.py b/rocksdb/__init__.py new file mode 100644 index 0000000..66495e8 --- /dev/null +++ b/rocksdb/__init__.py @@ -0,0 +1 @@ +from _rocksdb import * diff --git a/rocksdb/_rocksdb.pyx b/rocksdb/_rocksdb.pyx new file mode 100644 index 0000000..b0d2518 --- /dev/null +++ b/rocksdb/_rocksdb.pyx @@ -0,0 +1,1245 @@ +import cython +from libcpp.string cimport string +from libcpp.deque cimport deque +from libcpp.vector cimport vector +from libcpp cimport bool as cpp_bool +from cython.operator cimport dereference as deref +from cpython.string cimport PyString_AsString +from cpython.string cimport PyString_Size +from cpython.string cimport PyString_FromString + +from std_memory cimport shared_ptr +cimport options +cimport merge_operator +cimport filter_policy +cimport comparator +cimport slice_ +cimport cache +cimport logger +cimport snapshot +cimport db +cimport iterator + +from slice_ cimport slice_to_str +from slice_ cimport str_to_slice +from status cimport Status + +from interfaces import MergeOperator as IMergeOperator +from interfaces import AssociativeMergeOperator as IAssociativeMergeOperator +from interfaces import FilterPolicy as IFilterPolicy +from interfaces import Comparator as IComparator +import traceback +import errors + +cdef extern from "cpp/utils.hpp" namespace "py_rocks": + cdef const slice_.Slice* vector_data(vector[slice_.Slice]&) + +## Here comes the stuff to wrap the status to exception +cdef check_status(const Status& st): + if st.ok(): + return + + if st.IsNotFound(): + raise errors.NotFound(st.ToString()) + + if st.IsCorruption(): + raise errors.Corruption(st.ToString()) + + if st.IsNotSupported(): + raise errors.NotSupported(st.ToString()) + + if st.IsInvalidArgument(): + raise errors.InvalidArgument(st.ToString()) + + if st.IsIOError(): + raise errors.RocksIOError(st.ToString()) + + if st.IsMergeInProgress(): + raise errors.MergeInProgress(st.ToString()) + + if st.IsIncomplete(): + raise errors.Incomplete(st.ToString()) + + raise Exception("Unknown error: %s" % st.ToString()) +###################################################### + + +## Here comes the stuff for the comparator +@cython.internal +cdef class PyComparator(object): + cdef object get_ob(self): + return None + + cdef const comparator.Comparator* get_comparator(self): + return NULL + +@cython.internal +cdef class PyGenericComparator(PyComparator): + cdef const comparator.Comparator* comparator_ptr + cdef object ob + + def __cinit__(self, object ob): + if not isinstance(ob, IComparator): + # TODO: raise wrong subclass error + raise TypeError("Cannot set comparator: %s" % ob) + + self.ob = ob + self.comparator_ptr = ( + new comparator.ComparatorWrapper( + ob.name(), + ob, + compare_callback)) + + def __dealloc__(self): + del self.comparator_ptr + + cdef object get_ob(self): + return self.ob + + cdef const comparator.Comparator* get_comparator(self): + return self.comparator_ptr + +@cython.internal +cdef class PyBytewiseComparator(PyComparator): + cdef const comparator.Comparator* comparator_ptr + + def __cinit__(self): + self.comparator_ptr = comparator.BytewiseComparator() + + def name(self): + return PyString_FromString(self.comparator_ptr.Name()) + + def compare(self, str a, str b): + return self.comparator_ptr.Compare( + str_to_slice(a), + str_to_slice(b)) + + cdef object get_ob(self): + return self + + cdef const comparator.Comparator* get_comparator(self): + return self.comparator_ptr + +cdef int compare_callback( + void* ctx, + const slice_.Slice& a, + const slice_.Slice& b) with gil: + + return (ctx).compare(slice_to_str(a), slice_to_str(b)) + +BytewiseComparator = PyBytewiseComparator +######################################### + + + +## Here comes the stuff for the filter policy +@cython.internal +cdef class PyFilterPolicy(object): + cdef object get_ob(self): + return None + + cdef const filter_policy.FilterPolicy* get_policy(self): + return NULL + +@cython.internal +cdef class PyGenericFilterPolicy(PyFilterPolicy): + cdef filter_policy.FilterPolicy* policy + cdef object ob + + def __cinit__(self, object ob): + if not isinstance(ob, IFilterPolicy): + raise TypeError("Cannot set filter policy: %s" % ob) + + self.ob = ob + self.policy = new filter_policy.FilterPolicyWrapper( + ob.name(), + ob, + ob, + create_filter_callback, + key_may_match_callback) + + def __dealloc__(self): + del self.policy + + cdef object get_ob(self): + return self.ob + + cdef const filter_policy.FilterPolicy* get_policy(self): + return self.policy + +cdef void create_filter_callback( + void* ctx, + const slice_.Slice* keys, + int n, + string* dst) with gil: + + cdef string ret = (ctx).create_filter( + [slice_to_str(keys[i]) for i in range(n)]) + dst.append(ret) + +cdef cpp_bool key_may_match_callback( + void* ctx, + const slice_.Slice& key, + const slice_.Slice& filt) with gil: + + return (ctx).key_may_match(slice_to_str(key), slice_to_str(filt)) + +@cython.internal +cdef class PyBloomFilterPolicy(PyFilterPolicy): + cdef const filter_policy.FilterPolicy* policy + + def __cinit__(self, int bits_per_key): + self.policy = filter_policy.NewBloomFilterPolicy(bits_per_key) + + def __dealloc__(self): + del self.policy + + def name(self): + return PyString_FromString(self.policy.Name()) + + def create_filter(self, keys): + cdef string dst + cdef vector[slice_.Slice] c_keys + + for key in keys: + c_keys.push_back(str_to_slice(key)) + + self.policy.CreateFilter( + vector_data(c_keys), + c_keys.size(), + cython.address(dst)) + + return dst + + def key_may_match(self, key, filter_): + return self.policy.KeyMayMatch( + str_to_slice(key), + str_to_slice(filter_)) + + cdef object get_ob(self): + return self + + cdef const filter_policy.FilterPolicy* get_policy(self): + return self.policy + +BloomFilterPolicy = PyBloomFilterPolicy +############################################# + + + +## Here comes the stuff for the merge operator +@cython.internal +cdef class PyMergeOperator(object): + cdef shared_ptr[merge_operator.MergeOperator] merge_op + cdef object ob + + def __cinit__(self, object ob): + if isinstance(ob, IAssociativeMergeOperator): + self.ob = ob + self.merge_op.reset( + + new merge_operator.AssociativeMergeOperatorWrapper( + ob.name(), + (ob), + merge_callback)) + + elif isinstance(ob, IMergeOperator): + self.ob = ob + self.merge_op.reset( + + new merge_operator.MergeOperatorWrapper( + ob.name(), + ob, + ob, + full_merge_callback, + partial_merge_callback)) + else: + raise TypeError("Cannot set MergeOperator: %s" % ob) + + cdef object get_ob(self): + return self.ob + + cdef shared_ptr[merge_operator.MergeOperator] get_operator(self): + return self.merge_op + +cdef cpp_bool merge_callback( + void* ctx, + const slice_.Slice& key, + const slice_.Slice* existing_value, + const slice_.Slice& value, + string* new_value, + logger.Logger* log) with gil: + + if existing_value == NULL: + py_existing_value = None + else: + py_existing_value = slice_to_str(deref(existing_value)) + + try: + ret = (ctx).merge( + slice_to_str(key), + py_existing_value, + slice_to_str(value)) + + if ret[0]: + new_value.assign( + PyString_AsString(ret[1]), + PyString_Size(ret[1])) + return True + return False + + except Exception: + logger.Log( + log, + "Error in merge_callback: %s", + PyString_AsString(traceback.format_exc())) + return False + +cdef cpp_bool full_merge_callback( + void* ctx, + const slice_.Slice& key, + const slice_.Slice* existing_value, + const deque[string]& operand_list, + string* new_value, + logger.Logger* log) with gil: + + if existing_value == NULL: + py_existing_value = None + else: + py_existing_value = slice_to_str(deref(existing_value)) + + try: + ret = (ctx).full_merge( + slice_to_str(key), + py_existing_value, + [operand_list[i] for i in range(operand_list.size())]) + + if ret[0]: + new_value.assign( + PyString_AsString(ret[1]), + PyString_Size(ret[1])) + return True + return False + + except Exception: + logger.Log( + log, + "Error in full_merge_callback: %s", + PyString_AsString(traceback.format_exc())) + return False + +cdef cpp_bool partial_merge_callback( + void* ctx, + const slice_.Slice& key, + const slice_.Slice& left_op, + const slice_.Slice& right_op, + string* new_value, + logger.Logger* log) with gil: + + try: + ret = (ctx).partial_merge( + slice_to_str(key), + slice_to_str(left_op), + slice_to_str(right_op)) + + if ret[0]: + new_value.assign( + PyString_AsString(ret[1]), + PyString_Size(ret[1])) + return True + return False + + except Exception: + logger.Log( + log, + "Error in partial_merge_callback: %s", + PyString_AsString(traceback.format_exc())) + + return False +############################################## + +#### Here comes the Cache stuff +@cython.internal +cdef class PyCache(object): + cdef object get_ob(self): + return None + + cdef shared_ptr[cache.Cache] get_cache(self): + return shared_ptr[cache.Cache]() + +@cython.internal +cdef class PyLRUCache(PyCache): + cdef shared_ptr[cache.Cache] cache_ob + + def __cinit__(self, capacity, shard_bits=None, rm_scan_count_limit=None): + if shard_bits is not None: + if rm_scan_count_limit is not None: + self.cache_ob = cache.NewLRUCache( + capacity, + shard_bits, + rm_scan_count_limit) + else: + self.cache_ob = cache.NewLRUCache(capacity, shard_bits) + else: + self.cache_ob = cache.NewLRUCache(capacity) + + cdef object get_ob(self): + return self + + cdef shared_ptr[cache.Cache] get_cache(self): + return self.cache_ob + +LRUCache = PyLRUCache +############################### + + +cdef class CompressionType(object): + no_compression = 'no_compression' + snappy_compression = 'snappy_compression' + zlib_compression = 'zlib_compression' + bzip2_compression = 'bzip2_compression' + +cdef class Options(object): + cdef options.Options* opts + cdef PyComparator py_comparator + cdef PyMergeOperator py_merge_operator + cdef PyFilterPolicy py_filter_policy + cdef PyCache py_block_cache + cdef PyCache py_block_cache_compressed + + def __cinit__(self): + self.opts = new options.Options() + + def __dealloc__(self): + del self.opts + + def __init__(self, **kwargs): + self.py_comparator = BytewiseComparator() + self.py_merge_operator = None + self.py_filter_policy = None + self.py_block_cache = None + self.py_block_cache_compressed = None + + for key, value in kwargs.items(): + setattr(self, key, value) + + property create_if_missing: + def __get__(self): + return self.opts.create_if_missing + def __set__(self, value): + self.opts.create_if_missing = value + + property error_if_exists: + def __get__(self): + return self.opts.error_if_exists + def __set__(self, value): + self.opts.error_if_exists = value + + property paranoid_checks: + def __get__(self): + return self.opts.paranoid_checks + def __set__(self, value): + self.opts.paranoid_checks = value + + property write_buffer_size: + def __get__(self): + return self.opts.write_buffer_size + def __set__(self, value): + self.opts.write_buffer_size = value + + property max_write_buffer_number: + def __get__(self): + return self.opts.max_write_buffer_number + def __set__(self, value): + self.opts.max_write_buffer_number = value + + property min_write_buffer_number_to_merge: + def __get__(self): + return self.opts.min_write_buffer_number_to_merge + def __set__(self, value): + self.opts.min_write_buffer_number_to_merge = value + + property max_open_files: + def __get__(self): + return self.opts.max_open_files + def __set__(self, value): + self.opts.max_open_files = value + + property block_size: + def __get__(self): + return self.opts.block_size + def __set__(self, value): + self.opts.block_size = value + + property block_restart_interval: + def __get__(self): + return self.opts.block_restart_interval + def __set__(self, value): + self.opts.block_restart_interval = value + + property compression: + def __get__(self): + if self.opts.compression == options.kNoCompression: + return CompressionType.no_compression + elif self.opts.compression == options.kSnappyCompression: + return CompressionType.snappy_compression + elif self.opts.compression == options.kZlibCompression: + return CompressionType.zlib_compression + elif self.opts.compression == options.kBZip2Compression: + return CompressionType.bzip2_compression + else: + raise Exception("Unknonw type: %s" % self.opts.compression) + + def __set__(self, value): + if value == CompressionType.no_compression: + self.opts.compression = options.kNoCompression + elif value == CompressionType.snappy_compression: + self.opts.compression = options.kSnappyCompression + elif value == CompressionType.zlib_compression: + self.opts.compression = options.kZlibCompression + elif value == CompressionType.bzip2_compression: + self.opts.compression = options.kBZip2Compression + else: + raise TypeError("Unknown compression: %s" % value) + + property whole_key_filtering: + def __get__(self): + return self.opts.whole_key_filtering + def __set__(self, value): + self.opts.whole_key_filtering = value + + property num_levels: + def __get__(self): + return self.opts.num_levels + def __set__(self, value): + self.opts.num_levels = value + + property level0_file_num_compaction_trigger: + def __get__(self): + return self.opts.level0_file_num_compaction_trigger + def __set__(self, value): + self.opts.level0_file_num_compaction_trigger = value + + property level0_slowdown_writes_trigger: + def __get__(self): + return self.opts.level0_slowdown_writes_trigger + def __set__(self, value): + self.opts.level0_slowdown_writes_trigger = value + + property level0_stop_writes_trigger: + def __get__(self): + return self.opts.level0_stop_writes_trigger + def __set__(self, value): + self.opts.level0_stop_writes_trigger = value + + property max_mem_compaction_level: + def __get__(self): + return self.opts.max_mem_compaction_level + def __set__(self, value): + self.opts.max_mem_compaction_level = value + + property target_file_size_base: + def __get__(self): + return self.opts.target_file_size_base + def __set__(self, value): + self.opts.target_file_size_base = value + + property target_file_size_multiplier: + def __get__(self): + return self.opts.target_file_size_multiplier + def __set__(self, value): + self.opts.target_file_size_multiplier = value + + property max_bytes_for_level_base: + def __get__(self): + return self.opts.max_bytes_for_level_base + def __set__(self, value): + self.opts.max_bytes_for_level_base = value + + property max_bytes_for_level_multiplier: + def __get__(self): + return self.opts.max_bytes_for_level_multiplier + def __set__(self, value): + self.opts.max_bytes_for_level_multiplier = value + + property max_bytes_for_level_multiplier_additional: + def __get__(self): + return self.opts.max_bytes_for_level_multiplier_additional + def __set__(self, value): + self.opts.max_bytes_for_level_multiplier_additional = value + + property expanded_compaction_factor: + def __get__(self): + return self.opts.expanded_compaction_factor + def __set__(self, value): + self.opts.expanded_compaction_factor = value + + property source_compaction_factor: + def __get__(self): + return self.opts.source_compaction_factor + def __set__(self, value): + self.opts.source_compaction_factor = value + + property max_grandparent_overlap_factor: + def __get__(self): + return self.opts.max_grandparent_overlap_factor + def __set__(self, value): + self.opts.max_grandparent_overlap_factor = value + + property disable_data_sync: + def __get__(self): + return self.opts.disableDataSync + def __set__(self, value): + self.opts.disableDataSync = value + + property use_fsync: + def __get__(self): + return self.opts.use_fsync + def __set__(self, value): + self.opts.use_fsync = value + + property db_stats_log_interval: + def __get__(self): + return self.opts.db_stats_log_interval + def __set__(self, value): + self.opts.db_stats_log_interval = value + + property db_log_dir: + def __get__(self): + return self.opts.db_log_dir + def __set__(self, value): + self.opts.db_log_dir = value + + property wal_dir: + def __get__(self): + return self.opts.wal_dir + def __set__(self, value): + self.opts.wal_dir = value + + property disable_seek_compaction: + def __get__(self): + return self.opts.disable_seek_compaction + def __set__(self, value): + self.opts.disable_seek_compaction = value + + property delete_obsolete_files_period_micros: + def __get__(self): + return self.opts.delete_obsolete_files_period_micros + def __set__(self, value): + self.opts.delete_obsolete_files_period_micros = value + + property max_background_compactions: + def __get__(self): + return self.opts.max_background_compactions + def __set__(self, value): + self.opts.max_background_compactions = value + + property max_background_flushes: + def __get__(self): + return self.opts.max_background_flushes + def __set__(self, value): + self.opts.max_background_flushes = value + + property max_log_file_size: + def __get__(self): + return self.opts.max_log_file_size + def __set__(self, value): + self.opts.max_log_file_size = value + + property log_file_time_to_roll: + def __get__(self): + return self.opts.log_file_time_to_roll + def __set__(self, value): + self.opts.log_file_time_to_roll = value + + property keep_log_file_num: + def __get__(self): + return self.opts.keep_log_file_num + def __set__(self, value): + self.opts.keep_log_file_num = value + + property soft_rate_limit: + def __get__(self): + return self.opts.soft_rate_limit + def __set__(self, value): + self.opts.soft_rate_limit = value + + property hard_rate_limit: + def __get__(self): + return self.opts.hard_rate_limit + def __set__(self, value): + self.opts.hard_rate_limit = value + + property rate_limit_delay_max_milliseconds: + def __get__(self): + return self.opts.rate_limit_delay_max_milliseconds + def __set__(self, value): + self.opts.rate_limit_delay_max_milliseconds = value + + property max_manifest_file_size: + def __get__(self): + return self.opts.max_manifest_file_size + def __set__(self, value): + self.opts.max_manifest_file_size = value + + property no_block_cache: + def __get__(self): + return self.opts.no_block_cache + def __set__(self, value): + self.opts.no_block_cache = value + + property table_cache_numshardbits: + def __get__(self): + return self.opts.table_cache_numshardbits + def __set__(self, value): + self.opts.table_cache_numshardbits = value + + property table_cache_remove_scan_count_limit: + def __get__(self): + return self.opts.table_cache_remove_scan_count_limit + def __set__(self, value): + self.opts.table_cache_remove_scan_count_limit = value + + property arena_block_size: + def __get__(self): + return self.opts.arena_block_size + def __set__(self, value): + self.opts.arena_block_size = value + + property disable_auto_compactions: + def __get__(self): + return self.opts.disable_auto_compactions + def __set__(self, value): + self.opts.disable_auto_compactions = value + + property wal_ttl_seconds: + def __get__(self): + return self.opts.WAL_ttl_seconds + def __set__(self, value): + self.opts.WAL_ttl_seconds = value + + property wal_size_limit_mb: + def __get__(self): + return self.opts.WAL_size_limit_MB + def __set__(self, value): + self.opts.WAL_size_limit_MB = value + + property manifest_preallocation_size: + def __get__(self): + return self.opts.manifest_preallocation_size + def __set__(self, value): + self.opts.manifest_preallocation_size = value + + property purge_redundant_kvs_while_flush: + def __get__(self): + return self.opts.purge_redundant_kvs_while_flush + def __set__(self, value): + self.opts.purge_redundant_kvs_while_flush = value + + property allow_os_buffer: + def __get__(self): + return self.opts.allow_os_buffer + def __set__(self, value): + self.opts.allow_os_buffer = value + + property allow_mmap_reads: + def __get__(self): + return self.opts.allow_mmap_reads + def __set__(self, value): + self.opts.allow_mmap_reads = value + + property allow_mmap_writes: + def __get__(self): + return self.opts.allow_mmap_writes + def __set__(self, value): + self.opts.allow_mmap_writes = value + + property is_fd_close_on_exec: + def __get__(self): + return self.opts.is_fd_close_on_exec + def __set__(self, value): + self.opts.is_fd_close_on_exec = value + + property skip_log_error_on_recovery: + def __get__(self): + return self.opts.skip_log_error_on_recovery + def __set__(self, value): + self.opts.skip_log_error_on_recovery = value + + property stats_dump_period_sec: + def __get__(self): + return self.opts.stats_dump_period_sec + def __set__(self, value): + self.opts.stats_dump_period_sec = value + + property block_size_deviation: + def __get__(self): + return self.opts.block_size_deviation + def __set__(self, value): + self.opts.block_size_deviation = value + + property advise_random_on_open: + def __get__(self): + return self.opts.advise_random_on_open + def __set__(self, value): + self.opts.advise_random_on_open = value + + property use_adaptive_mutex: + def __get__(self): + return self.opts.use_adaptive_mutex + def __set__(self, value): + self.opts.use_adaptive_mutex = value + + property bytes_per_sync: + def __get__(self): + return self.opts.bytes_per_sync + def __set__(self, value): + self.opts.bytes_per_sync = value + + property filter_deletes: + def __get__(self): + return self.opts.filter_deletes + def __set__(self, value): + self.opts.filter_deletes = value + + property max_sequential_skip_in_iterations: + def __get__(self): + return self.opts.max_sequential_skip_in_iterations + def __set__(self, value): + self.opts.max_sequential_skip_in_iterations = value + + property inplace_update_support: + def __get__(self): + return self.opts.inplace_update_support + def __set__(self, value): + self.opts.inplace_update_support = value + + property inplace_update_num_locks: + def __get__(self): + return self.opts.inplace_update_num_locks + def __set__(self, value): + self.opts.inplace_update_num_locks = value + + property comparator: + def __get__(self): + return self.py_comparator.get_ob() + + def __set__(self, value): + if isinstance(value, PyComparator): + if (value).get_comparator() == NULL: + raise Exception("Cannot set %s as comparator" % value) + else: + self.py_comparator = value + else: + self.py_comparator = PyGenericComparator(value) + + self.opts.comparator = self.py_comparator.get_comparator() + + property merge_operator: + def __get__(self): + if self.py_merge_operator is None: + return None + return self.py_merge_operator.get_ob() + + def __set__(self, value): + self.py_merge_operator = PyMergeOperator(value) + self.opts.merge_operator = self.py_merge_operator.get_operator() + + property filter_policy: + def __get__(self): + if self.py_filter_policy is None: + return None + return self.py_filter_policy.get_ob() + + def __set__(self, value): + if isinstance(value, PyFilterPolicy): + if (value).get_policy() == NULL: + raise Exception("Cannot set filter policy: %s" % value) + self.py_filter_policy = value + else: + self.py_filter_policy = PyGenericFilterPolicy(value) + + self.opts.filter_policy = self.py_filter_policy.get_policy() + + property block_cache: + def __get__(self): + if self.py_block_cache is None: + return None + return self.py_block_cache.get_ob() + + def __set__(self, value): + if value is None: + self.py_block_cache = None + self.opts.block_cache.reset() + else: + if not isinstance(value, PyCache): + raise TypeError("%s is not a Cache" % value) + + self.py_block_cache = value + self.opts.block_cache = self.py_block_cache.get_cache() + + property block_cache_compressed: + def __get__(self): + if self.py_block_cache_compressed is None: + return None + return self.py_block_cache_compressed.get_ob() + + def __set__(self, value): + if value is None: + self.py_block_cache_compressed = None + self.opts.block_cache_compressed.reset() + return + + if not isinstance(value, PyCache): + raise TypeError("%s is not a Cache" % value) + + self.py_block_cache_compressed = value + self.opts.block_cache_compressed = (value).get_cache() + +# Forward declaration +cdef class Snapshot + +cdef class KeysIterator +cdef class ValuesIterator +cdef class ItemsIterator +cdef class ReversedIterator + +cdef class WriteBatch(object): + cdef db.WriteBatch* batch + + def __cinit__(self, data=None): + if data is not None: + self.batch = new db.WriteBatch(data) + else: + self.batch = new db.WriteBatch() + + def __dealloc__(self): + del self.batch + + def put(self, key, value): + self.batch.Put(str_to_slice(key), str_to_slice(value)) + + def merge(self, key, value): + self.batch.Merge(str_to_slice(key), str_to_slice(value)) + + def delete(self, key): + self.batch.Delete(str_to_slice(key)) + + def clear(self): + self.batch.Clear() + + def data(self): + return self.batch.Data() + + def count(self): + return self.batch.Count() + +cdef class DB(object): + cdef Options opts + cdef db.DB* db + + def __cinit__(self, db_name, Options opts, read_only=False): + if read_only: + check_status( + db.DB_OpenForReadOnly( + deref(opts.opts), + db_name, + cython.address(self.db), + False)) + else: + check_status( + db.DB_Open( + deref(opts.opts), + db_name, + cython.address(self.db))) + + self.opts = opts + + def __dealloc__(self): + del self.db + + def put(self, key, value, sync=False, disable_wal=False): + cdef options.WriteOptions opts + opts.sync = sync + opts.disableWAL = disable_wal + + check_status( + self.db.Put(opts, str_to_slice(key), str_to_slice(value))) + + def delete(self, key, sync=False, disable_wal=False): + cdef options.WriteOptions opts + opts.sync = sync + opts.disableWAL = disable_wal + + check_status( + self.db.Delete(opts, str_to_slice(key))) + + def merge(self, key, value, sync=False, disable_wal=False): + cdef options.WriteOptions opts + opts.sync = sync + opts.disableWAL = disable_wal + + check_status( + self.db.Merge(opts, str_to_slice(key), str_to_slice(value))) + + def write(self, WriteBatch batch, sync=False, disable_wal=False): + cdef options.WriteOptions opts + opts.sync = sync + opts.disableWAL = disable_wal + + check_status( + self.db.Write(opts, batch.batch)) + + def get(self, key, *args, **kwargs): + cdef string res + cdef Status st + + st = self.db.Get( + self.build_read_opts(self.__parse_read_opts(*args, **kwargs)), + str_to_slice(key), + cython.address(res)) + + if st.ok(): + return res + elif st.IsNotFound(): + return None + else: + check_status(st) + + def multi_get(self, keys, *args, **kwargs): + cdef vector[string] values + values.resize(len(keys)) + + cdef vector[slice_.Slice] c_keys + for key in keys: + c_keys.push_back(str_to_slice(key)) + + cdef vector[Status] res = self.db.MultiGet( + self.build_read_opts(self.__parse_read_opts(*args, **kwargs)), + c_keys, + cython.address(values)) + + cdef dict ret_dict = {} + for index in range(len(keys)): + if res[index].ok(): + ret_dict[keys[index]] = values[index] + elif res[index].IsNotFound(): + ret_dict[keys[index]] = None + else: + check_status(res[index]) + + return ret_dict + + def key_may_exist(self, key, fetch=False, *args, **kwargs): + cdef string value + cdef cpp_bool value_found + cdef cpp_bool exists + cdef options.ReadOptions opts + opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) + + if fetch: + value_found = False + exists = self.db.KeyMayExist( + opts, + str_to_slice(key), + cython.address(value), + cython.address(value_found)) + + if exists: + if value_found: + return (True, value) + else: + return (True, None) + else: + return (False, None) + else: + exists = self.db.KeyMayExist( + opts, + str_to_slice(key), + cython.address(value)) + + return (exists, None) + + def iterkeys(self, prefix=None, *args, **kwargs): + cdef options.ReadOptions opts + cdef KeysIterator it + opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) + it = KeysIterator(self) + it.ptr = self.db.NewIterator(opts) + return it + + def itervalues(self, prefix=None, *args, **kwargs): + cdef options.ReadOptions opts + cdef ValuesIterator it + opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) + it = ValuesIterator(self) + it.ptr = self.db.NewIterator(opts) + return it + + def iteritems(self, prefix=None, *args, **kwargs): + cdef options.ReadOptions opts + cdef ItemsIterator it + opts = self.build_read_opts(self.__parse_read_opts(*args, **kwargs)) + it = ItemsIterator(self) + it.ptr = self.db.NewIterator(opts) + return it + + def snapshot(self): + return Snapshot(self) + + def get_property(self, prop): + cdef string value + + if self.db.GetProperty(str_to_slice(prop), cython.address(value)): + return value + else: + return None + + def get_live_files_metadata(self): + cdef vector[db.LiveFileMetaData] metadata + + self.db.GetLiveFilesMetaData(cython.address(metadata)) + + ret = [] + for ob in metadata: + t = {} + t['name'] = ob.name + t['level'] = ob.level + t['size'] = ob.size + t['smallestkey'] = ob.smallestkey + t['largestkey'] = ob.largestkey + t['smallest_seqno'] = ob.smallest_seqno + t['largest_seqno'] = ob.largest_seqno + + ret.append(t) + + return ret + + @staticmethod + def __parse_read_opts( + verify_checksums=False, + fill_cache=True, + prefix_seek=False, + snapshot=None, + read_tier="all"): + + # TODO: Is this really effiencet ? + return locals() + + cdef options.ReadOptions build_read_opts(self, dict py_opts): + cdef options.ReadOptions opts + opts.verify_checksums = py_opts['verify_checksums'] + opts.fill_cache = py_opts['fill_cache'] + opts.prefix_seek = py_opts['prefix_seek'] + if py_opts['snapshot'] is not None: + opts.snapshot = ((py_opts['snapshot'])).ptr + + if py_opts['read_tier'] == "all": + opts.read_tier = options.kReadAllTier + elif py_opts['read_tier'] == 'cache': + opts.read_tier = options.kBlockCacheTier + else: + raise ValueError("Invalid read_tier") + + return opts + + property options: + def __get__(self): + return self.opts + +@cython.internal +cdef class Snapshot(object): + cdef const snapshot.Snapshot* ptr + cdef DB db + + def __cinit__(self, DB db): + self.db = db + self.ptr = db.db.GetSnapshot() + + def __dealloc__(self): + self.db.db.ReleaseSnapshot(self.ptr) + + +@cython.internal +cdef class BaseIterator(object): + cdef iterator.Iterator* ptr + cdef DB db + + def __cinit__(self, DB db): + self.db = db + self.ptr = NULL + + def __dealloc__(self): + if self.ptr != NULL: + del self.ptr + + def __iter__(self): + return self + + def __next__(self): + if not self.ptr.Valid(): + raise StopIteration() + + cdef object ret = self.get_ob() + self.ptr.Next() + return ret + + def __reversed__(self): + return ReversedIterator(self) + + cpdef seek_to_first(self): + self.ptr.SeekToFirst() + + cpdef seek_to_last(self): + self.ptr.SeekToLast() + + cpdef seek(self, key): + self.ptr.Seek(str_to_slice(key)) + + cdef object get_ob(self): + return None + +@cython.internal +cdef class KeysIterator(BaseIterator): + cdef object get_ob(self): + return slice_to_str(self.ptr.key()) + +@cython.internal +cdef class ValuesIterator(BaseIterator): + cdef object get_ob(self): + return slice_to_str(self.ptr.value()) + +@cython.internal +cdef class ItemsIterator(BaseIterator): + cdef object get_ob(self): + return (slice_to_str(self.ptr.key()), slice_to_str(self.ptr.value())) + +@cython.internal +cdef class ReversedIterator(object): + cdef BaseIterator it + + def __cinit__(self, BaseIterator it): + self.it = it + + def seek_to_first(self): + self.it.seek_to_first() + + def seek_to_last(self): + self.it.seek_to_last() + + def seek(self, key): + self.it.seek(key) + + def __iter__(self): + return self + + def __reversed__(self): + return self.it + + def __next__(self): + if not self.it.ptr.Valid(): + raise StopIteration() + + cdef object ret = self.it.get_ob() + self.it.ptr.Prev() + return ret diff --git a/rocksdb/cache.pxd b/rocksdb/cache.pxd new file mode 100644 index 0000000..080b99d --- /dev/null +++ b/rocksdb/cache.pxd @@ -0,0 +1,9 @@ +from std_memory cimport shared_ptr + +cdef extern from "rocksdb/cache.h" namespace "rocksdb": + cdef cppclass Cache: + pass + + cdef extern shared_ptr[Cache] NewLRUCache(size_t) + cdef extern shared_ptr[Cache] NewLRUCache(size_t, int) + cdef extern shared_ptr[Cache] NewLRUCache(size_t, int, int) diff --git a/rocksdb/comparator.pxd b/rocksdb/comparator.pxd new file mode 100644 index 0000000..c440a7b --- /dev/null +++ b/rocksdb/comparator.pxd @@ -0,0 +1,14 @@ +from libcpp.string cimport string +from slice_ cimport Slice +cdef extern from "rocksdb/comparator.h" namespace "rocksdb": + cdef cppclass Comparator: + const char* Name() + int Compare(const Slice&, const Slice&) const + + cdef extern const Comparator* BytewiseComparator() + +ctypedef int (*compare_func)(void*, const Slice&, const Slice&) + +cdef extern from "cpp/comparator_wrapper.hpp" namespace "py_rocks": + cdef cppclass ComparatorWrapper: + ComparatorWrapper(string, void*, compare_func) diff --git a/rocksdb/cpp/comparator_wrapper.hpp b/rocksdb/cpp/comparator_wrapper.hpp new file mode 100644 index 0000000..7f17a04 --- /dev/null +++ b/rocksdb/cpp/comparator_wrapper.hpp @@ -0,0 +1,37 @@ +#include "rocksdb/comparator.h" + +using std::string; +using rocksdb::Comparator; +using rocksdb::Slice; + +namespace py_rocks { + class ComparatorWrapper: public Comparator { + public: + typedef int (*compare_func)(void*, const Slice&, const Slice&); + + ComparatorWrapper( + string name, + void* compare_context, + compare_func compare_callback): + name(name), + compare_context(compare_context), + compare_callback(compare_callback) + {} + + int Compare(const Slice& a, const Slice& b) const { + return this->compare_callback(this->compare_context, a, b); + } + + const char* Name() const { + return this->name.c_str(); + } + + void FindShortestSeparator(string* start, const Slice& limit) const {} + void FindShortSuccessor(string* key) const {} + + private: + string name; + void* compare_context; + compare_func compare_callback; + }; +} diff --git a/rocksdb/cpp/filter_policy_wrapper.hpp b/rocksdb/cpp/filter_policy_wrapper.hpp new file mode 100644 index 0000000..3b3e92d --- /dev/null +++ b/rocksdb/cpp/filter_policy_wrapper.hpp @@ -0,0 +1,62 @@ +#include "rocksdb/filter_policy.h" + +using std::string; +using rocksdb::FilterPolicy; +using rocksdb::Slice; + +namespace py_rocks { + class FilterPolicyWrapper: public FilterPolicy { + public: + typedef void (*create_filter_func)( + void* ctx, + const Slice* keys, + int n, + string* dst); + + typedef bool (*key_may_match_func)( + void* ctx, + const Slice& key, + const Slice& filter); + + FilterPolicyWrapper( + string name, + void* create_filter_context, + void* key_may_match_context, + create_filter_func create_filter_callback, + key_may_match_func key_may_match_callback): + name(name), + create_filter_context(create_filter_context), + key_may_match_context(key_may_match_context), + create_filter_callback(create_filter_callback), + key_may_match_callback(key_may_match_callback) + {} + + void + CreateFilter(const Slice* keys, int n, std::string* dst) const { + this->create_filter_callback( + this->create_filter_context, + keys, + n, + dst); + } + + bool + KeyMayMatch(const Slice& key, const Slice& filter) const { + return this->key_may_match_callback( + this->key_may_match_context, + key, + filter); + } + + const char* Name() const { + return this->name.c_str(); + } + + private: + string name; + void* create_filter_context; + void* key_may_match_context; + create_filter_func create_filter_callback; + key_may_match_func key_may_match_callback; + }; +} diff --git a/rocksdb/cpp/merge_operator_wrapper.hpp b/rocksdb/cpp/merge_operator_wrapper.hpp new file mode 100644 index 0000000..f7205c9 --- /dev/null +++ b/rocksdb/cpp/merge_operator_wrapper.hpp @@ -0,0 +1,132 @@ +#include "rocksdb/merge_operator.h" + +using std::string; +using std::deque; +using rocksdb::Slice; +using rocksdb::Logger; +using rocksdb::MergeOperator; +using rocksdb::AssociativeMergeOperator; + +namespace py_rocks { + class AssociativeMergeOperatorWrapper: public AssociativeMergeOperator { + public: + typedef bool (*merge_func)( + void*, + const Slice& key, + const Slice* existing_value, + const Slice& value, + std::string* new_value, + Logger* logger); + + + AssociativeMergeOperatorWrapper( + string name, + void* merge_context, + merge_func merge_callback): + name(name), + merge_context(merge_context), + merge_callback(merge_callback) + {} + + bool Merge( + const Slice& key, + const Slice* existing_value, + const Slice& value, + std::string* new_value, + Logger* logger) const + { + return this->merge_callback( + this->merge_context, + key, + existing_value, + value, + new_value, + logger); + } + + const char* Name() const { + return this->name.c_str(); + } + + private: + string name; + void* merge_context; + merge_func merge_callback; + }; + + class MergeOperatorWrapper: public MergeOperator { + public: + typedef bool (*full_merge_func)( + void* ctx, + const Slice& key, + const Slice* existing_value, + const deque& operand_list, + string* new_value, + Logger* logger); + + typedef bool (*partial_merge_func)( + void* ctx, + const Slice& key, + const Slice& left_op, + const Slice& right_op, + string* new_value, + Logger* logger); + + MergeOperatorWrapper( + string name, + void* full_merge_context, + void* partial_merge_context, + full_merge_func full_merge_callback, + partial_merge_func partial_merge_callback): + name(name), + full_merge_context(full_merge_context), + partial_merge_context(partial_merge_context), + full_merge_callback(full_merge_callback), + partial_merge_callback(partial_merge_callback) + {} + + bool FullMerge( + const Slice& key, + const Slice* existing_value, + const deque& operand_list, + string* new_value, + Logger* logger) const + { + return this->full_merge_callback( + this->full_merge_context, + key, + existing_value, + operand_list, + new_value, + logger); + } + + bool PartialMerge ( + const Slice& key, + const Slice& left_operand, + const Slice& right_operand, + string* new_value, + Logger* logger) const + { + return this->partial_merge_callback( + this->partial_merge_context, + key, + left_operand, + right_operand, + new_value, + logger); + } + + const char* Name() const { + return this->name.c_str(); + } + + private: + string name; + void* full_merge_context; + void* partial_merge_context; + full_merge_func full_merge_callback; + partial_merge_func partial_merge_callback; + + }; +} diff --git a/rocksdb/cpp/utils.hpp b/rocksdb/cpp/utils.hpp new file mode 100644 index 0000000..43d2093 --- /dev/null +++ b/rocksdb/cpp/utils.hpp @@ -0,0 +1,8 @@ +#include + +namespace py_rocks { + template + const T* vector_data(std::vector& v) { + return v.data(); + } +} diff --git a/rocksdb/db.pxd b/rocksdb/db.pxd new file mode 100644 index 0000000..18fd7de --- /dev/null +++ b/rocksdb/db.pxd @@ -0,0 +1,128 @@ +cimport options +from libc.stdint cimport uint64_t +from status cimport Status +from libcpp cimport bool as cpp_bool +from libcpp.string cimport string +from libcpp.vector cimport vector +from slice_ cimport Slice +from snapshot cimport Snapshot +from iterator cimport Iterator + +# TODO: Move this to a separate .pxd file +cdef extern from "rocksdb/write_batch.h" namespace "rocksdb": + cdef cppclass WriteBatch: + WriteBatch() except + + WriteBatch(string) except + + void Put(const Slice&, const Slice&) + void Merge(const Slice&, const Slice&) + void Delete(const Slice&) + void PutLogData(const Slice&) + void Clear() + string Data() + int Count() const + +cdef extern from "rocksdb/db.h" namespace "rocksdb": + ctypedef uint64_t SequenceNumber + + cdef struct LiveFileMetaData: + string name + int level + size_t size + string smallestkey + string largestkey + SequenceNumber smallest_seqno + SequenceNumber largest_seqno + + cdef cppclass Range: + Range(const Slice&, const Slice&) + + cdef cppclass DB: + Status Put( + const options.WriteOptions&, + const Slice&, + const Slice&) + + Status Delete( + const options.WriteOptions&, + const Slice&) + + Status Merge( + const options.WriteOptions&, + const Slice&, + const Slice&) + + Status Write( + const options.WriteOptions&, + WriteBatch*) + + Status Get( + const options.ReadOptions&, + const Slice&, + string*) + + vector[Status] MultiGet( + const options.ReadOptions&, + const vector[Slice]&, + vector[string]*) + + cpp_bool KeyMayExist( + const options.ReadOptions&, + Slice&, + string*, + cpp_bool*) + + cpp_bool KeyMayExist( + const options.ReadOptions&, + Slice&, + string*) + + Iterator* NewIterator( + const options.ReadOptions&) + + const Snapshot* GetSnapshot() + + void ReleaseSnapshot(const Snapshot*) + + cpp_bool GetProperty( + const Slice&, + string*) + + void GetApproximateSizes( + const Range* + int, + uint64_t*) + + void CompactRange( + const Slice*, + const Slice*, + bool, + int) + + int NumberLevels() + int MaxMemCompactionLevel() + int Level0StopWriteTrigger() + const string& GetName() const + Status Flush(const options.FlushOptions&) + Status DisableFileDeletions() + Status EnableFileDeletions() + + # TODO: Status GetSortedWalFiles(VectorLogPtr& files) + # TODO: SequenceNumber GetLatestSequenceNumber() + # TODO: Status GetUpdatesSince( + # SequenceNumber seq_number, + # unique_ptr[TransactionLogIterator]*) + + Status DeleteFile(string) + void GetLiveFilesMetaData(vector[LiveFileMetaData]*) + + + cdef Status DB_Open "rocksdb::DB::Open"( + const options.Options&, + const string&, + DB**) + + cdef Status DB_OpenForReadOnly "rocksdb::DB::OpenForReadOnly"( + const options.Options&, + const string&, + DB**, + cpp_bool) diff --git a/rocksdb/errors.py b/rocksdb/errors.py new file mode 100644 index 0000000..9c7a208 --- /dev/null +++ b/rocksdb/errors.py @@ -0,0 +1,20 @@ +class NotFound(Exception): + pass + +class Corruption(Exception): + pass + +class NotSupported(Exception): + pass + +class InvalidArgument(Exception): + pass + +class RocksIOError(Exception): + pass + +class MergeInProgress(Exception): + pass + +class Incomplete(Exception): + pass diff --git a/rocksdb/filter_policy.pxd b/rocksdb/filter_policy.pxd new file mode 100644 index 0000000..c99510c --- /dev/null +++ b/rocksdb/filter_policy.pxd @@ -0,0 +1,23 @@ +from libcpp cimport bool as cpp_bool +from libcpp.string cimport string +from slice_ cimport Slice + +cdef extern from "rocksdb/filter_policy.h" namespace "rocksdb": + cdef cppclass FilterPolicy: + void CreateFilter(const Slice*, int, string*) const + cpp_bool KeyMayMatch(const Slice&, const Slice&) const + const char* Name() const + + cdef extern const FilterPolicy* NewBloomFilterPolicy(int) + +ctypedef void (*create_filter_func)(void*, const Slice*, int, string*) +ctypedef cpp_bool (*key_may_match_func)(void*, const Slice&, const Slice&) + +cdef extern from "cpp/filter_policy_wrapper.hpp" namespace "py_rocks": + cdef cppclass FilterPolicyWrapper: + FilterPolicyWrapper( + string, + void*, + void*, + create_filter_func, + key_may_match_func) diff --git a/rocksdb/interfaces.py b/rocksdb/interfaces.py new file mode 100644 index 0000000..19bf658 --- /dev/null +++ b/rocksdb/interfaces.py @@ -0,0 +1,58 @@ +from abc import ABCMeta +from abc import abstractmethod + + +class Comparator: + __metaclass__ = ABCMeta + + @abstractmethod + def compare(self, a, b): + pass + + @abstractmethod + def name(self): + pass + + +class AssociativeMergeOperator: + __metaclass__ = ABCMeta + + @abstractmethod + def merge(self, key, existing_value, value): + pass + + @abstractmethod + def name(self): + pass + + +class MergeOperator: + __metaclass__ = ABCMeta + + @abstractmethod + def full_merge(self, key, existing_value, operand_list): + pass + + @abstractmethod + def partial_merge(self, key, left_operand, right_operand): + pass + + @abstractmethod + def name(self): + pass + + +class FilterPolicy: + __metaclass__ = ABCMeta + + @abstractmethod + def name(self): + pass + + @abstractmethod + def create_filter(self, keys): + pass + + @abstractmethod + def key_may_match(self, key, filter_): + pass diff --git a/rocksdb/iterator.pxd b/rocksdb/iterator.pxd new file mode 100644 index 0000000..b147a1c --- /dev/null +++ b/rocksdb/iterator.pxd @@ -0,0 +1,15 @@ +from libcpp cimport bool as cpp_bool +from slice_ cimport Slice +from status cimport Status + +cdef extern from "rocksdb/iterator.h" namespace "rocksdb": + cdef cppclass Iterator: + cpp_bool Valid() const + void SeekToFirst() + void SeekToLast() + void Seek(const Slice&) + void Next() + void Prev() + Slice key() const + Slice value() const + Status status() const diff --git a/rocksdb/logger.pxd b/rocksdb/logger.pxd new file mode 100644 index 0000000..6468d61 --- /dev/null +++ b/rocksdb/logger.pxd @@ -0,0 +1,5 @@ +cdef extern from "rocksdb/env.h" namespace "rocksdb": + cdef cppclass Logger: + pass + + void Log(Logger*, const char*, ...) diff --git a/rocksdb/merge_operator.pxd b/rocksdb/merge_operator.pxd new file mode 100644 index 0000000..fcbeabb --- /dev/null +++ b/rocksdb/merge_operator.pxd @@ -0,0 +1,45 @@ +from libcpp.string cimport string +from libcpp cimport bool as cpp_bool +from libcpp.deque cimport deque +from slice_ cimport Slice +from logger cimport Logger + +cdef extern from "rocksdb/merge_operator.h" namespace "rocksdb": + cdef cppclass MergeOperator: + pass + +ctypedef cpp_bool (*merge_func)( + void*, + const Slice&, + const Slice*, + const Slice&, + string*, + Logger*) + +ctypedef cpp_bool (*full_merge_func)( + void* ctx, + const Slice& key, + const Slice* existing_value, + const deque[string]& operand_list, + string* new_value, + Logger* logger) + +ctypedef cpp_bool (*partial_merge_func)( + void* ctx, + const Slice& key, + const Slice& left_op, + const Slice& right_op, + string* new_value, + Logger* logger) + +cdef extern from "cpp/merge_operator_wrapper.hpp" namespace "py_rocks": + cdef cppclass AssociativeMergeOperatorWrapper: + AssociativeMergeOperatorWrapper(string, void*, merge_func) + + cdef cppclass MergeOperatorWrapper: + MergeOperatorWrapper( + string, + void*, + void*, + full_merge_func, + partial_merge_func) diff --git a/rocksdb/options.pxd b/rocksdb/options.pxd new file mode 100644 index 0000000..4bc30f8 --- /dev/null +++ b/rocksdb/options.pxd @@ -0,0 +1,123 @@ +from libcpp cimport bool as cpp_bool +from libcpp.string cimport string +from libcpp.vector cimport vector +from libc.stdint cimport uint64_t +from std_memory cimport shared_ptr +from comparator cimport Comparator +from merge_operator cimport MergeOperator +from filter_policy cimport FilterPolicy +from cache cimport Cache +from logger cimport Logger +from slice_ cimport Slice +from snapshot cimport Snapshot + +cdef extern from "rocksdb/options.h" namespace "rocksdb": + ctypedef enum CompressionType: + kNoCompression + kSnappyCompression + kZlibCompression + kBZip2Compression + + ctypedef enum ReadTier: + kReadAllTier + kBlockCacheTier + + cdef cppclass Options: + const Comparator* comparator + shared_ptr[MergeOperator] merge_operator + const FilterPolicy* filter_policy + # TODO: compaction_filter + # TODO: compaction_filter_factory + cpp_bool create_if_missing + cpp_bool error_if_exists + cpp_bool paranoid_checks + # TODO: env + # TODO: info_log + size_t write_buffer_size + int max_write_buffer_number + int min_write_buffer_number_to_merge + int max_open_files + shared_ptr[Cache] block_cache + shared_ptr[Cache] block_cache_compressed + size_t block_size + int block_restart_interval + CompressionType compression + # TODO: compression_per_level + # TODO: compression_opts + # TODO: prefix_extractor + cpp_bool whole_key_filtering + int num_levels + int level0_file_num_compaction_trigger + int level0_slowdown_writes_trigger + int level0_stop_writes_trigger + int max_mem_compaction_level + int target_file_size_base + int target_file_size_multiplier + uint64_t max_bytes_for_level_base + int max_bytes_for_level_multiplier + vector[int] max_bytes_for_level_multiplier_additional + int expanded_compaction_factor + int source_compaction_factor + int max_grandparent_overlap_factor + # TODO: statistics + cpp_bool disableDataSync + cpp_bool use_fsync + int db_stats_log_interval + string db_log_dir + string wal_dir + cpp_bool disable_seek_compaction + uint64_t delete_obsolete_files_period_micros + int max_background_compactions + int max_background_flushes + size_t max_log_file_size + size_t log_file_time_to_roll + size_t keep_log_file_num + double soft_rate_limit + double hard_rate_limit + unsigned int rate_limit_delay_max_milliseconds + uint64_t max_manifest_file_size + cpp_bool no_block_cache + int table_cache_numshardbits + int table_cache_remove_scan_count_limit + size_t arena_block_size + # TODO: PrepareForBulkLoad() + cpp_bool disable_auto_compactions + uint64_t WAL_ttl_seconds + uint64_t WAL_size_limit_MB + size_t manifest_preallocation_size + cpp_bool purge_redundant_kvs_while_flush + cpp_bool allow_os_buffer + cpp_bool allow_mmap_reads + cpp_bool allow_mmap_writes + cpp_bool is_fd_close_on_exec + cpp_bool skip_log_error_on_recovery + unsigned int stats_dump_period_sec + int block_size_deviation + cpp_bool advise_random_on_open + # TODO: enum { NONE, NORMAL, SEQUENTIAL, WILLNEED } access_hint_on_compaction_start + cpp_bool use_adaptive_mutex + uint64_t bytes_per_sync + # TODO: CompactionStyle compaction_style + # TODO: CompactionOptionsUniversal compaction_options_universal + cpp_bool filter_deletes + uint64_t max_sequential_skip_in_iterations + # TODO: memtable_factory + # TODO: table_factory + # TODO: table_properties_collectors + cpp_bool inplace_update_support + size_t inplace_update_num_locks + + cdef cppclass WriteOptions: + cpp_bool sync + cpp_bool disableWAL + + cdef cppclass ReadOptions: + cpp_bool verify_checksums + cpp_bool fill_cache + cpp_bool prefix_seek + const Slice* prefix + const Snapshot* snapshot + ReadTier read_tier + + cdef cppclass FlushOptions: + cpp_bool wait diff --git a/rocksdb/slice_.pxd b/rocksdb/slice_.pxd new file mode 100644 index 0000000..c4e1180 --- /dev/null +++ b/rocksdb/slice_.pxd @@ -0,0 +1,29 @@ +from libcpp.string cimport string +from libcpp cimport bool as cpp_bool +from cpython.string cimport PyString_Size +from cpython.string cimport PyString_AsString +from cpython.string cimport PyString_FromStringAndSize + +cdef extern from "rocksdb/slice.h" namespace "rocksdb": + cdef cppclass Slice: + Slice() + Slice(const char*, size_t) + Slice(const string&) + Slice(const char*) + + const char* data() + size_t size() + cpp_bool empty() + char operator[](int) + void clear() + void remove_prefix(size_t) + string ToString() + string ToString(cpp_bool) + int compare(const Slice&) + cpp_bool starts_with(const Slice&) + +cdef inline Slice str_to_slice(str ob): + return Slice(PyString_AsString(ob), PyString_Size(ob)) + +cdef inline str slice_to_str(Slice ob): + return PyString_FromStringAndSize(ob.data(), ob.size()) diff --git a/rocksdb/snapshot.pxd b/rocksdb/snapshot.pxd new file mode 100644 index 0000000..9cea04d --- /dev/null +++ b/rocksdb/snapshot.pxd @@ -0,0 +1,3 @@ +cdef extern from "rocksdb/db.h" namespace "rocksdb": + cdef cppclass Snapshot: + pass diff --git a/rocksdb/status.pxd b/rocksdb/status.pxd new file mode 100644 index 0000000..9e60a21 --- /dev/null +++ b/rocksdb/status.pxd @@ -0,0 +1,15 @@ +from libcpp cimport bool as cpp_bool +from libcpp.string cimport string + +cdef extern from "rocksdb/status.h" namespace "rocksdb": + cdef cppclass Status: + Status() + cpp_bool ok() const + cpp_bool IsNotFound() const + cpp_bool IsCorruption() const + cpp_bool IsNotSupported() const + cpp_bool IsInvalidArgument() const + cpp_bool IsIOError() const + cpp_bool IsMergeInProgress() const + cpp_bool IsIncomplete() const + string ToString() const diff --git a/rocksdb/std_memory.pxd b/rocksdb/std_memory.pxd new file mode 100644 index 0000000..537e3e9 --- /dev/null +++ b/rocksdb/std_memory.pxd @@ -0,0 +1,7 @@ +cdef extern from "" namespace "std": + cdef cppclass shared_ptr[T]: + shared_ptr() + shared_ptr(T*) + void reset() + void reset(T*) + T* get() diff --git a/rocksdb/tests/__init__.py b/rocksdb/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/rocksdb/tests/test_db.py b/rocksdb/tests/test_db.py new file mode 100644 index 0000000..202cc31 --- /dev/null +++ b/rocksdb/tests/test_db.py @@ -0,0 +1,256 @@ +import os +import shutil +import gc +import unittest +import rocksdb + + +class TestHelper(object): + def _clean(self): + if os.path.exists('/tmp/test'): + shutil.rmtree("/tmp/test") + + def _close_db(self): + del self.db + gc.collect() + + +class TestDB(unittest.TestCase, TestHelper): + def setUp(self): + opts = rocksdb.Options(create_if_missing=True) + self._clean() + self.db = rocksdb.DB("/tmp/test", opts) + + def tearDown(self): + self._close_db() + + def test_get_none(self): + self.assertIsNone(self.db.get('xxx')) + + def test_put_get(self): + self.db.put("a", "b") + self.assertEqual("b", self.db.get("a")) + + def test_multi_get(self): + self.db.put("a", "1") + self.db.put("b", "2") + self.db.put("c", "3") + + ret = self.db.multi_get(['a', 'b', 'c']) + ref = {'a': '1', 'c': '3', 'b': '2'} + self.assertEqual(ref, ret) + + def test_delete(self): + self.db.put("a", "b") + self.assertEqual("b", self.db.get("a")) + self.db.delete("a") + self.assertIsNone(self.db.get("a")) + + def test_write_batch(self): + batch = rocksdb.WriteBatch() + batch.put("key", "v1") + batch.delete("key") + batch.put("key", "v2") + batch.put("key", "v3") + batch.put("a", "b") + + self.db.write(batch) + ref = {'a': 'b', 'key': 'v3'} + ret = self.db.multi_get(['key', 'a']) + self.assertEqual(ref, ret) + + def test_key_may_exists(self): + self.db.put("a", '1') + + self.assertEqual((False, None), self.db.key_may_exist("x")) + self.assertEqual((False, None), self.db.key_may_exist('x', True)) + self.assertEqual((True, None), self.db.key_may_exist('a')) + self.assertEqual((True, '1'), self.db.key_may_exist('a', True)) + + def test_iter_keys(self): + for x in range(300): + self.db.put(str(x), str(x)) + + it = self.db.iterkeys() + + self.assertEqual([], list(it)) + + it.seek_to_last() + self.assertEqual(['99'], list(it)) + + ref = sorted([str(x) for x in range(300)]) + it.seek_to_first() + self.assertEqual(ref, list(it)) + + it.seek('90') + ref = ['90', '91', '92', '93', '94', '95', '96', '97', '98', '99'] + self.assertEqual(ref, list(it)) + + def test_iter_values(self): + for x in range(300): + self.db.put(str(x), str(x * 1000)) + + it = self.db.itervalues() + + self.assertEqual([], list(it)) + + it.seek_to_last() + self.assertEqual(['99000'], list(it)) + + ref = sorted([str(x) for x in range(300)]) + ref = [str(int(x) * 1000) for x in ref] + it.seek_to_first() + self.assertEqual(ref, list(it)) + + it.seek('90') + ref = [str(x * 1000) for x in range(90, 100)] + self.assertEqual(ref, list(it)) + + def test_iter_items(self): + for x in range(300): + self.db.put(str(x), str(x * 1000)) + + it = self.db.iteritems() + + self.assertEqual([], list(it)) + + it.seek_to_last() + self.assertEqual([('99', '99000')], list(it)) + + ref = sorted([str(x) for x in range(300)]) + ref = [(x, str(int(x) * 1000)) for x in ref] + it.seek_to_first() + self.assertEqual(ref, list(it)) + + it.seek('90') + ref = [(str(x), str(x * 1000)) for x in range(90, 100)] + self.assertEqual(ref, list(it)) + + def test_reverse_iter(self): + for x in range(100): + self.db.put(str(x), str(x * 1000)) + + it = self.db.iteritems() + it.seek_to_last() + + ref = reversed(sorted([str(x) for x in range(100)])) + ref = [(x, str(int(x) * 1000)) for x in ref] + + self.assertEqual(ref, list(reversed(it))) + + def test_snapshot(self): + self.db.put("a", "1") + self.db.put("b", "2") + + snapshot = self.db.snapshot() + self.db.put("a", "2") + self.db.delete("b") + + it = self.db.iteritems() + it.seek_to_first() + self.assertEqual({'a': '2'}, dict(it)) + + it = self.db.iteritems(snapshot=snapshot) + it.seek_to_first() + self.assertEqual({'a': '1', 'b': '2'}, dict(it)) + + def test_get_property(self): + for x in range(300): + self.db.put(str(x), str(x)) + + self.assertIsNotNone(self.db.get_property('rocksdb.stats')) + self.assertIsNotNone(self.db.get_property('rocksdb.sstables')) + self.assertIsNotNone(self.db.get_property('rocksdb.num-files-at-level0')) + self.assertIsNone(self.db.get_property('does not exsits')) + + +class AssocCounter(rocksdb.interfaces.AssociativeMergeOperator): + def merge(self, key, existing_value, value): + if existing_value: + return (True, str(int(existing_value) + int(value))) + return (True, value) + + def name(self): + return 'AssocCounter' + + +class TestAssocMerge(unittest.TestCase, TestHelper): + def setUp(self): + opts = rocksdb.Options() + opts.create_if_missing = True + opts.merge_operator = AssocCounter() + self._clean() + self.db = rocksdb.DB('/tmp/test', opts) + + def tearDown(self): + self._close_db() + + def test_merge(self): + for x in range(1000): + self.db.merge("a", str(x)) + self.assertEqual(str(sum(range(1000))), self.db.get('a')) + + +class FullCounter(rocksdb.interfaces.MergeOperator): + def name(self): + return 'fullcounter' + + def full_merge(self, key, existing_value, operand_list): + ret = sum([int(x) for x in operand_list]) + if existing_value: + ret += int(existing_value) + + return (True, str(ret)) + + def partial_merge(self, key, left, right): + return (True, str(int(left) + int(right))) + + +class TestFullMerge(unittest.TestCase, TestHelper): + def setUp(self): + opts = rocksdb.Options() + opts.create_if_missing = True + opts.merge_operator = FullCounter() + self._clean() + self.db = rocksdb.DB('/tmp/test', opts) + + def tearDown(self): + self._close_db() + + def test_merge(self): + for x in range(1000): + self.db.merge("a", str(x)) + self.assertEqual(str(sum(range(1000))), self.db.get('a')) + + +class SimpleComparator(rocksdb.interfaces.Comparator): + def name(self): + return 'mycompare' + + def compare(self, a, b): + a = int(a) + b = int(b) + if a < b: + return -1 + if a == b: + return 0 + if a > b: + return 1 + + +class TestComparator(unittest.TestCase, TestHelper): + def setUp(self): + opts = rocksdb.Options() + opts.create_if_missing = True + opts.comparator = SimpleComparator() + self._clean() + self.db = rocksdb.DB('/tmp/test', opts) + + def tearDown(self): + self._close_db() + + def test_compare(self): + for x in range(1000): + self.db.put(str(x), str(x)) + + self.assertEqual('300', self.db.get('300')) diff --git a/rocksdb/tests/test_options.py b/rocksdb/tests/test_options.py new file mode 100644 index 0000000..599f3f3 --- /dev/null +++ b/rocksdb/tests/test_options.py @@ -0,0 +1,54 @@ +import unittest +import rocksdb + +class TestFilterPolicy(rocksdb.interfaces.FilterPolicy): + def create_filter(self, keys): + return 'nix' + + def key_may_match(self, key, fil): + return True + + def name(self): + return 'testfilter' + +class TestMergeOperator(rocksdb.interfaces.MergeOperator): + def full_merge(self, *args, **kwargs): + return (False, None) + + def partial_merge(self, *args, **kwargs): + return (False, None) + + def name(self): + return 'testmergeop' + +class TestOptions(unittest.TestCase): + def test_simple(self): + opts = rocksdb.Options() + self.assertEqual(False, opts.paranoid_checks) + opts.paranoid_checks = True + self.assertEqual(True, opts.paranoid_checks) + + self.assertIsNone(opts.filter_policy) + ob = TestFilterPolicy() + opts.filter_policy = ob + self.assertEqual(opts.filter_policy, ob) + + self.assertIsNone(opts.merge_operator) + ob = TestMergeOperator() + opts.merge_operator = ob + self.assertEqual(opts.merge_operator, ob) + + self.assertIsInstance( + opts.comparator, + rocksdb.BytewiseComparator) + + self.assertEqual('snappy_compression', opts.compression) + opts.compression = rocksdb.CompressionType.no_compression + self.assertEqual('no_compression', opts.compression) + + self.assertEqual(opts.block_size, 4096) + + self.assertIsNone(opts.block_cache) + ob = rocksdb.LRUCache(100) + opts.block_cache = ob + self.assertEqual(ob, opts.block_cache) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..6eec643 --- /dev/null +++ b/setup.py @@ -0,0 +1,39 @@ +from setuptools import setup, find_packages +from distutils.extension import Extension +from Cython.Build import cythonize + +extension_defaults = { + 'extra_compile_args': [ + '-std=gnu++11', + '-O3', + '-Wall', + '-Wextra', + '-Wconversion', + '-fno-strict-aliasing' + ], + 'language': 'c++', + 'libraries': [ + 'bz2', + 'z', + 'rocksdb' + ] +} + +mod1 = Extension( + 'rocksdb._rocksdb', + ['rocksdb/_rocksdb.pyx'], + **extension_defaults +) + +setup( + name="pyrocksdb", + install_requires=[ + 'setuptools', + 'Cython', + ], + package_dir={'rocksdb': 'rocksdb'}, + packages=find_packages('.'), + ext_modules=cythonize([mod1]), + test_suite='rocksdb.tests', + include_package_data=True +)