Squashed 'src/leveldb/' changes from 20ca81f..a31c8aa

a31c8aa Add NewAppendableFile for win32 environment
1913d71 Merge upstream LevelDB 1.19
3080a45 Increase leveldb version to 1.19.
fa6dc01 A zippy change broke test assumptions about the size of compressed output. Fix the tests by allowing more slop in zippy's behavior. ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=123432472
06a191b fix problems in LevelDB's caching code
a7bff69 Fix LevelDB build when asserts are enabled in release builds. (#367)
ea992b4 Change std::uint64_t to uint64_t (#354)
e84b5bd This CL fixes a bug encountered when reading records from leveldb files that have been split, as in a [] input task split.
3211343 Deleted redundant null ptr check prior to delete.
7306ef8 Merge pull request #348 from randomascii/master
6b18316 Fix signed/unsigned mismatch on VC++ builds
adbe3eb Putting build artifacts in subdirectory.
2d0320a Merge pull request #329 from ralphtheninja/travis-badge
dd1c3c3 add travis build badge
43fcf23 Merge pull request #328 from cmumford/master
9fcae61 Added a Travis CI build file.
dac40d2 Merge pull request #284 from ideawu/master
8ec241a Merge pull request #317 from falvojr/patch-1
5d36bed Merge pull request #272 from vapier/master
4753c9b Added a contributors section to README.md
e2446d0 Merge pull request #275 from paulirish/patch-1
706b7f8 Resolve race when getting approximate-memory-usage property
3c9ff3c Only compiling TrimSpace on linux.
f8d205c Including atomic_pointer.h in port_posix
889de31 Let LevelDB use xcrun to determine Xcode.app path instead of using a hardcoded path.
528c2bc Add "approximate-memory-usage" property to leveldb::DB::GetProperty
359b6bc Add leveldb::Cache::Prune
50e77a8 Fix size_t/int comparison/conversion issues in leveldb.
5208e79 Added leveldb::Status::IsInvalidArgument() method.
ce45404 Suppress error reporting after seeking but before a valid First or Full record is encountered.
b9afa1f include <assert> -> <cassert>
edf2939 Update README.md
65190ac Will not reuse manifest if reuse_logs options is false.
ac1d69d LevelDB now attempts to reuse the preceding MANIFEST and log file when re-opened.
76bba13 fix indent
8fcceb2 log compaction output file's level along with number
0e0f074 documentation. improved link
c85addc readme: improved documentation link
ceff6f1 Fix Android/MIPS build.
77948e7 Add benchmark that measures cost of repeatedly opening the database.
34ad72e Move header guard below copyright banner.
a75d435 Clean up layering of storage/leveldb/...
b234f65 Added a new fault injection test.
c4c38f9 Add arm64 support to leveldb.
cea9b10 Fixed incorrect comment wording for Iterator::Seek.
c00c569 Deleted old README file.

git-subtree-dir: src/leveldb
git-subtree-split: a31c8aa408d5594830f7cb20ead1ef1dff51b79e
This commit is contained in:
Pieter Wuille 2016-12-01 16:14:45 -08:00
parent fb9857bfd6
commit 634ad51703
52 changed files with 1941 additions and 371 deletions

13
.travis.yml Normal file
View file

@ -0,0 +1,13 @@
language: cpp
compiler:
- clang
- gcc
os:
- linux
- osx
sudo: false
before_install:
- echo $LANG
- echo $LC_ALL
script:
- make -j 4 check

463
Makefile
View file

@ -20,208 +20,395 @@ $(shell CC="$(CC)" CXX="$(CXX)" TARGET_OS="$(TARGET_OS)" \
# this file is generated by the previous line to set build flags and sources # this file is generated by the previous line to set build flags and sources
include build_config.mk include build_config.mk
TESTS = \
db/autocompact_test \
db/c_test \
db/corruption_test \
db/db_test \
db/dbformat_test \
db/fault_injection_test \
db/filename_test \
db/log_test \
db/recovery_test \
db/skiplist_test \
db/version_edit_test \
db/version_set_test \
db/write_batch_test \
helpers/memenv/memenv_test \
issues/issue178_test \
issues/issue200_test \
table/filter_block_test \
table/table_test \
util/arena_test \
util/bloom_test \
util/cache_test \
util/coding_test \
util/crc32c_test \
util/env_test \
util/hash_test
UTILS = \
db/db_bench \
db/leveldbutil
# Put the object files in a subdirectory, but the application at the top of the object dir.
PROGNAMES := $(notdir $(TESTS) $(UTILS))
# On Linux may need libkyotocabinet-dev for dependency.
BENCHMARKS = \
doc/bench/db_bench_sqlite3 \
doc/bench/db_bench_tree_db
CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT)
CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT)
LDFLAGS += $(PLATFORM_LDFLAGS) LDFLAGS += $(PLATFORM_LDFLAGS)
LIBS += $(PLATFORM_LIBS) LIBS += $(PLATFORM_LIBS)
LIBOBJECTS = $(SOURCES:.cc=.o) SIMULATOR_OUTDIR=out-ios-x86
MEMENVOBJECTS = $(MEMENV_SOURCES:.cc=.o) DEVICE_OUTDIR=out-ios-arm
TESTUTIL = ./util/testutil.o
TESTHARNESS = ./util/testharness.o $(TESTUTIL)
# Note: iOS should probably be using libtool, not ar.
ifeq ($(PLATFORM), IOS) ifeq ($(PLATFORM), IOS)
# Note: iOS should probably be using libtool, not ar.
AR=xcrun ar AR=xcrun ar
SIMULATORSDK=$(shell xcrun -sdk iphonesimulator --show-sdk-path)
DEVICESDK=$(shell xcrun -sdk iphoneos --show-sdk-path)
DEVICE_CFLAGS = -isysroot "$(DEVICESDK)" -arch armv6 -arch armv7 -arch armv7s -arch arm64
SIMULATOR_CFLAGS = -isysroot "$(SIMULATORSDK)" -arch i686 -arch x86_64
STATIC_OUTDIR=out-ios-universal
else
STATIC_OUTDIR=out-static
SHARED_OUTDIR=out-shared
STATIC_PROGRAMS := $(addprefix $(STATIC_OUTDIR)/, $(PROGNAMES))
SHARED_PROGRAMS := $(addprefix $(SHARED_OUTDIR)/, db_bench)
endif endif
TESTS = \ STATIC_LIBOBJECTS := $(addprefix $(STATIC_OUTDIR)/, $(SOURCES:.cc=.o))
arena_test \ STATIC_MEMENVOBJECTS := $(addprefix $(STATIC_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
autocompact_test \
bloom_test \
c_test \
cache_test \
coding_test \
corruption_test \
crc32c_test \
db_test \
dbformat_test \
env_test \
filename_test \
filter_block_test \
hash_test \
issue178_test \
issue200_test \
log_test \
memenv_test \
skiplist_test \
table_test \
version_edit_test \
version_set_test \
write_batch_test
PROGRAMS = db_bench leveldbutil $(TESTS) DEVICE_LIBOBJECTS := $(addprefix $(DEVICE_OUTDIR)/, $(SOURCES:.cc=.o))
BENCHMARKS = db_bench_sqlite3 db_bench_tree_db DEVICE_MEMENVOBJECTS := $(addprefix $(DEVICE_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
LIBRARY = libleveldb.a SIMULATOR_LIBOBJECTS := $(addprefix $(SIMULATOR_OUTDIR)/, $(SOURCES:.cc=.o))
MEMENVLIBRARY = libmemenv.a SIMULATOR_MEMENVOBJECTS := $(addprefix $(SIMULATOR_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
SHARED_LIBOBJECTS := $(addprefix $(SHARED_OUTDIR)/, $(SOURCES:.cc=.o))
SHARED_MEMENVOBJECTS := $(addprefix $(SHARED_OUTDIR)/, $(MEMENV_SOURCES:.cc=.o))
TESTUTIL := $(STATIC_OUTDIR)/util/testutil.o
TESTHARNESS := $(STATIC_OUTDIR)/util/testharness.o $(TESTUTIL)
STATIC_TESTOBJS := $(addprefix $(STATIC_OUTDIR)/, $(addsuffix .o, $(TESTS)))
STATIC_UTILOBJS := $(addprefix $(STATIC_OUTDIR)/, $(addsuffix .o, $(UTILS)))
STATIC_ALLOBJS := $(STATIC_LIBOBJECTS) $(STATIC_MEMENVOBJECTS) $(STATIC_TESTOBJS) $(STATIC_UTILOBJS) $(TESTHARNESS)
DEVICE_ALLOBJS := $(DEVICE_LIBOBJECTS) $(DEVICE_MEMENVOBJECTS)
SIMULATOR_ALLOBJS := $(SIMULATOR_LIBOBJECTS) $(SIMULATOR_MEMENVOBJECTS)
default: all default: all
# Should we build shared libraries? # Should we build shared libraries?
ifneq ($(PLATFORM_SHARED_EXT),) ifneq ($(PLATFORM_SHARED_EXT),)
# Many leveldb test apps use non-exported API's. Only build a subset for testing.
SHARED_ALLOBJS := $(SHARED_LIBOBJECTS) $(SHARED_MEMENVOBJECTS) $(TESTHARNESS)
ifneq ($(PLATFORM_SHARED_VERSIONED),true) ifneq ($(PLATFORM_SHARED_VERSIONED),true)
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) SHARED_LIB1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1) SHARED_LIB2 = $(SHARED_LIB1)
SHARED3 = $(SHARED1) SHARED_LIB3 = $(SHARED_LIB1)
SHARED = $(SHARED1) SHARED_LIBS = $(SHARED_LIB1)
SHARED_MEMENVLIB = $(SHARED_OUTDIR)/libmemenv.a
else else
# Update db.h if you change these. # Update db.h if you change these.
SHARED_MAJOR = 1 SHARED_VERSION_MAJOR = 1
SHARED_MINOR = 18 SHARED_VERSION_MINOR = 19
SHARED1 = libleveldb.$(PLATFORM_SHARED_EXT) SHARED_LIB1 = libleveldb.$(PLATFORM_SHARED_EXT)
SHARED2 = $(SHARED1).$(SHARED_MAJOR) SHARED_LIB2 = $(SHARED_LIB1).$(SHARED_VERSION_MAJOR)
SHARED3 = $(SHARED1).$(SHARED_MAJOR).$(SHARED_MINOR) SHARED_LIB3 = $(SHARED_LIB1).$(SHARED_VERSION_MAJOR).$(SHARED_VERSION_MINOR)
SHARED = $(SHARED1) $(SHARED2) $(SHARED3) SHARED_LIBS = $(SHARED_OUTDIR)/$(SHARED_LIB1) $(SHARED_OUTDIR)/$(SHARED_LIB2) $(SHARED_OUTDIR)/$(SHARED_LIB3)
$(SHARED1): $(SHARED3) $(SHARED_OUTDIR)/$(SHARED_LIB1): $(SHARED_OUTDIR)/$(SHARED_LIB3)
ln -fs $(SHARED3) $(SHARED1) ln -fs $(SHARED_LIB3) $(SHARED_OUTDIR)/$(SHARED_LIB1)
$(SHARED2): $(SHARED3) $(SHARED_OUTDIR)/$(SHARED_LIB2): $(SHARED_OUTDIR)/$(SHARED_LIB3)
ln -fs $(SHARED3) $(SHARED2) ln -fs $(SHARED_LIB3) $(SHARED_OUTDIR)/$(SHARED_LIB2)
SHARED_MEMENVLIB = $(SHARED_OUTDIR)/libmemenv.a
endif endif
$(SHARED3): $(SHARED_OUTDIR)/$(SHARED_LIB3): $(SHARED_LIBOBJECTS)
$(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED2) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SOURCES) -o $(SHARED3) $(LIBS) $(CXX) $(LDFLAGS) $(PLATFORM_SHARED_LDFLAGS)$(SHARED_LIB2) $(SHARED_LIBOBJECTS) -o $(SHARED_OUTDIR)/$(SHARED_LIB3) $(LIBS)
endif # PLATFORM_SHARED_EXT endif # PLATFORM_SHARED_EXT
all: $(SHARED) $(LIBRARY) all: $(SHARED_LIBS) $(SHARED_PROGRAMS) $(STATIC_OUTDIR)/libleveldb.a $(STATIC_OUTDIR)/libmemenv.a $(STATIC_PROGRAMS)
check: all $(PROGRAMS) $(TESTS) check: $(STATIC_PROGRAMS)
for t in $(TESTS); do echo "***** Running $$t"; ./$$t || exit 1; done for t in $(notdir $(TESTS)); do echo "***** Running $$t"; $(STATIC_OUTDIR)/$$t || exit 1; done
clean: clean:
-rm -f $(PROGRAMS) $(BENCHMARKS) $(LIBRARY) $(SHARED) $(MEMENVLIBRARY) */*.o */*/*.o ios-x86/*/*.o ios-arm/*/*.o build_config.mk -rm -rf out-static out-shared out-ios-x86 out-ios-arm out-ios-universal
-rm -rf ios-x86/* ios-arm/* -rm -f build_config.mk
-rm -rf ios-x86 ios-arm
$(LIBRARY): $(LIBOBJECTS) $(STATIC_OUTDIR):
rm -f $@ mkdir $@
$(AR) -rs $@ $(LIBOBJECTS)
db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) $(STATIC_OUTDIR)/db: | $(STATIC_OUTDIR)
$(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS) mkdir $@
db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) $(STATIC_OUTDIR)/helpers/memenv: | $(STATIC_OUTDIR)
$(CXX) $(LDFLAGS) doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS) mkdir -p $@
db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) $(STATIC_OUTDIR)/port: | $(STATIC_OUTDIR)
$(CXX) $(LDFLAGS) doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS) mkdir $@
leveldbutil: db/leveldb_main.o $(LIBOBJECTS) $(STATIC_OUTDIR)/table: | $(STATIC_OUTDIR)
$(CXX) $(LDFLAGS) db/leveldb_main.o $(LIBOBJECTS) -o $@ $(LIBS) mkdir $@
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) $(STATIC_OUTDIR)/util: | $(STATIC_OUTDIR)
$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
autocompact_test: db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) .PHONY: STATIC_OBJDIRS
$(CXX) $(LDFLAGS) db/autocompact_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) STATIC_OBJDIRS: \
$(STATIC_OUTDIR)/db \
$(STATIC_OUTDIR)/port \
$(STATIC_OUTDIR)/table \
$(STATIC_OUTDIR)/util \
$(STATIC_OUTDIR)/helpers/memenv
bloom_test: util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SHARED_OUTDIR):
$(CXX) $(LDFLAGS) util/bloom_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SHARED_OUTDIR)/db: | $(SHARED_OUTDIR)
$(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SHARED_OUTDIR)/helpers/memenv: | $(SHARED_OUTDIR)
$(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir -p $@
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SHARED_OUTDIR)/port: | $(SHARED_OUTDIR)
$(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SHARED_OUTDIR)/table: | $(SHARED_OUTDIR)
$(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SHARED_OUTDIR)/util: | $(SHARED_OUTDIR)
$(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) .PHONY: SHARED_OBJDIRS
$(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) SHARED_OBJDIRS: \
$(SHARED_OUTDIR)/db \
$(SHARED_OUTDIR)/port \
$(SHARED_OUTDIR)/table \
$(SHARED_OUTDIR)/util \
$(SHARED_OUTDIR)/helpers/memenv
dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) $(DEVICE_OUTDIR):
$(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) $(DEVICE_OUTDIR)/db: | $(DEVICE_OUTDIR)
$(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) $(DEVICE_OUTDIR)/helpers/memenv: | $(DEVICE_OUTDIR)
$(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir -p $@
filter_block_test: table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) $(DEVICE_OUTDIR)/port: | $(DEVICE_OUTDIR)
$(CXX) $(LDFLAGS) table/filter_block_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
hash_test: util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) $(DEVICE_OUTDIR)/table: | $(DEVICE_OUTDIR)
$(CXX) $(LDFLAGS) util/hash_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
issue178_test: issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) $(DEVICE_OUTDIR)/util: | $(DEVICE_OUTDIR)
$(CXX) $(LDFLAGS) issues/issue178_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
issue200_test: issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS) .PHONY: DEVICE_OBJDIRS
$(CXX) $(LDFLAGS) issues/issue200_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) DEVICE_OBJDIRS: \
$(DEVICE_OUTDIR)/db \
$(DEVICE_OUTDIR)/port \
$(DEVICE_OUTDIR)/table \
$(DEVICE_OUTDIR)/util \
$(DEVICE_OUTDIR)/helpers/memenv
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SIMULATOR_OUTDIR):
$(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SIMULATOR_OUTDIR)/db: | $(SIMULATOR_OUTDIR)
$(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SIMULATOR_OUTDIR)/helpers/memenv: | $(SIMULATOR_OUTDIR)
$(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir -p $@
version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SIMULATOR_OUTDIR)/port: | $(SIMULATOR_OUTDIR)
$(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SIMULATOR_OUTDIR)/table: | $(SIMULATOR_OUTDIR)
$(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) $(SIMULATOR_OUTDIR)/util: | $(SIMULATOR_OUTDIR)
$(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS) mkdir $@
$(MEMENVLIBRARY) : $(MEMENVOBJECTS) .PHONY: SIMULATOR_OBJDIRS
rm -f $@ SIMULATOR_OBJDIRS: \
$(AR) -rs $@ $(MEMENVOBJECTS) $(SIMULATOR_OUTDIR)/db \
$(SIMULATOR_OUTDIR)/port \
$(SIMULATOR_OUTDIR)/table \
$(SIMULATOR_OUTDIR)/util \
$(SIMULATOR_OUTDIR)/helpers/memenv
memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) $(STATIC_ALLOBJS): | STATIC_OBJDIRS
$(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LIBS) $(DEVICE_ALLOBJS): | DEVICE_OBJDIRS
$(SIMULATOR_ALLOBJS): | SIMULATOR_OBJDIRS
$(SHARED_ALLOBJS): | SHARED_OBJDIRS
ifeq ($(PLATFORM), IOS) ifeq ($(PLATFORM), IOS)
# For iOS, create universal object files to be used on both the simulator and $(DEVICE_OUTDIR)/libleveldb.a: $(DEVICE_LIBOBJECTS)
rm -f $@
$(AR) -rs $@ $(DEVICE_LIBOBJECTS)
$(SIMULATOR_OUTDIR)/libleveldb.a: $(SIMULATOR_LIBOBJECTS)
rm -f $@
$(AR) -rs $@ $(SIMULATOR_LIBOBJECTS)
$(DEVICE_OUTDIR)/libmemenv.a: $(DEVICE_MEMENVOBJECTS)
rm -f $@
$(AR) -rs $@ $(DEVICE_MEMENVOBJECTS)
$(SIMULATOR_OUTDIR)/libmemenv.a: $(SIMULATOR_MEMENVOBJECTS)
rm -f $@
$(AR) -rs $@ $(SIMULATOR_MEMENVOBJECTS)
# For iOS, create universal object libraries to be used on both the simulator and
# a device. # a device.
PLATFORMSROOT=/Applications/Xcode.app/Contents/Developer/Platforms $(STATIC_OUTDIR)/libleveldb.a: $(STATIC_OUTDIR) $(DEVICE_OUTDIR)/libleveldb.a $(SIMULATOR_OUTDIR)/libleveldb.a
SIMULATORROOT=$(PLATFORMSROOT)/iPhoneSimulator.platform/Developer lipo -create $(DEVICE_OUTDIR)/libleveldb.a $(SIMULATOR_OUTDIR)/libleveldb.a -output $@
DEVICEROOT=$(PLATFORMSROOT)/iPhoneOS.platform/Developer
IOSVERSION=$(shell defaults read $(PLATFORMSROOT)/iPhoneOS.platform/version CFBundleShortVersionString)
IOSARCH=-arch armv6 -arch armv7 -arch armv7s -arch arm64
.cc.o:
mkdir -p ios-x86/$(dir $@)
xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
mkdir -p ios-arm/$(dir $@)
xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
.c.o:
mkdir -p ios-x86/$(dir $@)
xcrun -sdk iphonesimulator $(CC) $(CFLAGS) -isysroot $(SIMULATORROOT)/SDKs/iPhoneSimulator$(IOSVERSION).sdk -arch i686 -arch x86_64 -c $< -o ios-x86/$@
mkdir -p ios-arm/$(dir $@)
xcrun -sdk iphoneos $(CC) $(CFLAGS) -isysroot $(DEVICEROOT)/SDKs/iPhoneOS$(IOSVERSION).sdk $(IOSARCH) -c $< -o ios-arm/$@
xcrun lipo ios-x86/$@ ios-arm/$@ -create -output $@
$(STATIC_OUTDIR)/libmemenv.a: $(STATIC_OUTDIR) $(DEVICE_OUTDIR)/libmemenv.a $(SIMULATOR_OUTDIR)/libmemenv.a
lipo -create $(DEVICE_OUTDIR)/libmemenv.a $(SIMULATOR_OUTDIR)/libmemenv.a -output $@
else else
.cc.o: $(STATIC_OUTDIR)/libleveldb.a:$(STATIC_LIBOBJECTS)
rm -f $@
$(AR) -rs $@ $(STATIC_LIBOBJECTS)
$(STATIC_OUTDIR)/libmemenv.a:$(STATIC_MEMENVOBJECTS)
rm -f $@
$(AR) -rs $@ $(STATIC_MEMENVOBJECTS)
endif
$(SHARED_MEMENVLIB):$(SHARED_MEMENVOBJECTS)
rm -f $@
$(AR) -rs $@ $(SHARED_MEMENVOBJECTS)
$(STATIC_OUTDIR)/db_bench:db/db_bench.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/db_bench.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ $(LIBS)
$(STATIC_OUTDIR)/db_bench_sqlite3:doc/bench/db_bench_sqlite3.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) $(CXXFLAGS) doc/bench/db_bench_sqlite3.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ -lsqlite3 $(LIBS)
$(STATIC_OUTDIR)/db_bench_tree_db:doc/bench/db_bench_tree_db.cc $(STATIC_LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) $(CXXFLAGS) doc/bench/db_bench_tree_db.cc $(STATIC_LIBOBJECTS) $(TESTUTIL) -o $@ -lkyotocabinet $(LIBS)
$(STATIC_OUTDIR)/leveldbutil:db/leveldbutil.cc $(STATIC_LIBOBJECTS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/leveldbutil.cc $(STATIC_LIBOBJECTS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/arena_test:util/arena_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/arena_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/autocompact_test:db/autocompact_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/autocompact_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/bloom_test:util/bloom_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/bloom_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/c_test:$(STATIC_OUTDIR)/db/c_test.o $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(STATIC_OUTDIR)/db/c_test.o $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/cache_test:util/cache_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/cache_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/coding_test:util/coding_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/coding_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/corruption_test:db/corruption_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/corruption_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/crc32c_test:util/crc32c_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/crc32c_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/db_test:db/db_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/db_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/dbformat_test:db/dbformat_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/dbformat_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/env_test:util/env_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/env_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/fault_injection_test:db/fault_injection_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/fault_injection_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/filename_test:db/filename_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/filename_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/filter_block_test:table/filter_block_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) table/filter_block_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/hash_test:util/hash_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) util/hash_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/issue178_test:issues/issue178_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) issues/issue178_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/issue200_test:issues/issue200_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) issues/issue200_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/log_test:db/log_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/log_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/recovery_test:db/recovery_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/recovery_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/table_test:table/table_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) table/table_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/skiplist_test:db/skiplist_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/skiplist_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/version_edit_test:db/version_edit_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/version_edit_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/version_set_test:db/version_set_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/version_set_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/write_batch_test:db/write_batch_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) $(CXXFLAGS) db/write_batch_test.cc $(STATIC_LIBOBJECTS) $(TESTHARNESS) -o $@ $(LIBS)
$(STATIC_OUTDIR)/memenv_test:$(STATIC_OUTDIR)/helpers/memenv/memenv_test.o $(STATIC_OUTDIR)/libmemenv.a $(STATIC_OUTDIR)/libleveldb.a $(TESTHARNESS)
$(XCRUN) $(CXX) $(LDFLAGS) $(STATIC_OUTDIR)/helpers/memenv/memenv_test.o $(STATIC_OUTDIR)/libmemenv.a $(STATIC_OUTDIR)/libleveldb.a $(TESTHARNESS) -o $@ $(LIBS)
$(SHARED_OUTDIR)/db_bench:$(SHARED_OUTDIR)/db/db_bench.o $(SHARED_LIBS) $(TESTUTIL)
$(XCRUN) $(CXX) $(LDFLAGS) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) $(SHARED_OUTDIR)/db/db_bench.o $(TESTUTIL) $(SHARED_OUTDIR)/$(SHARED_LIB3) -o $@ $(LIBS)
.PHONY: run-shared
run-shared: $(SHARED_OUTDIR)/db_bench
LD_LIBRARY_PATH=$(SHARED_OUTDIR) $(SHARED_OUTDIR)/db_bench
$(SIMULATOR_OUTDIR)/%.o: %.cc
xcrun -sdk iphonesimulator $(CXX) $(CXXFLAGS) $(SIMULATOR_CFLAGS) -c $< -o $@
$(DEVICE_OUTDIR)/%.o: %.cc
xcrun -sdk iphoneos $(CXX) $(CXXFLAGS) $(DEVICE_CFLAGS) -c $< -o $@
$(SIMULATOR_OUTDIR)/%.o: %.c
xcrun -sdk iphonesimulator $(CC) $(CFLAGS) $(SIMULATOR_CFLAGS) -c $< -o $@
$(DEVICE_OUTDIR)/%.o: %.c
xcrun -sdk iphoneos $(CC) $(CFLAGS) $(DEVICE_CFLAGS) -c $< -o $@
$(STATIC_OUTDIR)/%.o: %.cc
$(CXX) $(CXXFLAGS) -c $< -o $@ $(CXX) $(CXXFLAGS) -c $< -o $@
.c.o: $(STATIC_OUTDIR)/%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@ $(CC) $(CFLAGS) -c $< -o $@
endif
$(SHARED_OUTDIR)/%.o: %.cc
$(CXX) $(CXXFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@
$(SHARED_OUTDIR)/%.o: %.c
$(CC) $(CFLAGS) $(PLATFORM_SHARED_CFLAGS) -c $< -o $@

51
README
View file

@ -1,51 +0,0 @@
leveldb: A key-value store
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
The code under this directory implements a system for maintaining a
persistent key/value store.
See doc/index.html for more explanation.
See doc/impl.html for a brief overview of the implementation.
The public interface is in include/*.h. Callers should not include or
rely on the details of any other header files in this package. Those
internal APIs may be changed without warning.
Guide to header files:
include/db.h
Main interface to the DB: Start here
include/options.h
Control over the behavior of an entire database, and also
control over the behavior of individual reads and writes.
include/comparator.h
Abstraction for user-specified comparison function. If you want
just bytewise comparison of keys, you can use the default comparator,
but clients can write their own comparator implementations if they
want custom ordering (e.g. to handle different character
encodings, etc.)
include/iterator.h
Interface for iterating over data. You can get an iterator
from a DB object.
include/write_batch.h
Interface for atomically applying multiple updates to a database.
include/slice.h
A simple module for maintaining a pointer and a length into some
other byte array.
include/status.h
Status is returned from many of the public interfaces and is used
to report success and various kinds of errors.
include/env.h
Abstraction of the OS environment. A posix implementation of
this interface is in util/env_posix.cc
include/table.h
include/table_builder.h
Lower-level modules that most clients probably won't use directly

View file

@ -1,5 +1,7 @@
**LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.** **LevelDB is a fast key-value storage library written at Google that provides an ordered mapping from string keys to string values.**
[![Build Status](https://travis-ci.org/google/leveldb.svg?branch=master)](https://travis-ci.org/google/leveldb)
Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com) Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
# Features # Features
@ -10,9 +12,11 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
* Multiple changes can be made in one atomic batch. * Multiple changes can be made in one atomic batch.
* Users can create a transient snapshot to get a consistent view of data. * Users can create a transient snapshot to get a consistent view of data.
* Forward and backward iteration is supported over the data. * Forward and backward iteration is supported over the data.
* Data is automatically compressed using the [Snappy compression library](http://code.google.com/p/snappy). * Data is automatically compressed using the [Snappy compression library](http://google.github.io/snappy/).
* External activity (file system operations etc.) is relayed through a virtual interface so users can customize the operating system interactions. * External activity (file system operations etc.) is relayed through a virtual interface so users can customize the operating system interactions.
* [Detailed documentation](http://htmlpreview.github.io/?https://github.com/google/leveldb/blob/master/doc/index.html) about how to use the library is included with the source code.
# Documentation
[LevelDB library documentation](https://rawgit.com/google/leveldb/master/doc/index.html) is online and bundled with the source code.
# Limitations # Limitations
@ -20,6 +24,37 @@ Authors: Sanjay Ghemawat (sanjay@google.com) and Jeff Dean (jeff@google.com)
* Only a single process (possibly multi-threaded) can access a particular database at a time. * Only a single process (possibly multi-threaded) can access a particular database at a time.
* There is no client-server support builtin to the library. An application that needs such support will have to wrap their own server around the library. * There is no client-server support builtin to the library. An application that needs such support will have to wrap their own server around the library.
# Contributing to the leveldb Project
The leveldb project welcomes contributions. leveldb's primary goal is to be
a reliable and fast key/value store. Changes that are in line with the
features/limitations outlined above, and meet the requirements below,
will be considered.
Contribution requirements:
1. **POSIX only**. We _generally_ will only accept changes that are both
compiled, and tested on a POSIX platform - usually Linux. Very small
changes will sometimes be accepted, but consider that more of an
exception than the rule.
2. **Stable API**. We strive very hard to maintain a stable API. Changes that
require changes for projects using leveldb _might_ be rejected without
sufficient benefit to the project.
3. **Tests**: All changes must be accompanied by a new (or changed) test, or
a sufficient explanation as to why a new (or changed) test is not required.
## Submitting a Pull Request
Before any pull request will be accepted the author must first sign a
Contributor License Agreement (CLA) at https://cla.developers.google.com/.
In order to keep the commit timeline linear
[squash](https://git-scm.com/book/en/v2/Git-Tools-Rewriting-History#Squashing-Commits)
your changes down to a single commit and [rebase](https://git-scm.com/docs/git-rebase)
on google/leveldb/master. This keeps the commit timeline linear and more easily sync'ed
with the internal repository at Google. More information at GitHub's
[About Git rebase](https://help.github.com/articles/about-git-rebase/) page.
# Performance # Performance
Here is a performance report (with explanations) from the run of the Here is a performance report (with explanations) from the run of the

View file

@ -175,7 +175,7 @@ DIRS="$PREFIX/db $PREFIX/util $PREFIX/table"
set -f # temporarily disable globbing so that our patterns aren't expanded set -f # temporarily disable globbing so that our patterns aren't expanded
PRUNE_TEST="-name *test*.cc -prune" PRUNE_TEST="-name *test*.cc -prune"
PRUNE_BENCH="-name *_bench.cc -prune" PRUNE_BENCH="-name *_bench.cc -prune"
PRUNE_TOOL="-name leveldb_main.cc -prune" PRUNE_TOOL="-name leveldbutil.cc -prune"
PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o $PRUNE_TOOL -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "` PORTABLE_FILES=`find $DIRS $PRUNE_TEST -o $PRUNE_BENCH -o $PRUNE_TOOL -o -name '*.cc' -print | sort | sed "s,^$PREFIX/,," | tr "\n" " "`
set +f # re-enable globbing set +f # re-enable globbing

View file

@ -36,7 +36,7 @@ class CorruptionTest {
tiny_cache_ = NewLRUCache(100); tiny_cache_ = NewLRUCache(100);
options_.env = &env_; options_.env = &env_;
options_.block_cache = tiny_cache_; options_.block_cache = tiny_cache_;
dbname_ = test::TmpDir() + "/db_test"; dbname_ = test::TmpDir() + "/corruption_test";
DestroyDB(dbname_, options_); DestroyDB(dbname_, options_);
db_ = NULL; db_ = NULL;

View file

@ -33,6 +33,7 @@
// readmissing -- read N missing keys in random order // readmissing -- read N missing keys in random order
// readhot -- read N times in random order from 1% section of DB // readhot -- read N times in random order from 1% section of DB
// seekrandom -- N random seeks // seekrandom -- N random seeks
// open -- cost of opening a DB
// crc32c -- repeated crc32c of 4K of data // crc32c -- repeated crc32c of 4K of data
// acquireload -- load N*1000 times // acquireload -- load N*1000 times
// Meta operations: // Meta operations:
@ -99,6 +100,9 @@ static int FLAGS_bloom_bits = -1;
// benchmark will fail. // benchmark will fail.
static bool FLAGS_use_existing_db = false; static bool FLAGS_use_existing_db = false;
// If true, reuse existing log/MANIFEST files when re-opening a database.
static bool FLAGS_reuse_logs = false;
// Use the db with the following name. // Use the db with the following name.
static const char* FLAGS_db = NULL; static const char* FLAGS_db = NULL;
@ -138,6 +142,7 @@ class RandomGenerator {
} }
}; };
#if defined(__linux)
static Slice TrimSpace(Slice s) { static Slice TrimSpace(Slice s) {
size_t start = 0; size_t start = 0;
while (start < s.size() && isspace(s[start])) { while (start < s.size() && isspace(s[start])) {
@ -149,6 +154,7 @@ static Slice TrimSpace(Slice s) {
} }
return Slice(s.data() + start, limit - start); return Slice(s.data() + start, limit - start);
} }
#endif
static void AppendWithSpace(std::string* str, Slice msg) { static void AppendWithSpace(std::string* str, Slice msg) {
if (msg.empty()) return; if (msg.empty()) return;
@ -442,7 +448,11 @@ class Benchmark {
bool fresh_db = false; bool fresh_db = false;
int num_threads = FLAGS_threads; int num_threads = FLAGS_threads;
if (name == Slice("fillseq")) { if (name == Slice("open")) {
method = &Benchmark::OpenBench;
num_ /= 10000;
if (num_ < 1) num_ = 1;
} else if (name == Slice("fillseq")) {
fresh_db = true; fresh_db = true;
method = &Benchmark::WriteSeq; method = &Benchmark::WriteSeq;
} else if (name == Slice("fillbatch")) { } else if (name == Slice("fillbatch")) {
@ -695,6 +705,7 @@ class Benchmark {
options.write_buffer_size = FLAGS_write_buffer_size; options.write_buffer_size = FLAGS_write_buffer_size;
options.max_open_files = FLAGS_open_files; options.max_open_files = FLAGS_open_files;
options.filter_policy = filter_policy_; options.filter_policy = filter_policy_;
options.reuse_logs = FLAGS_reuse_logs;
Status s = DB::Open(options, FLAGS_db, &db_); Status s = DB::Open(options, FLAGS_db, &db_);
if (!s.ok()) { if (!s.ok()) {
fprintf(stderr, "open error: %s\n", s.ToString().c_str()); fprintf(stderr, "open error: %s\n", s.ToString().c_str());
@ -702,6 +713,14 @@ class Benchmark {
} }
} }
void OpenBench(ThreadState* thread) {
for (int i = 0; i < num_; i++) {
delete db_;
Open();
thread->stats.FinishedSingleOp();
}
}
void WriteSeq(ThreadState* thread) { void WriteSeq(ThreadState* thread) {
DoWrite(thread, true); DoWrite(thread, true);
} }
@ -941,6 +960,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 && } else if (sscanf(argv[i], "--use_existing_db=%d%c", &n, &junk) == 1 &&
(n == 0 || n == 1)) { (n == 0 || n == 1)) {
FLAGS_use_existing_db = n; FLAGS_use_existing_db = n;
} else if (sscanf(argv[i], "--reuse_logs=%d%c", &n, &junk) == 1 &&
(n == 0 || n == 1)) {
FLAGS_reuse_logs = n;
} else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) { } else if (sscanf(argv[i], "--num=%d%c", &n, &junk) == 1) {
FLAGS_num = n; FLAGS_num = n;
} else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) { } else if (sscanf(argv[i], "--reads=%d%c", &n, &junk) == 1) {

View file

@ -125,7 +125,7 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
db_lock_(NULL), db_lock_(NULL),
shutting_down_(NULL), shutting_down_(NULL),
bg_cv_(&mutex_), bg_cv_(&mutex_),
mem_(new MemTable(internal_comparator_)), mem_(NULL),
imm_(NULL), imm_(NULL),
logfile_(NULL), logfile_(NULL),
logfile_number_(0), logfile_number_(0),
@ -134,7 +134,6 @@ DBImpl::DBImpl(const Options& raw_options, const std::string& dbname)
tmp_batch_(new WriteBatch), tmp_batch_(new WriteBatch),
bg_compaction_scheduled_(false), bg_compaction_scheduled_(false),
manual_compaction_(NULL) { manual_compaction_(NULL) {
mem_->Ref();
has_imm_.Release_Store(NULL); has_imm_.Release_Store(NULL);
// Reserve ten files or so for other uses and give the rest to TableCache. // Reserve ten files or so for other uses and give the rest to TableCache.
@ -271,7 +270,7 @@ void DBImpl::DeleteObsoleteFiles() {
} }
} }
Status DBImpl::Recover(VersionEdit* edit) { Status DBImpl::Recover(VersionEdit* edit, bool *save_manifest) {
mutex_.AssertHeld(); mutex_.AssertHeld();
// Ignore error from CreateDir since the creation of the DB is // Ignore error from CreateDir since the creation of the DB is
@ -301,8 +300,10 @@ Status DBImpl::Recover(VersionEdit* edit) {
} }
} }
s = versions_->Recover(); s = versions_->Recover(save_manifest);
if (s.ok()) { if (!s.ok()) {
return s;
}
SequenceNumber max_sequence(0); SequenceNumber max_sequence(0);
// Recover from all newer log files than the ones named in the // Recover from all newer log files than the ones named in the
@ -341,7 +342,11 @@ Status DBImpl::Recover(VersionEdit* edit) {
// Recover in the order in which the logs were generated // Recover in the order in which the logs were generated
std::sort(logs.begin(), logs.end()); std::sort(logs.begin(), logs.end());
for (size_t i = 0; i < logs.size(); i++) { for (size_t i = 0; i < logs.size(); i++) {
s = RecoverLogFile(logs[i], edit, &max_sequence); s = RecoverLogFile(logs[i], (i == logs.size() - 1), save_manifest, edit,
&max_sequence);
if (!s.ok()) {
return s;
}
// The previous incarnation may not have written any MANIFEST // The previous incarnation may not have written any MANIFEST
// records after allocating this log number. So we manually // records after allocating this log number. So we manually
@ -349,18 +354,15 @@ Status DBImpl::Recover(VersionEdit* edit) {
versions_->MarkFileNumberUsed(logs[i]); versions_->MarkFileNumberUsed(logs[i]);
} }
if (s.ok()) {
if (versions_->LastSequence() < max_sequence) { if (versions_->LastSequence() < max_sequence) {
versions_->SetLastSequence(max_sequence); versions_->SetLastSequence(max_sequence);
} }
}
}
return s; return Status::OK();
} }
Status DBImpl::RecoverLogFile(uint64_t log_number, Status DBImpl::RecoverLogFile(uint64_t log_number, bool last_log,
VersionEdit* edit, bool* save_manifest, VersionEdit* edit,
SequenceNumber* max_sequence) { SequenceNumber* max_sequence) {
struct LogReporter : public log::Reader::Reporter { struct LogReporter : public log::Reader::Reporter {
Env* env; Env* env;
@ -405,6 +407,7 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
std::string scratch; std::string scratch;
Slice record; Slice record;
WriteBatch batch; WriteBatch batch;
int compactions = 0;
MemTable* mem = NULL; MemTable* mem = NULL;
while (reader.ReadRecord(&record, &scratch) && while (reader.ReadRecord(&record, &scratch) &&
status.ok()) { status.ok()) {
@ -432,25 +435,52 @@ Status DBImpl::RecoverLogFile(uint64_t log_number,
} }
if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) { if (mem->ApproximateMemoryUsage() > options_.write_buffer_size) {
compactions++;
*save_manifest = true;
status = WriteLevel0Table(mem, edit, NULL); status = WriteLevel0Table(mem, edit, NULL);
mem->Unref();
mem = NULL;
if (!status.ok()) { if (!status.ok()) {
// Reflect errors immediately so that conditions like full // Reflect errors immediately so that conditions like full
// file-systems cause the DB::Open() to fail. // file-systems cause the DB::Open() to fail.
break; break;
} }
mem->Unref();
mem = NULL;
} }
} }
if (status.ok() && mem != NULL) {
status = WriteLevel0Table(mem, edit, NULL);
// Reflect errors immediately so that conditions like full
// file-systems cause the DB::Open() to fail.
}
if (mem != NULL) mem->Unref();
delete file; delete file;
// See if we should keep reusing the last log file.
if (status.ok() && options_.reuse_logs && last_log && compactions == 0) {
assert(logfile_ == NULL);
assert(log_ == NULL);
assert(mem_ == NULL);
uint64_t lfile_size;
if (env_->GetFileSize(fname, &lfile_size).ok() &&
env_->NewAppendableFile(fname, &logfile_).ok()) {
Log(options_.info_log, "Reusing old log %s \n", fname.c_str());
log_ = new log::Writer(logfile_, lfile_size);
logfile_number_ = log_number;
if (mem != NULL) {
mem_ = mem;
mem = NULL;
} else {
// mem can be NULL if lognum exists but was empty.
mem_ = new MemTable(internal_comparator_);
mem_->Ref();
}
}
}
if (mem != NULL) {
// mem did not get reused; compact it.
if (status.ok()) {
*save_manifest = true;
status = WriteLevel0Table(mem, edit, NULL);
}
mem->Unref();
}
return status; return status;
} }
@ -821,8 +851,9 @@ Status DBImpl::FinishCompactionOutputFile(CompactionState* compact,
delete iter; delete iter;
if (s.ok()) { if (s.ok()) {
Log(options_.info_log, Log(options_.info_log,
"Generated table #%llu: %lld keys, %lld bytes", "Generated table #%llu@%d: %lld keys, %lld bytes",
(unsigned long long) output_number, (unsigned long long) output_number,
compact->compaction->level(),
(unsigned long long) current_entries, (unsigned long long) current_entries,
(unsigned long long) current_bytes); (unsigned long long) current_bytes);
} }
@ -1395,6 +1426,19 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) {
} else if (in == "sstables") { } else if (in == "sstables") {
*value = versions_->current()->DebugString(); *value = versions_->current()->DebugString();
return true; return true;
} else if (in == "approximate-memory-usage") {
size_t total_usage = options_.block_cache->TotalCharge();
if (mem_) {
total_usage += mem_->ApproximateMemoryUsage();
}
if (imm_) {
total_usage += imm_->ApproximateMemoryUsage();
}
char buf[50];
snprintf(buf, sizeof(buf), "%llu",
static_cast<unsigned long long>(total_usage));
value->append(buf);
return true;
} }
return false; return false;
@ -1449,8 +1493,11 @@ Status DB::Open(const Options& options, const std::string& dbname,
DBImpl* impl = new DBImpl(options, dbname); DBImpl* impl = new DBImpl(options, dbname);
impl->mutex_.Lock(); impl->mutex_.Lock();
VersionEdit edit; VersionEdit edit;
Status s = impl->Recover(&edit); // Handles create_if_missing, error_if_exists // Recover handles create_if_missing, error_if_exists
if (s.ok()) { bool save_manifest = false;
Status s = impl->Recover(&edit, &save_manifest);
if (s.ok() && impl->mem_ == NULL) {
// Create new log and a corresponding memtable.
uint64_t new_log_number = impl->versions_->NewFileNumber(); uint64_t new_log_number = impl->versions_->NewFileNumber();
WritableFile* lfile; WritableFile* lfile;
s = options.env->NewWritableFile(LogFileName(dbname, new_log_number), s = options.env->NewWritableFile(LogFileName(dbname, new_log_number),
@ -1460,15 +1507,22 @@ Status DB::Open(const Options& options, const std::string& dbname,
impl->logfile_ = lfile; impl->logfile_ = lfile;
impl->logfile_number_ = new_log_number; impl->logfile_number_ = new_log_number;
impl->log_ = new log::Writer(lfile); impl->log_ = new log::Writer(lfile);
impl->mem_ = new MemTable(impl->internal_comparator_);
impl->mem_->Ref();
}
}
if (s.ok() && save_manifest) {
edit.SetPrevLogNumber(0); // No older logs needed after recovery.
edit.SetLogNumber(impl->logfile_number_);
s = impl->versions_->LogAndApply(&edit, &impl->mutex_); s = impl->versions_->LogAndApply(&edit, &impl->mutex_);
} }
if (s.ok()) { if (s.ok()) {
impl->DeleteObsoleteFiles(); impl->DeleteObsoleteFiles();
impl->MaybeScheduleCompaction(); impl->MaybeScheduleCompaction();
} }
}
impl->mutex_.Unlock(); impl->mutex_.Unlock();
if (s.ok()) { if (s.ok()) {
assert(impl->mem_ != NULL);
*dbptr = impl; *dbptr = impl;
} else { } else {
delete impl; delete impl;

View file

@ -78,7 +78,8 @@ class DBImpl : public DB {
// Recover the descriptor from persistent storage. May do a significant // Recover the descriptor from persistent storage. May do a significant
// amount of work to recover recently logged updates. Any changes to // amount of work to recover recently logged updates. Any changes to
// be made to the descriptor are added to *edit. // be made to the descriptor are added to *edit.
Status Recover(VersionEdit* edit) EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status Recover(VersionEdit* edit, bool* save_manifest)
EXCLUSIVE_LOCKS_REQUIRED(mutex_);
void MaybeIgnoreError(Status* s) const; void MaybeIgnoreError(Status* s) const;
@ -90,9 +91,8 @@ class DBImpl : public DB {
// Errors are recorded in bg_error_. // Errors are recorded in bg_error_.
void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_); void CompactMemTable() EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status RecoverLogFile(uint64_t log_number, Status RecoverLogFile(uint64_t log_number, bool last_log, bool* save_manifest,
VersionEdit* edit, VersionEdit* edit, SequenceNumber* max_sequence)
SequenceNumber* max_sequence)
EXCLUSIVE_LOCKS_REQUIRED(mutex_); EXCLUSIVE_LOCKS_REQUIRED(mutex_);
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base)

View file

@ -193,6 +193,7 @@ class DBTest {
// Sequence of option configurations to try // Sequence of option configurations to try
enum OptionConfig { enum OptionConfig {
kDefault, kDefault,
kReuse,
kFilter, kFilter,
kUncompressed, kUncompressed,
kEnd kEnd
@ -237,7 +238,11 @@ class DBTest {
// Return the current option configuration. // Return the current option configuration.
Options CurrentOptions() { Options CurrentOptions() {
Options options; Options options;
options.reuse_logs = false;
switch (option_config_) { switch (option_config_) {
case kReuse:
options.reuse_logs = true;
break;
case kFilter: case kFilter:
options.filter_policy = filter_policy_; options.filter_policy = filter_policy_;
break; break;
@ -558,6 +563,17 @@ TEST(DBTest, GetFromVersions) {
} while (ChangeOptions()); } while (ChangeOptions());
} }
TEST(DBTest, GetMemUsage) {
do {
ASSERT_OK(Put("foo", "v1"));
std::string val;
ASSERT_TRUE(db_->GetProperty("leveldb.approximate-memory-usage", &val));
int mem_usage = atoi(val.c_str());
ASSERT_GT(mem_usage, 0);
ASSERT_LT(mem_usage, 5*1024*1024);
} while (ChangeOptions());
}
TEST(DBTest, GetSnapshot) { TEST(DBTest, GetSnapshot) {
do { do {
// Try with both a short key and a long key // Try with both a short key and a long key
@ -1080,6 +1096,14 @@ TEST(DBTest, ApproximateSizes) {
// 0 because GetApproximateSizes() does not account for memtable space // 0 because GetApproximateSizes() does not account for memtable space
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0)); ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
if (options.reuse_logs) {
// Recovery will reuse memtable, and GetApproximateSizes() does not
// account for memtable usage;
Reopen(&options);
ASSERT_TRUE(Between(Size("", Key(50)), 0, 0));
continue;
}
// Check sizes across recovery by reopening a few times // Check sizes across recovery by reopening a few times
for (int run = 0; run < 3; run++) { for (int run = 0; run < 3; run++) {
Reopen(&options); Reopen(&options);
@ -1123,6 +1147,11 @@ TEST(DBTest, ApproximateSizes_MixOfSmallAndLarge) {
ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000))); ASSERT_OK(Put(Key(6), RandomString(&rnd, 300000)));
ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000))); ASSERT_OK(Put(Key(7), RandomString(&rnd, 10000)));
if (options.reuse_logs) {
// Need to force a memtable compaction since recovery does not do so.
ASSERT_OK(dbfull()->TEST_CompactMemTable());
}
// Check sizes across recovery by reopening a few times // Check sizes across recovery by reopening a few times
for (int run = 0; run < 3; run++) { for (int run = 0; run < 3; run++) {
Reopen(&options); Reopen(&options);
@ -2084,7 +2113,8 @@ void BM_LogAndApply(int iters, int num_base_files) {
InternalKeyComparator cmp(BytewiseComparator()); InternalKeyComparator cmp(BytewiseComparator());
Options options; Options options;
VersionSet vset(dbname, &options, NULL, &cmp); VersionSet vset(dbname, &options, NULL, &cmp);
ASSERT_OK(vset.Recover()); bool save_manifest;
ASSERT_OK(vset.Recover(&save_manifest));
VersionEdit vbase; VersionEdit vbase;
uint64_t fnum = 1; uint64_t fnum = 1;
for (int i = 0; i < num_base_files; i++) { for (int i = 0; i < num_base_files; i++) {

554
db/fault_injection_test.cc Normal file
View file

@ -0,0 +1,554 @@
// Copyright 2014 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
// This test uses a custom Env to keep track of the state of a filesystem as of
// the last "sync". It then checks for data loss errors by purposely dropping
// file data (or entire files) not protected by a "sync".
#include "leveldb/db.h"
#include <map>
#include <set>
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/log_format.h"
#include "db/version_set.h"
#include "leveldb/cache.h"
#include "leveldb/env.h"
#include "leveldb/table.h"
#include "leveldb/write_batch.h"
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
static const int kValueSize = 1000;
static const int kMaxNumValues = 2000;
static const size_t kNumIterations = 3;
class FaultInjectionTestEnv;
namespace {
// Assume a filename, and not a directory name like "/foo/bar/"
static std::string GetDirName(const std::string filename) {
size_t found = filename.find_last_of("/\\");
if (found == std::string::npos) {
return "";
} else {
return filename.substr(0, found);
}
}
Status SyncDir(const std::string& dir) {
// As this is a test it isn't required to *actually* sync this directory.
return Status::OK();
}
// A basic file truncation function suitable for this test.
Status Truncate(const std::string& filename, uint64_t length) {
leveldb::Env* env = leveldb::Env::Default();
SequentialFile* orig_file;
Status s = env->NewSequentialFile(filename, &orig_file);
if (!s.ok())
return s;
char* scratch = new char[length];
leveldb::Slice result;
s = orig_file->Read(length, &result, scratch);
delete orig_file;
if (s.ok()) {
std::string tmp_name = GetDirName(filename) + "/truncate.tmp";
WritableFile* tmp_file;
s = env->NewWritableFile(tmp_name, &tmp_file);
if (s.ok()) {
s = tmp_file->Append(result);
delete tmp_file;
if (s.ok()) {
s = env->RenameFile(tmp_name, filename);
} else {
env->DeleteFile(tmp_name);
}
}
}
delete[] scratch;
return s;
}
struct FileState {
std::string filename_;
ssize_t pos_;
ssize_t pos_at_last_sync_;
ssize_t pos_at_last_flush_;
FileState(const std::string& filename)
: filename_(filename),
pos_(-1),
pos_at_last_sync_(-1),
pos_at_last_flush_(-1) { }
FileState() : pos_(-1), pos_at_last_sync_(-1), pos_at_last_flush_(-1) {}
bool IsFullySynced() const { return pos_ <= 0 || pos_ == pos_at_last_sync_; }
Status DropUnsyncedData() const;
};
} // anonymous namespace
// A wrapper around WritableFile which informs another Env whenever this file
// is written to or sync'ed.
class TestWritableFile : public WritableFile {
public:
TestWritableFile(const FileState& state,
WritableFile* f,
FaultInjectionTestEnv* env);
virtual ~TestWritableFile();
virtual Status Append(const Slice& data);
virtual Status Close();
virtual Status Flush();
virtual Status Sync();
private:
FileState state_;
WritableFile* target_;
bool writable_file_opened_;
FaultInjectionTestEnv* env_;
Status SyncParent();
};
class FaultInjectionTestEnv : public EnvWrapper {
public:
FaultInjectionTestEnv() : EnvWrapper(Env::Default()), filesystem_active_(true) {}
virtual ~FaultInjectionTestEnv() { }
virtual Status NewWritableFile(const std::string& fname,
WritableFile** result);
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result);
virtual Status DeleteFile(const std::string& f);
virtual Status RenameFile(const std::string& s, const std::string& t);
void WritableFileClosed(const FileState& state);
Status DropUnsyncedFileData();
Status DeleteFilesCreatedAfterLastDirSync();
void DirWasSynced();
bool IsFileCreatedSinceLastDirSync(const std::string& filename);
void ResetState();
void UntrackFile(const std::string& f);
// Setting the filesystem to inactive is the test equivalent to simulating a
// system reset. Setting to inactive will freeze our saved filesystem state so
// that it will stop being recorded. It can then be reset back to the state at
// the time of the reset.
bool IsFilesystemActive() const { return filesystem_active_; }
void SetFilesystemActive(bool active) { filesystem_active_ = active; }
private:
port::Mutex mutex_;
std::map<std::string, FileState> db_file_state_;
std::set<std::string> new_files_since_last_dir_sync_;
bool filesystem_active_; // Record flushes, syncs, writes
};
TestWritableFile::TestWritableFile(const FileState& state,
WritableFile* f,
FaultInjectionTestEnv* env)
: state_(state),
target_(f),
writable_file_opened_(true),
env_(env) {
assert(f != NULL);
}
TestWritableFile::~TestWritableFile() {
if (writable_file_opened_) {
Close();
}
delete target_;
}
Status TestWritableFile::Append(const Slice& data) {
Status s = target_->Append(data);
if (s.ok() && env_->IsFilesystemActive()) {
state_.pos_ += data.size();
}
return s;
}
Status TestWritableFile::Close() {
writable_file_opened_ = false;
Status s = target_->Close();
if (s.ok()) {
env_->WritableFileClosed(state_);
}
return s;
}
Status TestWritableFile::Flush() {
Status s = target_->Flush();
if (s.ok() && env_->IsFilesystemActive()) {
state_.pos_at_last_flush_ = state_.pos_;
}
return s;
}
Status TestWritableFile::SyncParent() {
Status s = SyncDir(GetDirName(state_.filename_));
if (s.ok()) {
env_->DirWasSynced();
}
return s;
}
Status TestWritableFile::Sync() {
if (!env_->IsFilesystemActive()) {
return Status::OK();
}
// Ensure new files referred to by the manifest are in the filesystem.
Status s = target_->Sync();
if (s.ok()) {
state_.pos_at_last_sync_ = state_.pos_;
}
if (env_->IsFileCreatedSinceLastDirSync(state_.filename_)) {
Status ps = SyncParent();
if (s.ok() && !ps.ok()) {
s = ps;
}
}
return s;
}
Status FaultInjectionTestEnv::NewWritableFile(const std::string& fname,
WritableFile** result) {
WritableFile* actual_writable_file;
Status s = target()->NewWritableFile(fname, &actual_writable_file);
if (s.ok()) {
FileState state(fname);
state.pos_ = 0;
*result = new TestWritableFile(state, actual_writable_file, this);
// NewWritableFile doesn't append to files, so if the same file is
// opened again then it will be truncated - so forget our saved
// state.
UntrackFile(fname);
MutexLock l(&mutex_);
new_files_since_last_dir_sync_.insert(fname);
}
return s;
}
Status FaultInjectionTestEnv::NewAppendableFile(const std::string& fname,
WritableFile** result) {
WritableFile* actual_writable_file;
Status s = target()->NewAppendableFile(fname, &actual_writable_file);
if (s.ok()) {
FileState state(fname);
state.pos_ = 0;
{
MutexLock l(&mutex_);
if (db_file_state_.count(fname) == 0) {
new_files_since_last_dir_sync_.insert(fname);
} else {
state = db_file_state_[fname];
}
}
*result = new TestWritableFile(state, actual_writable_file, this);
}
return s;
}
Status FaultInjectionTestEnv::DropUnsyncedFileData() {
Status s;
MutexLock l(&mutex_);
for (std::map<std::string, FileState>::const_iterator it =
db_file_state_.begin();
s.ok() && it != db_file_state_.end(); ++it) {
const FileState& state = it->second;
if (!state.IsFullySynced()) {
s = state.DropUnsyncedData();
}
}
return s;
}
void FaultInjectionTestEnv::DirWasSynced() {
MutexLock l(&mutex_);
new_files_since_last_dir_sync_.clear();
}
bool FaultInjectionTestEnv::IsFileCreatedSinceLastDirSync(
const std::string& filename) {
MutexLock l(&mutex_);
return new_files_since_last_dir_sync_.find(filename) !=
new_files_since_last_dir_sync_.end();
}
void FaultInjectionTestEnv::UntrackFile(const std::string& f) {
MutexLock l(&mutex_);
db_file_state_.erase(f);
new_files_since_last_dir_sync_.erase(f);
}
Status FaultInjectionTestEnv::DeleteFile(const std::string& f) {
Status s = EnvWrapper::DeleteFile(f);
ASSERT_OK(s);
if (s.ok()) {
UntrackFile(f);
}
return s;
}
Status FaultInjectionTestEnv::RenameFile(const std::string& s,
const std::string& t) {
Status ret = EnvWrapper::RenameFile(s, t);
if (ret.ok()) {
MutexLock l(&mutex_);
if (db_file_state_.find(s) != db_file_state_.end()) {
db_file_state_[t] = db_file_state_[s];
db_file_state_.erase(s);
}
if (new_files_since_last_dir_sync_.erase(s) != 0) {
assert(new_files_since_last_dir_sync_.find(t) ==
new_files_since_last_dir_sync_.end());
new_files_since_last_dir_sync_.insert(t);
}
}
return ret;
}
void FaultInjectionTestEnv::ResetState() {
// Since we are not destroying the database, the existing files
// should keep their recorded synced/flushed state. Therefore
// we do not reset db_file_state_ and new_files_since_last_dir_sync_.
MutexLock l(&mutex_);
SetFilesystemActive(true);
}
Status FaultInjectionTestEnv::DeleteFilesCreatedAfterLastDirSync() {
// Because DeleteFile access this container make a copy to avoid deadlock
mutex_.Lock();
std::set<std::string> new_files(new_files_since_last_dir_sync_.begin(),
new_files_since_last_dir_sync_.end());
mutex_.Unlock();
Status s;
std::set<std::string>::const_iterator it;
for (it = new_files.begin(); s.ok() && it != new_files.end(); ++it) {
s = DeleteFile(*it);
}
return s;
}
void FaultInjectionTestEnv::WritableFileClosed(const FileState& state) {
MutexLock l(&mutex_);
db_file_state_[state.filename_] = state;
}
Status FileState::DropUnsyncedData() const {
ssize_t sync_pos = pos_at_last_sync_ == -1 ? 0 : pos_at_last_sync_;
return Truncate(filename_, sync_pos);
}
class FaultInjectionTest {
public:
enum ExpectedVerifResult { VAL_EXPECT_NO_ERROR, VAL_EXPECT_ERROR };
enum ResetMethod { RESET_DROP_UNSYNCED_DATA, RESET_DELETE_UNSYNCED_FILES };
FaultInjectionTestEnv* env_;
std::string dbname_;
Cache* tiny_cache_;
Options options_;
DB* db_;
FaultInjectionTest()
: env_(new FaultInjectionTestEnv),
tiny_cache_(NewLRUCache(100)),
db_(NULL) {
dbname_ = test::TmpDir() + "/fault_test";
DestroyDB(dbname_, Options()); // Destroy any db from earlier run
options_.reuse_logs = true;
options_.env = env_;
options_.paranoid_checks = true;
options_.block_cache = tiny_cache_;
options_.create_if_missing = true;
}
~FaultInjectionTest() {
CloseDB();
DestroyDB(dbname_, Options());
delete tiny_cache_;
delete env_;
}
void ReuseLogs(bool reuse) {
options_.reuse_logs = reuse;
}
void Build(int start_idx, int num_vals) {
std::string key_space, value_space;
WriteBatch batch;
for (int i = start_idx; i < start_idx + num_vals; i++) {
Slice key = Key(i, &key_space);
batch.Clear();
batch.Put(key, Value(i, &value_space));
WriteOptions options;
ASSERT_OK(db_->Write(options, &batch));
}
}
Status ReadValue(int i, std::string* val) const {
std::string key_space, value_space;
Slice key = Key(i, &key_space);
Value(i, &value_space);
ReadOptions options;
return db_->Get(options, key, val);
}
Status Verify(int start_idx, int num_vals,
ExpectedVerifResult expected) const {
std::string val;
std::string value_space;
Status s;
for (int i = start_idx; i < start_idx + num_vals && s.ok(); i++) {
Value(i, &value_space);
s = ReadValue(i, &val);
if (expected == VAL_EXPECT_NO_ERROR) {
if (s.ok()) {
ASSERT_EQ(value_space, val);
}
} else if (s.ok()) {
fprintf(stderr, "Expected an error at %d, but was OK\n", i);
s = Status::IOError(dbname_, "Expected value error:");
} else {
s = Status::OK(); // An expected error
}
}
return s;
}
// Return the ith key
Slice Key(int i, std::string* storage) const {
char buf[100];
snprintf(buf, sizeof(buf), "%016d", i);
storage->assign(buf, strlen(buf));
return Slice(*storage);
}
// Return the value to associate with the specified key
Slice Value(int k, std::string* storage) const {
Random r(k);
return test::RandomString(&r, kValueSize, storage);
}
Status OpenDB() {
delete db_;
db_ = NULL;
env_->ResetState();
return DB::Open(options_, dbname_, &db_);
}
void CloseDB() {
delete db_;
db_ = NULL;
}
void DeleteAllData() {
Iterator* iter = db_->NewIterator(ReadOptions());
WriteOptions options;
for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
ASSERT_OK(db_->Delete(WriteOptions(), iter->key()));
}
delete iter;
}
void ResetDBState(ResetMethod reset_method) {
switch (reset_method) {
case RESET_DROP_UNSYNCED_DATA:
ASSERT_OK(env_->DropUnsyncedFileData());
break;
case RESET_DELETE_UNSYNCED_FILES:
ASSERT_OK(env_->DeleteFilesCreatedAfterLastDirSync());
break;
default:
assert(false);
}
}
void PartialCompactTestPreFault(int num_pre_sync, int num_post_sync) {
DeleteAllData();
Build(0, num_pre_sync);
db_->CompactRange(NULL, NULL);
Build(num_pre_sync, num_post_sync);
}
void PartialCompactTestReopenWithFault(ResetMethod reset_method,
int num_pre_sync,
int num_post_sync) {
env_->SetFilesystemActive(false);
CloseDB();
ResetDBState(reset_method);
ASSERT_OK(OpenDB());
ASSERT_OK(Verify(0, num_pre_sync, FaultInjectionTest::VAL_EXPECT_NO_ERROR));
ASSERT_OK(Verify(num_pre_sync, num_post_sync, FaultInjectionTest::VAL_EXPECT_ERROR));
}
void NoWriteTestPreFault() {
}
void NoWriteTestReopenWithFault(ResetMethod reset_method) {
CloseDB();
ResetDBState(reset_method);
ASSERT_OK(OpenDB());
}
void DoTest() {
Random rnd(0);
ASSERT_OK(OpenDB());
for (size_t idx = 0; idx < kNumIterations; idx++) {
int num_pre_sync = rnd.Uniform(kMaxNumValues);
int num_post_sync = rnd.Uniform(kMaxNumValues);
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
PartialCompactTestReopenWithFault(RESET_DROP_UNSYNCED_DATA,
num_pre_sync,
num_post_sync);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(RESET_DROP_UNSYNCED_DATA);
PartialCompactTestPreFault(num_pre_sync, num_post_sync);
// No new files created so we expect all values since no files will be
// dropped.
PartialCompactTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES,
num_pre_sync + num_post_sync,
0);
NoWriteTestPreFault();
NoWriteTestReopenWithFault(RESET_DELETE_UNSYNCED_FILES);
}
}
};
TEST(FaultInjectionTest, FaultTestNoLogReuse) {
ReuseLogs(false);
DoTest();
}
TEST(FaultInjectionTest, FaultTestWithLogReuse) {
ReuseLogs(true);
DoTest();
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View file

@ -25,7 +25,8 @@ Reader::Reader(SequentialFile* file, Reporter* reporter, bool checksum,
eof_(false), eof_(false),
last_record_offset_(0), last_record_offset_(0),
end_of_buffer_offset_(0), end_of_buffer_offset_(0),
initial_offset_(initial_offset) { initial_offset_(initial_offset),
resyncing_(initial_offset > 0) {
} }
Reader::~Reader() { Reader::~Reader() {
@ -72,8 +73,25 @@ bool Reader::ReadRecord(Slice* record, std::string* scratch) {
Slice fragment; Slice fragment;
while (true) { while (true) {
uint64_t physical_record_offset = end_of_buffer_offset_ - buffer_.size();
const unsigned int record_type = ReadPhysicalRecord(&fragment); const unsigned int record_type = ReadPhysicalRecord(&fragment);
// ReadPhysicalRecord may have only had an empty trailer remaining in its
// internal buffer. Calculate the offset of the next physical record now
// that it has returned, properly accounting for its header size.
uint64_t physical_record_offset =
end_of_buffer_offset_ - buffer_.size() - kHeaderSize - fragment.size();
if (resyncing_) {
if (record_type == kMiddleType) {
continue;
} else if (record_type == kLastType) {
resyncing_ = false;
continue;
} else {
resyncing_ = false;
}
}
switch (record_type) { switch (record_type) {
case kFullType: case kFullType:
if (in_fragmented_record) { if (in_fragmented_record) {

View file

@ -73,6 +73,11 @@ class Reader {
// Offset at which to start looking for the first record to return // Offset at which to start looking for the first record to return
uint64_t const initial_offset_; uint64_t const initial_offset_;
// True if we are resynchronizing after a seek (initial_offset_ > 0). In
// particular, a run of kMiddleType and kLastType records can be silently
// skipped in this mode
bool resyncing_;
// Extend record types with the following special values // Extend record types with the following special values
enum { enum {
kEof = kMaxRecordType + 1, kEof = kMaxRecordType + 1,

View file

@ -79,7 +79,7 @@ class LogTest {
virtual Status Skip(uint64_t n) { virtual Status Skip(uint64_t n) {
if (n > contents_.size()) { if (n > contents_.size()) {
contents_.clear(); contents_.clear();
return Status::NotFound("in-memory file skipepd past end"); return Status::NotFound("in-memory file skipped past end");
} }
contents_.remove_prefix(n); contents_.remove_prefix(n);
@ -104,23 +104,34 @@ class LogTest {
StringSource source_; StringSource source_;
ReportCollector report_; ReportCollector report_;
bool reading_; bool reading_;
Writer writer_; Writer* writer_;
Reader reader_; Reader* reader_;
// Record metadata for testing initial offset functionality // Record metadata for testing initial offset functionality
static size_t initial_offset_record_sizes_[]; static size_t initial_offset_record_sizes_[];
static uint64_t initial_offset_last_record_offsets_[]; static uint64_t initial_offset_last_record_offsets_[];
static int num_initial_offset_records_;
public: public:
LogTest() : reading_(false), LogTest() : reading_(false),
writer_(&dest_), writer_(new Writer(&dest_)),
reader_(&source_, &report_, true/*checksum*/, reader_(new Reader(&source_, &report_, true/*checksum*/,
0/*initial_offset*/) { 0/*initial_offset*/)) {
}
~LogTest() {
delete writer_;
delete reader_;
}
void ReopenForAppend() {
delete writer_;
writer_ = new Writer(&dest_, dest_.contents_.size());
} }
void Write(const std::string& msg) { void Write(const std::string& msg) {
ASSERT_TRUE(!reading_) << "Write() after starting to read"; ASSERT_TRUE(!reading_) << "Write() after starting to read";
writer_.AddRecord(Slice(msg)); writer_->AddRecord(Slice(msg));
} }
size_t WrittenBytes() const { size_t WrittenBytes() const {
@ -134,7 +145,7 @@ class LogTest {
} }
std::string scratch; std::string scratch;
Slice record; Slice record;
if (reader_.ReadRecord(&record, &scratch)) { if (reader_->ReadRecord(&record, &scratch)) {
return record.ToString(); return record.ToString();
} else { } else {
return "EOF"; return "EOF";
@ -182,13 +193,18 @@ class LogTest {
} }
void WriteInitialOffsetLog() { void WriteInitialOffsetLog() {
for (int i = 0; i < 4; i++) { for (int i = 0; i < num_initial_offset_records_; i++) {
std::string record(initial_offset_record_sizes_[i], std::string record(initial_offset_record_sizes_[i],
static_cast<char>('a' + i)); static_cast<char>('a' + i));
Write(record); Write(record);
} }
} }
void StartReadingAt(uint64_t initial_offset) {
delete reader_;
reader_ = new Reader(&source_, &report_, true/*checksum*/, initial_offset);
}
void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) { void CheckOffsetPastEndReturnsNoRecords(uint64_t offset_past_end) {
WriteInitialOffsetLog(); WriteInitialOffsetLog();
reading_ = true; reading_ = true;
@ -208,6 +224,11 @@ class LogTest {
source_.contents_ = Slice(dest_.contents_); source_.contents_ = Slice(dest_.contents_);
Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/, Reader* offset_reader = new Reader(&source_, &report_, true/*checksum*/,
initial_offset); initial_offset);
// Read all records from expected_record_offset through the last one.
ASSERT_LT(expected_record_offset, num_initial_offset_records_);
for (; expected_record_offset < num_initial_offset_records_;
++expected_record_offset) {
Slice record; Slice record;
std::string scratch; std::string scratch;
ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch)); ASSERT_TRUE(offset_reader->ReadRecord(&record, &scratch));
@ -216,24 +237,35 @@ class LogTest {
ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset], ASSERT_EQ(initial_offset_last_record_offsets_[expected_record_offset],
offset_reader->LastRecordOffset()); offset_reader->LastRecordOffset());
ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]); ASSERT_EQ((char)('a' + expected_record_offset), record.data()[0]);
}
delete offset_reader; delete offset_reader;
} }
}; };
size_t LogTest::initial_offset_record_sizes_[] = size_t LogTest::initial_offset_record_sizes_[] =
{10000, // Two sizable records in first block {10000, // Two sizable records in first block
10000, 10000,
2 * log::kBlockSize - 1000, // Span three blocks 2 * log::kBlockSize - 1000, // Span three blocks
1}; 1,
13716, // Consume all but two bytes of block 3.
log::kBlockSize - kHeaderSize, // Consume the entirety of block 4.
};
uint64_t LogTest::initial_offset_last_record_offsets_[] = uint64_t LogTest::initial_offset_last_record_offsets_[] =
{0, {0,
kHeaderSize + 10000, kHeaderSize + 10000,
2 * (kHeaderSize + 10000), 2 * (kHeaderSize + 10000),
2 * (kHeaderSize + 10000) + 2 * (kHeaderSize + 10000) +
(2 * log::kBlockSize - 1000) + 3 * kHeaderSize}; (2 * log::kBlockSize - 1000) + 3 * kHeaderSize,
2 * (kHeaderSize + 10000) +
(2 * log::kBlockSize - 1000) + 3 * kHeaderSize
+ kHeaderSize + 1,
3 * log::kBlockSize,
};
// LogTest::initial_offset_last_record_offsets_ must be defined before this.
int LogTest::num_initial_offset_records_ =
sizeof(LogTest::initial_offset_last_record_offsets_)/sizeof(uint64_t);
TEST(LogTest, Empty) { TEST(LogTest, Empty) {
ASSERT_EQ("EOF", Read()); ASSERT_EQ("EOF", Read());
@ -318,6 +350,15 @@ TEST(LogTest, AlignedEof) {
ASSERT_EQ("EOF", Read()); ASSERT_EQ("EOF", Read());
} }
TEST(LogTest, OpenForAppend) {
Write("hello");
ReopenForAppend();
Write("world");
ASSERT_EQ("hello", Read());
ASSERT_EQ("world", Read());
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, RandomRead) { TEST(LogTest, RandomRead) {
const int N = 500; const int N = 500;
Random write_rnd(301); Random write_rnd(301);
@ -445,6 +486,22 @@ TEST(LogTest, PartialLastIsIgnored) {
ASSERT_EQ(0, DroppedBytes()); ASSERT_EQ(0, DroppedBytes());
} }
TEST(LogTest, SkipIntoMultiRecord) {
// Consider a fragmented record:
// first(R1), middle(R1), last(R1), first(R2)
// If initial_offset points to a record after first(R1) but before first(R2)
// incomplete fragment errors are not actual errors, and must be suppressed
// until a new first or full record is encountered.
Write(BigString("foo", 3*kBlockSize));
Write("correct");
StartReadingAt(kBlockSize);
ASSERT_EQ("correct", Read());
ASSERT_EQ("", ReportMessage());
ASSERT_EQ(0, DroppedBytes());
ASSERT_EQ("EOF", Read());
}
TEST(LogTest, ErrorJoinsRecords) { TEST(LogTest, ErrorJoinsRecords) {
// Consider two fragmented records: // Consider two fragmented records:
// first(R1) last(R1) first(R2) last(R2) // first(R1) last(R1) first(R2) last(R2)
@ -514,6 +571,10 @@ TEST(LogTest, ReadFourthStart) {
3); 3);
} }
TEST(LogTest, ReadInitialOffsetIntoBlockPadding) {
CheckInitialOffsetRecord(3 * log::kBlockSize - 3, 5);
}
TEST(LogTest, ReadEnd) { TEST(LogTest, ReadEnd) {
CheckOffsetPastEndReturnsNoRecords(0); CheckOffsetPastEndReturnsNoRecords(0);
} }

View file

@ -12,13 +12,22 @@
namespace leveldb { namespace leveldb {
namespace log { namespace log {
static void InitTypeCrc(uint32_t* type_crc) {
for (int i = 0; i <= kMaxRecordType; i++) {
char t = static_cast<char>(i);
type_crc[i] = crc32c::Value(&t, 1);
}
}
Writer::Writer(WritableFile* dest) Writer::Writer(WritableFile* dest)
: dest_(dest), : dest_(dest),
block_offset_(0) { block_offset_(0) {
for (int i = 0; i <= kMaxRecordType; i++) { InitTypeCrc(type_crc_);
char t = static_cast<char>(i); }
type_crc_[i] = crc32c::Value(&t, 1);
} Writer::Writer(WritableFile* dest, uint64_t dest_length)
: dest_(dest), block_offset_(dest_length % kBlockSize) {
InitTypeCrc(type_crc_);
} }
Writer::~Writer() { Writer::~Writer() {

View file

@ -22,6 +22,12 @@ class Writer {
// "*dest" must be initially empty. // "*dest" must be initially empty.
// "*dest" must remain live while this Writer is in use. // "*dest" must remain live while this Writer is in use.
explicit Writer(WritableFile* dest); explicit Writer(WritableFile* dest);
// Create a writer that will append data to "*dest".
// "*dest" must have initial length "dest_length".
// "*dest" must remain live while this Writer is in use.
Writer(WritableFile* dest, uint64_t dest_length);
~Writer(); ~Writer();
Status AddRecord(const Slice& slice); Status AddRecord(const Slice& slice);

View file

@ -36,10 +36,7 @@ class MemTable {
} }
// Returns an estimate of the number of bytes of data in use by this // Returns an estimate of the number of bytes of data in use by this
// data structure. // data structure. It is safe to call when MemTable is being modified.
//
// REQUIRES: external synchronization to prevent simultaneous
// operations on the same MemTable.
size_t ApproximateMemoryUsage(); size_t ApproximateMemoryUsage();
// Return an iterator that yields the contents of the memtable. // Return an iterator that yields the contents of the memtable.

324
db/recovery_test.cc Normal file
View file

@ -0,0 +1,324 @@
// Copyright (c) 2014 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#include "db/db_impl.h"
#include "db/filename.h"
#include "db/version_set.h"
#include "db/write_batch_internal.h"
#include "leveldb/db.h"
#include "leveldb/env.h"
#include "leveldb/write_batch.h"
#include "util/logging.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace leveldb {
class RecoveryTest {
public:
RecoveryTest() : env_(Env::Default()), db_(NULL) {
dbname_ = test::TmpDir() + "/recovery_test";
DestroyDB(dbname_, Options());
Open();
}
~RecoveryTest() {
Close();
DestroyDB(dbname_, Options());
}
DBImpl* dbfull() const { return reinterpret_cast<DBImpl*>(db_); }
Env* env() const { return env_; }
bool CanAppend() {
WritableFile* tmp;
Status s = env_->NewAppendableFile(CurrentFileName(dbname_), &tmp);
delete tmp;
if (s.IsNotSupportedError()) {
return false;
} else {
return true;
}
}
void Close() {
delete db_;
db_ = NULL;
}
void Open(Options* options = NULL) {
Close();
Options opts;
if (options != NULL) {
opts = *options;
} else {
opts.reuse_logs = true; // TODO(sanjay): test both ways
opts.create_if_missing = true;
}
if (opts.env == NULL) {
opts.env = env_;
}
ASSERT_OK(DB::Open(opts, dbname_, &db_));
ASSERT_EQ(1, NumLogs());
}
Status Put(const std::string& k, const std::string& v) {
return db_->Put(WriteOptions(), k, v);
}
std::string Get(const std::string& k, const Snapshot* snapshot = NULL) {
std::string result;
Status s = db_->Get(ReadOptions(), k, &result);
if (s.IsNotFound()) {
result = "NOT_FOUND";
} else if (!s.ok()) {
result = s.ToString();
}
return result;
}
std::string ManifestFileName() {
std::string current;
ASSERT_OK(ReadFileToString(env_, CurrentFileName(dbname_), &current));
size_t len = current.size();
if (len > 0 && current[len-1] == '\n') {
current.resize(len - 1);
}
return dbname_ + "/" + current;
}
std::string LogName(uint64_t number) {
return LogFileName(dbname_, number);
}
size_t DeleteLogFiles() {
std::vector<uint64_t> logs = GetFiles(kLogFile);
for (size_t i = 0; i < logs.size(); i++) {
ASSERT_OK(env_->DeleteFile(LogName(logs[i]))) << LogName(logs[i]);
}
return logs.size();
}
uint64_t FirstLogFile() {
return GetFiles(kLogFile)[0];
}
std::vector<uint64_t> GetFiles(FileType t) {
std::vector<std::string> filenames;
ASSERT_OK(env_->GetChildren(dbname_, &filenames));
std::vector<uint64_t> result;
for (size_t i = 0; i < filenames.size(); i++) {
uint64_t number;
FileType type;
if (ParseFileName(filenames[i], &number, &type) && type == t) {
result.push_back(number);
}
}
return result;
}
int NumLogs() {
return GetFiles(kLogFile).size();
}
int NumTables() {
return GetFiles(kTableFile).size();
}
uint64_t FileSize(const std::string& fname) {
uint64_t result;
ASSERT_OK(env_->GetFileSize(fname, &result)) << fname;
return result;
}
void CompactMemTable() {
dbfull()->TEST_CompactMemTable();
}
// Directly construct a log file that sets key to val.
void MakeLogFile(uint64_t lognum, SequenceNumber seq, Slice key, Slice val) {
std::string fname = LogFileName(dbname_, lognum);
WritableFile* file;
ASSERT_OK(env_->NewWritableFile(fname, &file));
log::Writer writer(file);
WriteBatch batch;
batch.Put(key, val);
WriteBatchInternal::SetSequence(&batch, seq);
ASSERT_OK(writer.AddRecord(WriteBatchInternal::Contents(&batch)));
ASSERT_OK(file->Flush());
delete file;
}
private:
std::string dbname_;
Env* env_;
DB* db_;
};
TEST(RecoveryTest, ManifestReused) {
if (!CanAppend()) {
fprintf(stderr, "skipping test because env does not support appending\n");
return;
}
ASSERT_OK(Put("foo", "bar"));
Close();
std::string old_manifest = ManifestFileName();
Open();
ASSERT_EQ(old_manifest, ManifestFileName());
ASSERT_EQ("bar", Get("foo"));
Open();
ASSERT_EQ(old_manifest, ManifestFileName());
ASSERT_EQ("bar", Get("foo"));
}
TEST(RecoveryTest, LargeManifestCompacted) {
if (!CanAppend()) {
fprintf(stderr, "skipping test because env does not support appending\n");
return;
}
ASSERT_OK(Put("foo", "bar"));
Close();
std::string old_manifest = ManifestFileName();
// Pad with zeroes to make manifest file very big.
{
uint64_t len = FileSize(old_manifest);
WritableFile* file;
ASSERT_OK(env()->NewAppendableFile(old_manifest, &file));
std::string zeroes(3*1048576 - static_cast<size_t>(len), 0);
ASSERT_OK(file->Append(zeroes));
ASSERT_OK(file->Flush());
delete file;
}
Open();
std::string new_manifest = ManifestFileName();
ASSERT_NE(old_manifest, new_manifest);
ASSERT_GT(10000, FileSize(new_manifest));
ASSERT_EQ("bar", Get("foo"));
Open();
ASSERT_EQ(new_manifest, ManifestFileName());
ASSERT_EQ("bar", Get("foo"));
}
TEST(RecoveryTest, NoLogFiles) {
ASSERT_OK(Put("foo", "bar"));
ASSERT_EQ(1, DeleteLogFiles());
Open();
ASSERT_EQ("NOT_FOUND", Get("foo"));
Open();
ASSERT_EQ("NOT_FOUND", Get("foo"));
}
TEST(RecoveryTest, LogFileReuse) {
if (!CanAppend()) {
fprintf(stderr, "skipping test because env does not support appending\n");
return;
}
for (int i = 0; i < 2; i++) {
ASSERT_OK(Put("foo", "bar"));
if (i == 0) {
// Compact to ensure current log is empty
CompactMemTable();
}
Close();
ASSERT_EQ(1, NumLogs());
uint64_t number = FirstLogFile();
if (i == 0) {
ASSERT_EQ(0, FileSize(LogName(number)));
} else {
ASSERT_LT(0, FileSize(LogName(number)));
}
Open();
ASSERT_EQ(1, NumLogs());
ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
ASSERT_EQ("bar", Get("foo"));
Open();
ASSERT_EQ(1, NumLogs());
ASSERT_EQ(number, FirstLogFile()) << "did not reuse log file";
ASSERT_EQ("bar", Get("foo"));
}
}
TEST(RecoveryTest, MultipleMemTables) {
// Make a large log.
const int kNum = 1000;
for (int i = 0; i < kNum; i++) {
char buf[100];
snprintf(buf, sizeof(buf), "%050d", i);
ASSERT_OK(Put(buf, buf));
}
ASSERT_EQ(0, NumTables());
Close();
ASSERT_EQ(0, NumTables());
ASSERT_EQ(1, NumLogs());
uint64_t old_log_file = FirstLogFile();
// Force creation of multiple memtables by reducing the write buffer size.
Options opt;
opt.reuse_logs = true;
opt.write_buffer_size = (kNum*100) / 2;
Open(&opt);
ASSERT_LE(2, NumTables());
ASSERT_EQ(1, NumLogs());
ASSERT_NE(old_log_file, FirstLogFile()) << "must not reuse log";
for (int i = 0; i < kNum; i++) {
char buf[100];
snprintf(buf, sizeof(buf), "%050d", i);
ASSERT_EQ(buf, Get(buf));
}
}
TEST(RecoveryTest, MultipleLogFiles) {
ASSERT_OK(Put("foo", "bar"));
Close();
ASSERT_EQ(1, NumLogs());
// Make a bunch of uncompacted log files.
uint64_t old_log = FirstLogFile();
MakeLogFile(old_log+1, 1000, "hello", "world");
MakeLogFile(old_log+2, 1001, "hi", "there");
MakeLogFile(old_log+3, 1002, "foo", "bar2");
// Recover and check that all log files were processed.
Open();
ASSERT_LE(1, NumTables());
ASSERT_EQ(1, NumLogs());
uint64_t new_log = FirstLogFile();
ASSERT_LE(old_log+3, new_log);
ASSERT_EQ("bar2", Get("foo"));
ASSERT_EQ("world", Get("hello"));
ASSERT_EQ("there", Get("hi"));
// Test that previous recovery produced recoverable state.
Open();
ASSERT_LE(1, NumTables());
ASSERT_EQ(1, NumLogs());
if (CanAppend()) {
ASSERT_EQ(new_log, FirstLogFile());
}
ASSERT_EQ("bar2", Get("foo"));
ASSERT_EQ("world", Get("hello"));
ASSERT_EQ("there", Get("hi"));
// Check that introducing an older log file does not cause it to be re-read.
Close();
MakeLogFile(old_log+1, 2000, "hello", "stale write");
Open();
ASSERT_LE(1, NumTables());
ASSERT_EQ(1, NumLogs());
if (CanAppend()) {
ASSERT_EQ(new_log, FirstLogFile());
}
ASSERT_EQ("bar2", Get("foo"));
ASSERT_EQ("world", Get("hello"));
ASSERT_EQ("there", Get("hi"));
}
} // namespace leveldb
int main(int argc, char** argv) {
return leveldb::test::RunAllTests();
}

View file

@ -1,10 +1,10 @@
#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
#define STORAGE_LEVELDB_DB_SKIPLIST_H_
// Copyright (c) 2011 The LevelDB Authors. All rights reserved. // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. // found in the LICENSE file. See the AUTHORS file for names of contributors.
//
#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
#define STORAGE_LEVELDB_DB_SKIPLIST_H_
// Thread safety // Thread safety
// ------------- // -------------
// //

View file

@ -250,7 +250,7 @@ class ConcurrentTest {
// Note that generation 0 is never inserted, so it is ok if // Note that generation 0 is never inserted, so it is ok if
// <*,0,*> is missing. // <*,0,*> is missing.
ASSERT_TRUE((gen(pos) == 0) || ASSERT_TRUE((gen(pos) == 0) ||
(gen(pos) > initial_state.Get(key(pos))) (gen(pos) > static_cast<Key>(initial_state.Get(key(pos))))
) << "key: " << key(pos) ) << "key: " << key(pos)
<< "; gen: " << gen(pos) << "; gen: " << gen(pos)
<< "; initgen: " << "; initgen: "

View file

@ -5,6 +5,7 @@
#ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_ #ifndef STORAGE_LEVELDB_DB_SNAPSHOT_H_
#define STORAGE_LEVELDB_DB_SNAPSHOT_H_ #define STORAGE_LEVELDB_DB_SNAPSHOT_H_
#include "db/dbformat.h"
#include "leveldb/db.h" #include "leveldb/db.h"
namespace leveldb { namespace leveldb {

View file

@ -893,7 +893,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) {
return s; return s;
} }
Status VersionSet::Recover() { Status VersionSet::Recover(bool *save_manifest) {
struct LogReporter : public log::Reader::Reporter { struct LogReporter : public log::Reader::Reporter {
Status* status; Status* status;
virtual void Corruption(size_t bytes, const Status& s) { virtual void Corruption(size_t bytes, const Status& s) {
@ -1003,11 +1003,49 @@ Status VersionSet::Recover() {
last_sequence_ = last_sequence; last_sequence_ = last_sequence;
log_number_ = log_number; log_number_ = log_number;
prev_log_number_ = prev_log_number; prev_log_number_ = prev_log_number;
// See if we can reuse the existing MANIFEST file.
if (ReuseManifest(dscname, current)) {
// No need to save new manifest
} else {
*save_manifest = true;
}
} }
return s; return s;
} }
bool VersionSet::ReuseManifest(const std::string& dscname,
const std::string& dscbase) {
if (!options_->reuse_logs) {
return false;
}
FileType manifest_type;
uint64_t manifest_number;
uint64_t manifest_size;
if (!ParseFileName(dscbase, &manifest_number, &manifest_type) ||
manifest_type != kDescriptorFile ||
!env_->GetFileSize(dscname, &manifest_size).ok() ||
// Make new compacted MANIFEST if old one is too big
manifest_size >= kTargetFileSize) {
return false;
}
assert(descriptor_file_ == NULL);
assert(descriptor_log_ == NULL);
Status r = env_->NewAppendableFile(dscname, &descriptor_file_);
if (!r.ok()) {
Log(options_->info_log, "Reuse MANIFEST: %s\n", r.ToString().c_str());
assert(descriptor_file_ == NULL);
return false;
}
Log(options_->info_log, "Reusing MANIFEST %s\n", dscname.c_str());
descriptor_log_ = new log::Writer(descriptor_file_, manifest_size);
manifest_file_number_ = manifest_number;
return true;
}
void VersionSet::MarkFileNumberUsed(uint64_t number) { void VersionSet::MarkFileNumberUsed(uint64_t number) {
if (next_file_number_ <= number) { if (next_file_number_ <= number) {
next_file_number_ = number + 1; next_file_number_ = number + 1;

View file

@ -179,7 +179,7 @@ class VersionSet {
EXCLUSIVE_LOCKS_REQUIRED(mu); EXCLUSIVE_LOCKS_REQUIRED(mu);
// Recover the last saved descriptor from persistent storage. // Recover the last saved descriptor from persistent storage.
Status Recover(); Status Recover(bool *save_manifest);
// Return the current version. // Return the current version.
Version* current() const { return current_; } Version* current() const { return current_; }
@ -274,6 +274,8 @@ class VersionSet {
friend class Compaction; friend class Compaction;
friend class Version; friend class Version;
bool ReuseManifest(const std::string& dscname, const std::string& dscbase);
void Finalize(Version* v); void Finalize(Version* v);
void GetRange(const std::vector<FileMetaData*>& inputs, void GetRange(const std::vector<FileMetaData*>& inputs,

View file

@ -5,6 +5,7 @@
#ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ #ifndef STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
#define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_ #define STORAGE_LEVELDB_DB_WRITE_BATCH_INTERNAL_H_
#include "db/dbformat.h"
#include "leveldb/write_batch.h" #include "leveldb/write_batch.h"
namespace leveldb { namespace leveldb {

View file

@ -22,7 +22,7 @@ directory. The following example shows how to open a database,
creating it if necessary: creating it if necessary:
<p> <p>
<pre> <pre>
#include &lt;assert&gt; #include &lt;cassert&gt;
#include "leveldb/db.h" #include "leveldb/db.h"
leveldb::DB* db; leveldb::DB* db;

View file

@ -277,6 +277,19 @@ class InMemoryEnv : public EnvWrapper {
return Status::OK(); return Status::OK();
} }
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result) {
MutexLock lock(&mutex_);
FileState** sptr = &file_map_[fname];
FileState* file = *sptr;
if (file == NULL) {
file = new FileState();
file->Ref();
}
*result = new WritableFileImpl(file);
return Status::OK();
}
virtual bool FileExists(const std::string& fname) { virtual bool FileExists(const std::string& fname) {
MutexLock lock(&mutex_); MutexLock lock(&mutex_);
return file_map_.find(fname) != file_map_.end(); return file_map_.find(fname) != file_map_.end();

View file

@ -40,6 +40,8 @@ TEST(MemEnvTest, Basics) {
// Create a file. // Create a file.
ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file)); ASSERT_OK(env_->NewWritableFile("/dir/f", &writable_file));
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(0, file_size);
delete writable_file; delete writable_file;
// Check that the file exists. // Check that the file exists.
@ -55,9 +57,16 @@ TEST(MemEnvTest, Basics) {
ASSERT_OK(writable_file->Append("abc")); ASSERT_OK(writable_file->Append("abc"));
delete writable_file; delete writable_file;
// Check for expected size. // Check that append works.
ASSERT_OK(env_->NewAppendableFile("/dir/f", &writable_file));
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size)); ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(3, file_size); ASSERT_EQ(3, file_size);
ASSERT_OK(writable_file->Append("hello"));
delete writable_file;
// Check for expected size.
ASSERT_OK(env_->GetFileSize("/dir/f", &file_size));
ASSERT_EQ(8, file_size);
// Check that renaming works. // Check that renaming works.
ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok()); ASSERT_TRUE(!env_->RenameFile("/dir/non_existent", "/dir/g").ok());
@ -65,7 +74,7 @@ TEST(MemEnvTest, Basics) {
ASSERT_TRUE(!env_->FileExists("/dir/f")); ASSERT_TRUE(!env_->FileExists("/dir/f"));
ASSERT_TRUE(env_->FileExists("/dir/g")); ASSERT_TRUE(env_->FileExists("/dir/g"));
ASSERT_OK(env_->GetFileSize("/dir/g", &file_size)); ASSERT_OK(env_->GetFileSize("/dir/g", &file_size));
ASSERT_EQ(3, file_size); ASSERT_EQ(8, file_size);
// Check that opening non-existent file fails. // Check that opening non-existent file fails.
SequentialFile* seq_file; SequentialFile* seq_file;

View file

@ -81,6 +81,17 @@ class Cache {
// its cache keys. // its cache keys.
virtual uint64_t NewId() = 0; virtual uint64_t NewId() = 0;
// Remove all cache entries that are not actively in use. Memory-constrained
// applications may wish to call this method to reduce memory usage.
// Default implementation of Prune() does nothing. Subclasses are strongly
// encouraged to override the default implementation. A future release of
// leveldb may change Prune() to a pure abstract method.
virtual void Prune() {}
// Return an estimate of the combined charges of all elements stored in the
// cache.
virtual size_t TotalCharge() const = 0;
private: private:
void LRU_Remove(Handle* e); void LRU_Remove(Handle* e);
void LRU_Append(Handle* e); void LRU_Append(Handle* e);

View file

@ -14,7 +14,7 @@ namespace leveldb {
// Update Makefile if you change these // Update Makefile if you change these
static const int kMajorVersion = 1; static const int kMajorVersion = 1;
static const int kMinorVersion = 18; static const int kMinorVersion = 19;
struct Options; struct Options;
struct ReadOptions; struct ReadOptions;
@ -115,6 +115,8 @@ class DB {
// about the internal operation of the DB. // about the internal operation of the DB.
// "leveldb.sstables" - returns a multi-line string that describes all // "leveldb.sstables" - returns a multi-line string that describes all
// of the sstables that make up the db contents. // of the sstables that make up the db contents.
// "leveldb.approximate-memory-usage" - returns the approximate number of
// bytes of memory in use by the DB.
virtual bool GetProperty(const Slice& property, std::string* value) = 0; virtual bool GetProperty(const Slice& property, std::string* value) = 0;
// For each i in [0,n-1], store in "sizes[i]", the approximate // For each i in [0,n-1], store in "sizes[i]", the approximate

View file

@ -69,6 +69,21 @@ class Env {
virtual Status NewWritableFile(const std::string& fname, virtual Status NewWritableFile(const std::string& fname,
WritableFile** result) = 0; WritableFile** result) = 0;
// Create an object that either appends to an existing file, or
// writes to a new file (if the file does not exist to begin with).
// On success, stores a pointer to the new file in *result and
// returns OK. On failure stores NULL in *result and returns
// non-OK.
//
// The returned file will only be accessed by one thread at a time.
//
// May return an IsNotSupportedError error if this Env does
// not allow appending to an existing file. Users of Env (including
// the leveldb implementation) must be prepared to deal with
// an Env that does not support appending.
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result);
// Returns true iff the named file exists. // Returns true iff the named file exists.
virtual bool FileExists(const std::string& fname) = 0; virtual bool FileExists(const std::string& fname) = 0;
@ -289,6 +304,9 @@ class EnvWrapper : public Env {
Status NewWritableFile(const std::string& f, WritableFile** r) { Status NewWritableFile(const std::string& f, WritableFile** r) {
return target_->NewWritableFile(f, r); return target_->NewWritableFile(f, r);
} }
Status NewAppendableFile(const std::string& f, WritableFile** r) {
return target_->NewAppendableFile(f, r);
}
bool FileExists(const std::string& f) { return target_->FileExists(f); } bool FileExists(const std::string& f) { return target_->FileExists(f); }
Status GetChildren(const std::string& dir, std::vector<std::string>* r) { Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
return target_->GetChildren(dir, r); return target_->GetChildren(dir, r);

View file

@ -37,7 +37,7 @@ class Iterator {
// Valid() after this call iff the source is not empty. // Valid() after this call iff the source is not empty.
virtual void SeekToLast() = 0; virtual void SeekToLast() = 0;
// Position at the first key in the source that at or past target // Position at the first key in the source that is at or past target.
// The iterator is Valid() after this call iff the source contains // The iterator is Valid() after this call iff the source contains
// an entry that comes at or past target. // an entry that comes at or past target.
virtual void Seek(const Slice& target) = 0; virtual void Seek(const Slice& target) = 0;

View file

@ -128,6 +128,12 @@ struct Options {
// efficiently detect that and will switch to uncompressed mode. // efficiently detect that and will switch to uncompressed mode.
CompressionType compression; CompressionType compression;
// EXPERIMENTAL: If true, append to existing MANIFEST and log files
// when a database is opened. This can significantly speed up open.
//
// Default: currently false, but may become true later.
bool reuse_logs;
// If non-NULL, use the specified filter policy to reduce disk reads. // If non-NULL, use the specified filter policy to reduce disk reads.
// Many applications will benefit from passing the result of // Many applications will benefit from passing the result of
// NewBloomFilterPolicy() here. // NewBloomFilterPolicy() here.

View file

@ -60,6 +60,12 @@ class Status {
// Returns true iff the status indicates an IOError. // Returns true iff the status indicates an IOError.
bool IsIOError() const { return code() == kIOError; } bool IsIOError() const { return code() == kIOError; }
// Returns true iff the status indicates a NotSupportedError.
bool IsNotSupportedError() const { return code() == kNotSupported; }
// Returns true iff the status indicates an InvalidArgument.
bool IsInvalidArgument() const { return code() == kInvalidArgument; }
// Return a string representation of this status suitable for printing. // Return a string representation of this status suitable for printing.
// Returns the string "OK" for success. // Returns the string "OK" for success.
std::string ToString() const; std::string ToString() const;

View file

@ -35,8 +35,12 @@
#define ARCH_CPU_X86_FAMILY 1 #define ARCH_CPU_X86_FAMILY 1
#elif defined(__ARMEL__) #elif defined(__ARMEL__)
#define ARCH_CPU_ARM_FAMILY 1 #define ARCH_CPU_ARM_FAMILY 1
#elif defined(__aarch64__)
#define ARCH_CPU_ARM64_FAMILY 1
#elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__) #elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__)
#define ARCH_CPU_PPC_FAMILY 1 #define ARCH_CPU_PPC_FAMILY 1
#elif defined(__mips__)
#define ARCH_CPU_MIPS_FAMILY 1
#endif #endif
namespace leveldb { namespace leveldb {
@ -92,6 +96,13 @@ inline void MemoryBarrier() {
} }
#define LEVELDB_HAVE_MEMORY_BARRIER #define LEVELDB_HAVE_MEMORY_BARRIER
// ARM64
#elif defined(ARCH_CPU_ARM64_FAMILY)
inline void MemoryBarrier() {
asm volatile("dmb sy" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
// PPC // PPC
#elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__) #elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__)
inline void MemoryBarrier() { inline void MemoryBarrier() {
@ -101,6 +112,13 @@ inline void MemoryBarrier() {
} }
#define LEVELDB_HAVE_MEMORY_BARRIER #define LEVELDB_HAVE_MEMORY_BARRIER
// MIPS
#elif defined(ARCH_CPU_MIPS_FAMILY) && defined(__GNUC__)
inline void MemoryBarrier() {
__asm__ __volatile__("sync" : : : "memory");
}
#define LEVELDB_HAVE_MEMORY_BARRIER
#endif #endif
// AtomicPointer built using platform-specific MemoryBarrier() // AtomicPointer built using platform-specific MemoryBarrier()
@ -215,6 +233,7 @@ class AtomicPointer {
#undef LEVELDB_HAVE_MEMORY_BARRIER #undef LEVELDB_HAVE_MEMORY_BARRIER
#undef ARCH_CPU_X86_FAMILY #undef ARCH_CPU_X86_FAMILY
#undef ARCH_CPU_ARM_FAMILY #undef ARCH_CPU_ARM_FAMILY
#undef ARCH_CPU_ARM64_FAMILY
#undef ARCH_CPU_PPC_FAMILY #undef ARCH_CPU_PPC_FAMILY
} // namespace port } // namespace port

View file

@ -7,7 +7,6 @@
#include <cstdlib> #include <cstdlib>
#include <stdio.h> #include <stdio.h>
#include <string.h> #include <string.h>
#include "util/logging.h"
namespace leveldb { namespace leveldb {
namespace port { namespace port {

View file

@ -68,7 +68,7 @@ void FilterBlockBuilder::GenerateFilter() {
// Generate filter for current set of keys and append to result_. // Generate filter for current set of keys and append to result_.
filter_offsets_.push_back(result_.size()); filter_offsets_.push_back(result_.size());
policy_->CreateFilter(&tmp_keys_[0], num_keys, &result_); policy_->CreateFilter(&tmp_keys_[0], static_cast<int>(num_keys), &result_);
tmp_keys_.clear(); tmp_keys_.clear();
keys_.clear(); keys_.clear();
@ -97,7 +97,7 @@ bool FilterBlockReader::KeyMayMatch(uint64_t block_offset, const Slice& key) {
if (index < num_) { if (index < num_) {
uint32_t start = DecodeFixed32(offset_ + index*4); uint32_t start = DecodeFixed32(offset_ + index*4);
uint32_t limit = DecodeFixed32(offset_ + index*4 + 4); uint32_t limit = DecodeFixed32(offset_ + index*4 + 4);
if (start <= limit && limit <= (offset_ - data_)) { if (start <= limit && limit <= static_cast<size_t>(offset_ - data_)) {
Slice filter = Slice(data_ + start, limit - start); Slice filter = Slice(data_ + start, limit - start);
return policy_->KeyMayMatch(key, filter); return policy_->KeyMayMatch(key, filter);
} else if (start == limit) { } else if (start == limit) {

View file

@ -30,15 +30,14 @@ Status BlockHandle::DecodeFrom(Slice* input) {
} }
void Footer::EncodeTo(std::string* dst) const { void Footer::EncodeTo(std::string* dst) const {
#ifndef NDEBUG
const size_t original_size = dst->size(); const size_t original_size = dst->size();
#endif
metaindex_handle_.EncodeTo(dst); metaindex_handle_.EncodeTo(dst);
index_handle_.EncodeTo(dst); index_handle_.EncodeTo(dst);
dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding dst->resize(2 * BlockHandle::kMaxEncodedLength); // Padding
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu)); PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber & 0xffffffffu));
PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32)); PutFixed32(dst, static_cast<uint32_t>(kTableMagicNumber >> 32));
assert(dst->size() == original_size + kEncodedLength); assert(dst->size() == original_size + kEncodedLength);
(void)original_size; // Disable unused variable warning.
} }
Status Footer::DecodeFrom(Slice* input) { Status Footer::DecodeFrom(Slice* input) {

View file

@ -5,6 +5,9 @@
#ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ #ifndef STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
#define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_ #define STORAGE_LEVELDB_TABLE_ITERATOR_WRAPPER_H_
#include "leveldb/iterator.h"
#include "leveldb/slice.h"
namespace leveldb { namespace leveldb {
// A internal wrapper class with an interface similar to Iterator that // A internal wrapper class with an interface similar to Iterator that

View file

@ -82,7 +82,7 @@ Status Table::Open(const Options& options,
*table = new Table(rep); *table = new Table(rep);
(*table)->ReadMeta(footer); (*table)->ReadMeta(footer);
} else { } else {
if (index_block) delete index_block; delete index_block;
} }
return s; return s;

View file

@ -853,12 +853,20 @@ TEST(TableTest, ApproximateOffsetOfCompressed) {
options.compression = kSnappyCompression; options.compression = kSnappyCompression;
c.Finish(options, &keys, &kvmap); c.Finish(options, &keys, &kvmap);
ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, 0)); // Expected upper and lower bounds of space used by compressible strings.
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, 0)); static const int kSlop = 1000; // Compressor effectiveness varies.
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, 0)); const int expected = 2500; // 10000 * compression ratio (0.25)
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), 2000, 3000)); const int min_z = expected - kSlop;
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), 2000, 3000)); const int max_z = expected + kSlop;
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 4000, 6000));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("abc"), 0, kSlop));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k01"), 0, kSlop));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k02"), 0, kSlop));
// Have now emitted a large compressible string, so adjust expected offset.
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k03"), min_z, max_z));
ASSERT_TRUE(Between(c.ApproximateOffsetOf("k04"), min_z, max_z));
// Have now emitted two large compressible strings, so adjust expected offset.
ASSERT_TRUE(Between(c.ApproximateOffsetOf("xyz"), 2 * min_z, 2 * max_z));
} }
} // namespace leveldb } // namespace leveldb

View file

@ -9,8 +9,7 @@ namespace leveldb {
static const int kBlockSize = 4096; static const int kBlockSize = 4096;
Arena::Arena() { Arena::Arena() : memory_usage_(0) {
blocks_memory_ = 0;
alloc_ptr_ = NULL; // First allocation will allocate a block alloc_ptr_ = NULL; // First allocation will allocate a block
alloc_bytes_remaining_ = 0; alloc_bytes_remaining_ = 0;
} }
@ -60,8 +59,9 @@ char* Arena::AllocateAligned(size_t bytes) {
char* Arena::AllocateNewBlock(size_t block_bytes) { char* Arena::AllocateNewBlock(size_t block_bytes) {
char* result = new char[block_bytes]; char* result = new char[block_bytes];
blocks_memory_ += block_bytes;
blocks_.push_back(result); blocks_.push_back(result);
memory_usage_.NoBarrier_Store(
reinterpret_cast<void*>(MemoryUsage() + block_bytes + sizeof(char*)));
return result; return result;
} }

View file

@ -9,6 +9,7 @@
#include <assert.h> #include <assert.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include "port/port.h"
namespace leveldb { namespace leveldb {
@ -24,10 +25,9 @@ class Arena {
char* AllocateAligned(size_t bytes); char* AllocateAligned(size_t bytes);
// Returns an estimate of the total memory usage of data allocated // Returns an estimate of the total memory usage of data allocated
// by the arena (including space allocated but not yet used for user // by the arena.
// allocations).
size_t MemoryUsage() const { size_t MemoryUsage() const {
return blocks_memory_ + blocks_.capacity() * sizeof(char*); return reinterpret_cast<uintptr_t>(memory_usage_.NoBarrier_Load());
} }
private: private:
@ -41,8 +41,8 @@ class Arena {
// Array of new[] allocated memory blocks // Array of new[] allocated memory blocks
std::vector<char*> blocks_; std::vector<char*> blocks_;
// Bytes of memory in blocks allocated so far // Total memory usage of the arena.
size_t blocks_memory_; port::AtomicPointer memory_usage_;
// No copying allowed // No copying allowed
Arena(const Arena&); Arena(const Arena&);

View file

@ -47,7 +47,7 @@ class BloomFilterPolicy : public FilterPolicy {
dst->resize(init_size + bytes, 0); dst->resize(init_size + bytes, 0);
dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter dst->push_back(static_cast<char>(k_)); // Remember # of probes in filter
char* array = &(*dst)[init_size]; char* array = &(*dst)[init_size];
for (size_t i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
// Use double-hashing to generate a sequence of hash values. // Use double-hashing to generate a sequence of hash values.
// See analysis in [Kirsch,Mitzenmacher 2006]. // See analysis in [Kirsch,Mitzenmacher 2006].
uint32_t h = BloomHash(keys[i]); uint32_t h = BloomHash(keys[i]);

View file

@ -46,7 +46,8 @@ class BloomTest {
key_slices.push_back(Slice(keys_[i])); key_slices.push_back(Slice(keys_[i]));
} }
filter_.clear(); filter_.clear();
policy_->CreateFilter(&key_slices[0], key_slices.size(), &filter_); policy_->CreateFilter(&key_slices[0], static_cast<int>(key_slices.size()),
&filter_);
keys_.clear(); keys_.clear();
if (kVerbose >= 2) DumpFilter(); if (kVerbose >= 2) DumpFilter();
} }

View file

@ -19,6 +19,23 @@ Cache::~Cache() {
namespace { namespace {
// LRU cache implementation // LRU cache implementation
//
// Cache entries have an "in_cache" boolean indicating whether the cache has a
// reference on the entry. The only ways that this can become false without the
// entry being passed to its "deleter" are via Erase(), via Insert() when
// an element with a duplicate key is inserted, or on destruction of the cache.
//
// The cache keeps two linked lists of items in the cache. All items in the
// cache are in one list or the other, and never both. Items still referenced
// by clients but erased from the cache are in neither list. The lists are:
// - in-use: contains the items currently referenced by clients, in no
// particular order. (This list is used for invariant checking. If we
// removed the check, elements that would otherwise be on this list could be
// left as disconnected singleton lists.)
// - LRU: contains the items not currently referenced by clients, in LRU order
// Elements are moved between these lists by the Ref() and Unref() methods,
// when they detect an element in the cache acquiring or losing its only
// external reference.
// An entry is a variable length heap-allocated structure. Entries // An entry is a variable length heap-allocated structure. Entries
// are kept in a circular doubly linked list ordered by access time. // are kept in a circular doubly linked list ordered by access time.
@ -30,7 +47,8 @@ struct LRUHandle {
LRUHandle* prev; LRUHandle* prev;
size_t charge; // TODO(opt): Only allow uint32_t? size_t charge; // TODO(opt): Only allow uint32_t?
size_t key_length; size_t key_length;
uint32_t refs; bool in_cache; // Whether entry is in the cache.
uint32_t refs; // References, including cache reference, if present.
uint32_t hash; // Hash of key(); used for fast sharding and comparisons uint32_t hash; // Hash of key(); used for fast sharding and comparisons
char key_data[1]; // Beginning of key char key_data[1]; // Beginning of key
@ -147,49 +165,77 @@ class LRUCache {
Cache::Handle* Lookup(const Slice& key, uint32_t hash); Cache::Handle* Lookup(const Slice& key, uint32_t hash);
void Release(Cache::Handle* handle); void Release(Cache::Handle* handle);
void Erase(const Slice& key, uint32_t hash); void Erase(const Slice& key, uint32_t hash);
void Prune();
size_t TotalCharge() const {
MutexLock l(&mutex_);
return usage_;
}
private: private:
void LRU_Remove(LRUHandle* e); void LRU_Remove(LRUHandle* e);
void LRU_Append(LRUHandle* e); void LRU_Append(LRUHandle*list, LRUHandle* e);
void Ref(LRUHandle* e);
void Unref(LRUHandle* e); void Unref(LRUHandle* e);
bool FinishErase(LRUHandle* e);
// Initialized before use. // Initialized before use.
size_t capacity_; size_t capacity_;
// mutex_ protects the following state. // mutex_ protects the following state.
port::Mutex mutex_; mutable port::Mutex mutex_;
size_t usage_; size_t usage_;
// Dummy head of LRU list. // Dummy head of LRU list.
// lru.prev is newest entry, lru.next is oldest entry. // lru.prev is newest entry, lru.next is oldest entry.
// Entries have refs==1 and in_cache==true.
LRUHandle lru_; LRUHandle lru_;
// Dummy head of in-use list.
// Entries are in use by clients, and have refs >= 2 and in_cache==true.
LRUHandle in_use_;
HandleTable table_; HandleTable table_;
}; };
LRUCache::LRUCache() LRUCache::LRUCache()
: usage_(0) { : usage_(0) {
// Make empty circular linked list // Make empty circular linked lists.
lru_.next = &lru_; lru_.next = &lru_;
lru_.prev = &lru_; lru_.prev = &lru_;
in_use_.next = &in_use_;
in_use_.prev = &in_use_;
} }
LRUCache::~LRUCache() { LRUCache::~LRUCache() {
assert(in_use_.next == &in_use_); // Error if caller has an unreleased handle
for (LRUHandle* e = lru_.next; e != &lru_; ) { for (LRUHandle* e = lru_.next; e != &lru_; ) {
LRUHandle* next = e->next; LRUHandle* next = e->next;
assert(e->refs == 1); // Error if caller has an unreleased handle assert(e->in_cache);
e->in_cache = false;
assert(e->refs == 1); // Invariant of lru_ list.
Unref(e); Unref(e);
e = next; e = next;
} }
} }
void LRUCache::Ref(LRUHandle* e) {
if (e->refs == 1 && e->in_cache) { // If on lru_ list, move to in_use_ list.
LRU_Remove(e);
LRU_Append(&in_use_, e);
}
e->refs++;
}
void LRUCache::Unref(LRUHandle* e) { void LRUCache::Unref(LRUHandle* e) {
assert(e->refs > 0); assert(e->refs > 0);
e->refs--; e->refs--;
if (e->refs <= 0) { if (e->refs == 0) { // Deallocate.
usage_ -= e->charge; assert(!e->in_cache);
(*e->deleter)(e->key(), e->value); (*e->deleter)(e->key(), e->value);
free(e); free(e);
} else if (e->in_cache && e->refs == 1) { // No longer in use; move to lru_ list.
LRU_Remove(e);
LRU_Append(&lru_, e);
} }
} }
@ -198,10 +244,10 @@ void LRUCache::LRU_Remove(LRUHandle* e) {
e->prev->next = e->next; e->prev->next = e->next;
} }
void LRUCache::LRU_Append(LRUHandle* e) { void LRUCache::LRU_Append(LRUHandle* list, LRUHandle* e) {
// Make "e" newest entry by inserting just before lru_ // Make "e" newest entry by inserting just before *list
e->next = &lru_; e->next = list;
e->prev = lru_.prev; e->prev = list->prev;
e->prev->next = e; e->prev->next = e;
e->next->prev = e; e->next->prev = e;
} }
@ -210,9 +256,7 @@ Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {
MutexLock l(&mutex_); MutexLock l(&mutex_);
LRUHandle* e = table_.Lookup(key, hash); LRUHandle* e = table_.Lookup(key, hash);
if (e != NULL) { if (e != NULL) {
e->refs++; Ref(e);
LRU_Remove(e);
LRU_Append(e);
} }
return reinterpret_cast<Cache::Handle*>(e); return reinterpret_cast<Cache::Handle*>(e);
} }
@ -234,33 +278,57 @@ Cache::Handle* LRUCache::Insert(
e->charge = charge; e->charge = charge;
e->key_length = key.size(); e->key_length = key.size();
e->hash = hash; e->hash = hash;
e->refs = 2; // One from LRUCache, one for the returned handle e->in_cache = false;
e->refs = 1; // for the returned handle.
memcpy(e->key_data, key.data(), key.size()); memcpy(e->key_data, key.data(), key.size());
LRU_Append(e);
usage_ += charge;
LRUHandle* old = table_.Insert(e); if (capacity_ > 0) {
if (old != NULL) { e->refs++; // for the cache's reference.
LRU_Remove(old); e->in_cache = true;
Unref(old); LRU_Append(&in_use_, e);
} usage_ += charge;
FinishErase(table_.Insert(e));
} // else don't cache. (Tests use capacity_==0 to turn off caching.)
while (usage_ > capacity_ && lru_.next != &lru_) { while (usage_ > capacity_ && lru_.next != &lru_) {
LRUHandle* old = lru_.next; LRUHandle* old = lru_.next;
LRU_Remove(old); assert(old->refs == 1);
table_.Remove(old->key(), old->hash); bool erased = FinishErase(table_.Remove(old->key(), old->hash));
Unref(old); if (!erased) { // to avoid unused variable when compiled NDEBUG
assert(erased);
}
} }
return reinterpret_cast<Cache::Handle*>(e); return reinterpret_cast<Cache::Handle*>(e);
} }
// If e != NULL, finish removing *e from the cache; it has already been removed
// from the hash table. Return whether e != NULL. Requires mutex_ held.
bool LRUCache::FinishErase(LRUHandle* e) {
if (e != NULL) {
assert(e->in_cache);
LRU_Remove(e);
e->in_cache = false;
usage_ -= e->charge;
Unref(e);
}
return e != NULL;
}
void LRUCache::Erase(const Slice& key, uint32_t hash) { void LRUCache::Erase(const Slice& key, uint32_t hash) {
MutexLock l(&mutex_); MutexLock l(&mutex_);
LRUHandle* e = table_.Remove(key, hash); FinishErase(table_.Remove(key, hash));
if (e != NULL) { }
LRU_Remove(e);
Unref(e); void LRUCache::Prune() {
MutexLock l(&mutex_);
while (lru_.next != &lru_) {
LRUHandle* e = lru_.next;
assert(e->refs == 1);
bool erased = FinishErase(table_.Remove(e->key(), e->hash));
if (!erased) { // to avoid unused variable when compiled NDEBUG
assert(erased);
}
} }
} }
@ -314,6 +382,18 @@ class ShardedLRUCache : public Cache {
MutexLock l(&id_mutex_); MutexLock l(&id_mutex_);
return ++(last_id_); return ++(last_id_);
} }
virtual void Prune() {
for (int s = 0; s < kNumShards; s++) {
shard_[s].Prune();
}
}
virtual size_t TotalCharge() const {
size_t total = 0;
for (int s = 0; s < kNumShards; s++) {
total += shard_[s].TotalCharge();
}
return total;
}
}; };
} // end anonymous namespace } // end anonymous namespace

View file

@ -59,6 +59,11 @@ class CacheTest {
&CacheTest::Deleter)); &CacheTest::Deleter));
} }
Cache::Handle* InsertAndReturnHandle(int key, int value, int charge = 1) {
return cache_->Insert(EncodeKey(key), EncodeValue(value), charge,
&CacheTest::Deleter);
}
void Erase(int key) { void Erase(int key) {
cache_->Erase(EncodeKey(key)); cache_->Erase(EncodeKey(key));
} }
@ -135,8 +140,11 @@ TEST(CacheTest, EntriesArePinned) {
TEST(CacheTest, EvictionPolicy) { TEST(CacheTest, EvictionPolicy) {
Insert(100, 101); Insert(100, 101);
Insert(200, 201); Insert(200, 201);
Insert(300, 301);
Cache::Handle* h = cache_->Lookup(EncodeKey(300));
// Frequently used entry must be kept around // Frequently used entry must be kept around,
// as must things that are still in use.
for (int i = 0; i < kCacheSize + 100; i++) { for (int i = 0; i < kCacheSize + 100; i++) {
Insert(1000+i, 2000+i); Insert(1000+i, 2000+i);
ASSERT_EQ(2000+i, Lookup(1000+i)); ASSERT_EQ(2000+i, Lookup(1000+i));
@ -144,6 +152,25 @@ TEST(CacheTest, EvictionPolicy) {
} }
ASSERT_EQ(101, Lookup(100)); ASSERT_EQ(101, Lookup(100));
ASSERT_EQ(-1, Lookup(200)); ASSERT_EQ(-1, Lookup(200));
ASSERT_EQ(301, Lookup(300));
cache_->Release(h);
}
TEST(CacheTest, UseExceedsCacheSize) {
// Overfill the cache, keeping handles on all inserted entries.
std::vector<Cache::Handle*> h;
for (int i = 0; i < kCacheSize + 100; i++) {
h.push_back(InsertAndReturnHandle(1000+i, 2000+i));
}
// Check that all the entries can be found in the cache.
for (int i = 0; i < h.size(); i++) {
ASSERT_EQ(2000+i, Lookup(1000+i));
}
for (int i = 0; i < h.size(); i++) {
cache_->Release(h[i]);
}
} }
TEST(CacheTest, HeavyEntries) { TEST(CacheTest, HeavyEntries) {
@ -179,6 +206,19 @@ TEST(CacheTest, NewId) {
ASSERT_NE(a, b); ASSERT_NE(a, b);
} }
TEST(CacheTest, Prune) {
Insert(1, 100);
Insert(2, 200);
Cache::Handle* handle = cache_->Lookup(EncodeKey(1));
ASSERT_TRUE(handle);
cache_->Prune();
cache_->Release(handle);
ASSERT_EQ(100, Lookup(1));
ASSERT_EQ(-1, Lookup(2));
}
} // namespace leveldb } // namespace leveldb
int main(int argc, char** argv) { int main(int argc, char** argv) {

View file

@ -9,6 +9,10 @@ namespace leveldb {
Env::~Env() { Env::~Env() {
} }
Status Env::NewAppendableFile(const std::string& fname, WritableFile** result) {
return Status::NotSupported("NewAppendableFile", fname);
}
SequentialFile::~SequentialFile() { SequentialFile::~SequentialFile() {
} }

View file

@ -351,6 +351,19 @@ class PosixEnv : public Env {
return s; return s;
} }
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result) {
Status s;
FILE* f = fopen(fname.c_str(), "a");
if (f == NULL) {
*result = NULL;
s = IOError(fname, errno);
} else {
*result = new PosixWritableFile(fname, f);
}
return s;
}
virtual bool FileExists(const std::string& fname) { virtual bool FileExists(const std::string& fname) {
return access(fname.c_str(), F_OK) == 0; return access(fname.c_str(), F_OK) == 0;
} }

View file

@ -106,7 +106,7 @@ private:
class Win32WritableFile : public WritableFile class Win32WritableFile : public WritableFile
{ {
public: public:
Win32WritableFile(const std::string& fname); Win32WritableFile(const std::string& fname, bool append);
~Win32WritableFile(); ~Win32WritableFile();
virtual Status Append(const Slice& data); virtual Status Append(const Slice& data);
@ -158,6 +158,8 @@ public:
RandomAccessFile** result); RandomAccessFile** result);
virtual Status NewWritableFile(const std::string& fname, virtual Status NewWritableFile(const std::string& fname,
WritableFile** result); WritableFile** result);
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result);
virtual bool FileExists(const std::string& fname); virtual bool FileExists(const std::string& fname);
@ -423,17 +425,23 @@ void Win32RandomAccessFile::_CleanUp()
} }
} }
Win32WritableFile::Win32WritableFile(const std::string& fname) Win32WritableFile::Win32WritableFile(const std::string& fname, bool append)
: filename_(fname) : filename_(fname)
{ {
std::wstring path; std::wstring path;
ToWidePath(fname, path); ToWidePath(fname, path);
DWORD Flag = PathFileExistsW(path.c_str()) ? OPEN_EXISTING : CREATE_ALWAYS; // NewAppendableFile: append to an existing file, or create a new one
// if none exists - this is OPEN_ALWAYS behavior, with
// FILE_APPEND_DATA to avoid having to manually position the file
// pointer at the end of the file.
// NewWritableFile: create a new file, delete if it exists - this is
// CREATE_ALWAYS behavior. This file is used for writing only so
// use GENERIC_WRITE.
_hFile = CreateFileW(path.c_str(), _hFile = CreateFileW(path.c_str(),
GENERIC_READ | GENERIC_WRITE, append ? FILE_APPEND_DATA : GENERIC_WRITE,
FILE_SHARE_READ|FILE_SHARE_DELETE|FILE_SHARE_WRITE, FILE_SHARE_READ|FILE_SHARE_DELETE|FILE_SHARE_WRITE,
NULL, NULL,
Flag, append ? OPEN_ALWAYS : CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL, FILE_ATTRIBUTE_NORMAL,
NULL); NULL);
// CreateFileW returns INVALID_HANDLE_VALUE in case of error, always check isEnable() before use // CreateFileW returns INVALID_HANDLE_VALUE in case of error, always check isEnable() before use
@ -823,7 +831,9 @@ Status Win32Env::NewLogger( const std::string& fname, Logger** result )
{ {
Status sRet; Status sRet;
std::string path = fname; std::string path = fname;
Win32WritableFile* pMapFile = new Win32WritableFile(ModifyPath(path)); // Logs are opened with write semantics, not with append semantics
// (see PosixEnv::NewLogger)
Win32WritableFile* pMapFile = new Win32WritableFile(ModifyPath(path), false);
if(!pMapFile->isEnable()){ if(!pMapFile->isEnable()){
delete pMapFile; delete pMapFile;
*result = NULL; *result = NULL;
@ -837,7 +847,20 @@ Status Win32Env::NewWritableFile( const std::string& fname, WritableFile** resul
{ {
Status sRet; Status sRet;
std::string path = fname; std::string path = fname;
Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path)); Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path), false);
if(!pFile->isEnable()){
*result = NULL;
sRet = Status::IOError(fname,Win32::GetLastErrSz());
}else
*result = pFile;
return sRet;
}
Status Win32Env::NewAppendableFile( const std::string& fname, WritableFile** result )
{
Status sRet;
std::string path = fname;
Win32WritableFile* pFile = new Win32WritableFile(ModifyPath(path), true);
if(!pFile->isEnable()){ if(!pFile->isEnable()){
*result = NULL; *result = NULL;
sRet = Status::IOError(fname,Win32::GetLastErrSz()); sRet = Status::IOError(fname,Win32::GetLastErrSz());

View file

@ -22,8 +22,8 @@ Options::Options()
block_size(4096), block_size(4096),
block_restart_interval(16), block_restart_interval(16),
compression(kSnappyCompression), compression(kSnappyCompression),
reuse_logs(false),
filter_policy(NULL) { filter_policy(NULL) {
} }
} // namespace leveldb } // namespace leveldb

View file

@ -45,6 +45,16 @@ class ErrorEnv : public EnvWrapper {
} }
return target()->NewWritableFile(fname, result); return target()->NewWritableFile(fname, result);
} }
virtual Status NewAppendableFile(const std::string& fname,
WritableFile** result) {
if (writable_file_error_) {
++num_writable_file_errors_;
*result = NULL;
return Status::IOError(fname, "fake error");
}
return target()->NewAppendableFile(fname, result);
}
}; };
} // namespace test } // namespace test