From ccedbafd73514e0e5b58c1a6251029e84dadaea8 Mon Sep 17 00:00:00 2001
From: Evan Klitzke <evan@eklitzke.org>
Date: Tue, 20 Feb 2018 17:25:24 -0500
Subject: [PATCH] Increase LevelDB max_open_files unless on 32-bit Unix.

This change significantly increases IBD performance by increasing the
amount of the UTXO index that can remain in memory. To ensure this
doesn't cause problems in the future, a static_assert on the LevelDB
version has been added, which must be updated by anyone upgrading
LevelDB.
---
 doc/developer-notes.md | 51 +++++++++++++++++++++++++++++++++++++++++-
 src/dbwrapper.cpp      | 27 +++++++++++++++++++++-
 2 files changed, 76 insertions(+), 2 deletions(-)

diff --git a/doc/developer-notes.md b/doc/developer-notes.md
index f10ad8e87..f4513dca1 100644
--- a/doc/developer-notes.md
+++ b/doc/developer-notes.md
@@ -543,7 +543,10 @@ its upstream repository.
 Current subtrees include:
 
 - src/leveldb
-  - Upstream at https://github.com/google/leveldb ; Maintained by Google, but open important PRs to Core to avoid delay
+  - Upstream at https://github.com/google/leveldb ; Maintained by Google, but
+    open important PRs to Core to avoid delay.
+  - **Note**: Follow the instructions in [Upgrading LevelDB](#upgrading-leveldb) when
+    merging upstream changes to the leveldb subtree.
 
 - src/libsecp256k1
   - Upstream at https://github.com/bitcoin-core/secp256k1/ ; actively maintaned by Core contributors.
@@ -554,6 +557,52 @@ Current subtrees include:
 - src/univalue
   - Upstream at https://github.com/jgarzik/univalue ; report important PRs to Core to avoid delay.
 
+Upgrading LevelDB
+---------------------
+
+Extra care must be taken when upgrading LevelDB. This section explains issues
+you must be aware of.
+
+### File Descriptor Counts
+
+In most configurations we use the default LevelDB value for `max_open_files`,
+which is 1000 at the time of this writing. If LevelDB actually uses this many
+file descriptors it will cause problems with Bitcoin's `select()` loop, because
+it may cause new sockets to be created where the fd value is >= 1024. For this
+reason, on 64-bit Unix systems we rely on an internal LevelDB optimization that
+uses `mmap()` + `close()` to open table files without actually retaining
+references to the table file descriptors. If you are upgrading LevelDB, you must
+sanity check the changes to make sure that this assumption remains valid.
+
+In addition to reviewing the upstream changes in `env_posix.cc`, you can use `lsof` to
+check this. For example, on Linux this command will show open `.ldb` file counts:
+
+```bash
+$ lsof -p $(pidof bitcoind) |\
+    awk 'BEGIN { fd=0; mem=0; } /ldb$/ { if ($4 == "mem") mem++; else fd++ } END { printf "mem = %s, fd = %s\n", mem, fd}'
+mem = 119, fd = 0
+```
+
+The `mem` value shows how many files are mmap'ed, and the `fd` value shows you
+many file descriptors these files are using. You should check that `fd` is a
+small number (usually 0 on 64-bit hosts).
+
+See the notes in the `SetMaxOpenFiles()` function in `dbwrapper.cc` for more
+details.
+
+### Consensus Compatibility
+
+It is possible for LevelDB changes to inadvertently change consensus
+compatibility between nodes. This happened in Bitcoin 0.8 (when LevelDB was
+first introduced). When upgrading LevelDB you should review the upstream changes
+to check for issues affecting consensus compatibility.
+
+For example, if LevelDB had a bug that accidentally prevented a key from being
+returned in an edge case, and that bug was fixed upstream, the bug "fix" would
+be an incompatible consensus change. In this situation the correct behavior
+would be to revert the upstream fix before applying the updates to Bitcoin's
+copy of LevelDB. In general you should be wary of any upstream changes affecting
+what data is returned from LevelDB queries.
 
 Git and GitHub tips
 ---------------------
diff --git a/src/dbwrapper.cpp b/src/dbwrapper.cpp
index fb0d4215a..fef8eedd1 100644
--- a/src/dbwrapper.cpp
+++ b/src/dbwrapper.cpp
@@ -71,6 +71,31 @@ public:
     }
 };
 
+static void SetMaxOpenFiles(leveldb::Options *options) {
+    // On most platforms the default setting of max_open_files (which is 1000)
+    // is optimal. On Windows using a large file count is OK because the handles
+    // do not interfere with select() loops. On 64-bit Unix hosts this value is
+    // also OK, because up to that amount LevelDB will use an mmap
+    // implementation that does not use extra file descriptors (the fds are
+    // closed after being mmaped).
+    //
+    // Increasing the value beyond the default is dangerous because LevelDB will
+    // fall back to a non-mmap implementation when the file count is too large.
+    // On 32-bit Unix host we should decrease the value because the handles use
+    // up real fds, and we want to avoid fd exhaustion issues.
+    //
+    // See PR #12495 for further discussion.
+
+    int default_open_files = options->max_open_files;
+#ifndef WIN32
+    if (sizeof(void*) < 8) {
+        options->max_open_files = 64;
+    }
+#endif
+    LogPrint(BCLog::LEVELDB, "LevelDB using max_open_files=%d (default=%d)\n",
+             options->max_open_files, default_open_files);
+}
+
 static leveldb::Options GetOptions(size_t nCacheSize)
 {
     leveldb::Options options;
@@ -78,13 +103,13 @@ static leveldb::Options GetOptions(size_t nCacheSize)
     options.write_buffer_size = nCacheSize / 4; // up to two write buffers may be held in memory simultaneously
     options.filter_policy = leveldb::NewBloomFilterPolicy(10);
     options.compression = leveldb::kNoCompression;
-    options.max_open_files = 64;
     options.info_log = new CBitcoinLevelDBLogger();
     if (leveldb::kMajorVersion > 1 || (leveldb::kMajorVersion == 1 && leveldb::kMinorVersion >= 16)) {
         // LevelDB versions before 1.16 consider short writes to be corruption. Only trigger error
         // on corruption in later versions.
         options.paranoid_checks = true;
     }
+    SetMaxOpenFiles(&options);
     return options;
 }