lbcd/database2/ffldb/reconcile.go

// Copyright (c) 2015-2016 The btcsuite developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.

package ffldb

import (
	"fmt"
	"hash/crc32"

	database "github.com/btcsuite/btcd/database2"
)

// The serialized write cursor location format is:
//
//  [0:4]  Block file (4 bytes)
//  [4:8]  File offset (4 bytes)
//  [8:12] Castagnoli CRC-32 checksum (4 bytes)

// serializeWriteRow serialize the current block file and offset where new
// will be written into a format suitable for storage into the metadata.
func serializeWriteRow(curBlockFileNum, curFileOffset uint32) []byte {
	var serializedRow [12]byte
	byteOrder.PutUint32(serializedRow[0:4], curBlockFileNum)
	byteOrder.PutUint32(serializedRow[4:8], curFileOffset)
	checksum := crc32.Checksum(serializedRow[:8], castagnoli)
	byteOrder.PutUint32(serializedRow[8:12], checksum)
	return serializedRow[:]
}

// deserializeWriteRow deserializes the write cursor location stored in the
// metadata.  Returns ErrCorruption if the checksum of the entry doesn't match.
func deserializeWriteRow(writeRow []byte) (uint32, uint32, error) {
	// Ensure the checksum matches.  The checksum is at the end.
	gotChecksum := crc32.Checksum(writeRow[:8], castagnoli)
	wantChecksumBytes := writeRow[8:12]
	wantChecksum := byteOrder.Uint32(wantChecksumBytes)
	if gotChecksum != wantChecksum {
		str := fmt.Sprintf("metadata for write cursor does not match "+
			"the expected checksum - got %d, want %d", gotChecksum,
			wantChecksum)
		return 0, 0, makeDbErr(database.ErrCorruption, str, nil)
	}

	fileNum := byteOrder.Uint32(writeRow[0:4])
	fileOffset := byteOrder.Uint32(writeRow[4:8])
	return fileNum, fileOffset, nil
}

// reconcileDB reconciles the metadata with the flat block files on disk.  It
// will also initialize the underlying database if the create flag is set.
func reconcileDB(pdb *db, create bool) (database.DB, error) {
	// Perform initial internal bucket and value creation during database
	// creation.
	if create {
		if err := initDB(pdb.cache.ldb); err != nil {
			return nil, err
		}
	}

	// Load the current write cursor position from the metadata.
	var curFileNum, curOffset uint32
	err := pdb.View(func(tx database.Tx) error {
		writeRow := tx.Metadata().Get(writeLocKeyName)
		if writeRow == nil {
			str := "write cursor does not exist"
			return makeDbErr(database.ErrCorruption, str, nil)
		}

		var err error
		curFileNum, curOffset, err = deserializeWriteRow(writeRow)
		return err
	})
	if err != nil {
		return nil, err
	}

	// When the write cursor position found by scanning the block files on
	// disk is AFTER the position the metadata believes to be true, truncate
	// the files on disk to match the metadata.  This can be a fairly common
	// occurrence in unclean shutdown scenarios while the block files are in
	// the middle of being written.  Since the metadata isn't updated until
	// after the block data is written, this is effectively just a rollback
	// to the known good point before the unclean shutdown.
	wc := pdb.store.writeCursor
	if wc.curFileNum > curFileNum || (wc.curFileNum == curFileNum &&
		wc.curOffset > curOffset) {

		log.Info("Detected unclean shutdown - Repairing...")
		log.Debugf("Metadata claims file %d, offset %d. Block data is "+
			"at file %d, offset %d", curFileNum, curOffset,
			wc.curFileNum, wc.curOffset)
		pdb.store.handleRollback(curFileNum, curOffset)
		log.Infof("Database sync complete")
	}

	// When the write cursor position found by scanning the block files on
	// disk is BEFORE the position the metadata believes to be true, return
	// a corruption error.  Since sync is called after each block is written
	// and before the metadata is updated, this should only happen in the
	// case of missing, deleted, or truncated block files, which generally
	// is not an easily recoverable scenario.  In the future, it might be
	// possible to rescan and rebuild the metadata from the block files,
	// however, that would need to happen with coordination from a higher
	// layer since it could invalidate other metadata.
	if wc.curFileNum < curFileNum || (wc.curFileNum == curFileNum &&
		wc.curOffset < curOffset) {

		str := fmt.Sprintf("metadata claims file %d, offset %d, but "+
			"block data is at file %d, offset %d", curFileNum,
			curOffset, wc.curFileNum, wc.curOffset)
		_ = log.Warnf("***Database corruption detected***: %v", str)
		return nil, makeDbErr(database.ErrCorruption, str, nil)
	}

	return pdb, nil
}
database: Major redesign of database package. This commit contains a complete redesign and rewrite of the database package that approaches things in a vastly different manner than the previous version. This is the first part of several stages that will be needed to ultimately make use of this new package. Some of the reason for this were discussed in #255, however a quick summary is as follows: - The previous database could only contain blocks on the main chain and reorgs required deleting the blocks from the database. This made it impossible to store orphans and could make external RPC calls for information about blocks during the middle of a reorg fail. - The previous database interface forced a high level of bitcoin-specific intelligence such as spend tracking into each backend driver. - The aforementioned point led to making it difficult to implement new backend drivers due to the need to repeat a lot of non-trivial logic which is better handled at a higher layer, such as the blockchain package. - The old database stored all blocks in leveldb. This made it extremely inefficient to do things such as lookup headers and individual transactions since the entire block had to be loaded from leveldb (which entails it doing data copies) to get access. In order to address all of these concerns, and others not mentioned, the database interface has been redesigned as follows: - Two main categories of functionality are provided: block storage and metadata storage - All block storage and metadata storage are done via read-only and read-write MVCC transactions with both manual and managed modes - Support for multiple concurrent readers and a single writer - Readers use a snapshot and therefore are not blocked by the writer - Some key properties of the block storage and retrieval API: - It is generic and does NOT contain additional bitcoin logic such spend tracking and block linking - Provides access to the raw serialized bytes so deserialization is not forced for callers that don't need it - Support for fetching headers via independent functions which allows implementations to provide significant optimizations - Ability to efficiently retrieve arbitrary regions of blocks (transactions, scripts, etc) - A rich metadata storage API is provided: - Key/value with arbitrary data - Support for buckets and nested buckets - Bucket iteration through a couple of different mechanisms - Cursors for efficient and direct key seeking - Supports registration of backend database implementations - Comprehensive test coverage - Provides strong documentation with example usage This commit also contains an implementation of the previously discussed interface named ffldb (flat file plus leveldb metadata backend). Here is a quick overview: - Highly optimized for read performance with consistent write performance regardless of database size - All blocks are stored in flat files on the file system - Bulk block region fetching is optimized to perform linear reads which improves performance on spindle disks - Anti-corruption mechanisms: - Flat files contain full block checksums to quickly an easily detect database corruption without needing to do expensive merkle root calculations - Metadata checksums - Open reconciliation - Extensive test coverage: - Comprehensive blackbox interface testing - Whitebox testing which uses intimate knowledge to exercise uncommon failure paths such as deleting files out from under the database - Corruption tests (replacing random data in the files) In addition, this commit also contains a new tool under the new database directory named dbtool which provides a few basic commands for testing the database. It is designed around commands, so it could be useful to expand on in the future. Finally, this commit addresses the following issues: - Adds support for and therefore closes #255 - Fixes #199 - Fixes #201 - Implements and closes #256 - Obsoletes and closes #257 - Closes #247 once the required chain and btcd modifications are in place to make use of this new code 2016-02-03 18:42:04 +01:00			`// Copyright (c) 2015-2016 The btcsuite developers`
			`// Use of this source code is governed by an ISC`
			`// license that can be found in the LICENSE file.`

			`package ffldb`

			`import (`
			`"fmt"`
			`"hash/crc32"`

			`database "github.com/btcsuite/btcd/database2"`
			`)`

			`// The serialized write cursor location format is:`
			`//`
			`// [0:4] Block file (4 bytes)`
			`// [4:8] File offset (4 bytes)`
			`// [8:12] Castagnoli CRC-32 checksum (4 bytes)`

			`// serializeWriteRow serialize the current block file and offset where new`
			`// will be written into a format suitable for storage into the metadata.`
			`func serializeWriteRow(curBlockFileNum, curFileOffset uint32) []byte {`
			`var serializedRow [12]byte`
			`byteOrder.PutUint32(serializedRow[0:4], curBlockFileNum)`
			`byteOrder.PutUint32(serializedRow[4:8], curFileOffset)`
			`checksum := crc32.Checksum(serializedRow[:8], castagnoli)`
			`byteOrder.PutUint32(serializedRow[8:12], checksum)`
			`return serializedRow[:]`
			`}`

			`// deserializeWriteRow deserializes the write cursor location stored in the`
			`// metadata. Returns ErrCorruption if the checksum of the entry doesn't match.`
			`func deserializeWriteRow(writeRow []byte) (uint32, uint32, error) {`
			`// Ensure the checksum matches. The checksum is at the end.`
			`gotChecksum := crc32.Checksum(writeRow[:8], castagnoli)`
			`wantChecksumBytes := writeRow[8:12]`
			`wantChecksum := byteOrder.Uint32(wantChecksumBytes)`
			`if gotChecksum != wantChecksum {`
			`str := fmt.Sprintf("metadata for write cursor does not match "+`
			`"the expected checksum - got %d, want %d", gotChecksum,`
			`wantChecksum)`
			`return 0, 0, makeDbErr(database.ErrCorruption, str, nil)`
			`}`

			`fileNum := byteOrder.Uint32(writeRow[0:4])`
			`fileOffset := byteOrder.Uint32(writeRow[4:8])`
			`return fileNum, fileOffset, nil`
			`}`

			`// reconcileDB reconciles the metadata with the flat block files on disk. It`
			`// will also initialize the underlying database if the create flag is set.`
			`func reconcileDB(pdb *db, create bool) (database.DB, error) {`
			`// Perform initial internal bucket and value creation during database`
			`// creation.`
			`if create {`
database: Implement cache layer. This commit adds a database cache layer to the ffldb database backend so that callers can commit multiple transactions without having to incur the overhead of a disk sync on every new block. 2016-02-03 18:41:46 +01:00			`if err := initDB(pdb.cache.ldb); err != nil {`
database: Major redesign of database package. This commit contains a complete redesign and rewrite of the database package that approaches things in a vastly different manner than the previous version. This is the first part of several stages that will be needed to ultimately make use of this new package. Some of the reason for this were discussed in #255, however a quick summary is as follows: - The previous database could only contain blocks on the main chain and reorgs required deleting the blocks from the database. This made it impossible to store orphans and could make external RPC calls for information about blocks during the middle of a reorg fail. - The previous database interface forced a high level of bitcoin-specific intelligence such as spend tracking into each backend driver. - The aforementioned point led to making it difficult to implement new backend drivers due to the need to repeat a lot of non-trivial logic which is better handled at a higher layer, such as the blockchain package. - The old database stored all blocks in leveldb. This made it extremely inefficient to do things such as lookup headers and individual transactions since the entire block had to be loaded from leveldb (which entails it doing data copies) to get access. In order to address all of these concerns, and others not mentioned, the database interface has been redesigned as follows: - Two main categories of functionality are provided: block storage and metadata storage - All block storage and metadata storage are done via read-only and read-write MVCC transactions with both manual and managed modes - Support for multiple concurrent readers and a single writer - Readers use a snapshot and therefore are not blocked by the writer - Some key properties of the block storage and retrieval API: - It is generic and does NOT contain additional bitcoin logic such spend tracking and block linking - Provides access to the raw serialized bytes so deserialization is not forced for callers that don't need it - Support for fetching headers via independent functions which allows implementations to provide significant optimizations - Ability to efficiently retrieve arbitrary regions of blocks (transactions, scripts, etc) - A rich metadata storage API is provided: - Key/value with arbitrary data - Support for buckets and nested buckets - Bucket iteration through a couple of different mechanisms - Cursors for efficient and direct key seeking - Supports registration of backend database implementations - Comprehensive test coverage - Provides strong documentation with example usage This commit also contains an implementation of the previously discussed interface named ffldb (flat file plus leveldb metadata backend). Here is a quick overview: - Highly optimized for read performance with consistent write performance regardless of database size - All blocks are stored in flat files on the file system - Bulk block region fetching is optimized to perform linear reads which improves performance on spindle disks - Anti-corruption mechanisms: - Flat files contain full block checksums to quickly an easily detect database corruption without needing to do expensive merkle root calculations - Metadata checksums - Open reconciliation - Extensive test coverage: - Comprehensive blackbox interface testing - Whitebox testing which uses intimate knowledge to exercise uncommon failure paths such as deleting files out from under the database - Corruption tests (replacing random data in the files) In addition, this commit also contains a new tool under the new database directory named dbtool which provides a few basic commands for testing the database. It is designed around commands, so it could be useful to expand on in the future. Finally, this commit addresses the following issues: - Adds support for and therefore closes #255 - Fixes #199 - Fixes #201 - Implements and closes #256 - Obsoletes and closes #257 - Closes #247 once the required chain and btcd modifications are in place to make use of this new code 2016-02-03 18:42:04 +01:00			`return nil, err`
			`}`
			`}`

			`// Load the current write cursor position from the metadata.`
			`var curFileNum, curOffset uint32`
			`err := pdb.View(func(tx database.Tx) error {`
			`writeRow := tx.Metadata().Get(writeLocKeyName)`
			`if writeRow == nil {`
			`str := "write cursor does not exist"`
			`return makeDbErr(database.ErrCorruption, str, nil)`
			`}`

			`var err error`
			`curFileNum, curOffset, err = deserializeWriteRow(writeRow)`
			`return err`
			`})`
			`if err != nil {`
			`return nil, err`
			`}`

			`// When the write cursor position found by scanning the block files on`
			`// disk is AFTER the position the metadata believes to be true, truncate`
			`// the files on disk to match the metadata. This can be a fairly common`
			`// occurrence in unclean shutdown scenarios while the block files are in`
			`// the middle of being written. Since the metadata isn't updated until`
			`// after the block data is written, this is effectively just a rollback`
			`// to the known good point before the unclean shutdown.`
			`wc := pdb.store.writeCursor`
			`if wc.curFileNum > curFileNum \|\| (wc.curFileNum == curFileNum &&`
			`wc.curOffset > curOffset) {`

			`log.Info("Detected unclean shutdown - Repairing...")`
			`log.Debugf("Metadata claims file %d, offset %d. Block data is "+`
			`"at file %d, offset %d", curFileNum, curOffset,`
			`wc.curFileNum, wc.curOffset)`
			`pdb.store.handleRollback(curFileNum, curOffset)`
			`log.Infof("Database sync complete")`
			`}`

			`// When the write cursor position found by scanning the block files on`
			`// disk is BEFORE the position the metadata believes to be true, return`
			`// a corruption error. Since sync is called after each block is written`
			`// and before the metadata is updated, this should only happen in the`
			`// case of missing, deleted, or truncated block files, which generally`
			`// is not an easily recoverable scenario. In the future, it might be`
			`// possible to rescan and rebuild the metadata from the block files,`
			`// however, that would need to happen with coordination from a higher`
			`// layer since it could invalidate other metadata.`
			`if wc.curFileNum < curFileNum \|\| (wc.curFileNum == curFileNum &&`
			`wc.curOffset < curOffset) {`

			`str := fmt.Sprintf("metadata claims file %d, offset %d, but "+`
			`"block data is at file %d, offset %d", curFileNum,`
			`curOffset, wc.curFileNum, wc.curOffset)`
			`_ = log.Warnf("*Database corruption detected*: %v", str)`
			`return nil, makeDbErr(database.ErrCorruption, str, nil)`
			`}`

			`return pdb, nil`
			`}`