2019-05-13 22:46:52 -04:00
|
|
|
import time
|
2019-12-30 18:47:37 -05:00
|
|
|
import asyncio
|
|
|
|
from struct import pack, unpack
|
2020-02-25 14:15:40 -05:00
|
|
|
from concurrent.futures.thread import ThreadPoolExecutor
|
2020-03-31 10:14:20 -04:00
|
|
|
from typing import Optional
|
2020-04-23 21:17:44 -04:00
|
|
|
from prometheus_client import Gauge, Histogram
|
2019-12-31 14:52:57 -05:00
|
|
|
import lbry
|
|
|
|
from lbry.schema.claim import Claim
|
2021-01-19 04:37:31 -03:00
|
|
|
from lbry.wallet.server.db.elastic_search import SearchIndex
|
2019-12-31 14:52:57 -05:00
|
|
|
from lbry.wallet.server.db.writer import SQLDB
|
|
|
|
from lbry.wallet.server.daemon import DaemonError
|
|
|
|
from lbry.wallet.server.hash import hash_to_hex_str, HASHX_LEN
|
|
|
|
from lbry.wallet.server.util import chunks, class_logger
|
|
|
|
from lbry.wallet.server.leveldb import FlushData
|
2021-01-21 16:08:33 -05:00
|
|
|
from lbry.wallet.server.udp import StatusServer
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
|
|
|
|
class Prefetcher:
|
|
|
|
"""Prefetches blocks (in the forward direction only)."""
|
|
|
|
|
|
|
|
def __init__(self, daemon, coin, blocks_event):
|
|
|
|
self.logger = class_logger(__name__, self.__class__.__name__)
|
|
|
|
self.daemon = daemon
|
|
|
|
self.coin = coin
|
|
|
|
self.blocks_event = blocks_event
|
|
|
|
self.blocks = []
|
|
|
|
self.caught_up = False
|
|
|
|
# Access to fetched_height should be protected by the semaphore
|
|
|
|
self.fetched_height = None
|
|
|
|
self.semaphore = asyncio.Semaphore()
|
|
|
|
self.refill_event = asyncio.Event()
|
|
|
|
# The prefetched block cache size. The min cache size has
|
|
|
|
# little effect on sync time.
|
|
|
|
self.cache_size = 0
|
|
|
|
self.min_cache_size = 10 * 1024 * 1024
|
|
|
|
# This makes the first fetch be 10 blocks
|
|
|
|
self.ave_size = self.min_cache_size // 10
|
|
|
|
self.polling_delay = 5
|
|
|
|
|
|
|
|
async def main_loop(self, bp_height):
|
|
|
|
"""Loop forever polling for more blocks."""
|
|
|
|
await self.reset_height(bp_height)
|
|
|
|
while True:
|
|
|
|
try:
|
|
|
|
# Sleep a while if there is nothing to prefetch
|
|
|
|
await self.refill_event.wait()
|
|
|
|
if not await self._prefetch_blocks():
|
|
|
|
await asyncio.sleep(self.polling_delay)
|
|
|
|
except DaemonError as e:
|
|
|
|
self.logger.info(f'ignoring daemon error: {e}')
|
|
|
|
|
|
|
|
def get_prefetched_blocks(self):
|
|
|
|
"""Called by block processor when it is processing queued blocks."""
|
|
|
|
blocks = self.blocks
|
|
|
|
self.blocks = []
|
|
|
|
self.cache_size = 0
|
|
|
|
self.refill_event.set()
|
|
|
|
return blocks
|
|
|
|
|
|
|
|
async def reset_height(self, height):
|
|
|
|
"""Reset to prefetch blocks from the block processor's height.
|
|
|
|
|
|
|
|
Used in blockchain reorganisations. This coroutine can be
|
|
|
|
called asynchronously to the _prefetch_blocks coroutine so we
|
|
|
|
must synchronize with a semaphore.
|
|
|
|
"""
|
|
|
|
async with self.semaphore:
|
|
|
|
self.blocks.clear()
|
|
|
|
self.cache_size = 0
|
|
|
|
self.fetched_height = height
|
|
|
|
self.refill_event.set()
|
|
|
|
|
|
|
|
daemon_height = await self.daemon.height()
|
|
|
|
behind = daemon_height - height
|
|
|
|
if behind > 0:
|
|
|
|
self.logger.info(f'catching up to daemon height {daemon_height:,d} '
|
|
|
|
f'({behind:,d} blocks behind)')
|
|
|
|
else:
|
|
|
|
self.logger.info(f'caught up to daemon height {daemon_height:,d}')
|
|
|
|
|
|
|
|
async def _prefetch_blocks(self):
|
|
|
|
"""Prefetch some blocks and put them on the queue.
|
|
|
|
|
|
|
|
Repeats until the queue is full or caught up.
|
|
|
|
"""
|
|
|
|
daemon = self.daemon
|
|
|
|
daemon_height = await daemon.height()
|
|
|
|
async with self.semaphore:
|
|
|
|
while self.cache_size < self.min_cache_size:
|
|
|
|
# Try and catch up all blocks but limit to room in cache.
|
|
|
|
# Constrain fetch count to between 0 and 500 regardless;
|
|
|
|
# testnet can be lumpy.
|
|
|
|
cache_room = self.min_cache_size // self.ave_size
|
|
|
|
count = min(daemon_height - self.fetched_height, cache_room)
|
|
|
|
count = min(500, max(count, 0))
|
|
|
|
if not count:
|
|
|
|
self.caught_up = True
|
|
|
|
return False
|
|
|
|
|
|
|
|
first = self.fetched_height + 1
|
|
|
|
hex_hashes = await daemon.block_hex_hashes(first, count)
|
|
|
|
if self.caught_up:
|
|
|
|
self.logger.info('new block height {:,d} hash {}'
|
|
|
|
.format(first + count-1, hex_hashes[-1]))
|
|
|
|
blocks = await daemon.raw_blocks(hex_hashes)
|
|
|
|
|
|
|
|
assert count == len(blocks)
|
|
|
|
|
|
|
|
# Special handling for genesis block
|
|
|
|
if first == 0:
|
|
|
|
blocks[0] = self.coin.genesis_block(blocks[0])
|
|
|
|
self.logger.info(f'verified genesis block with hash {hex_hashes[0]}')
|
|
|
|
|
|
|
|
# Update our recent average block size estimate
|
|
|
|
size = sum(len(block) for block in blocks)
|
|
|
|
if count >= 10:
|
|
|
|
self.ave_size = size // count
|
|
|
|
else:
|
|
|
|
self.ave_size = (size + (10 - count) * self.ave_size) // 10
|
|
|
|
|
|
|
|
self.blocks.extend(blocks)
|
|
|
|
self.cache_size += size
|
|
|
|
self.fetched_height += count
|
|
|
|
self.blocks_event.set()
|
|
|
|
|
|
|
|
self.refill_event.clear()
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
class ChainError(Exception):
|
|
|
|
"""Raised on error processing blocks."""
|
|
|
|
|
|
|
|
|
2020-04-23 21:17:44 -04:00
|
|
|
NAMESPACE = "wallet_server"
|
2020-05-25 10:24:31 -04:00
|
|
|
HISTOGRAM_BUCKETS = (
|
|
|
|
.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, 60.0, float('inf')
|
|
|
|
)
|
2020-04-23 21:17:44 -04:00
|
|
|
|
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
class BlockProcessor:
|
|
|
|
"""Process blocks and update the DB state to match.
|
|
|
|
|
|
|
|
Employ a prefetcher to prefetch blocks in batches for processing.
|
|
|
|
Coordinate backing up in case of chain reorganisations.
|
|
|
|
"""
|
|
|
|
|
2020-04-23 21:17:44 -04:00
|
|
|
block_count_metric = Gauge(
|
|
|
|
"block_count", "Number of processed blocks", namespace=NAMESPACE
|
|
|
|
)
|
2020-05-25 10:24:31 -04:00
|
|
|
block_update_time_metric = Histogram(
|
|
|
|
"block_time", "Block update times", namespace=NAMESPACE, buckets=HISTOGRAM_BUCKETS
|
|
|
|
)
|
2020-04-23 21:17:44 -04:00
|
|
|
reorg_count_metric = Gauge(
|
|
|
|
"reorg_count", "Number of reorgs", namespace=NAMESPACE
|
|
|
|
)
|
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
def __init__(self, env, db, daemon, notifications):
|
|
|
|
self.env = env
|
|
|
|
self.db = db
|
|
|
|
self.daemon = daemon
|
|
|
|
self.notifications = notifications
|
|
|
|
|
|
|
|
self.coin = env.coin
|
|
|
|
self.blocks_event = asyncio.Event()
|
|
|
|
self.prefetcher = Prefetcher(daemon, env.coin, self.blocks_event)
|
|
|
|
self.logger = class_logger(__name__, self.__class__.__name__)
|
2020-02-25 14:15:40 -05:00
|
|
|
self.executor = ThreadPoolExecutor(1)
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
# Meta
|
|
|
|
self.next_cache_check = 0
|
|
|
|
self.touched = set()
|
|
|
|
self.reorg_count = 0
|
|
|
|
|
|
|
|
# Caches of unflushed items.
|
|
|
|
self.headers = []
|
2020-11-09 15:34:42 -05:00
|
|
|
self.block_hashes = []
|
2020-11-09 13:09:00 -05:00
|
|
|
self.block_txs = []
|
2019-12-30 18:47:37 -05:00
|
|
|
self.undo_infos = []
|
|
|
|
|
|
|
|
# UTXO cache
|
|
|
|
self.utxo_cache = {}
|
|
|
|
self.db_deletes = []
|
|
|
|
|
|
|
|
# If the lock is successfully acquired, in-memory chain state
|
|
|
|
# is consistent with self.height
|
|
|
|
self.state_lock = asyncio.Lock()
|
|
|
|
|
2020-02-13 11:01:02 -05:00
|
|
|
self.search_cache = {}
|
2020-06-04 09:15:21 -04:00
|
|
|
self.history_cache = {}
|
2021-01-21 16:08:33 -05:00
|
|
|
self.status_server = StatusServer()
|
2020-02-25 14:15:40 -05:00
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
async def run_in_thread_with_lock(self, func, *args):
|
|
|
|
# Run in a thread to prevent blocking. Shielded so that
|
|
|
|
# cancellations from shutdown don't lose work - when the task
|
|
|
|
# completes the data will be flushed and then we shut down.
|
|
|
|
# Take the state lock to be certain in-memory state is
|
|
|
|
# consistent and not being updated elsewhere.
|
|
|
|
async def run_in_thread_locked():
|
|
|
|
async with self.state_lock:
|
2020-02-25 14:15:40 -05:00
|
|
|
return await asyncio.get_event_loop().run_in_executor(self.executor, func, *args)
|
2019-12-30 18:47:37 -05:00
|
|
|
return await asyncio.shield(run_in_thread_locked())
|
|
|
|
|
|
|
|
async def check_and_advance_blocks(self, raw_blocks):
|
|
|
|
"""Process the list of raw blocks passed. Detects and handles
|
|
|
|
reorgs.
|
|
|
|
"""
|
|
|
|
if not raw_blocks:
|
|
|
|
return
|
|
|
|
first = self.height + 1
|
|
|
|
blocks = [self.coin.block(raw_block, first + n)
|
|
|
|
for n, raw_block in enumerate(raw_blocks)]
|
|
|
|
headers = [block.header for block in blocks]
|
|
|
|
hprevs = [self.coin.header_prevhash(h) for h in headers]
|
|
|
|
chain = [self.tip] + [self.coin.header_hash(h) for h in headers[:-1]]
|
|
|
|
|
|
|
|
if hprevs == chain:
|
2020-02-04 10:26:22 -05:00
|
|
|
start = time.perf_counter()
|
2019-12-30 18:47:37 -05:00
|
|
|
await self.run_in_thread_with_lock(self.advance_blocks, blocks)
|
2021-01-19 04:37:31 -03:00
|
|
|
await self.db.search_index.sync_queue(self.sql.claim_queue)
|
2020-02-13 11:01:02 -05:00
|
|
|
for cache in self.search_cache.values():
|
|
|
|
cache.clear()
|
2020-06-04 09:15:21 -04:00
|
|
|
self.history_cache.clear()
|
2020-12-14 13:42:20 -05:00
|
|
|
self.notifications.notified_mempool_txs.clear()
|
2019-12-30 18:47:37 -05:00
|
|
|
await self._maybe_flush()
|
2020-02-04 10:26:22 -05:00
|
|
|
processed_time = time.perf_counter() - start
|
2020-04-23 21:17:44 -04:00
|
|
|
self.block_count_metric.set(self.height)
|
|
|
|
self.block_update_time_metric.observe(processed_time)
|
2021-01-21 16:08:33 -05:00
|
|
|
self.status_server.set_height(self.db.fs_height, self.db.db_tip)
|
2019-12-30 18:47:37 -05:00
|
|
|
if not self.db.first_sync:
|
|
|
|
s = '' if len(blocks) == 1 else 's'
|
2020-02-04 10:26:22 -05:00
|
|
|
self.logger.info('processed {:,d} block{} in {:.1f}s'.format(len(blocks), s, processed_time))
|
2019-12-30 18:47:37 -05:00
|
|
|
if self._caught_up_event.is_set():
|
|
|
|
await self.notifications.on_block(self.touched, self.height)
|
|
|
|
self.touched = set()
|
|
|
|
elif hprevs[0] != chain[0]:
|
|
|
|
await self.reorg_chain()
|
|
|
|
else:
|
|
|
|
# It is probably possible but extremely rare that what
|
|
|
|
# bitcoind returns doesn't form a chain because it
|
|
|
|
# reorg-ed the chain as it was processing the batched
|
|
|
|
# block hash requests. Should this happen it's simplest
|
|
|
|
# just to reset the prefetcher and try again.
|
|
|
|
self.logger.warning('daemon blocks do not form a chain; '
|
|
|
|
'resetting the prefetcher')
|
|
|
|
await self.prefetcher.reset_height(self.height)
|
|
|
|
|
2020-03-31 10:14:20 -04:00
|
|
|
async def reorg_chain(self, count: Optional[int] = None):
|
2019-12-30 18:47:37 -05:00
|
|
|
"""Handle a chain reorganisation.
|
|
|
|
|
|
|
|
Count is the number of blocks to simulate a reorg, or None for
|
|
|
|
a real reorg."""
|
|
|
|
if count is None:
|
|
|
|
self.logger.info('chain reorg detected')
|
|
|
|
else:
|
|
|
|
self.logger.info(f'faking a reorg of {count:,d} blocks')
|
2020-12-16 01:26:19 -05:00
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
async def get_raw_blocks(last_height, hex_hashes):
|
|
|
|
heights = range(last_height, last_height - len(hex_hashes), -1)
|
|
|
|
try:
|
2020-06-09 16:04:14 -04:00
|
|
|
blocks = [await self.db.read_raw_block(height) for height in heights]
|
2019-12-30 18:47:37 -05:00
|
|
|
self.logger.info(f'read {len(blocks)} blocks from disk')
|
|
|
|
return blocks
|
|
|
|
except FileNotFoundError:
|
|
|
|
return await self.daemon.raw_blocks(hex_hashes)
|
|
|
|
|
|
|
|
def flush_backup():
|
|
|
|
# self.touched can include other addresses which is
|
|
|
|
# harmless, but remove None.
|
|
|
|
self.touched.discard(None)
|
|
|
|
self.db.flush_backup(self.flush_data(), self.touched)
|
|
|
|
|
2020-12-16 01:26:19 -05:00
|
|
|
try:
|
|
|
|
await self.flush(True)
|
|
|
|
|
|
|
|
start, last, hashes = await self.reorg_hashes(count)
|
|
|
|
# Reverse and convert to hex strings.
|
|
|
|
hashes = [hash_to_hex_str(hash) for hash in reversed(hashes)]
|
|
|
|
self.logger.info("reorg %i block hashes", len(hashes))
|
|
|
|
for hex_hashes in chunks(hashes, 50):
|
|
|
|
raw_blocks = await get_raw_blocks(last, hex_hashes)
|
|
|
|
self.logger.info("got %i raw blocks", len(raw_blocks))
|
|
|
|
await self.run_in_thread_with_lock(self.backup_blocks, raw_blocks)
|
|
|
|
await self.run_in_thread_with_lock(flush_backup)
|
|
|
|
last -= len(raw_blocks)
|
|
|
|
|
|
|
|
await self.run_in_thread_with_lock(self.db.sql.delete_claims_above_height, self.height)
|
|
|
|
await self.prefetcher.reset_height(self.height)
|
|
|
|
self.reorg_count_metric.inc()
|
|
|
|
except:
|
|
|
|
self.logger.exception("boom")
|
|
|
|
raise
|
|
|
|
finally:
|
|
|
|
self.logger.info("done with reorg")
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
async def reorg_hashes(self, count):
|
|
|
|
"""Return a pair (start, last, hashes) of blocks to back up during a
|
|
|
|
reorg.
|
|
|
|
|
|
|
|
The hashes are returned in order of increasing height. Start
|
|
|
|
is the height of the first hash, last of the last.
|
|
|
|
"""
|
|
|
|
start, count = await self.calc_reorg_range(count)
|
|
|
|
last = start + count - 1
|
|
|
|
s = '' if count == 1 else 's'
|
|
|
|
self.logger.info(f'chain was reorganised replacing {count:,d} '
|
|
|
|
f'block{s} at heights {start:,d}-{last:,d}')
|
|
|
|
|
|
|
|
return start, last, await self.db.fs_block_hashes(start, count)
|
|
|
|
|
2020-03-31 10:14:20 -04:00
|
|
|
async def calc_reorg_range(self, count: Optional[int]):
|
2019-12-30 18:47:37 -05:00
|
|
|
"""Calculate the reorg range"""
|
|
|
|
|
|
|
|
def diff_pos(hashes1, hashes2):
|
|
|
|
"""Returns the index of the first difference in the hash lists.
|
|
|
|
If both lists match returns their length."""
|
|
|
|
for n, (hash1, hash2) in enumerate(zip(hashes1, hashes2)):
|
|
|
|
if hash1 != hash2:
|
|
|
|
return n
|
|
|
|
return len(hashes)
|
|
|
|
|
|
|
|
if count is None:
|
|
|
|
# A real reorg
|
|
|
|
start = self.height - 1
|
|
|
|
count = 1
|
|
|
|
while start > 0:
|
|
|
|
hashes = await self.db.fs_block_hashes(start, count)
|
|
|
|
hex_hashes = [hash_to_hex_str(hash) for hash in hashes]
|
|
|
|
d_hex_hashes = await self.daemon.block_hex_hashes(start, count)
|
|
|
|
n = diff_pos(hex_hashes, d_hex_hashes)
|
|
|
|
if n > 0:
|
|
|
|
start += n
|
|
|
|
break
|
|
|
|
count = min(count * 2, start)
|
|
|
|
start -= count
|
|
|
|
|
|
|
|
count = (self.height - start) + 1
|
|
|
|
else:
|
|
|
|
start = (self.height - count) + 1
|
|
|
|
|
|
|
|
return start, count
|
|
|
|
|
|
|
|
def estimate_txs_remaining(self):
|
|
|
|
# Try to estimate how many txs there are to go
|
|
|
|
daemon_height = self.daemon.cached_height()
|
|
|
|
coin = self.coin
|
|
|
|
tail_count = daemon_height - max(self.height, coin.TX_COUNT_HEIGHT)
|
|
|
|
# Damp the initial enthusiasm
|
|
|
|
realism = max(2.0 - 0.9 * self.height / coin.TX_COUNT_HEIGHT, 1.0)
|
|
|
|
return (tail_count * coin.TX_PER_BLOCK +
|
|
|
|
max(coin.TX_COUNT - self.tx_count, 0)) * realism
|
|
|
|
|
|
|
|
# - Flushing
|
|
|
|
def flush_data(self):
|
|
|
|
"""The data for a flush. The lock must be taken."""
|
|
|
|
assert self.state_lock.locked()
|
2020-11-09 15:34:42 -05:00
|
|
|
return FlushData(self.height, self.tx_count, self.headers, self.block_hashes,
|
2020-11-09 13:09:00 -05:00
|
|
|
self.block_txs, self.undo_infos, self.utxo_cache,
|
2019-12-30 18:47:37 -05:00
|
|
|
self.db_deletes, self.tip)
|
|
|
|
|
|
|
|
async def flush(self, flush_utxos):
|
|
|
|
def flush():
|
|
|
|
self.db.flush_dbs(self.flush_data(), flush_utxos,
|
|
|
|
self.estimate_txs_remaining)
|
|
|
|
await self.run_in_thread_with_lock(flush)
|
|
|
|
|
|
|
|
async def _maybe_flush(self):
|
|
|
|
# If caught up, flush everything as client queries are
|
|
|
|
# performed on the DB.
|
|
|
|
if self._caught_up_event.is_set():
|
|
|
|
await self.flush(True)
|
2020-06-12 00:51:02 -04:00
|
|
|
elif time.perf_counter() > self.next_cache_check:
|
|
|
|
await self.flush(True)
|
|
|
|
self.next_cache_check = time.perf_counter() + 30
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
def check_cache_size(self):
|
|
|
|
"""Flush a cache if it gets too big."""
|
|
|
|
# Good average estimates based on traversal of subobjects and
|
|
|
|
# requesting size from Python (see deep_getsizeof).
|
|
|
|
one_MB = 1000*1000
|
|
|
|
utxo_cache_size = len(self.utxo_cache) * 205
|
|
|
|
db_deletes_size = len(self.db_deletes) * 57
|
|
|
|
hist_cache_size = self.db.history.unflushed_memsize()
|
|
|
|
# Roughly ntxs * 32 + nblocks * 42
|
|
|
|
tx_hash_size = ((self.tx_count - self.db.fs_tx_count) * 32
|
|
|
|
+ (self.height - self.db.fs_height) * 42)
|
|
|
|
utxo_MB = (db_deletes_size + utxo_cache_size) // one_MB
|
|
|
|
hist_MB = (hist_cache_size + tx_hash_size) // one_MB
|
|
|
|
|
|
|
|
self.logger.info('our height: {:,d} daemon: {:,d} '
|
|
|
|
'UTXOs {:,d}MB hist {:,d}MB'
|
|
|
|
.format(self.height, self.daemon.cached_height(),
|
|
|
|
utxo_MB, hist_MB))
|
|
|
|
|
|
|
|
# Flush history if it takes up over 20% of cache memory.
|
|
|
|
# Flush UTXOs once they take up 80% of cache memory.
|
|
|
|
cache_MB = self.env.cache_MB
|
|
|
|
if utxo_MB + hist_MB >= cache_MB or hist_MB >= cache_MB // 5:
|
|
|
|
return utxo_MB >= cache_MB * 4 // 5
|
|
|
|
return None
|
|
|
|
|
|
|
|
def advance_blocks(self, blocks):
|
|
|
|
"""Synchronously advance the blocks.
|
|
|
|
|
|
|
|
It is already verified they correctly connect onto our tip.
|
|
|
|
"""
|
|
|
|
min_height = self.db.min_undo_height(self.daemon.cached_height())
|
|
|
|
height = self.height
|
|
|
|
|
|
|
|
for block in blocks:
|
|
|
|
height += 1
|
|
|
|
undo_info = self.advance_txs(
|
2020-11-09 15:34:42 -05:00
|
|
|
height, block.transactions, self.coin.electrum_header(block.header, height),
|
|
|
|
self.coin.header_hash(block.header)
|
2019-12-30 18:47:37 -05:00
|
|
|
)
|
|
|
|
if height >= min_height:
|
|
|
|
self.undo_infos.append((undo_info, height))
|
|
|
|
self.db.write_raw_block(block.raw, height)
|
|
|
|
|
|
|
|
headers = [block.header for block in blocks]
|
|
|
|
self.height = height
|
|
|
|
self.headers.extend(headers)
|
|
|
|
self.tip = self.coin.header_hash(headers[-1])
|
|
|
|
|
2020-11-09 15:34:42 -05:00
|
|
|
def advance_txs(self, height, txs, header, block_hash):
|
|
|
|
self.block_hashes.append(block_hash)
|
2020-11-09 13:56:46 -05:00
|
|
|
self.block_txs.append((b''.join(tx_hash for tx, tx_hash in txs), [tx.raw for tx, _ in txs]))
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
undo_info = []
|
|
|
|
tx_num = self.tx_count
|
|
|
|
hashXs_by_tx = []
|
|
|
|
|
2020-11-09 15:34:42 -05:00
|
|
|
# Use local vars for speed in the loops
|
|
|
|
put_utxo = self.utxo_cache.__setitem__
|
|
|
|
spend_utxo = self.spend_utxo
|
|
|
|
undo_info_append = undo_info.append
|
|
|
|
update_touched = self.touched.update
|
|
|
|
append_hashX_by_tx = hashXs_by_tx.append
|
|
|
|
hashX_from_script = self.coin.hashX_from_script
|
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
for tx, tx_hash in txs:
|
|
|
|
hashXs = []
|
|
|
|
append_hashX = hashXs.append
|
2020-11-09 13:56:46 -05:00
|
|
|
tx_numb = pack('<I', tx_num)
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
# Spend the inputs
|
|
|
|
for txin in tx.inputs:
|
|
|
|
if txin.is_generation():
|
|
|
|
continue
|
2020-11-09 15:34:42 -05:00
|
|
|
cache_value = spend_utxo(txin.prev_hash, txin.prev_idx)
|
|
|
|
undo_info_append(cache_value)
|
2019-12-30 18:47:37 -05:00
|
|
|
append_hashX(cache_value[:-12])
|
|
|
|
|
|
|
|
# Add the new UTXOs
|
|
|
|
for idx, txout in enumerate(tx.outputs):
|
|
|
|
# Get the hashX. Ignore unspendable outputs
|
2020-11-09 15:34:42 -05:00
|
|
|
hashX = hashX_from_script(txout.pk_script)
|
2019-12-30 18:47:37 -05:00
|
|
|
if hashX:
|
|
|
|
append_hashX(hashX)
|
2020-11-09 15:34:42 -05:00
|
|
|
put_utxo(tx_hash + pack('<H', idx), hashX + tx_numb + pack('<Q', txout.value))
|
2019-12-30 18:47:37 -05:00
|
|
|
|
2020-11-09 15:34:42 -05:00
|
|
|
append_hashX_by_tx(hashXs)
|
|
|
|
update_touched(hashXs)
|
2020-11-25 16:08:04 -05:00
|
|
|
self.db.total_transactions.append(tx_hash)
|
2019-12-30 18:47:37 -05:00
|
|
|
tx_num += 1
|
|
|
|
|
|
|
|
self.db.history.add_unflushed(hashXs_by_tx, self.tx_count)
|
|
|
|
self.tx_count = tx_num
|
|
|
|
self.db.tx_counts.append(tx_num)
|
|
|
|
|
|
|
|
return undo_info
|
|
|
|
|
|
|
|
def backup_blocks(self, raw_blocks):
|
|
|
|
"""Backup the raw blocks and flush.
|
|
|
|
|
|
|
|
The blocks should be in order of decreasing height, starting at.
|
|
|
|
self.height. A flush is performed once the blocks are backed up.
|
|
|
|
"""
|
|
|
|
self.db.assert_flushed(self.flush_data())
|
|
|
|
assert self.height >= len(raw_blocks)
|
|
|
|
|
|
|
|
coin = self.coin
|
|
|
|
for raw_block in raw_blocks:
|
2020-12-16 01:26:19 -05:00
|
|
|
self.logger.info("backup block %i", self.height)
|
2019-12-30 18:47:37 -05:00
|
|
|
# Check and update self.tip
|
|
|
|
block = coin.block(raw_block, self.height)
|
|
|
|
header_hash = coin.header_hash(block.header)
|
|
|
|
if header_hash != self.tip:
|
|
|
|
raise ChainError('backup block {} not tip {} at height {:,d}'
|
|
|
|
.format(hash_to_hex_str(header_hash),
|
|
|
|
hash_to_hex_str(self.tip),
|
|
|
|
self.height))
|
|
|
|
self.tip = coin.header_prevhash(block.header)
|
|
|
|
self.backup_txs(block.transactions)
|
|
|
|
self.height -= 1
|
|
|
|
self.db.tx_counts.pop()
|
|
|
|
|
|
|
|
self.logger.info(f'backed up to height {self.height:,d}')
|
|
|
|
|
|
|
|
def backup_txs(self, txs):
|
|
|
|
# Prevout values, in order down the block (coinbase first if present)
|
|
|
|
# undo_info is in reverse block order
|
|
|
|
undo_info = self.db.read_undo_info(self.height)
|
|
|
|
if undo_info is None:
|
|
|
|
raise ChainError(f'no undo information found for height {self.height:,d}')
|
|
|
|
n = len(undo_info)
|
|
|
|
|
|
|
|
# Use local vars for speed in the loops
|
|
|
|
s_pack = pack
|
|
|
|
undo_entry_len = 12 + HASHX_LEN
|
|
|
|
|
|
|
|
for tx, tx_hash in reversed(txs):
|
|
|
|
for idx, txout in enumerate(tx.outputs):
|
|
|
|
# Spend the TX outputs. Be careful with unspendable
|
|
|
|
# outputs - we didn't save those in the first place.
|
2020-11-09 13:56:46 -05:00
|
|
|
hashX = self.coin.hashX_from_script(txout.pk_script)
|
2019-12-30 18:47:37 -05:00
|
|
|
if hashX:
|
2020-11-09 13:56:46 -05:00
|
|
|
cache_value = self.spend_utxo(tx_hash, idx)
|
|
|
|
self.touched.add(cache_value[:-12])
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
# Restore the inputs
|
|
|
|
for txin in reversed(tx.inputs):
|
|
|
|
if txin.is_generation():
|
|
|
|
continue
|
|
|
|
n -= undo_entry_len
|
|
|
|
undo_item = undo_info[n:n + undo_entry_len]
|
2020-11-09 13:56:46 -05:00
|
|
|
self.utxo_cache[txin.prev_hash + s_pack('<H', txin.prev_idx)] = undo_item
|
|
|
|
self.touched.add(undo_item[:-12])
|
2019-12-30 18:47:37 -05:00
|
|
|
|
2020-12-16 01:27:03 -05:00
|
|
|
self.db.total_transactions.pop()
|
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
assert n == 0
|
|
|
|
self.tx_count -= len(txs)
|
|
|
|
|
|
|
|
"""An in-memory UTXO cache, representing all changes to UTXO state
|
|
|
|
since the last DB flush.
|
|
|
|
|
|
|
|
We want to store millions of these in memory for optimal
|
|
|
|
performance during initial sync, because then it is possible to
|
|
|
|
spend UTXOs without ever going to the database (other than as an
|
|
|
|
entry in the address history, and there is only one such entry per
|
|
|
|
TX not per UTXO). So store them in a Python dictionary with
|
|
|
|
binary keys and values.
|
|
|
|
|
|
|
|
Key: TX_HASH + TX_IDX (32 + 2 = 34 bytes)
|
|
|
|
Value: HASHX + TX_NUM + VALUE (11 + 4 + 8 = 23 bytes)
|
|
|
|
|
|
|
|
That's 57 bytes of raw data in-memory. Python dictionary overhead
|
|
|
|
means each entry actually uses about 205 bytes of memory. So
|
|
|
|
almost 5 million UTXOs can fit in 1GB of RAM. There are
|
|
|
|
approximately 42 million UTXOs on bitcoin mainnet at height
|
|
|
|
433,000.
|
|
|
|
|
|
|
|
Semantics:
|
|
|
|
|
|
|
|
add: Add it to the cache dictionary.
|
|
|
|
|
|
|
|
spend: Remove it if in the cache dictionary. Otherwise it's
|
|
|
|
been flushed to the DB. Each UTXO is responsible for two
|
|
|
|
entries in the DB. Mark them for deletion in the next
|
|
|
|
cache flush.
|
|
|
|
|
|
|
|
The UTXO database format has to be able to do two things efficiently:
|
|
|
|
|
|
|
|
1. Given an address be able to list its UTXOs and their values
|
|
|
|
so its balance can be efficiently computed.
|
|
|
|
|
|
|
|
2. When processing transactions, for each prevout spent - a (tx_hash,
|
|
|
|
idx) pair - we have to be able to remove it from the DB. To send
|
|
|
|
notifications to clients we also need to know any address it paid
|
|
|
|
to.
|
|
|
|
|
|
|
|
To this end we maintain two "tables", one for each point above:
|
|
|
|
|
|
|
|
1. Key: b'u' + address_hashX + tx_idx + tx_num
|
|
|
|
Value: the UTXO value as a 64-bit unsigned integer
|
|
|
|
|
|
|
|
2. Key: b'h' + compressed_tx_hash + tx_idx + tx_num
|
|
|
|
Value: hashX
|
|
|
|
|
|
|
|
The compressed tx hash is just the first few bytes of the hash of
|
|
|
|
the tx in which the UTXO was created. As this is not unique there
|
|
|
|
will be potential collisions so tx_num is also in the key. When
|
|
|
|
looking up a UTXO the prefix space of the compressed hash needs to
|
|
|
|
be searched and resolved if necessary with the tx_num. The
|
|
|
|
collision rate is low (<0.1%).
|
|
|
|
"""
|
|
|
|
|
|
|
|
def spend_utxo(self, tx_hash, tx_idx):
|
|
|
|
"""Spend a UTXO and return the 33-byte value.
|
|
|
|
|
|
|
|
If the UTXO is not in the cache it must be on disk. We store
|
|
|
|
all UTXOs so not finding one indicates a logic error or DB
|
|
|
|
corruption.
|
|
|
|
"""
|
|
|
|
# Fast track is it being in the cache
|
|
|
|
idx_packed = pack('<H', tx_idx)
|
|
|
|
cache_value = self.utxo_cache.pop(tx_hash + idx_packed, None)
|
|
|
|
if cache_value:
|
|
|
|
return cache_value
|
|
|
|
|
|
|
|
# Spend it from the DB.
|
|
|
|
|
|
|
|
# Key: b'h' + compressed_tx_hash + tx_idx + tx_num
|
|
|
|
# Value: hashX
|
|
|
|
prefix = b'h' + tx_hash[:4] + idx_packed
|
|
|
|
candidates = {db_key: hashX for db_key, hashX
|
|
|
|
in self.db.utxo_db.iterator(prefix=prefix)}
|
|
|
|
for hdb_key, hashX in candidates.items():
|
|
|
|
tx_num_packed = hdb_key[-4:]
|
|
|
|
if len(candidates) > 1:
|
2020-12-16 01:28:30 -05:00
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
tx_num, = unpack('<I', tx_num_packed)
|
2020-12-16 01:28:30 -05:00
|
|
|
try:
|
|
|
|
hash, height = self.db.fs_tx_hash(tx_num)
|
|
|
|
except IndexError:
|
|
|
|
self.logger.error("data integrity error for hashx history: %s missing tx #%s (%s:%s)",
|
|
|
|
hashX.hex(), tx_num, hash_to_hex_str(tx_hash), tx_idx)
|
|
|
|
continue
|
2019-12-30 18:47:37 -05:00
|
|
|
if hash != tx_hash:
|
|
|
|
assert hash is not None # Should always be found
|
|
|
|
continue
|
|
|
|
|
|
|
|
# Key: b'u' + address_hashX + tx_idx + tx_num
|
|
|
|
# Value: the UTXO value as a 64-bit unsigned integer
|
|
|
|
udb_key = b'u' + hashX + hdb_key[-6:]
|
|
|
|
utxo_value_packed = self.db.utxo_db.get(udb_key)
|
2020-12-16 01:26:19 -05:00
|
|
|
if utxo_value_packed is None:
|
|
|
|
self.logger.warning(
|
|
|
|
"%s:%s is not found in UTXO db for %s", hash_to_hex_str(tx_hash), tx_idx, hash_to_hex_str(hashX)
|
|
|
|
)
|
|
|
|
raise ChainError(f"{hash_to_hex_str(tx_hash)}:{tx_idx} is not found in UTXO db for {hash_to_hex_str(hashX)}")
|
|
|
|
# Remove both entries for this UTXO
|
|
|
|
self.db_deletes.append(hdb_key)
|
|
|
|
self.db_deletes.append(udb_key)
|
|
|
|
return hashX + tx_num_packed + utxo_value_packed
|
|
|
|
|
|
|
|
self.logger.error('UTXO {hash_to_hex_str(tx_hash)} / {tx_idx} not found in "h" table')
|
2019-12-30 18:47:37 -05:00
|
|
|
raise ChainError('UTXO {} / {:,d} not found in "h" table'
|
|
|
|
.format(hash_to_hex_str(tx_hash), tx_idx))
|
|
|
|
|
|
|
|
async def _process_prefetched_blocks(self):
|
|
|
|
"""Loop forever processing blocks as they arrive."""
|
|
|
|
while True:
|
|
|
|
if self.height == self.daemon.cached_height():
|
|
|
|
if not self._caught_up_event.is_set():
|
|
|
|
await self._first_caught_up()
|
|
|
|
self._caught_up_event.set()
|
|
|
|
await self.blocks_event.wait()
|
|
|
|
self.blocks_event.clear()
|
2020-12-16 01:28:30 -05:00
|
|
|
if self.reorg_count: # this could only happen by calling the reorg rpc
|
2019-12-30 18:47:37 -05:00
|
|
|
await self.reorg_chain(self.reorg_count)
|
|
|
|
self.reorg_count = 0
|
|
|
|
else:
|
|
|
|
blocks = self.prefetcher.get_prefetched_blocks()
|
2021-01-19 04:37:31 -03:00
|
|
|
try:
|
|
|
|
await self.check_and_advance_blocks(blocks)
|
|
|
|
except Exception:
|
|
|
|
self.logger.exception("error while processing txs")
|
|
|
|
raise
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
async def _first_caught_up(self):
|
|
|
|
self.logger.info(f'caught up to height {self.height}')
|
|
|
|
# Flush everything but with first_sync->False state.
|
|
|
|
first_sync = self.db.first_sync
|
|
|
|
self.db.first_sync = False
|
|
|
|
await self.flush(True)
|
|
|
|
if first_sync:
|
2019-12-31 14:52:57 -05:00
|
|
|
self.logger.info(f'{lbry.__version__} synced to '
|
2019-12-30 18:47:37 -05:00
|
|
|
f'height {self.height:,d}')
|
|
|
|
# Reopen for serving
|
|
|
|
await self.db.open_for_serving()
|
|
|
|
|
|
|
|
async def _first_open_dbs(self):
|
|
|
|
await self.db.open_for_sync()
|
|
|
|
self.height = self.db.db_height
|
|
|
|
self.tip = self.db.db_tip
|
|
|
|
self.tx_count = self.db.db_tx_count
|
|
|
|
|
|
|
|
# --- External API
|
|
|
|
|
|
|
|
async def fetch_and_process_blocks(self, caught_up_event):
|
|
|
|
"""Fetch, process and index blocks from the daemon.
|
|
|
|
|
|
|
|
Sets caught_up_event when first caught up. Flushes to disk
|
|
|
|
and shuts down cleanly if cancelled.
|
|
|
|
|
|
|
|
This is mainly because if, during initial sync ElectrumX is
|
|
|
|
asked to shut down when a large number of blocks have been
|
|
|
|
processed but not written to disk, it should write those to
|
|
|
|
disk before exiting, as otherwise a significant amount of work
|
|
|
|
could be lost.
|
|
|
|
"""
|
2021-01-21 16:08:33 -05:00
|
|
|
|
2019-12-30 18:47:37 -05:00
|
|
|
self._caught_up_event = caught_up_event
|
|
|
|
try:
|
|
|
|
await self._first_open_dbs()
|
2021-01-21 16:08:33 -05:00
|
|
|
self.status_server.set_height(self.db.fs_height, self.db.db_tip)
|
2019-12-30 18:47:37 -05:00
|
|
|
await asyncio.wait([
|
|
|
|
self.prefetcher.main_loop(self.height),
|
|
|
|
self._process_prefetched_blocks()
|
|
|
|
])
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
raise
|
|
|
|
except:
|
|
|
|
self.logger.exception("Block processing failed!")
|
|
|
|
raise
|
|
|
|
finally:
|
2021-01-21 16:08:33 -05:00
|
|
|
self.status_server.stop()
|
2019-12-30 18:47:37 -05:00
|
|
|
# Shut down block processing
|
|
|
|
self.logger.info('flushing to DB for a clean shutdown...')
|
|
|
|
await self.flush(True)
|
|
|
|
self.db.close()
|
2020-02-25 14:15:40 -05:00
|
|
|
self.executor.shutdown(wait=True)
|
2019-12-30 18:47:37 -05:00
|
|
|
|
|
|
|
def force_chain_reorg(self, count):
|
|
|
|
"""Force a reorg of the given number of blocks.
|
|
|
|
|
|
|
|
Returns True if a reorg is queued, false if not caught up.
|
|
|
|
"""
|
|
|
|
if self._caught_up_event.is_set():
|
|
|
|
self.reorg_count = count
|
|
|
|
self.blocks_event.set()
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2019-05-13 22:46:52 -04:00
|
|
|
class Timer:
|
|
|
|
def __init__(self, name):
|
|
|
|
self.name = name
|
|
|
|
self.total = 0
|
|
|
|
self.count = 0
|
|
|
|
self.sub_timers = {}
|
|
|
|
self._last_start = None
|
|
|
|
|
|
|
|
def add_timer(self, name):
|
|
|
|
if name not in self.sub_timers:
|
|
|
|
self.sub_timers[name] = Timer(name)
|
|
|
|
return self.sub_timers[name]
|
|
|
|
|
|
|
|
def run(self, func, *args, forward_timer=False, timer_name=None, **kwargs):
|
|
|
|
t = self.add_timer(timer_name or func.__name__)
|
|
|
|
t.start()
|
|
|
|
try:
|
|
|
|
if forward_timer:
|
|
|
|
return func(*args, **kwargs, timer=t)
|
|
|
|
else:
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
finally:
|
|
|
|
t.stop()
|
|
|
|
|
|
|
|
def start(self):
|
|
|
|
self._last_start = time.time()
|
|
|
|
return self
|
|
|
|
|
|
|
|
def stop(self):
|
|
|
|
self.total += (time.time() - self._last_start)
|
|
|
|
self.count += 1
|
|
|
|
self._last_start = None
|
|
|
|
return self
|
|
|
|
|
|
|
|
def show(self, depth=0, height=None):
|
|
|
|
if depth == 0:
|
|
|
|
print('='*100)
|
|
|
|
if height is not None:
|
|
|
|
print(f'STATISTICS AT HEIGHT {height}')
|
|
|
|
print('='*100)
|
|
|
|
else:
|
|
|
|
print(
|
2019-05-14 23:59:21 -04:00
|
|
|
f"{' '*depth} {self.total/60:4.2f}mins {self.name}"
|
2019-05-13 22:46:52 -04:00
|
|
|
# f"{self.total/self.count:.5f}sec/call, "
|
|
|
|
)
|
|
|
|
for sub_timer in self.sub_timers.values():
|
|
|
|
sub_timer.show(depth+1)
|
|
|
|
if depth == 0:
|
|
|
|
print('='*100)
|
|
|
|
|
|
|
|
|
2018-11-04 04:42:47 -05:00
|
|
|
class LBRYBlockProcessor(BlockProcessor):
|
|
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
|
super().__init__(*args, **kwargs)
|
|
|
|
if self.env.coin.NET == "regtest":
|
|
|
|
self.prefetcher.polling_delay = 0.5
|
|
|
|
self.should_validate_signatures = self.env.boolean('VALIDATE_CLAIM_SIGNATURES', False)
|
2019-03-30 19:40:01 -04:00
|
|
|
self.logger.info(f"LbryumX Block Processor - Validating signatures: {self.should_validate_signatures}")
|
|
|
|
self.sql: SQLDB = self.db.sql
|
2019-05-13 22:46:52 -04:00
|
|
|
self.timer = Timer('BlockProcessor')
|
2018-11-04 04:42:47 -05:00
|
|
|
|
|
|
|
def advance_blocks(self, blocks):
|
2019-03-30 19:40:01 -04:00
|
|
|
self.sql.begin()
|
|
|
|
try:
|
2019-05-13 22:46:52 -04:00
|
|
|
self.timer.run(super().advance_blocks, blocks)
|
2019-03-30 19:40:01 -04:00
|
|
|
except:
|
|
|
|
self.logger.exception(f'Error while advancing transaction in new block.')
|
|
|
|
raise
|
|
|
|
finally:
|
|
|
|
self.sql.commit()
|
2019-07-20 13:45:38 -04:00
|
|
|
if self.db.first_sync and self.height == self.daemon.cached_height():
|
2019-12-07 18:13:13 -05:00
|
|
|
self.timer.run(self.sql.execute, self.sql.SEARCH_INDEXES, timer_name='executing SEARCH_INDEXES')
|
2019-07-20 10:03:24 -04:00
|
|
|
if self.env.individual_tag_indexes:
|
2019-12-07 18:13:13 -05:00
|
|
|
self.timer.run(self.sql.execute, self.sql.TAG_INDEXES, timer_name='executing TAG_INDEXES')
|
2020-09-24 13:00:18 -04:00
|
|
|
self.timer.run(self.sql.execute, self.sql.LANGUAGE_INDEXES, timer_name='executing LANGUAGE_INDEXES')
|
2019-03-30 19:40:01 -04:00
|
|
|
|
2020-11-09 15:34:42 -05:00
|
|
|
def advance_txs(self, height, txs, header, block_hash):
|
2019-05-13 22:46:52 -04:00
|
|
|
timer = self.timer.sub_timers['advance_blocks']
|
2020-11-09 15:34:42 -05:00
|
|
|
undo = timer.run(super().advance_txs, height, txs, header, block_hash, timer_name='super().advance_txs')
|
2019-05-19 20:22:25 -04:00
|
|
|
timer.run(self.sql.advance_txs, height, txs, header, self.daemon.cached_height(), forward_timer=True)
|
2020-01-13 15:52:31 -05:00
|
|
|
if (height % 10000 == 0 or not self.db.first_sync) and self.logger.isEnabledFor(10):
|
2019-05-13 22:46:52 -04:00
|
|
|
self.timer.show(height=height)
|
2019-03-30 19:40:01 -04:00
|
|
|
return undo
|