Merge pull request #3128 from lbryio/sync_smaller_batches

Improve memory usage during wallet sync
This commit is contained in:
Jack Robison 2020-12-23 20:45:03 -05:00 committed by GitHub
commit 7551b51e7d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -514,7 +514,6 @@ class Ledger(metaclass=LedgerRegistry):
) )
return True return True
synced_txs = []
to_request = {} to_request = {}
pending_synced_history = {} pending_synced_history = {}
already_synced = set() already_synced = set()
@ -536,21 +535,14 @@ class Ledger(metaclass=LedgerRegistry):
to_request[i] = (txid, remote_height) to_request[i] = (txid, remote_height)
log.debug( log.debug(
"request %i transactions, %i/%i for %s are already synced", len(to_request), len(synced_txs), "request %i transactions, %i/%i for %s are already synced", len(to_request), len(already_synced),
len(remote_history), address len(remote_history), address
) )
remote_history_txids = set(txid for txid, _ in remote_history) remote_history_txids = set(txid for txid, _ in remote_history)
async for tx in self.request_synced_transactions(to_request, remote_history_txids, address): async for tx in self.request_synced_transactions(to_request, remote_history_txids, address):
pending_synced_history[tx_indexes[tx.id]] = f"{tx.id}:{tx.height}:" pending_synced_history[tx_indexes[tx.id]] = f"{tx.id}:{tx.height}:"
synced_txs.append(tx) if len(pending_synced_history) % 100 == 0:
if len(synced_txs) >= 100:
log.info("Syncing address %s: %d/%d", address, len(pending_synced_history), len(to_request)) log.info("Syncing address %s: %d/%d", address, len(pending_synced_history), len(to_request))
await self.db.save_transaction_io_batch(
synced_txs, address, self.address_to_hash160(address), ""
)
while synced_txs:
tx = synced_txs.pop()
self._on_transaction_controller.add(TransactionEvent(address, tx))
log.info("Sync finished for address %s: %d/%d", address, len(pending_synced_history), len(to_request)) log.info("Sync finished for address %s: %d/%d", address, len(pending_synced_history), len(to_request))
assert len(pending_synced_history) == len(remote_history), \ assert len(pending_synced_history) == len(remote_history), \
@ -562,11 +554,7 @@ class Ledger(metaclass=LedgerRegistry):
if f"{txid}:{height}:" != pending_synced_history[i]: if f"{txid}:{height}:" != pending_synced_history[i]:
log.warning("history mismatch: %s vs %s", remote_history[remote_i], pending_synced_history[i]) log.warning("history mismatch: %s vs %s", remote_history[remote_i], pending_synced_history[i])
synced_history += pending_synced_history[i] synced_history += pending_synced_history[i]
await self.db.save_transaction_io_batch( await self.db.set_address_history(address, synced_history)
synced_txs, address, self.address_to_hash160(address), synced_history
)
while synced_txs:
self._on_transaction_controller.add(TransactionEvent(address, synced_txs.pop()))
if address_manager is None: if address_manager is None:
address_manager = await self.get_address_manager_for_address(address) address_manager = await self.get_address_manager_for_address(address)
@ -617,16 +605,18 @@ class Ledger(metaclass=LedgerRegistry):
header = await self.headers.get(remote_height) header = await self.headers.get(remote_height)
tx.position = merkle['pos'] tx.position = merkle['pos']
tx.is_verified = merkle_root == header['merkle_root'] tx.is_verified = merkle_root == header['merkle_root']
return tx
async def request_transactions(self, to_request: Tuple[Tuple[str, int], ...], cached=False): async def request_transactions(self, to_request: Tuple[Tuple[str, int], ...], cached=False):
batches = [[]] batches = [[]]
remote_heights = {} remote_heights = {}
cache_hits = set()
for txid, height in sorted(to_request, key=lambda x: x[1]): for txid, height in sorted(to_request, key=lambda x: x[1]):
if cached: if cached:
if txid in self._tx_cache: if txid in self._tx_cache:
if self._tx_cache[txid].tx is not None and self._tx_cache[txid].tx.is_verified: if self._tx_cache[txid].tx is not None and self._tx_cache[txid].tx.is_verified:
yield self._tx_cache[txid].tx cache_hits.add(txid)
continue continue
else: else:
self._tx_cache[txid] = TransactionCacheItem() self._tx_cache[txid] = TransactionCacheItem()
@ -636,29 +626,41 @@ class Ledger(metaclass=LedgerRegistry):
batches[-1].append(txid) batches[-1].append(txid)
if not batches[-1]: if not batches[-1]:
batches.pop() batches.pop()
if cached and cache_hits:
yield {txid: self._tx_cache[txid].tx for txid in cache_hits}
for batch in batches: for batch in batches:
async for tx in self._single_batch(batch, remote_heights): txs = await self._single_batch(batch, remote_heights)
if cached: if cached:
self._tx_cache[tx.id].tx = tx for txid, tx in txs.items():
yield tx self._tx_cache[txid].tx = tx
yield txs
async def request_synced_transactions(self, to_request, remote_history, address): async def request_synced_transactions(self, to_request, remote_history, address):
pending_sync = {} async for txs in self.request_transactions(((txid, height) for txid, height in to_request.values())):
async for tx in self.request_transactions(((txid, height) for txid, height in to_request.values())): for tx in txs.values():
pending_sync[tx.id] = tx yield tx
for f in asyncio.as_completed([self._sync(tx, remote_history, pending_sync) for tx in pending_sync.values()]): await self._sync_and_save_batch(address, remote_history, txs)
yield await f
async def _single_batch(self, batch, remote_heights): async def _single_batch(self, batch, remote_heights):
heights = {remote_heights[txid] for txid in batch} heights = {remote_heights[txid] for txid in batch}
unrestriced = 0 < min(heights) < max(heights) < max(self.headers.checkpoints or [0]) unrestriced = 0 < min(heights) < max(heights) < max(self.headers.checkpoints or [0])
batch_result = await self.network.retriable_call(self.network.get_transaction_batch, batch, not unrestriced) batch_result = await self.network.retriable_call(self.network.get_transaction_batch, batch, not unrestriced)
txs = {}
for txid, (raw, merkle) in batch_result.items(): for txid, (raw, merkle) in batch_result.items():
remote_height = remote_heights[txid] remote_height = remote_heights[txid]
tx = Transaction(unhexlify(raw), height=remote_height) tx = Transaction(unhexlify(raw), height=remote_height)
txs[tx.id] = tx
await self.maybe_verify_transaction(tx, remote_height, merkle) await self.maybe_verify_transaction(tx, remote_height, merkle)
yield tx return txs
async def _sync_and_save_batch(self, address, remote_history, pending_txs):
await asyncio.gather(*(self._sync(tx, remote_history, pending_txs) for tx in pending_txs.values()))
await self.db.save_transaction_io_batch(
pending_txs.values(), address, self.address_to_hash160(address), ""
)
while pending_txs:
self._on_transaction_controller.add(TransactionEvent(address, pending_txs.popitem()[1]))
async def _sync(self, tx, remote_history, pending_txs): async def _sync(self, tx, remote_history, pending_txs):
check_db_for_txos = {} check_db_for_txos = {}
@ -759,7 +761,7 @@ class Ledger(metaclass=LedgerRegistry):
txs: List[Transaction] = [] txs: List[Transaction] = []
if len(outputs.txs) > 0: if len(outputs.txs) > 0:
async for tx in self.request_transactions(tuple(outputs.txs), cached=True): async for tx in self.request_transactions(tuple(outputs.txs), cached=True):
txs.append(tx) txs.extend(tx.values())
_txos, blocked = outputs.inflate(txs) _txos, blocked = outputs.inflate(txs)