Merge pull request #3128 from lbryio/sync_smaller_batches

Improve memory usage during wallet sync
This commit is contained in:
Jack Robison 2020-12-23 20:45:03 -05:00 committed by GitHub
commit 7551b51e7d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -514,7 +514,6 @@ class Ledger(metaclass=LedgerRegistry):
)
return True
synced_txs = []
to_request = {}
pending_synced_history = {}
already_synced = set()
@ -536,21 +535,14 @@ class Ledger(metaclass=LedgerRegistry):
to_request[i] = (txid, remote_height)
log.debug(
"request %i transactions, %i/%i for %s are already synced", len(to_request), len(synced_txs),
"request %i transactions, %i/%i for %s are already synced", len(to_request), len(already_synced),
len(remote_history), address
)
remote_history_txids = set(txid for txid, _ in remote_history)
async for tx in self.request_synced_transactions(to_request, remote_history_txids, address):
pending_synced_history[tx_indexes[tx.id]] = f"{tx.id}:{tx.height}:"
synced_txs.append(tx)
if len(synced_txs) >= 100:
if len(pending_synced_history) % 100 == 0:
log.info("Syncing address %s: %d/%d", address, len(pending_synced_history), len(to_request))
await self.db.save_transaction_io_batch(
synced_txs, address, self.address_to_hash160(address), ""
)
while synced_txs:
tx = synced_txs.pop()
self._on_transaction_controller.add(TransactionEvent(address, tx))
log.info("Sync finished for address %s: %d/%d", address, len(pending_synced_history), len(to_request))
assert len(pending_synced_history) == len(remote_history), \
@ -562,11 +554,7 @@ class Ledger(metaclass=LedgerRegistry):
if f"{txid}:{height}:" != pending_synced_history[i]:
log.warning("history mismatch: %s vs %s", remote_history[remote_i], pending_synced_history[i])
synced_history += pending_synced_history[i]
await self.db.save_transaction_io_batch(
synced_txs, address, self.address_to_hash160(address), synced_history
)
while synced_txs:
self._on_transaction_controller.add(TransactionEvent(address, synced_txs.pop()))
await self.db.set_address_history(address, synced_history)
if address_manager is None:
address_manager = await self.get_address_manager_for_address(address)
@ -617,16 +605,18 @@ class Ledger(metaclass=LedgerRegistry):
header = await self.headers.get(remote_height)
tx.position = merkle['pos']
tx.is_verified = merkle_root == header['merkle_root']
return tx
async def request_transactions(self, to_request: Tuple[Tuple[str, int], ...], cached=False):
batches = [[]]
remote_heights = {}
cache_hits = set()
for txid, height in sorted(to_request, key=lambda x: x[1]):
if cached:
if txid in self._tx_cache:
if self._tx_cache[txid].tx is not None and self._tx_cache[txid].tx.is_verified:
yield self._tx_cache[txid].tx
cache_hits.add(txid)
continue
else:
self._tx_cache[txid] = TransactionCacheItem()
@ -636,29 +626,41 @@ class Ledger(metaclass=LedgerRegistry):
batches[-1].append(txid)
if not batches[-1]:
batches.pop()
if cached and cache_hits:
yield {txid: self._tx_cache[txid].tx for txid in cache_hits}
for batch in batches:
async for tx in self._single_batch(batch, remote_heights):
if cached:
self._tx_cache[tx.id].tx = tx
yield tx
txs = await self._single_batch(batch, remote_heights)
if cached:
for txid, tx in txs.items():
self._tx_cache[txid].tx = tx
yield txs
async def request_synced_transactions(self, to_request, remote_history, address):
pending_sync = {}
async for tx in self.request_transactions(((txid, height) for txid, height in to_request.values())):
pending_sync[tx.id] = tx
for f in asyncio.as_completed([self._sync(tx, remote_history, pending_sync) for tx in pending_sync.values()]):
yield await f
async for txs in self.request_transactions(((txid, height) for txid, height in to_request.values())):
for tx in txs.values():
yield tx
await self._sync_and_save_batch(address, remote_history, txs)
async def _single_batch(self, batch, remote_heights):
heights = {remote_heights[txid] for txid in batch}
unrestriced = 0 < min(heights) < max(heights) < max(self.headers.checkpoints or [0])
batch_result = await self.network.retriable_call(self.network.get_transaction_batch, batch, not unrestriced)
txs = {}
for txid, (raw, merkle) in batch_result.items():
remote_height = remote_heights[txid]
tx = Transaction(unhexlify(raw), height=remote_height)
txs[tx.id] = tx
await self.maybe_verify_transaction(tx, remote_height, merkle)
yield tx
return txs
async def _sync_and_save_batch(self, address, remote_history, pending_txs):
await asyncio.gather(*(self._sync(tx, remote_history, pending_txs) for tx in pending_txs.values()))
await self.db.save_transaction_io_batch(
pending_txs.values(), address, self.address_to_hash160(address), ""
)
while pending_txs:
self._on_transaction_controller.add(TransactionEvent(address, pending_txs.popitem()[1]))
async def _sync(self, tx, remote_history, pending_txs):
check_db_for_txos = {}
@ -759,7 +761,7 @@ class Ledger(metaclass=LedgerRegistry):
txs: List[Transaction] = []
if len(outputs.txs) > 0:
async for tx in self.request_transactions(tuple(outputs.txs), cached=True):
txs.append(tx)
txs.extend(tx.values())
_txos, blocked = outputs.inflate(txs)