From ea1285cd9f95451af7109da404711c0244eb2fa3 Mon Sep 17 00:00:00 2001 From: Jack Robison Date: Fri, 16 Jul 2021 14:46:46 -0400 Subject: [PATCH] reorg claims in the search index --- lbry/wallet/server/block_processor.py | 100 +++++++++++++----- lbry/wallet/server/db/__init__.py | 1 + lbry/wallet/server/db/prefixes.py | 48 +++++++++ lbry/wallet/server/leveldb.py | 10 +- .../blockchain/test_claim_commands.py | 9 +- .../blockchain/test_resolve_command.py | 12 +++ 6 files changed, 150 insertions(+), 30 deletions(-) diff --git a/lbry/wallet/server/block_processor.py b/lbry/wallet/server/block_processor.py index 637a4a0f7..c33bd19f5 100644 --- a/lbry/wallet/server/block_processor.py +++ b/lbry/wallet/server/block_processor.py @@ -249,8 +249,12 @@ class BlockProcessor: self.possible_future_claim_amount_by_name_and_hash: Dict[Tuple[str, bytes], int] = {} self.possible_future_support_txos_by_claim_hash: DefaultDict[bytes, List[Tuple[int, int]]] = defaultdict(list) - self.removed_claims_to_send_es = set() + self.removed_claims_to_send_es = set() # cumulative changes across blocks to send ES self.touched_claims_to_send_es = set() + + self.removed_claim_hashes: Set[bytes] = set() # per block changes + self.touched_claim_hashes: Set[bytes] = set() + self.signatures_changed = set() self.pending_reposted = set() @@ -268,16 +272,9 @@ class BlockProcessor: if self.db.db_height <= 1: return - to_send_es = set(self.touched_claims_to_send_es) - to_send_es.update(self.pending_reposted.difference(self.removed_claims_to_send_es)) - to_send_es.update( - {k for k, v in self.pending_channel_counts.items() if v != 0}.difference( - self.removed_claims_to_send_es) - ) - for claim_hash in self.removed_claims_to_send_es: yield 'delete', claim_hash.hex() - for claim in self.db.claims_producer(to_send_es): + for claim in self.db.claims_producer(self.touched_claims_to_send_es): yield 'update', claim async def run_in_thread_with_lock(self, func, *args): @@ -313,14 +310,12 @@ class BlockProcessor: await self.run_in_thread_with_lock(self.advance_block, block) self.logger.info("advanced to %i in %0.3fs", self.height, time.perf_counter() - start) # TODO: we shouldnt wait on the search index updating before advancing to the next block - if not self.db.first_sync: - await self.db.search_index.claim_consumer(self.claim_producer()) - self.db.search_index.clear_caches() - self.touched_claims_to_send_es.clear() - self.removed_claims_to_send_es.clear() - self.pending_reposted.clear() - self.pending_channel_counts.clear() - # print("******************\n") + if not self.db.first_sync: + await self.db.search_index.claim_consumer(self.claim_producer()) + self.db.search_index.clear_caches() + self.touched_claims_to_send_es.clear() + self.removed_claims_to_send_es.clear() + # print("******************\n") except: self.logger.exception("advance blocks failed") raise @@ -351,6 +346,14 @@ class BlockProcessor: assert count > 0, count for _ in range(count): await self.run_in_thread_with_lock(self.backup_block) + for touched in self.touched_claims_to_send_es: + if not self.db.get_claim_txo(touched): + self.removed_claims_to_send_es.add(touched) + self.touched_claims_to_send_es.difference_update(self.removed_claims_to_send_es) + await self.db.search_index.claim_consumer(self.claim_producer()) + self.db.search_index.clear_caches() + self.touched_claims_to_send_es.clear() + self.removed_claims_to_send_es.clear() await self.prefetcher.reset_height(self.height) self.reorg_count_metric.inc() except: @@ -995,6 +998,9 @@ class BlockProcessor: ).get_activate_ops() ) self.db_op_stack.extend(get_takeover_name_ops(name, winning_including_future_activations, height, controlling)) + self.touched_claim_hashes.add(winning_including_future_activations) + if controlling and controlling.claim_hash not in self.abandoned_claims: + self.touched_claim_hashes.add(controlling.claim_hash) elif not controlling or (winning_claim_hash != controlling.claim_hash and name in names_with_abandoned_controlling_claims) or \ ((winning_claim_hash != controlling.claim_hash) and (amounts[winning_claim_hash] > amounts[controlling.claim_hash])): @@ -1025,6 +1031,9 @@ class BlockProcessor: ).get_activate_ops() ) self.db_op_stack.extend(get_takeover_name_ops(name, winning_claim_hash, height, controlling)) + if controlling and controlling.claim_hash not in self.abandoned_claims: + self.touched_claim_hashes.add(controlling.claim_hash) + self.touched_claim_hashes.add(winning_claim_hash) elif winning_claim_hash == controlling.claim_hash: # print("\tstill winning") pass @@ -1048,19 +1057,23 @@ class BlockProcessor: if (controlling and winning != controlling.claim_hash) or (not controlling and winning): # print(f"\ttakeover from abandoned support {controlling.claim_hash.hex()} -> {winning.hex()}") self.db_op_stack.extend(get_takeover_name_ops(name, winning, height, controlling)) + if controlling: + self.touched_claim_hashes.add(controlling.claim_hash) + self.touched_claim_hashes.add(winning) + def _get_cumulative_update_ops(self): # gather cumulative removed/touched sets to update the search index - self.removed_claims_to_send_es.update(set(self.abandoned_claims.keys())) - self.touched_claims_to_send_es.update( + self.removed_claim_hashes.update(set(self.abandoned_claims.keys())) + self.touched_claim_hashes.update( set(self.activated_support_amount_by_claim.keys()).union( set(claim_hash for (_, claim_hash) in self.activated_claim_amount_by_name_and_hash.keys()) ).union(self.signatures_changed).union( set(self.removed_active_support_amount_by_claim.keys()) - ).difference(self.removed_claims_to_send_es) + ).difference(self.removed_claim_hashes) ) # use the cumulative changes to update bid ordered resolve - for removed in self.removed_claims_to_send_es: + for removed in self.removed_claim_hashes: removed_claim = self.db.get_claim_txo(removed) if removed_claim: amt = self.db.get_url_effective_amount( @@ -1071,7 +1084,7 @@ class BlockProcessor: removed_claim.name, amt.effective_amount, amt.tx_num, amt.position, removed )) - for touched in self.touched_claims_to_send_es: + for touched in self.touched_claim_hashes: if touched in self.claim_hash_to_txo: pending = self.txo_to_claim[self.claim_hash_to_txo[touched]] name, tx_num, position = pending.name, pending.tx_num, pending.position @@ -1098,6 +1111,16 @@ class BlockProcessor: tx_num, position, touched) ) + self.touched_claim_hashes.update( + {k for k in self.pending_reposted if k not in self.removed_claim_hashes} + ) + self.touched_claim_hashes.update( + {k for k, v in self.pending_channel_counts.items() if v != 0 and k not in self.removed_claim_hashes} + ) + self.touched_claims_to_send_es.difference_update(self.removed_claim_hashes) + self.touched_claims_to_send_es.update(self.touched_claim_hashes) + self.removed_claims_to_send_es.update(self.removed_claim_hashes) + def advance_block(self, block): height = self.height + 1 # print("advance ", height) @@ -1168,6 +1191,9 @@ class BlockProcessor: # activate claims and process takeovers self._get_takeover_ops(height) + # update effective amount and update sets of touched and deleted claims + self._get_cumulative_update_ops() + self.db_op_stack.append(RevertablePut(*Prefixes.header.pack_item(height, block.header))) self.db_op_stack.append(RevertablePut(*Prefixes.tx_count.pack_item(height, tx_count))) @@ -1185,8 +1211,20 @@ class BlockProcessor: self.tx_count = tx_count self.db.tx_counts.append(self.tx_count) - if height >= self.daemon.cached_height() - self.env.reorg_limit: - self.db_op_stack.append(RevertablePut(*Prefixes.undo.pack_item(height, self.db_op_stack.get_undo_ops()))) + cached_max_reorg_depth = self.daemon.cached_height() - self.env.reorg_limit + if height >= cached_max_reorg_depth: + self.db_op_stack.append( + RevertablePut( + *Prefixes.touched_or_deleted.pack_item( + height, self.touched_claim_hashes, self.removed_claim_hashes + ) + ) + ) + self.db_op_stack.append( + RevertablePut( + *Prefixes.undo.pack_item(height, self.db_op_stack.get_undo_ops()) + ) + ) self.height = height self.db.headers.append(block.header) @@ -1220,16 +1258,26 @@ class BlockProcessor: self.hashXs_by_tx.clear() self.history_cache.clear() self.notifications.notified_mempool_txs.clear() + self.removed_claim_hashes.clear() + self.touched_claim_hashes.clear() + self.pending_reposted.clear() + self.pending_channel_counts.clear() def backup_block(self): self.db.assert_flushed(self.flush_data()) self.logger.info("backup block %i", self.height) # Check and update self.tip - undo_ops = self.db.read_undo_info(self.height) + undo_ops, touched_and_deleted_bytes = self.db.read_undo_info(self.height) if undo_ops is None: raise ChainError(f'no undo information found for height {self.height:,d}') - self.db_op_stack.apply_packed_undo_ops(undo_ops) self.db_op_stack.append(RevertableDelete(Prefixes.undo.pack_key(self.height), undo_ops)) + self.db_op_stack.apply_packed_undo_ops(undo_ops) + + touched_and_deleted = Prefixes.touched_or_deleted.unpack_value(touched_and_deleted_bytes) + self.touched_claims_to_send_es.update(touched_and_deleted.touched_claims) + self.removed_claims_to_send_es.difference_update(touched_and_deleted.touched_claims) + self.removed_claims_to_send_es.update(touched_and_deleted.deleted_claims) + self.db.headers.pop() self.block_hashes.pop() self.db.tx_counts.pop() diff --git a/lbry/wallet/server/db/__init__.py b/lbry/wallet/server/db/__init__.py index c6723afbf..ec33b6ead 100644 --- a/lbry/wallet/server/db/__init__.py +++ b/lbry/wallet/server/db/__init__.py @@ -25,6 +25,7 @@ class DB_PREFIXES(enum.Enum): reposted_claim = b'W' undo = b'M' + claim_diff = b'Y' tx = b'B' block_hash = b'C' diff --git a/lbry/wallet/server/db/prefixes.py b/lbry/wallet/server/db/prefixes.py index d1cc21b99..86254f394 100644 --- a/lbry/wallet/server/db/prefixes.py +++ b/lbry/wallet/server/db/prefixes.py @@ -387,6 +387,20 @@ class RepostedValue(typing.NamedTuple): return f"{self.__class__.__name__}(claim_hash={self.claim_hash.hex()})" +class TouchedOrDeletedClaimKey(typing.NamedTuple): + height: int + + +class TouchedOrDeletedClaimValue(typing.NamedTuple): + touched_claims: typing.Set[bytes] + deleted_claims: typing.Set[bytes] + + def __str__(self): + return f"{self.__class__.__name__}(" \ + f"touched_claims={','.join(map(lambda x: x.hex(), self.touched_claims))}," \ + f"deleted_claims={','.join(map(lambda x: x.hex(), self.deleted_claims))})" + + class ActiveAmountPrefixRow(PrefixRow): prefix = DB_PREFIXES.active_amount.value key_struct = struct.Struct(b'>20sBLLH') @@ -1219,6 +1233,38 @@ class HashXHistoryPrefixRow(PrefixRow): return cls.pack_key(hashX, height), cls.pack_value(history) +class TouchedOrDeletedPrefixRow(PrefixRow): + prefix = DB_PREFIXES.claim_diff.value + key_struct = struct.Struct(b'>L') + value_struct = struct.Struct(b'>LL') + + @classmethod + def pack_key(cls, height: int): + return super().pack_key(height) + + @classmethod + def unpack_key(cls, key: bytes) -> TouchedOrDeletedClaimKey: + return TouchedOrDeletedClaimKey(*super().unpack_key(key)) + + @classmethod + def pack_value(cls, touched, deleted) -> bytes: + return cls.value_struct.pack(len(touched), len(deleted)) + b''.join(touched) + b''.join(deleted) + + @classmethod + def unpack_value(cls, data: bytes) -> TouchedOrDeletedClaimValue: + touched_len, deleted_len = cls.value_struct.unpack(data[:8]) + assert len(data) == 20 * (touched_len + deleted_len) + 8 + touched_bytes, deleted_bytes = data[8:touched_len*20+8], data[touched_len*20+8:touched_len*20+deleted_len*20+8] + return TouchedOrDeletedClaimValue( + {touched_bytes[8+20*i:8+20*(i+1)] for i in range(touched_len)}, + {deleted_bytes[8+20*i:8+20*(i+1)] for i in range(deleted_len)} + ) + + @classmethod + def pack_item(cls, height, touched, deleted): + return cls.pack_key(height), cls.pack_value(touched, deleted) + + class Prefixes: claim_to_support = ClaimToSupportPrefixRow support_to_claim = SupportToClaimPrefixRow @@ -1252,6 +1298,8 @@ class Prefixes: tx_num = TXNumPrefixRow tx = TXPrefixRow header = BlockHeaderPrefixRow + touched_or_deleted = TouchedOrDeletedPrefixRow + ROW_TYPES = { diff --git a/lbry/wallet/server/leveldb.py b/lbry/wallet/server/leveldb.py index 4132e8c33..61dec5697 100644 --- a/lbry/wallet/server/leveldb.py +++ b/lbry/wallet/server/leveldb.py @@ -784,12 +784,18 @@ class LevelDB: min_height = self.min_undo_height(self.db_height) delete_undo_keys = [] - if min_height > 0: + if min_height > 0: # delete undos for blocks deep enough they can't be reorged delete_undo_keys.extend( self.db.iterator( start=Prefixes.undo.pack_key(0), stop=Prefixes.undo.pack_key(min_height), include_value=False ) ) + delete_undo_keys.extend( + self.db.iterator( + start=Prefixes.touched_or_deleted.pack_key(0), + stop=Prefixes.touched_or_deleted.pack_key(min_height), include_value=False + ) + ) with self.db.write_batch() as batch: batch_put = batch.put @@ -1006,7 +1012,7 @@ class LevelDB: return Prefixes.undo.pack_key(height) def read_undo_info(self, height: int): - return self.db.get(Prefixes.undo.pack_key(height)) + return self.db.get(Prefixes.undo.pack_key(height)), self.db.get(Prefixes.touched_or_deleted.pack_key(height)) # -- UTXO database diff --git a/tests/integration/blockchain/test_claim_commands.py b/tests/integration/blockchain/test_claim_commands.py index 541b6e72c..2e7b36cea 100644 --- a/tests/integration/blockchain/test_claim_commands.py +++ b/tests/integration/blockchain/test_claim_commands.py @@ -813,10 +813,15 @@ class TransactionOutputCommands(ClaimTestCase): stream_id = self.get_claim_id(await self.stream_create()) await self.support_create(stream_id, '0.3') await self.support_create(stream_id, '0.2') - await self.generate(day_blocks) + await self.generate(day_blocks // 2) + await self.stream_update(stream_id) + await self.generate(day_blocks // 2) await self.support_create(stream_id, '0.4') await self.support_create(stream_id, '0.5') - await self.generate(day_blocks) + await self.stream_update(stream_id) + await self.generate(day_blocks // 2) + await self.stream_update(stream_id) + await self.generate(day_blocks // 2) await self.support_create(stream_id, '0.6') plot = await self.txo_plot(type='support') diff --git a/tests/integration/blockchain/test_resolve_command.py b/tests/integration/blockchain/test_resolve_command.py index 5f230f53f..55b710323 100644 --- a/tests/integration/blockchain/test_resolve_command.py +++ b/tests/integration/blockchain/test_resolve_command.py @@ -17,8 +17,13 @@ class BaseResolveTestCase(CommandTestCase): if claim_id is None: self.assertIn('error', other) self.assertEqual(other['error']['name'], 'NOT_FOUND') + claims_from_es = (await self.conductor.spv_node.server.bp.db.search_index.search(name=name))[0] + claims_from_es = [c['claim_hash'][::-1].hex() for c in claims_from_es] + self.assertNotIn(claim_id, claims_from_es) else: + claim_from_es = await self.conductor.spv_node.server.bp.db.search_index.search(claim_id=claim_id) self.assertEqual(claim_id, other['claim_id']) + self.assertEqual(claim_id, claim_from_es[0][0]['claim_hash'][::-1].hex()) async def assertNoClaimForName(self, name: str): lbrycrd_winning = json.loads(await self.blockchain._cli_cmnd('getvalueforname', name)) @@ -28,11 +33,18 @@ class BaseResolveTestCase(CommandTestCase): self.assertIsInstance(stream, LookupError) else: self.assertIsInstance(channel, LookupError) + claim_from_es = await self.conductor.spv_node.server.bp.db.search_index.search(name=name) + self.assertListEqual([], claim_from_es[0]) async def assertMatchWinningClaim(self, name): expected = json.loads(await self.blockchain._cli_cmnd('getvalueforname', name)) stream, channel = await self.conductor.spv_node.server.bp.db.fs_resolve(name) claim = stream if stream else channel + claim_from_es = await self.conductor.spv_node.server.bp.db.search_index.search( + claim_id=claim.claim_hash.hex() + ) + self.assertEqual(len(claim_from_es[0]), 1) + self.assertEqual(claim_from_es[0][0]['claim_hash'][::-1].hex(), claim.claim_hash.hex()) self.assertEqual(expected['claimId'], claim.claim_hash.hex()) self.assertEqual(expected['validAtHeight'], claim.activation_height) self.assertEqual(expected['lastTakeoverHeight'], claim.last_takeover_height)