fix merge conflicts and simplify extract_doc

2021-08-06 14:11:28 -04:00 · 2021-08-06 14:11:28 -04:00 · b4853c5f67
commit b4853c5f67
parent c68334b421
5 changed files with 47 additions and 62 deletions
--- a/lbry/error/init.py
+++ b/lbry/error/init.py
@ -252,9 +252,10 @@ class ResolveTimeoutError(WalletError):

 class ResolveCensoredError(WalletError):

-    def __init__(self, url, censor_id):
+    def __init__(self, url, censor_id, censor_row):
        self.url = url
        self.censor_id = censor_id
+        self.censor_row = censor_row
        super().__init__(f"Resolve of '{url}' was censored by channel with claim id '{censor_id}'.")


--- a/lbry/schema/result.py
+++ b/lbry/schema/result.py
@ -44,7 +44,7 @@ class Censor:

    def censor(self, row) -> Optional[bytes]:
        if self.is_censored(row):
-            censoring_channel_hash = row['censoring_channel_hash']
+            censoring_channel_hash = bytes.fromhex(row['censoring_channel_id'])[::-1]
            self.censored.setdefault(censoring_channel_hash, set())
            self.censored[censoring_channel_hash].add(row['tx_hash'])
            return censoring_channel_hash
@ -192,7 +192,7 @@ class Outputs:
                if row.reposted_claim_hash:
                    set_reference(txo_message.claim.repost, row.reposted_claim_hash, extra_txo_rows)
            elif isinstance(row, ResolveCensoredError):
-                set_reference(txo_message.error.blocked.channel, row.censor_hash, extra_txo_rows)
+                set_reference(txo_message.error.blocked.channel, row.censor_id, extra_txo_rows)
        return page.SerializeToString()

    @classmethod
--- a/lbry/wallet/server/db/elasticsearch/search.py
+++ b/lbry/wallet/server/db/elasticsearch/search.py
@ -106,9 +106,19 @@ class SearchIndex:
        count = 0
        async for op, doc in claim_producer:
            if op == 'delete':
-                yield {'_index': self.index, '_op_type': 'delete', '_id': doc}
+                yield {
+                    '_index': self.index,
+                    '_op_type': 'delete',
+                    '_id': doc
+                }
            else:
-                yield extract_doc(doc, self.index)
+                yield {
+                    'doc': {key: value for key, value in doc.items() if key in ALL_FIELDS},
+                    '_id': doc['claim_id'],
+                    '_index': self.index,
+                    '_op_type': 'update',
+                    'doc_as_upsert': True
+                }
            count += 1
            if count % 100 == 0:
                self.logger.debug("Indexing in progress, %d claims.", count)
@ -474,34 +484,6 @@ class SearchIndex:
        return referenced_txos


-def extract_doc(doc, index):
-    doc['claim_id'] = doc.pop('claim_hash')[::-1].hex()
-    if doc['reposted_claim_hash'] is not None:
-        doc['reposted_claim_id'] = doc.pop('reposted_claim_hash').hex()
-    else:
-        doc['reposted_claim_id'] = None
-    channel_hash = doc.pop('channel_hash')
-    doc['channel_id'] = channel_hash[::-1].hex() if channel_hash else channel_hash
-    channel_hash = doc.pop('censoring_channel_hash')
-    doc['censoring_channel_hash'] = channel_hash.hex() if channel_hash else channel_hash
-    # txo_hash = doc.pop('txo_hash')
-    # doc['tx_id'] = txo_hash[:32][::-1].hex()
-    # doc['tx_nout'] = struct.unpack('<I', txo_hash[32:])[0]
-    doc['repost_count'] = doc.pop('reposted')
-    doc['is_controlling'] = bool(doc['is_controlling'])
-    doc['signature'] = (doc.pop('signature') or b'').hex() or None
-    doc['signature_digest'] = doc['signature']
-    doc['public_key_bytes'] = (doc.pop('public_key_bytes') or b'').hex() or None
-    doc['public_key_id'] = (doc.pop('public_key_hash') or b'').hex() or None
-    doc['is_signature_valid'] = bool(doc['signature_valid'])
-    doc['claim_type'] = doc.get('claim_type', 0) or 0
-    doc['stream_type'] = int(doc.get('stream_type', 0) or 0)
-    doc['has_source'] = bool(doc['has_source'])
-    doc['normalized_name'] = doc.pop('normalized')
-    doc = {key: value for key, value in doc.items() if key in ALL_FIELDS}
-    return {'doc': doc, '_id': doc['claim_id'], '_index': index, '_op_type': 'update', 'doc_as_upsert': True}
-
-
 def expand_query(**kwargs):
    if "amount_order" in kwargs:
        kwargs["limit"] = 1
@ -533,8 +515,8 @@ def expand_query(**kwargs):
                    value = CLAIM_TYPES[value]
                else:
                    value = [CLAIM_TYPES[claim_type] for claim_type in value]
-            elif key == 'stream_type':
-                value = STREAM_TYPES[value] if isinstance(value, str) else list(map(STREAM_TYPES.get, value))
+            # elif key == 'stream_type':
+            #     value = STREAM_TYPES[value] if isinstance(value, str) else list(map(STREAM_TYPES.get, value))
            if key == '_id':
                if isinstance(value, Iterable):
                    value = [item[::-1].hex() for item in value]
@ -590,13 +572,13 @@ def expand_query(**kwargs):
        elif key == 'limit_claims_per_channel':
            collapse = ('channel_id.keyword', value)
    if kwargs.get('has_channel_signature'):
-        query['must'].append({"exists": {"field": "signature_digest"}})
+        query['must'].append({"exists": {"field": "signature"}})
        if 'signature_valid' in kwargs:
            query['must'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}})
    elif 'signature_valid' in kwargs:
        query.setdefault('should', [])
        query["minimum_should_match"] = 1
-        query['should'].append({"bool": {"must_not": {"exists": {"field": "signature_digest"}}}})
+        query['should'].append({"bool": {"must_not": {"exists": {"field": "signature"}}}})
        query['should'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}})
    if 'has_source' in kwargs:
        query.setdefault('should', [])
--- a/lbry/wallet/server/db/elasticsearch/sync.py
+++ b/lbry/wallet/server/db/elasticsearch/sync.py
@ -6,7 +6,8 @@ from elasticsearch.helpers import async_bulk
 from lbry.wallet.server.env import Env
 from lbry.wallet.server.coin import LBC
 from lbry.wallet.server.leveldb import LevelDB
-from lbry.wallet.server.db.elasticsearch.search import extract_doc, SearchIndex, IndexVersionMismatch
+from lbry.wallet.server.db.elasticsearch.search import SearchIndex, IndexVersionMismatch
+from lbry.wallet.server.db.elasticsearch.constants import ALL_FIELDS


 async def get_all_claims(index_name='claims', db=None):
@ -18,7 +19,13 @@ async def get_all_claims(index_name='claims', db=None):
    try:
        cnt = 0
        async for claim in db.all_claims_producer():
-            yield extract_doc(claim, index_name)
+            yield {
+                'doc': {key: value for key, value in claim.items() if key in ALL_FIELDS},
+                '_id': claim['claim_id'],
+                '_index': index_name,
+                '_op_type': 'update',
+                'doc_as_upsert': True
+            }
            cnt += 1
            if cnt % 10000 == 0:
                print(f"{cnt} claims sent")
--- a/lbry/wallet/server/leveldb.py
+++ b/lbry/wallet/server/leveldb.py
@ -65,7 +65,7 @@ class FlushData:
    tip = attr.ib()


-OptionalResolveResultOrError = Optional[typing.Union[ResolveResult, LookupError, ValueError]]
+OptionalResolveResultOrError = Optional[typing.Union[ResolveResult, ResolveCensoredError, LookupError, ValueError]]

 DB_STATE_STRUCT = struct.Struct(b'>32sLL32sLLBBlll')
 DB_STATE_STRUCT_SIZE = 94
@ -527,7 +527,7 @@ class LevelDB:
            output = self.coin.transaction(raw).outputs[nout]
            script = OutputScript(output.pk_script)
            script.parse()
-            return Claim.from_bytes(script.values['claim'])
+            return Claim.from_bytes(script.values['claim']), ''.join(chr(c) for c in script.values['claim_name'])
        except:
            self.logger.error(
                "tx parsing for ES went boom %s %s", tx_hash[::-1].hex(),
@ -546,6 +546,7 @@ class LevelDB:
        metadata = self.get_claim_metadata(claim.tx_hash, claim.position)
        if not metadata:
            return
+        metadata, non_normalized_name = metadata
        if not metadata.is_stream or not metadata.stream.has_fee:
            fee_amount = 0
        else:
@ -564,6 +565,7 @@ class LevelDB:
            )
            if not reposted_metadata:
                return
+            reposted_metadata, _ = reposted_metadata
        reposted_tags = []
        reposted_languages = []
        reposted_has_source = None
@ -632,10 +634,9 @@ class LevelDB:
            reposted_claim_hash) or self.filtered_channels.get(claim_hash) or self.filtered_channels.get(
            reposted_claim_hash) or self.filtered_channels.get(claim.channel_hash)
        value = {
-            'claim_hash': claim_hash[::-1],
-            # 'claim_id': claim_hash.hex(),
-            'claim_name': claim.name,
-            'normalized': claim.name,
+            'claim_id': claim_hash.hex(),
+            'claim_name': non_normalized_name,
+            'normalized_name': claim.name,
            'tx_id': claim.tx_hash[::-1].hex(),
            'tx_num': claim.tx_num,
            'tx_nout': claim.position,
@ -648,7 +649,7 @@ class LevelDB:
            'expiration_height': claim.expiration_height,
            'effective_amount': claim.effective_amount,
            'support_amount': claim.support_amount,
-            'is_controlling': claim.is_controlling,
+            'is_controlling': bool(claim.is_controlling),
            'last_take_over_height': claim.last_takeover_height,
            'short_url': claim.short_url,
            'canonical_url': claim.canonical_url,
@ -658,30 +659,26 @@ class LevelDB:
            'claim_type': CLAIM_TYPES[metadata.claim_type],
            'has_source': reposted_has_source if reposted_has_source is not None else (
                False if not metadata.is_stream else metadata.stream.has_source),
-            'stream_type': None if not metadata.is_stream else STREAM_TYPES[
+            'stream_type': 0 if not metadata.is_stream else STREAM_TYPES[
                guess_stream_type(metadata.stream.source.media_type)],
            'media_type': None if not metadata.is_stream else metadata.stream.source.media_type,
            'fee_amount': fee_amount,
            'fee_currency': None if not metadata.is_stream else metadata.stream.fee.currency,

-            'reposted': self.get_reposted_count(claim_hash),
-            'reposted_claim_hash': reposted_claim_hash,
+            'repost_count': self.get_reposted_count(claim_hash),
+            'reposted_claim_id': None if not reposted_claim_hash else reposted_claim_hash.hex(),
            'reposted_claim_type': reposted_claim_type,
            'reposted_has_source': reposted_has_source,
-
-            'channel_hash': metadata.signing_channel_hash,
-
-            'public_key_bytes': None if not metadata.is_channel else metadata.channel.public_key_bytes,
-            'public_key_hash': None if not metadata.is_channel else self.ledger.address_to_hash160(
-                self.ledger.public_key_to_address(metadata.channel.public_key_bytes)
-            ),
-            'signature': metadata.signature,
-            'signature_digest': None,  # TODO: fix
-            'signature_valid': claim.signature_valid,
+            'channel_id': None if not metadata.is_signed else metadata.signing_channel_hash[::-1].hex(),
+            'public_key_id': None if not metadata.is_channel else
+            self.ledger.public_key_to_address(metadata.channel.public_key_bytes),
+            'signature': (metadata.signature or b'').hex() or None,
+            # 'signature_digest': metadata.signature,
+            'is_signature_valid': bool(claim.signature_valid),
            'tags': tags,
            'languages': languages,
            'censor_type': Censor.RESOLVE if blocked_hash else Censor.SEARCH if filtered_hash else Censor.NOT_CENSORED,
-            'censoring_channel_hash': blocked_hash or filtered_hash or None,
+            'censoring_channel_id': (blocked_hash or filtered_hash or b'').hex() or None,
            'claims_in_channel': None if not metadata.is_channel else self.get_claims_in_channel_count(claim_hash)
            # 'trending_group': 0,
            # 'trending_mixed': 0,
@ -695,9 +692,7 @@ class LevelDB:
        return value

    async def all_claims_producer(self, batch_size=500_000):
-        loop = asyncio.get_event_loop()
        batch = []
-        tasks = []
        for claim_hash, v in self.db.iterator(prefix=Prefixes.claim_to_txo.prefix):
            # TODO: fix the couple of claim txos that dont have controlling names
            if not self.db.get(Prefixes.claim_takeover.pack_key(Prefixes.claim_to_txo.unpack_value(v).name)):