move search.py to herald

2022-05-18 13:34:36 -04:00 · 2022-05-18 13:34:36 -04:00 · a919a3a519
commit a919a3a519
parent 37fc334c46
7 changed files with 366 additions and 260 deletions
--- a/hub/common.py
+++ b/hub/common.py
@ -1,3 +1,4 @@
+import struct
 import hashlib
 import hmac
 import ipaddress
@ -5,8 +6,13 @@ import logging
 import logging.handlers
 import typing
 import collections
+from decimal import Decimal
+from typing import Iterable
 from asyncio import get_event_loop, Event
 from prometheus_client import Counter
+from hub.schema.tags import clean_tags
+from hub.schema.url import normalize_name
+from hub.error import TooManyClaimSearchParametersError

 log = logging.getLogger(__name__)

@ -23,6 +29,22 @@ HISTOGRAM_BUCKETS = (
    .005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, 15.0, 20.0, 30.0, 60.0, float('inf')
 )

+CLAIM_TYPES = {
+    'stream': 1,
+    'channel': 2,
+    'repost': 3,
+    'collection': 4,
+}
+
+STREAM_TYPES = {
+    'video': 1,
+    'audio': 2,
+    'image': 3,
+    'document': 4,
+    'binary': 5,
+    'model': 6,
+}
+

 def setup_logging(log_path: str):
    log = logging.getLogger('scribe')
@ -404,3 +426,340 @@ class IndexVersionMismatch(Exception):
    def __init__(self, got_version, expected_version):
        self.got_version = got_version
        self.expected_version = expected_version
+
+
+# Elasticsearch constants
+
+INDEX_DEFAULT_SETTINGS = {
+    "settings":
+        {"analysis":
+            {"analyzer": {
+                "default": {"tokenizer": "whitespace", "filter": ["lowercase", "porter_stem"]}}},
+            "index":
+                {"refresh_interval": -1,
+                 "number_of_shards": 1,
+                 "number_of_replicas": 0,
+                 "sort": {
+                     "field": ["trending_score", "release_time"],
+                     "order": ["desc", "desc"]
+                 }}
+        },
+    "mappings": {
+        "properties": {
+            "claim_id": {
+                "fields": {
+                    "keyword": {
+                        "ignore_above": 256,
+                        "type": "keyword"
+                    }
+                },
+                "type": "text",
+                "index_prefixes": {
+                    "min_chars": 1,
+                    "max_chars": 10
+                }
+            },
+            "sd_hash": {
+                "fields": {
+                    "keyword": {
+                        "ignore_above": 96,
+                        "type": "keyword"
+                    }
+                },
+                "type": "text",
+                "index_prefixes": {
+                    "min_chars": 1,
+                    "max_chars": 4
+                }
+            },
+            "height": {"type": "integer"},
+            "claim_type": {"type": "byte"},
+            "censor_type": {"type": "byte"},
+            "trending_score": {"type": "double"},
+            "release_time": {"type": "long"}
+        }
+    }
+}
+
+FIELDS = {
+    '_id',
+    'claim_id', 'claim_type', 'claim_name', 'normalized_name',
+    'tx_id', 'tx_nout', 'tx_position',
+    'short_url', 'canonical_url',
+    'is_controlling', 'last_take_over_height',
+    'public_key_bytes', 'public_key_id', 'claims_in_channel',
+    'channel_id', 'signature', 'signature_digest', 'is_signature_valid',
+    'amount', 'effective_amount', 'support_amount',
+    'fee_amount', 'fee_currency',
+    'height', 'creation_height', 'activation_height', 'expiration_height',
+    'stream_type', 'media_type', 'censor_type',
+    'title', 'author', 'description',
+    'timestamp', 'creation_timestamp',
+    'duration', 'release_time',
+    'tags', 'languages', 'has_source', 'reposted_claim_type',
+    'reposted_claim_id', 'repost_count', 'sd_hash',
+    'trending_score', 'tx_num',
+    'channel_tx_id', 'channel_tx_position', 'channel_height',  'reposted_tx_id',
+    'reposted_tx_position', 'reposted_height',
+}
+
+TEXT_FIELDS = {
+    'author', 'canonical_url', 'channel_id', 'description', 'claim_id', 'censoring_channel_id',
+    'media_type', 'normalized_name', 'public_key_bytes', 'public_key_id', 'short_url', 'signature',
+    'claim_name', 'signature_digest', 'title', 'tx_id', 'fee_currency', 'reposted_claim_id',
+    'tags', 'sd_hash', 'channel_tx_id', 'reposted_tx_id',
+}
+
+RANGE_FIELDS = {
+    'height', 'creation_height', 'activation_height', 'expiration_height',
+    'timestamp', 'creation_timestamp', 'duration', 'release_time', 'fee_amount',
+    'tx_position', 'repost_count', 'limit_claims_per_channel',
+    'amount', 'effective_amount', 'support_amount',
+    'trending_score', 'censor_type', 'tx_num', 'reposted_tx_position', 'reposted_height',
+    'channel_tx_position', 'channel_height',
+}
+
+ALL_FIELDS = RANGE_FIELDS | TEXT_FIELDS | FIELDS
+
+REPLACEMENTS = {
+    'claim_name': 'normalized_name',
+    'name': 'normalized_name',
+    'txid': 'tx_id',
+    'nout': 'tx_nout',
+    'trending_group': 'trending_score',
+    'trending_mixed': 'trending_score',
+    'trending_global': 'trending_score',
+    'trending_local': 'trending_score',
+    'reposted': 'repost_count',
+    'stream_types': 'stream_type',
+    'media_types': 'media_type',
+    'valid_channel_signature': 'is_signature_valid'
+}
+
+
+def expand_query(**kwargs):
+    if "amount_order" in kwargs:
+        kwargs["limit"] = 1
+        kwargs["order_by"] = "effective_amount"
+        kwargs["offset"] = int(kwargs["amount_order"]) - 1
+    if 'name' in kwargs:
+        kwargs['name'] = normalize_name(kwargs.pop('name'))
+    if kwargs.get('is_controlling') is False:
+        kwargs.pop('is_controlling')
+    query = {'must': [], 'must_not': []}
+    collapse = None
+    if 'fee_currency' in kwargs and kwargs['fee_currency'] is not None:
+        kwargs['fee_currency'] = kwargs['fee_currency'].upper()
+    for key, value in kwargs.items():
+        key = key.replace('claim.', '')
+        many = key.endswith('__in') or isinstance(value, list)
+        if many and len(value) > 2048:
+            raise TooManyClaimSearchParametersError(key, 2048)
+        if many:
+            key = key.replace('__in', '')
+            value = list(filter(None, value))
+        if value is None or isinstance(value, list) and len(value) == 0:
+            continue
+        key = REPLACEMENTS.get(key, key)
+        if key in FIELDS:
+            partial_id = False
+            if key == 'claim_type':
+                if isinstance(value, str):
+                    value = CLAIM_TYPES[value]
+                else:
+                    value = [CLAIM_TYPES[claim_type] for claim_type in value]
+            elif key == 'stream_type':
+                value = [STREAM_TYPES[value]] if isinstance(value, str) else list(map(STREAM_TYPES.get, value))
+            if key == '_id':
+                if isinstance(value, Iterable):
+                    value = [item[::-1].hex() for item in value]
+                else:
+                    value = value[::-1].hex()
+            if not many and key in ('_id', 'claim_id', 'sd_hash') and len(value) < 20:
+                partial_id = True
+            if key in ('signature_valid', 'has_source'):
+                continue  # handled later
+            if key in TEXT_FIELDS:
+                key += '.keyword'
+            ops = {'<=': 'lte', '>=': 'gte', '<': 'lt', '>': 'gt'}
+            if partial_id:
+                query['must'].append({"prefix": {key: value}})
+            elif key in RANGE_FIELDS and isinstance(value, str) and value[0] in ops:
+                operator_length = 2 if value[:2] in ops else 1
+                operator, value = value[:operator_length], value[operator_length:]
+                if key == 'fee_amount':
+                    value = str(Decimal(value)*1000)
+                query['must'].append({"range": {key: {ops[operator]: value}}})
+            elif key in RANGE_FIELDS and isinstance(value, list) and all(v[0] in ops for v in value):
+                range_constraints = []
+                release_times = []
+                for v in value:
+                    operator_length = 2 if v[:2] in ops else 1
+                    operator, stripped_op_v = v[:operator_length], v[operator_length:]
+                    if key == 'fee_amount':
+                        stripped_op_v = str(Decimal(stripped_op_v)*1000)
+                    if key == 'release_time':
+                        release_times.append((operator, stripped_op_v))
+                    else:
+                        range_constraints.append((operator, stripped_op_v))
+                if key != 'release_time':
+                    query['must'].append({"range": {key: {ops[operator]: v for operator, v in range_constraints}}})
+                else:
+                    query['must'].append(
+                        {"bool":
+                            {"should": [
+                                {"bool": {
+                                    "must_not": {
+                                        "exists": {
+                                            "field": "release_time"
+                                        }
+                                    }
+                                }},
+                                {"bool": {
+                                    "must": [
+                                        {"exists": {"field": "release_time"}},
+                                        {'range': {key: {ops[operator]: v for operator, v in release_times}}},
+                                ]}},
+                            ]}
+                        }
+                    )
+            elif many:
+                query['must'].append({"terms": {key: value}})
+            else:
+                if key == 'fee_amount':
+                    value = str(Decimal(value)*1000)
+                query['must'].append({"term": {key: {"value": value}}})
+        elif key == 'not_channel_ids':
+            for channel_id in value:
+                query['must_not'].append({"term": {'channel_id.keyword': channel_id}})
+                query['must_not'].append({"term": {'_id': channel_id}})
+        elif key == 'channel_ids':
+            query['must'].append({"terms": {'channel_id.keyword': value}})
+        elif key == 'claim_ids':
+            query['must'].append({"terms": {'claim_id.keyword': value}})
+        elif key == 'media_types':
+            query['must'].append({"terms": {'media_type.keyword': value}})
+        elif key == 'any_languages':
+            query['must'].append({"terms": {'languages': clean_tags(value)}})
+        elif key == 'any_languages':
+            query['must'].append({"terms": {'languages': value}})
+        elif key == 'all_languages':
+            query['must'].extend([{"term": {'languages': tag}} for tag in value])
+        elif key == 'any_tags':
+            query['must'].append({"terms": {'tags.keyword': clean_tags(value)}})
+        elif key == 'all_tags':
+            query['must'].extend([{"term": {'tags.keyword': tag}} for tag in clean_tags(value)])
+        elif key == 'not_tags':
+            query['must_not'].extend([{"term": {'tags.keyword': tag}} for tag in clean_tags(value)])
+        elif key == 'not_claim_id':
+            query['must_not'].extend([{"term": {'claim_id.keyword': cid}} for cid in value])
+        elif key == 'limit_claims_per_channel':
+            collapse = ('channel_id.keyword', value)
+    if kwargs.get('has_channel_signature'):
+        query['must'].append({"exists": {"field": "signature"}})
+        if 'signature_valid' in kwargs:
+            query['must'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}})
+    elif 'signature_valid' in kwargs:
+        query['must'].append(
+            {"bool":
+                {"should": [
+                    {"bool": {"must_not": {"exists": {"field": "signature"}}}},
+                    {"bool" : {"must" : {"term": {"is_signature_valid": bool(kwargs["signature_valid"])}}}}
+                ]}
+             }
+        )
+    if 'has_source' in kwargs:
+        is_stream_or_repost_terms = {"terms": {"claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']]}}
+        query['must'].append(
+            {"bool":
+                {"should": [
+                    {"bool": # when is_stream_or_repost AND has_source
+                        {"must": [
+                            {"match": {"has_source": kwargs['has_source']}},
+                            is_stream_or_repost_terms,
+                        ]
+                        },
+                     },
+                    {"bool": # when not is_stream_or_repost
+                        {"must_not": is_stream_or_repost_terms}
+                     },
+                    {"bool": # when reposted_claim_type wouldn't have source
+                        {"must_not":
+                            [
+                                {"term": {"reposted_claim_type": CLAIM_TYPES['stream']}}
+                            ],
+                        "must":
+                            [
+                                {"term": {"claim_type": CLAIM_TYPES['repost']}}
+                            ]
+                        }
+                     }
+                ]}
+             }
+        )
+    if kwargs.get('text'):
+        query['must'].append(
+                    {"simple_query_string":
+                         {"query": kwargs["text"], "fields": [
+                             "claim_name^4", "channel_name^8", "title^1", "description^.5", "author^1", "tags^.5"
+                         ]}})
+    query = {
+        "_source": {"excludes": ["description", "title"]},
+        'query': {'bool': query},
+        "sort": [],
+    }
+    if "limit" in kwargs:
+        query["size"] = kwargs["limit"]
+    if 'offset' in kwargs:
+        query["from"] = kwargs["offset"]
+    if 'order_by' in kwargs:
+        if isinstance(kwargs["order_by"], str):
+            kwargs["order_by"] = [kwargs["order_by"]]
+        for value in kwargs['order_by']:
+            if 'trending_group' in value:
+                # fixme: trending_mixed is 0 for all records on variable decay, making sort slow.
+                continue
+            is_asc = value.startswith('^')
+            value = value[1:] if is_asc else value
+            value = REPLACEMENTS.get(value, value)
+            if value in TEXT_FIELDS:
+                value += '.keyword'
+            query['sort'].append({value: "asc" if is_asc else "desc"})
+    if collapse:
+        query["collapse"] = {
+            "field": collapse[0],
+            "inner_hits": {
+                "name": collapse[0],
+                "size": collapse[1],
+                "sort": query["sort"]
+            }
+        }
+    return query
+
+
+def expand_result(results):
+    inner_hits = []
+    expanded = []
+    for result in results:
+        if result.get("inner_hits"):
+            for _, inner_hit in result["inner_hits"].items():
+                inner_hits.extend(inner_hit["hits"]["hits"])
+            continue
+        result = result['_source']
+        result['claim_hash'] = bytes.fromhex(result['claim_id'])[::-1]
+        if result['reposted_claim_id']:
+            result['reposted_claim_hash'] = bytes.fromhex(result['reposted_claim_id'])[::-1]
+        else:
+            result['reposted_claim_hash'] = None
+        result['channel_hash'] = bytes.fromhex(result['channel_id'])[::-1] if result['channel_id'] else None
+        result['txo_hash'] = bytes.fromhex(result['tx_id'])[::-1] + struct.pack('<I', result['tx_nout'])
+        result['tx_hash'] = bytes.fromhex(result['tx_id'])[::-1]
+        result['reposted'] = result.pop('repost_count')
+        result['signature_valid'] = result.pop('is_signature_valid')
+        # result['normalized'] = result.pop('normalized_name')
+        # if result['censoring_channel_hash']:
+        #     result['censoring_channel_hash'] = unhexlify(result['censoring_channel_hash'])[::-1]
+        expanded.append(result)
+    if inner_hits:
+        return expand_result(inner_hits)
+    return expanded
--- a/hub/elastic_sync/constants.py
+++ b/hub/elastic_sync/constants.py
--- a/hub/db/common.py
+++ b/hub/db/common.py
@ -53,22 +53,6 @@ class DB_PREFIXES(enum.Enum):
 COLUMN_SETTINGS = {}  # this is updated by the PrefixRow metaclass


-CLAIM_TYPES = {
-    'stream': 1,
-    'channel': 2,
-    'repost': 3,
-    'collection': 4,
-}
-
-STREAM_TYPES = {
-    'video': 1,
-    'audio': 2,
-    'image': 3,
-    'document': 4,
-    'binary': 5,
-    'model': 6,
-}
-
 # 9/21/2020
 MOST_USED_TAGS = {
    "gaming",
--- a/hub/db/db.py
+++ b/hub/db/db.py
@ -18,9 +18,9 @@ from hub.schema.url import URL, normalize_name
 from hub.schema.claim import guess_stream_type
 from hub.schema.result import Censor
 from hub.scribe.transaction import TxInput
-from hub.common import hash_to_hex_str, hash160, LRUCacheWithMetrics, sha256
+from hub.common import hash_to_hex_str, hash160, LRUCacheWithMetrics, sha256, STREAM_TYPES, CLAIM_TYPES
 from hub.db.merkle import Merkle, MerkleCache, FastMerkleCacheItem
-from hub.db.common import ResolveResult, STREAM_TYPES, CLAIM_TYPES, ExpandedResolveResult, DBError, UTXO
+from hub.db.common import ResolveResult,ExpandedResolveResult, DBError, UTXO
 from hub.db.prefixes import PendingActivationValue, ClaimTakeoverValue, ClaimToTXOValue, PrefixDB
 from hub.db.prefixes import ACTIVATED_CLAIM_TXO_TYPE, ACTIVATED_SUPPORT_TXO_TYPE, EffectiveAmountKey
 from hub.db.prefixes import PendingActivationKey, TXOToClaimValue, DBStatePrefixRow, MempoolTXPrefixRow
--- a/hub/elastic_sync/service.py
+++ b/hub/elastic_sync/service.py
@ -7,12 +7,10 @@ from elasticsearch import AsyncElasticsearch, NotFoundError
 from elasticsearch.helpers import async_streaming_bulk
 from hub.schema.result import Censor
 from hub.service import BlockchainReaderService
-from hub.common import IndexVersionMismatch
+from hub.common import IndexVersionMismatch, ALL_FIELDS, INDEX_DEFAULT_SETTINGS, expand_query
 from hub.db.revertable import RevertableOp
 from hub.db.common import TrendingNotification, DB_PREFIXES
 from hub.notifier_protocol import ElasticNotifierProtocol
-from hub.elastic_sync.search import expand_query
-from hub.elastic_sync.constants import ALL_FIELDS, INDEX_DEFAULT_SETTINGS
 from hub.elastic_sync.fast_ar_trending import FAST_AR_TRENDING_SCRIPT
 if typing.TYPE_CHECKING:
    from hub.elastic_sync.env import ElasticEnv
--- a/hub/elastic_sync/search.py
+++ b/hub/elastic_sync/search.py
@ -1,20 +1,13 @@
 import logging
 import asyncio
-import struct
 from bisect import bisect_right
 from collections import Counter, deque
-from decimal import Decimal
 from operator import itemgetter
-from typing import Optional, List, Iterable, TYPE_CHECKING
+from typing import Optional, List, TYPE_CHECKING

 from elasticsearch import AsyncElasticsearch, NotFoundError, ConnectionError
 from hub.schema.result import Censor, Outputs
-from hub.schema.tags import clean_tags
-from hub.schema.url import normalize_name
-from hub.error import TooManyClaimSearchParametersError
-from hub.common import LRUCache, IndexVersionMismatch
-from hub.db.common import CLAIM_TYPES, STREAM_TYPES
-from hub.elastic_sync.constants import INDEX_DEFAULT_SETTINGS, REPLACEMENTS, FIELDS, TEXT_FIELDS,  RANGE_FIELDS
+from hub.common import LRUCache, IndexVersionMismatch, INDEX_DEFAULT_SETTINGS, expand_query, expand_result
 from hub.db.common import ResolveResult
 if TYPE_CHECKING:
    from hub.db import HubDB
@ -85,7 +78,7 @@ class SearchIndex:
            self.logger.error("es search index has an incompatible version: %s vs %s", index_version, self.VERSION)
            raise IndexVersionMismatch(index_version, self.VERSION)
        await self.sync_client.indices.refresh(self.index)
-        return acked
+        return True

    async def stop(self):
        clients = [c for c in (self.sync_client, self.search_client) if c is not None]
@ -291,234 +284,6 @@ class SearchIndex:
        return referenced_txos


-def expand_query(**kwargs):
-    if "amount_order" in kwargs:
-        kwargs["limit"] = 1
-        kwargs["order_by"] = "effective_amount"
-        kwargs["offset"] = int(kwargs["amount_order"]) - 1
-    if 'name' in kwargs:
-        kwargs['name'] = normalize_name(kwargs.pop('name'))
-    if kwargs.get('is_controlling') is False:
-        kwargs.pop('is_controlling')
-    query = {'must': [], 'must_not': []}
-    collapse = None
-    if 'fee_currency' in kwargs and kwargs['fee_currency'] is not None:
-        kwargs['fee_currency'] = kwargs['fee_currency'].upper()
-    for key, value in kwargs.items():
-        key = key.replace('claim.', '')
-        many = key.endswith('__in') or isinstance(value, list)
-        if many and len(value) > 2048:
-            raise TooManyClaimSearchParametersError(key, 2048)
-        if many:
-            key = key.replace('__in', '')
-            value = list(filter(None, value))
-        if value is None or isinstance(value, list) and len(value) == 0:
-            continue
-        key = REPLACEMENTS.get(key, key)
-        if key in FIELDS:
-            partial_id = False
-            if key == 'claim_type':
-                if isinstance(value, str):
-                    value = CLAIM_TYPES[value]
-                else:
-                    value = [CLAIM_TYPES[claim_type] for claim_type in value]
-            elif key == 'stream_type':
-                value = [STREAM_TYPES[value]] if isinstance(value, str) else list(map(STREAM_TYPES.get, value))
-            if key == '_id':
-                if isinstance(value, Iterable):
-                    value = [item[::-1].hex() for item in value]
-                else:
-                    value = value[::-1].hex()
-            if not many and key in ('_id', 'claim_id', 'sd_hash') and len(value) < 20:
-                partial_id = True
-            if key in ('signature_valid', 'has_source'):
-                continue  # handled later
-            if key in TEXT_FIELDS:
-                key += '.keyword'
-            ops = {'<=': 'lte', '>=': 'gte', '<': 'lt', '>': 'gt'}
-            if partial_id:
-                query['must'].append({"prefix": {key: value}})
-            elif key in RANGE_FIELDS and isinstance(value, str) and value[0] in ops:
-                operator_length = 2 if value[:2] in ops else 1
-                operator, value = value[:operator_length], value[operator_length:]
-                if key == 'fee_amount':
-                    value = str(Decimal(value)*1000)
-                query['must'].append({"range": {key: {ops[operator]: value}}})
-            elif key in RANGE_FIELDS and isinstance(value, list) and all(v[0] in ops for v in value):
-                range_constraints = []
-                release_times = []
-                for v in value:
-                    operator_length = 2 if v[:2] in ops else 1
-                    operator, stripped_op_v = v[:operator_length], v[operator_length:]
-                    if key == 'fee_amount':
-                        stripped_op_v = str(Decimal(stripped_op_v)*1000)
-                    if key == 'release_time':
-                        release_times.append((operator, stripped_op_v))
-                    else:
-                        range_constraints.append((operator, stripped_op_v))
-                if key != 'release_time':
-                    query['must'].append({"range": {key: {ops[operator]: v for operator, v in range_constraints}}})
-                else:
-                    query['must'].append(
-                        {"bool":
-                            {"should": [
-                                {"bool": {
-                                    "must_not": {
-                                        "exists": {
-                                            "field": "release_time"
-                                        }
-                                    }
-                                }},
-                                {"bool": {
-                                    "must": [
-                                        {"exists": {"field": "release_time"}},
-                                        {'range': {key: {ops[operator]: v for operator, v in release_times}}},
-                                ]}},
-                            ]}
-                        }
-                    )
-            elif many:
-                query['must'].append({"terms": {key: value}})
-            else:
-                if key == 'fee_amount':
-                    value = str(Decimal(value)*1000)
-                query['must'].append({"term": {key: {"value": value}}})
-        elif key == 'not_channel_ids':
-            for channel_id in value:
-                query['must_not'].append({"term": {'channel_id.keyword': channel_id}})
-                query['must_not'].append({"term": {'_id': channel_id}})
-        elif key == 'channel_ids':
-            query['must'].append({"terms": {'channel_id.keyword': value}})
-        elif key == 'claim_ids':
-            query['must'].append({"terms": {'claim_id.keyword': value}})
-        elif key == 'media_types':
-            query['must'].append({"terms": {'media_type.keyword': value}})
-        elif key == 'any_languages':
-            query['must'].append({"terms": {'languages': clean_tags(value)}})
-        elif key == 'any_languages':
-            query['must'].append({"terms": {'languages': value}})
-        elif key == 'all_languages':
-            query['must'].extend([{"term": {'languages': tag}} for tag in value])
-        elif key == 'any_tags':
-            query['must'].append({"terms": {'tags.keyword': clean_tags(value)}})
-        elif key == 'all_tags':
-            query['must'].extend([{"term": {'tags.keyword': tag}} for tag in clean_tags(value)])
-        elif key == 'not_tags':
-            query['must_not'].extend([{"term": {'tags.keyword': tag}} for tag in clean_tags(value)])
-        elif key == 'not_claim_id':
-            query['must_not'].extend([{"term": {'claim_id.keyword': cid}} for cid in value])
-        elif key == 'limit_claims_per_channel':
-            collapse = ('channel_id.keyword', value)
-    if kwargs.get('has_channel_signature'):
-        query['must'].append({"exists": {"field": "signature"}})
-        if 'signature_valid' in kwargs:
-            query['must'].append({"term": {"is_signature_valid": bool(kwargs["signature_valid"])}})
-    elif 'signature_valid' in kwargs:
-        query['must'].append(
-            {"bool":
-                {"should": [
-                    {"bool": {"must_not": {"exists": {"field": "signature"}}}},
-                    {"bool" : {"must" : {"term": {"is_signature_valid": bool(kwargs["signature_valid"])}}}}
-                ]}
-             }
-        )
-    if 'has_source' in kwargs:
-        is_stream_or_repost_terms = {"terms": {"claim_type": [CLAIM_TYPES['stream'], CLAIM_TYPES['repost']]}}
-        query['must'].append(
-            {"bool":
-                {"should": [
-                    {"bool": # when is_stream_or_repost AND has_source
-                        {"must": [
-                            {"match": {"has_source": kwargs['has_source']}},
-                            is_stream_or_repost_terms,
-                        ]
-                        },
-                     },
-                    {"bool": # when not is_stream_or_repost
-                        {"must_not": is_stream_or_repost_terms}
-                     },
-                    {"bool": # when reposted_claim_type wouldn't have source
-                        {"must_not":
-                            [
-                                {"term": {"reposted_claim_type": CLAIM_TYPES['stream']}}
-                            ],
-                        "must":
-                            [
-                                {"term": {"claim_type": CLAIM_TYPES['repost']}}
-                            ]
-                        }
-                     }
-                ]}
-             }
-        )
-    if kwargs.get('text'):
-        query['must'].append(
-                    {"simple_query_string":
-                         {"query": kwargs["text"], "fields": [
-                             "claim_name^4", "channel_name^8", "title^1", "description^.5", "author^1", "tags^.5"
-                         ]}})
-    query = {
-        "_source": {"excludes": ["description", "title"]},
-        'query': {'bool': query},
-        "sort": [],
-    }
-    if "limit" in kwargs:
-        query["size"] = kwargs["limit"]
-    if 'offset' in kwargs:
-        query["from"] = kwargs["offset"]
-    if 'order_by' in kwargs:
-        if isinstance(kwargs["order_by"], str):
-            kwargs["order_by"] = [kwargs["order_by"]]
-        for value in kwargs['order_by']:
-            if 'trending_group' in value:
-                # fixme: trending_mixed is 0 for all records on variable decay, making sort slow.
-                continue
-            is_asc = value.startswith('^')
-            value = value[1:] if is_asc else value
-            value = REPLACEMENTS.get(value, value)
-            if value in TEXT_FIELDS:
-                value += '.keyword'
-            query['sort'].append({value: "asc" if is_asc else "desc"})
-    if collapse:
-        query["collapse"] = {
-            "field": collapse[0],
-            "inner_hits": {
-                "name": collapse[0],
-                "size": collapse[1],
-                "sort": query["sort"]
-            }
-        }
-    return query
-
-
-def expand_result(results):
-    inner_hits = []
-    expanded = []
-    for result in results:
-        if result.get("inner_hits"):
-            for _, inner_hit in result["inner_hits"].items():
-                inner_hits.extend(inner_hit["hits"]["hits"])
-            continue
-        result = result['_source']
-        result['claim_hash'] = bytes.fromhex(result['claim_id'])[::-1]
-        if result['reposted_claim_id']:
-            result['reposted_claim_hash'] = bytes.fromhex(result['reposted_claim_id'])[::-1]
-        else:
-            result['reposted_claim_hash'] = None
-        result['channel_hash'] = bytes.fromhex(result['channel_id'])[::-1] if result['channel_id'] else None
-        result['txo_hash'] = bytes.fromhex(result['tx_id'])[::-1] + struct.pack('<I', result['tx_nout'])
-        result['tx_hash'] = bytes.fromhex(result['tx_id'])[::-1]
-        result['reposted'] = result.pop('repost_count')
-        result['signature_valid'] = result.pop('is_signature_valid')
-        # result['normalized'] = result.pop('normalized_name')
-        # if result['censoring_channel_hash']:
-        #     result['censoring_channel_hash'] = unhexlify(result['censoring_channel_hash'])[::-1]
-        expanded.append(result)
-    if inner_hits:
-        return expand_result(inner_hits)
-    return expanded
-
-
 class ResultCacheItem:
    __slots__ = '_result', 'lock', 'has_result'

--- a/hub/herald/session.py
+++ b/hub/herald/session.py
@ -20,7 +20,7 @@ from hub.error import ResolveCensoredError, TooManyClaimSearchParametersError
 from hub import __version__, PROMETHEUS_NAMESPACE
 from hub.herald import PROTOCOL_MIN, PROTOCOL_MAX, HUB_PROTOCOL_VERSION
 from hub.build_info import BUILD, COMMIT_HASH, DOCKER_TAG
-from hub.elastic_sync.search import SearchIndex
+from hub.herald.search import SearchIndex
 from hub.common import sha256, hash_to_hex_str, hex_str_to_hash, HASHX_LEN, version_string, formatted_time
 from hub.common import protocol_version, RPCError, DaemonError, TaskGroup, HISTOGRAM_BUCKETS
 from hub.herald.jsonrpc import JSONRPCAutoDetect, JSONRPCConnection, JSONRPCv2, JSONRPC