2019-07-12 01:38:03 -04:00
|
|
|
import time
|
2019-07-11 13:29:26 -04:00
|
|
|
import struct
|
2019-12-07 18:13:13 -05:00
|
|
|
import apsw
|
2019-07-16 12:26:28 -04:00
|
|
|
import logging
|
2019-09-24 12:53:23 -03:00
|
|
|
from operator import itemgetter
|
2019-07-13 00:34:40 -04:00
|
|
|
from typing import Tuple, List, Dict, Union, Type, Optional
|
2020-02-07 18:50:29 -05:00
|
|
|
from binascii import unhexlify
|
2019-07-11 13:29:26 -04:00
|
|
|
from decimal import Decimal
|
|
|
|
from contextvars import ContextVar
|
2019-07-13 00:34:40 -04:00
|
|
|
from functools import wraps
|
2020-02-07 18:50:29 -05:00
|
|
|
from itertools import chain
|
2019-07-13 00:34:40 -04:00
|
|
|
from dataclasses import dataclass
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2020-01-02 22:18:49 -05:00
|
|
|
from lbry.wallet.database import query, interpolate
|
2020-02-01 12:53:39 -05:00
|
|
|
from lbry.error import ResolveCensoredError
|
2019-07-11 13:29:26 -04:00
|
|
|
from lbry.schema.url import URL, normalize_name
|
|
|
|
from lbry.schema.tags import clean_tags
|
2020-01-10 10:47:57 -05:00
|
|
|
from lbry.schema.result import Outputs, Censor
|
2020-01-02 22:18:49 -05:00
|
|
|
from lbry.wallet import Ledger, RegTestLedger
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2020-09-24 17:21:27 -04:00
|
|
|
from .common import CLAIM_TYPES, STREAM_TYPES, COMMON_TAGS, INDEXED_LANGUAGES
|
2019-11-14 14:31:49 -05:00
|
|
|
from .full_text_search import FTS_ORDER_BY
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
|
2019-12-07 18:13:13 -05:00
|
|
|
class SQLiteOperationalError(apsw.Error):
|
2019-07-17 21:50:20 -04:00
|
|
|
def __init__(self, metrics):
|
|
|
|
super().__init__('sqlite query errored')
|
|
|
|
self.metrics = metrics
|
|
|
|
|
|
|
|
|
2019-12-07 18:13:13 -05:00
|
|
|
class SQLiteInterruptedError(apsw.InterruptError):
|
2019-07-16 12:26:28 -04:00
|
|
|
def __init__(self, metrics):
|
|
|
|
super().__init__('sqlite query interrupted')
|
|
|
|
self.metrics = metrics
|
|
|
|
|
|
|
|
|
2019-07-11 13:29:26 -04:00
|
|
|
ATTRIBUTE_ARRAY_MAX_LENGTH = 100
|
|
|
|
|
|
|
|
INTEGER_PARAMS = {
|
|
|
|
'height', 'creation_height', 'activation_height', 'expiration_height',
|
2020-01-29 15:41:40 -05:00
|
|
|
'timestamp', 'creation_timestamp', 'duration', 'release_time', 'fee_amount',
|
2020-08-19 10:51:31 -04:00
|
|
|
'tx_position', 'channel_join', 'reposted', 'limit_claims_per_channel',
|
2019-07-11 13:29:26 -04:00
|
|
|
'amount', 'effective_amount', 'support_amount',
|
|
|
|
'trending_group', 'trending_mixed',
|
|
|
|
'trending_local', 'trending_global',
|
|
|
|
}
|
|
|
|
|
|
|
|
SEARCH_PARAMS = {
|
2019-11-14 14:31:49 -05:00
|
|
|
'name', 'text', 'claim_id', 'claim_ids', 'txid', 'nout', 'channel', 'channel_ids', 'not_channel_ids',
|
2019-07-11 13:29:26 -04:00
|
|
|
'public_key_id', 'claim_type', 'stream_types', 'media_types', 'fee_currency',
|
2020-01-10 10:47:57 -05:00
|
|
|
'has_channel_signature', 'signature_valid',
|
2019-10-15 02:21:05 -03:00
|
|
|
'any_tags', 'all_tags', 'not_tags', 'reposted_claim_id',
|
2019-07-11 13:29:26 -04:00
|
|
|
'any_locations', 'all_locations', 'not_locations',
|
|
|
|
'any_languages', 'all_languages', 'not_languages',
|
|
|
|
'is_controlling', 'limit', 'offset', 'order_by',
|
2021-03-12 18:23:54 -03:00
|
|
|
'no_totals', 'has_source'
|
2019-07-11 13:29:26 -04:00
|
|
|
} | INTEGER_PARAMS
|
|
|
|
|
|
|
|
|
|
|
|
ORDER_FIELDS = {
|
2019-07-20 12:17:26 -04:00
|
|
|
'name', 'claim_hash'
|
2019-07-11 13:29:26 -04:00
|
|
|
} | INTEGER_PARAMS
|
|
|
|
|
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
@dataclass
|
|
|
|
class ReaderState:
|
2019-12-07 18:13:13 -05:00
|
|
|
db: apsw.Connection
|
2019-07-13 00:34:40 -04:00
|
|
|
stack: List[List]
|
|
|
|
metrics: Dict
|
|
|
|
is_tracking_metrics: bool
|
2020-01-02 22:18:49 -05:00
|
|
|
ledger: Type[Ledger]
|
2019-07-16 12:26:28 -04:00
|
|
|
query_timeout: float
|
|
|
|
log: logging.Logger
|
2020-01-22 01:55:37 -05:00
|
|
|
blocked_streams: Dict
|
|
|
|
blocked_channels: Dict
|
|
|
|
filtered_streams: Dict
|
|
|
|
filtered_channels: Dict
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
def close(self):
|
|
|
|
self.db.close()
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
def reset_metrics(self):
|
|
|
|
self.stack = []
|
|
|
|
self.metrics = {}
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2019-07-16 12:26:28 -04:00
|
|
|
def set_query_timeout(self):
|
|
|
|
stop_at = time.perf_counter() + self.query_timeout
|
|
|
|
|
|
|
|
def interruptor():
|
|
|
|
if time.perf_counter() >= stop_at:
|
|
|
|
self.db.interrupt()
|
|
|
|
return
|
|
|
|
|
2019-12-07 18:13:13 -05:00
|
|
|
self.db.setprogresshandler(interruptor, 100)
|
2019-07-16 12:26:28 -04:00
|
|
|
|
2020-01-22 01:55:37 -05:00
|
|
|
def get_resolve_censor(self) -> Censor:
|
|
|
|
return Censor(self.blocked_streams, self.blocked_channels)
|
|
|
|
|
2020-08-19 10:51:31 -04:00
|
|
|
def get_search_censor(self, limit_claims_per_channel: int) -> Censor:
|
|
|
|
return Censor(self.filtered_streams, self.filtered_channels, limit_claims_per_channel)
|
2020-01-22 01:55:37 -05:00
|
|
|
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
ctx: ContextVar[Optional[ReaderState]] = ContextVar('ctx')
|
|
|
|
|
|
|
|
|
2020-01-10 10:47:57 -05:00
|
|
|
def row_factory(cursor, row):
|
|
|
|
return {
|
|
|
|
k[0]: (set(row[i].split(',')) if k[0] == 'tags' else row[i])
|
|
|
|
for i, k in enumerate(cursor.getdescription())
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2020-01-22 01:55:37 -05:00
|
|
|
def initializer(log, _path, _ledger_name, query_timeout, _measure=False, block_and_filter=None):
|
2019-12-07 18:13:13 -05:00
|
|
|
db = apsw.Connection(_path, flags=apsw.SQLITE_OPEN_READONLY | apsw.SQLITE_OPEN_URI)
|
|
|
|
db.setrowtrace(row_factory)
|
2020-01-22 01:55:37 -05:00
|
|
|
if block_and_filter:
|
|
|
|
blocked_streams, blocked_channels, filtered_streams, filtered_channels = block_and_filter
|
|
|
|
else:
|
|
|
|
blocked_streams = blocked_channels = filtered_streams = filtered_channels = {}
|
2019-07-16 12:26:28 -04:00
|
|
|
ctx.set(
|
|
|
|
ReaderState(
|
|
|
|
db=db, stack=[], metrics={}, is_tracking_metrics=_measure,
|
2020-01-02 22:18:49 -05:00
|
|
|
ledger=Ledger if _ledger_name == 'mainnet' else RegTestLedger,
|
2020-01-10 10:47:57 -05:00
|
|
|
query_timeout=query_timeout, log=log,
|
2020-01-22 01:55:37 -05:00
|
|
|
blocked_streams=blocked_streams, blocked_channels=blocked_channels,
|
|
|
|
filtered_streams=filtered_streams, filtered_channels=filtered_channels,
|
2019-07-16 12:26:28 -04:00
|
|
|
)
|
|
|
|
)
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
def cleanup():
|
|
|
|
ctx.get().close()
|
|
|
|
ctx.set(None)
|
|
|
|
|
|
|
|
|
|
|
|
def measure(func):
|
|
|
|
@wraps(func)
|
|
|
|
def wrapper(*args, **kwargs):
|
|
|
|
state = ctx.get()
|
|
|
|
if not state.is_tracking_metrics:
|
|
|
|
return func(*args, **kwargs)
|
2019-07-17 21:50:20 -04:00
|
|
|
metric = {}
|
|
|
|
state.metrics.setdefault(func.__name__, []).append(metric)
|
2019-07-13 00:34:40 -04:00
|
|
|
state.stack.append([])
|
|
|
|
start = time.perf_counter()
|
|
|
|
try:
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
finally:
|
|
|
|
elapsed = int((time.perf_counter()-start)*1000)
|
2019-07-17 21:50:20 -04:00
|
|
|
metric['total'] = elapsed
|
|
|
|
metric['isolated'] = (elapsed-sum(state.stack.pop()))
|
2019-07-13 00:34:40 -04:00
|
|
|
if state.stack:
|
|
|
|
state.stack[-1].append(elapsed)
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
|
|
def reports_metrics(func):
|
|
|
|
@wraps(func)
|
|
|
|
def wrapper(*args, **kwargs):
|
|
|
|
state = ctx.get()
|
|
|
|
if not state.is_tracking_metrics:
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
state.reset_metrics()
|
|
|
|
r = func(*args, **kwargs)
|
|
|
|
return r, state.metrics
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
|
|
@reports_metrics
|
|
|
|
def search_to_bytes(constraints) -> Union[bytes, Tuple[bytes, Dict]]:
|
|
|
|
return encode_result(search(constraints))
|
|
|
|
|
|
|
|
|
|
|
|
@reports_metrics
|
|
|
|
def resolve_to_bytes(urls) -> Union[bytes, Tuple[bytes, Dict]]:
|
|
|
|
return encode_result(resolve(urls))
|
|
|
|
|
|
|
|
|
|
|
|
def encode_result(result):
|
|
|
|
return Outputs.to_bytes(*result)
|
|
|
|
|
|
|
|
|
|
|
|
@measure
|
2020-01-18 20:58:30 -05:00
|
|
|
def execute_query(sql, values, row_offset: int, row_limit: int, censor: Censor) -> List:
|
2019-07-16 12:26:28 -04:00
|
|
|
context = ctx.get()
|
|
|
|
context.set_query_timeout()
|
|
|
|
try:
|
2020-01-10 10:47:57 -05:00
|
|
|
c = context.db.cursor()
|
|
|
|
def row_filter(cursor, row):
|
2020-01-18 20:58:30 -05:00
|
|
|
nonlocal row_offset
|
2020-01-10 10:47:57 -05:00
|
|
|
row = row_factory(cursor, row)
|
|
|
|
if len(row) > 1 and censor.censor(row):
|
|
|
|
return
|
2020-01-18 20:58:30 -05:00
|
|
|
if row_offset:
|
|
|
|
row_offset -= 1
|
|
|
|
return
|
2020-01-10 10:47:57 -05:00
|
|
|
return row
|
|
|
|
c.setrowtrace(row_filter)
|
|
|
|
i, rows = 0, []
|
|
|
|
for row in c.execute(sql, values):
|
|
|
|
i += 1
|
|
|
|
rows.append(row)
|
|
|
|
if i >= row_limit:
|
|
|
|
break
|
|
|
|
return rows
|
2019-12-07 18:13:13 -05:00
|
|
|
except apsw.Error as err:
|
2019-07-17 21:50:20 -04:00
|
|
|
plain_sql = interpolate(sql, values)
|
2019-07-17 22:18:17 -04:00
|
|
|
if context.is_tracking_metrics:
|
2019-07-18 19:15:01 -04:00
|
|
|
context.metrics['execute_query'][-1]['sql'] = plain_sql
|
2019-12-14 18:42:14 -05:00
|
|
|
if isinstance(err, apsw.InterruptError):
|
2019-07-17 21:50:20 -04:00
|
|
|
context.log.warning("interrupted slow sqlite query:\n%s", plain_sql)
|
2019-07-16 12:26:28 -04:00
|
|
|
raise SQLiteInterruptedError(context.metrics)
|
2019-07-17 21:50:20 -04:00
|
|
|
context.log.exception('failed running query', exc_info=err)
|
|
|
|
raise SQLiteOperationalError(context.metrics)
|
2019-07-13 00:34:40 -04:00
|
|
|
|
|
|
|
|
2020-01-22 01:55:37 -05:00
|
|
|
def claims_query(cols, for_count=False, **constraints) -> Tuple[str, Dict]:
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'order_by' in constraints:
|
2020-01-18 20:58:30 -05:00
|
|
|
order_by_parts = constraints['order_by']
|
|
|
|
if isinstance(order_by_parts, str):
|
|
|
|
order_by_parts = [order_by_parts]
|
2019-07-11 13:29:26 -04:00
|
|
|
sql_order_by = []
|
2020-01-18 20:58:30 -05:00
|
|
|
for order_by in order_by_parts:
|
2019-07-11 13:29:26 -04:00
|
|
|
is_asc = order_by.startswith('^')
|
|
|
|
column = order_by[1:] if is_asc else order_by
|
|
|
|
if column not in ORDER_FIELDS:
|
|
|
|
raise NameError(f'{column} is not a valid order_by field')
|
|
|
|
if column == 'name':
|
|
|
|
column = 'normalized'
|
|
|
|
sql_order_by.append(
|
|
|
|
f"claim.{column} ASC" if is_asc else f"claim.{column} DESC"
|
|
|
|
)
|
|
|
|
constraints['order_by'] = sql_order_by
|
|
|
|
|
|
|
|
ops = {'<=': '__lte', '>=': '__gte', '<': '__lt', '>': '__gt'}
|
|
|
|
for constraint in INTEGER_PARAMS:
|
|
|
|
if constraint in constraints:
|
|
|
|
value = constraints.pop(constraint)
|
|
|
|
postfix = ''
|
|
|
|
if isinstance(value, str):
|
|
|
|
if len(value) >= 2 and value[:2] in ops:
|
|
|
|
postfix, value = ops[value[:2]], value[2:]
|
|
|
|
elif len(value) >= 1 and value[0] in ops:
|
|
|
|
postfix, value = ops[value[0]], value[1:]
|
|
|
|
if constraint == 'fee_amount':
|
|
|
|
value = Decimal(value)*1000
|
|
|
|
constraints[f'claim.{constraint}{postfix}'] = int(value)
|
|
|
|
|
|
|
|
if constraints.pop('is_controlling', False):
|
|
|
|
if {'sequence', 'amount_order'}.isdisjoint(constraints):
|
|
|
|
for_count = False
|
|
|
|
constraints['claimtrie.claim_hash__is_not_null'] = ''
|
|
|
|
if 'sequence' in constraints:
|
|
|
|
constraints['order_by'] = 'claim.activation_height ASC'
|
|
|
|
constraints['offset'] = int(constraints.pop('sequence')) - 1
|
|
|
|
constraints['limit'] = 1
|
|
|
|
if 'amount_order' in constraints:
|
|
|
|
constraints['order_by'] = 'claim.effective_amount DESC'
|
|
|
|
constraints['offset'] = int(constraints.pop('amount_order')) - 1
|
|
|
|
constraints['limit'] = 1
|
|
|
|
|
|
|
|
if 'claim_id' in constraints:
|
|
|
|
claim_id = constraints.pop('claim_id')
|
|
|
|
if len(claim_id) == 40:
|
|
|
|
constraints['claim.claim_id'] = claim_id
|
|
|
|
else:
|
|
|
|
constraints['claim.claim_id__like'] = f'{claim_id[:40]}%'
|
2019-10-29 01:26:25 -04:00
|
|
|
elif 'claim_ids' in constraints:
|
2020-02-12 10:39:25 -05:00
|
|
|
constraints['claim.claim_id__in'] = set(constraints.pop('claim_ids'))
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2019-10-15 02:21:05 -03:00
|
|
|
if 'reposted_claim_id' in constraints:
|
2019-12-07 18:13:13 -05:00
|
|
|
constraints['claim.reposted_claim_hash'] = unhexlify(constraints.pop('reposted_claim_id'))[::-1]
|
2019-10-15 02:21:05 -03:00
|
|
|
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'name' in constraints:
|
|
|
|
constraints['claim.normalized'] = normalize_name(constraints.pop('name'))
|
|
|
|
|
|
|
|
if 'public_key_id' in constraints:
|
2019-12-07 18:13:13 -05:00
|
|
|
constraints['claim.public_key_hash'] = (
|
2019-07-13 00:34:40 -04:00
|
|
|
ctx.get().ledger.address_to_hash160(constraints.pop('public_key_id')))
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'channel_hash' in constraints:
|
2019-12-07 18:13:13 -05:00
|
|
|
constraints['claim.channel_hash'] = constraints.pop('channel_hash')
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'channel_ids' in constraints:
|
|
|
|
channel_ids = constraints.pop('channel_ids')
|
|
|
|
if channel_ids:
|
2020-02-12 10:39:25 -05:00
|
|
|
constraints['claim.channel_hash__in'] = {
|
2020-12-29 21:41:07 -05:00
|
|
|
unhexlify(cid)[::-1] for cid in channel_ids if cid
|
2020-02-12 10:39:25 -05:00
|
|
|
}
|
2020-01-22 01:55:37 -05:00
|
|
|
if 'not_channel_ids' in constraints:
|
|
|
|
not_channel_ids = constraints.pop('not_channel_ids')
|
|
|
|
if not_channel_ids:
|
2020-02-12 10:39:25 -05:00
|
|
|
not_channel_ids_binary = {
|
2020-01-22 01:55:37 -05:00
|
|
|
unhexlify(ncid)[::-1] for ncid in not_channel_ids
|
2020-02-12 10:39:25 -05:00
|
|
|
}
|
2020-02-21 21:59:46 -05:00
|
|
|
constraints['claim.claim_hash__not_in#not_channel_ids'] = not_channel_ids_binary
|
2020-01-22 01:55:37 -05:00
|
|
|
if constraints.get('has_channel_signature', False):
|
|
|
|
constraints['claim.channel_hash__not_in'] = not_channel_ids_binary
|
|
|
|
else:
|
|
|
|
constraints['null_or_not_channel__or'] = {
|
|
|
|
'claim.signature_valid__is_null': True,
|
|
|
|
'claim.channel_hash__not_in': not_channel_ids_binary
|
|
|
|
}
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'signature_valid' in constraints:
|
|
|
|
has_channel_signature = constraints.pop('has_channel_signature', False)
|
|
|
|
if has_channel_signature:
|
|
|
|
constraints['claim.signature_valid'] = constraints.pop('signature_valid')
|
|
|
|
else:
|
|
|
|
constraints['null_or_signature__or'] = {
|
|
|
|
'claim.signature_valid__is_null': True,
|
|
|
|
'claim.signature_valid': constraints.pop('signature_valid')
|
|
|
|
}
|
|
|
|
elif constraints.pop('has_channel_signature', False):
|
|
|
|
constraints['claim.signature_valid__is_not_null'] = True
|
|
|
|
|
|
|
|
if 'txid' in constraints:
|
|
|
|
tx_hash = unhexlify(constraints.pop('txid'))[::-1]
|
|
|
|
nout = constraints.pop('nout', 0)
|
2019-12-07 18:13:13 -05:00
|
|
|
constraints['claim.txo_hash'] = tx_hash + struct.pack('<I', nout)
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
if 'claim_type' in constraints:
|
2020-02-22 12:23:11 -05:00
|
|
|
claim_types = constraints.pop('claim_type')
|
|
|
|
if isinstance(claim_types, str):
|
|
|
|
claim_types = [claim_types]
|
|
|
|
if claim_types:
|
|
|
|
constraints['claim.claim_type__in'] = {
|
|
|
|
CLAIM_TYPES[claim_type] for claim_type in claim_types
|
|
|
|
}
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'stream_types' in constraints:
|
|
|
|
stream_types = constraints.pop('stream_types')
|
|
|
|
if stream_types:
|
2020-02-12 10:39:25 -05:00
|
|
|
constraints['claim.stream_type__in'] = {
|
2019-07-11 13:29:26 -04:00
|
|
|
STREAM_TYPES[stream_type] for stream_type in stream_types
|
2020-02-12 10:39:25 -05:00
|
|
|
}
|
2019-07-11 13:29:26 -04:00
|
|
|
if 'media_types' in constraints:
|
|
|
|
media_types = constraints.pop('media_types')
|
|
|
|
if media_types:
|
2020-02-12 10:39:25 -05:00
|
|
|
constraints['claim.media_type__in'] = set(media_types)
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
if 'fee_currency' in constraints:
|
|
|
|
constraints['claim.fee_currency'] = constraints.pop('fee_currency').lower()
|
|
|
|
|
|
|
|
_apply_constraints_for_array_attributes(constraints, 'tag', clean_tags, for_count)
|
|
|
|
_apply_constraints_for_array_attributes(constraints, 'language', lambda _: _, for_count)
|
|
|
|
_apply_constraints_for_array_attributes(constraints, 'location', lambda _: _, for_count)
|
|
|
|
|
2019-11-14 14:31:49 -05:00
|
|
|
if 'text' in constraints:
|
|
|
|
constraints["search"] = constraints.pop("text")
|
|
|
|
constraints["order_by"] = FTS_ORDER_BY
|
|
|
|
select = f"SELECT {cols} FROM search JOIN claim ON (search.rowid=claim.rowid)"
|
|
|
|
else:
|
2019-11-11 01:12:24 -03:00
|
|
|
select = f"SELECT {cols} FROM claim"
|
2019-11-18 15:48:52 -05:00
|
|
|
if not for_count:
|
|
|
|
select += " LEFT JOIN claimtrie USING (claim_hash)"
|
|
|
|
return query(select, **constraints)
|
2019-07-13 00:34:40 -04:00
|
|
|
|
2019-07-19 14:15:15 -04:00
|
|
|
|
2020-01-22 01:55:37 -05:00
|
|
|
def select_claims(censor: Censor, cols: str, for_count=False, **constraints) -> List:
|
2019-07-19 14:15:15 -04:00
|
|
|
if 'channel' in constraints:
|
|
|
|
channel_url = constraints.pop('channel')
|
|
|
|
match = resolve_url(channel_url)
|
2019-12-07 18:13:13 -05:00
|
|
|
if isinstance(match, dict):
|
2019-07-19 14:15:15 -04:00
|
|
|
constraints['channel_hash'] = match['claim_hash']
|
|
|
|
else:
|
2020-01-22 01:55:37 -05:00
|
|
|
return [{'row_count': 0}] if cols == 'count(*) as row_count' else []
|
2020-01-18 20:58:30 -05:00
|
|
|
row_offset = constraints.pop('offset', 0)
|
2020-01-10 10:47:57 -05:00
|
|
|
row_limit = constraints.pop('limit', 20)
|
2020-01-22 01:55:37 -05:00
|
|
|
sql, values = claims_query(cols, for_count, **constraints)
|
|
|
|
return execute_query(sql, values, row_offset, row_limit, censor)
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
@measure
|
2020-01-22 01:55:37 -05:00
|
|
|
def count_claims(**constraints) -> int:
|
2019-07-11 13:29:26 -04:00
|
|
|
constraints.pop('offset', None)
|
|
|
|
constraints.pop('limit', None)
|
|
|
|
constraints.pop('order_by', None)
|
2020-01-22 01:55:37 -05:00
|
|
|
count = select_claims(Censor(), 'count(*) as row_count', for_count=True, **constraints)
|
2019-12-07 18:13:13 -05:00
|
|
|
return count[0]['row_count']
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
|
2020-01-22 01:55:37 -05:00
|
|
|
def search_claims(censor: Censor, **constraints) -> List:
|
|
|
|
return select_claims(
|
|
|
|
censor,
|
2019-07-11 13:29:26 -04:00
|
|
|
"""
|
|
|
|
claimtrie.claim_hash as is_controlling,
|
|
|
|
claimtrie.last_take_over_height,
|
|
|
|
claim.claim_hash, claim.txo_hash,
|
2019-11-18 15:48:52 -05:00
|
|
|
claim.claims_in_channel, claim.reposted,
|
2019-07-11 13:29:26 -04:00
|
|
|
claim.height, claim.creation_height,
|
|
|
|
claim.activation_height, claim.expiration_height,
|
|
|
|
claim.effective_amount, claim.support_amount,
|
|
|
|
claim.trending_group, claim.trending_mixed,
|
|
|
|
claim.trending_local, claim.trending_global,
|
2019-11-18 15:48:52 -05:00
|
|
|
claim.short_url, claim.canonical_url,
|
|
|
|
claim.channel_hash, claim.reposted_claim_hash,
|
2020-01-22 01:55:37 -05:00
|
|
|
claim.signature_valid
|
2019-07-11 13:29:26 -04:00
|
|
|
""", **constraints
|
|
|
|
)
|
|
|
|
|
|
|
|
|
2020-02-07 18:50:29 -05:00
|
|
|
def _get_referenced_rows(txo_rows: List[dict], censor_channels: List[bytes]):
|
|
|
|
censor = ctx.get().get_resolve_censor()
|
2019-11-18 15:48:52 -05:00
|
|
|
repost_hashes = set(filter(None, map(itemgetter('reposted_claim_hash'), txo_rows)))
|
2020-02-07 18:50:29 -05:00
|
|
|
channel_hashes = set(chain(
|
|
|
|
filter(None, map(itemgetter('channel_hash'), txo_rows)),
|
|
|
|
censor_channels
|
|
|
|
))
|
2019-11-18 15:48:52 -05:00
|
|
|
|
|
|
|
reposted_txos = []
|
|
|
|
if repost_hashes:
|
2020-01-22 01:55:37 -05:00
|
|
|
reposted_txos = search_claims(censor, **{'claim.claim_hash__in': repost_hashes})
|
2019-11-18 15:48:52 -05:00
|
|
|
channel_hashes |= set(filter(None, map(itemgetter('channel_hash'), reposted_txos)))
|
|
|
|
|
|
|
|
channel_txos = []
|
2019-07-11 13:29:26 -04:00
|
|
|
if channel_hashes:
|
2020-01-22 01:55:37 -05:00
|
|
|
channel_txos = search_claims(censor, **{'claim.claim_hash__in': channel_hashes})
|
2019-11-18 15:48:52 -05:00
|
|
|
|
|
|
|
# channels must come first for client side inflation to work properly
|
|
|
|
return channel_txos + reposted_txos
|
|
|
|
|
|
|
|
@measure
|
2020-01-10 10:47:57 -05:00
|
|
|
def search(constraints) -> Tuple[List, List, int, int, Censor]:
|
2019-11-18 15:48:52 -05:00
|
|
|
assert set(constraints).issubset(SEARCH_PARAMS), \
|
|
|
|
f"Search query contains invalid arguments: {set(constraints).difference(SEARCH_PARAMS)}"
|
|
|
|
total = None
|
2020-08-19 10:51:31 -04:00
|
|
|
limit_claims_per_channel = constraints.pop('limit_claims_per_channel', None)
|
2019-11-18 15:48:52 -05:00
|
|
|
if not constraints.pop('no_totals', False):
|
2020-01-22 01:55:37 -05:00
|
|
|
total = count_claims(**constraints)
|
2019-11-18 15:48:52 -05:00
|
|
|
constraints['offset'] = abs(constraints.get('offset', 0))
|
|
|
|
constraints['limit'] = min(abs(constraints.get('limit', 10)), 50)
|
2020-01-22 01:55:37 -05:00
|
|
|
context = ctx.get()
|
2020-08-19 10:51:31 -04:00
|
|
|
search_censor = context.get_search_censor(limit_claims_per_channel)
|
2020-01-22 01:55:37 -05:00
|
|
|
txo_rows = search_claims(search_censor, **constraints)
|
2020-02-07 18:50:29 -05:00
|
|
|
extra_txo_rows = _get_referenced_rows(txo_rows, search_censor.censored.keys())
|
2020-01-22 01:55:37 -05:00
|
|
|
return txo_rows, extra_txo_rows, constraints['offset'], total, search_censor
|
2019-11-18 15:48:52 -05:00
|
|
|
|
|
|
|
|
|
|
|
@measure
|
|
|
|
def resolve(urls) -> Tuple[List, List]:
|
|
|
|
txo_rows = [resolve_url(raw_url) for raw_url in urls]
|
2020-01-22 01:55:37 -05:00
|
|
|
extra_txo_rows = _get_referenced_rows(
|
2020-02-07 18:50:29 -05:00
|
|
|
[txo for txo in txo_rows if isinstance(txo, dict)],
|
|
|
|
[txo.censor_hash for txo in txo_rows if isinstance(txo, ResolveCensoredError)]
|
2020-01-22 01:55:37 -05:00
|
|
|
)
|
2019-11-18 15:48:52 -05:00
|
|
|
return txo_rows, extra_txo_rows
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
@measure
|
|
|
|
def resolve_url(raw_url):
|
2020-01-22 01:55:37 -05:00
|
|
|
censor = ctx.get().get_resolve_censor()
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
try:
|
|
|
|
url = URL.parse(raw_url)
|
|
|
|
except ValueError as e:
|
2019-07-13 00:34:40 -04:00
|
|
|
return e
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
channel = None
|
|
|
|
|
|
|
|
if url.has_channel:
|
|
|
|
query = url.channel.to_dict()
|
|
|
|
if set(query) == {'name'}:
|
|
|
|
query['is_controlling'] = True
|
|
|
|
else:
|
2019-10-23 10:54:05 -04:00
|
|
|
query['order_by'] = ['^creation_height']
|
2020-01-22 01:55:37 -05:00
|
|
|
matches = search_claims(censor, **query, limit=1)
|
2019-07-11 13:29:26 -04:00
|
|
|
if matches:
|
|
|
|
channel = matches[0]
|
2020-02-01 12:53:39 -05:00
|
|
|
elif censor.censored:
|
2020-02-07 18:50:29 -05:00
|
|
|
return ResolveCensoredError(raw_url, next(iter(censor.censored)))
|
2019-07-11 13:29:26 -04:00
|
|
|
else:
|
2019-07-13 00:34:40 -04:00
|
|
|
return LookupError(f'Could not find channel in "{raw_url}".')
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
if url.has_stream:
|
|
|
|
query = url.stream.to_dict()
|
|
|
|
if channel is not None:
|
|
|
|
if set(query) == {'name'}:
|
|
|
|
# temporarily emulate is_controlling for claims in channel
|
2019-07-20 12:17:26 -04:00
|
|
|
query['order_by'] = ['effective_amount', '^height']
|
2019-07-11 13:29:26 -04:00
|
|
|
else:
|
|
|
|
query['order_by'] = ['^channel_join']
|
|
|
|
query['channel_hash'] = channel['claim_hash']
|
|
|
|
query['signature_valid'] = 1
|
|
|
|
elif set(query) == {'name'}:
|
|
|
|
query['is_controlling'] = 1
|
2020-01-22 01:55:37 -05:00
|
|
|
matches = search_claims(censor, **query, limit=1)
|
2019-07-11 13:29:26 -04:00
|
|
|
if matches:
|
2019-07-13 00:34:40 -04:00
|
|
|
return matches[0]
|
2020-02-01 12:53:39 -05:00
|
|
|
elif censor.censored:
|
2020-02-07 18:50:29 -05:00
|
|
|
return ResolveCensoredError(raw_url, next(iter(censor.censored)))
|
2019-07-11 13:29:26 -04:00
|
|
|
else:
|
2020-02-01 13:29:55 -05:00
|
|
|
return LookupError(f'Could not find claim at "{raw_url}".')
|
2019-07-11 13:29:26 -04:00
|
|
|
|
2019-07-13 00:34:40 -04:00
|
|
|
return channel
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
|
2020-02-22 12:23:11 -05:00
|
|
|
CLAIM_HASH_OR_REPOST_HASH_SQL = f"""
|
|
|
|
CASE WHEN claim.claim_type = {CLAIM_TYPES['repost']}
|
|
|
|
THEN claim.reposted_claim_hash
|
|
|
|
ELSE claim.claim_hash
|
|
|
|
END
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
2019-07-11 13:29:26 -04:00
|
|
|
def _apply_constraints_for_array_attributes(constraints, attr, cleaner, for_count=False):
|
2019-07-16 17:02:40 -04:00
|
|
|
any_items = set(cleaner(constraints.pop(f'any_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH])
|
|
|
|
all_items = set(cleaner(constraints.pop(f'all_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH])
|
2020-01-18 21:23:38 -05:00
|
|
|
not_items = set(cleaner(constraints.pop(f'not_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH])
|
|
|
|
|
|
|
|
all_items = {item for item in all_items if item not in not_items}
|
|
|
|
any_items = {item for item in any_items if item not in not_items}
|
2019-07-16 17:02:40 -04:00
|
|
|
|
2019-07-21 19:44:10 -04:00
|
|
|
any_queries = {}
|
|
|
|
|
|
|
|
if attr == 'tag':
|
|
|
|
common_tags = any_items & COMMON_TAGS.keys()
|
|
|
|
if common_tags:
|
|
|
|
any_items -= common_tags
|
|
|
|
if len(common_tags) < 5:
|
|
|
|
for item in common_tags:
|
|
|
|
index_name = COMMON_TAGS[item]
|
|
|
|
any_queries[f'#_common_tag_{index_name}'] = f"""
|
|
|
|
EXISTS(
|
2020-02-22 12:23:11 -05:00
|
|
|
SELECT 1 FROM tag INDEXED BY tag_{index_name}_idx
|
|
|
|
WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=tag.claim_hash
|
2019-07-21 19:44:10 -04:00
|
|
|
AND tag = '{item}'
|
2019-07-20 14:27:34 -04:00
|
|
|
)
|
2019-07-20 01:14:06 -04:00
|
|
|
"""
|
2019-07-21 19:44:10 -04:00
|
|
|
elif len(common_tags) >= 5:
|
|
|
|
constraints.update({
|
|
|
|
f'$any_common_tag{i}': item for i, item in enumerate(common_tags)
|
|
|
|
})
|
|
|
|
values = ', '.join(
|
|
|
|
f':$any_common_tag{i}' for i in range(len(common_tags))
|
|
|
|
)
|
|
|
|
any_queries[f'#_any_common_tags'] = f"""
|
|
|
|
EXISTS(
|
2020-02-22 12:23:11 -05:00
|
|
|
SELECT 1 FROM tag WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=tag.claim_hash
|
2019-07-21 19:44:10 -04:00
|
|
|
AND tag IN ({values})
|
|
|
|
)
|
|
|
|
"""
|
2020-09-24 17:21:27 -04:00
|
|
|
elif attr == 'language':
|
|
|
|
indexed_languages = any_items & set(INDEXED_LANGUAGES)
|
|
|
|
if indexed_languages:
|
|
|
|
any_items -= indexed_languages
|
|
|
|
for language in indexed_languages:
|
|
|
|
any_queries[f'#_any_common_languages_{language}'] = f"""
|
|
|
|
EXISTS(
|
|
|
|
SELECT 1 FROM language INDEXED BY language_{language}_idx
|
|
|
|
WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=language.claim_hash
|
|
|
|
AND language = '{language}'
|
|
|
|
)
|
|
|
|
"""
|
2019-07-21 19:44:10 -04:00
|
|
|
|
|
|
|
if any_items:
|
2019-07-20 01:14:06 -04:00
|
|
|
|
2019-07-11 13:29:26 -04:00
|
|
|
constraints.update({
|
|
|
|
f'$any_{attr}{i}': item for i, item in enumerate(any_items)
|
|
|
|
})
|
|
|
|
values = ', '.join(
|
|
|
|
f':$any_{attr}{i}' for i in range(len(any_items))
|
|
|
|
)
|
2019-07-20 14:27:34 -04:00
|
|
|
if for_count or attr == 'tag':
|
2020-04-16 10:48:40 -04:00
|
|
|
if attr == 'tag':
|
|
|
|
any_queries[f'#_any_{attr}'] = f"""
|
2020-04-22 23:16:30 -04:00
|
|
|
((claim.claim_type != {CLAIM_TYPES['repost']}
|
2020-04-16 10:48:40 -04:00
|
|
|
AND claim.claim_hash IN (SELECT claim_hash FROM tag WHERE tag IN ({values}))) OR
|
|
|
|
(claim.claim_type == {CLAIM_TYPES['repost']} AND
|
2020-04-22 23:16:30 -04:00
|
|
|
claim.reposted_claim_hash IN (SELECT claim_hash FROM tag WHERE tag IN ({values}))))
|
2020-04-16 10:48:40 -04:00
|
|
|
"""
|
|
|
|
else:
|
|
|
|
any_queries[f'#_any_{attr}'] = f"""
|
|
|
|
{CLAIM_HASH_OR_REPOST_HASH_SQL} IN (
|
|
|
|
SELECT claim_hash FROM {attr} WHERE {attr} IN ({values})
|
|
|
|
)
|
|
|
|
"""
|
2019-07-20 14:27:34 -04:00
|
|
|
else:
|
|
|
|
any_queries[f'#_any_{attr}'] = f"""
|
2019-07-21 19:44:10 -04:00
|
|
|
EXISTS(
|
|
|
|
SELECT 1 FROM {attr} WHERE
|
2020-02-22 12:23:11 -05:00
|
|
|
{CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash
|
2019-07-21 19:44:10 -04:00
|
|
|
AND {attr} IN ({values})
|
|
|
|
)
|
2019-07-20 14:27:34 -04:00
|
|
|
"""
|
2019-07-21 19:44:10 -04:00
|
|
|
|
|
|
|
if len(any_queries) == 1:
|
|
|
|
constraints.update(any_queries)
|
|
|
|
elif len(any_queries) > 1:
|
|
|
|
constraints[f'ORed_{attr}_queries__any'] = any_queries
|
2019-07-11 13:29:26 -04:00
|
|
|
|
|
|
|
if all_items:
|
|
|
|
constraints[f'$all_{attr}_count'] = len(all_items)
|
|
|
|
constraints.update({
|
|
|
|
f'$all_{attr}{i}': item for i, item in enumerate(all_items)
|
|
|
|
})
|
|
|
|
values = ', '.join(
|
|
|
|
f':$all_{attr}{i}' for i in range(len(all_items))
|
|
|
|
)
|
|
|
|
if for_count:
|
2020-02-22 12:23:11 -05:00
|
|
|
constraints[f'#_all_{attr}'] = f"""
|
|
|
|
{CLAIM_HASH_OR_REPOST_HASH_SQL} IN (
|
2019-07-11 13:29:26 -04:00
|
|
|
SELECT claim_hash FROM {attr} WHERE {attr} IN ({values})
|
|
|
|
GROUP BY claim_hash HAVING COUNT({attr}) = :$all_{attr}_count
|
2020-02-22 12:23:11 -05:00
|
|
|
)
|
2019-07-11 13:29:26 -04:00
|
|
|
"""
|
|
|
|
else:
|
|
|
|
constraints[f'#_all_{attr}'] = f"""
|
|
|
|
{len(all_items)}=(
|
|
|
|
SELECT count(*) FROM {attr} WHERE
|
2020-02-22 12:23:11 -05:00
|
|
|
{CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash
|
2019-07-11 13:29:26 -04:00
|
|
|
AND {attr} IN ({values})
|
|
|
|
)
|
|
|
|
"""
|
2020-01-18 21:23:38 -05:00
|
|
|
|
|
|
|
if not_items:
|
|
|
|
constraints.update({
|
|
|
|
f'$not_{attr}{i}': item for i, item in enumerate(not_items)
|
|
|
|
})
|
|
|
|
values = ', '.join(
|
|
|
|
f':$not_{attr}{i}' for i in range(len(not_items))
|
|
|
|
)
|
|
|
|
if for_count:
|
2020-04-16 11:04:24 -04:00
|
|
|
if attr == 'tag':
|
|
|
|
constraints[f'#_not_{attr}'] = f"""
|
2020-04-22 23:16:30 -04:00
|
|
|
((claim.claim_type != {CLAIM_TYPES['repost']}
|
|
|
|
AND claim.claim_hash NOT IN (SELECT claim_hash FROM tag WHERE tag IN ({values}))) OR
|
2020-04-16 11:04:24 -04:00
|
|
|
(claim.claim_type == {CLAIM_TYPES['repost']} AND
|
2020-04-22 23:16:30 -04:00
|
|
|
claim.reposted_claim_hash NOT IN (SELECT claim_hash FROM tag WHERE tag IN ({values}))))
|
2020-04-16 11:04:24 -04:00
|
|
|
"""
|
|
|
|
else:
|
|
|
|
constraints[f'#_not_{attr}'] = f"""
|
|
|
|
{CLAIM_HASH_OR_REPOST_HASH_SQL} NOT IN (
|
|
|
|
SELECT claim_hash FROM {attr} WHERE {attr} IN ({values})
|
|
|
|
)
|
|
|
|
"""
|
2020-01-18 21:23:38 -05:00
|
|
|
else:
|
|
|
|
constraints[f'#_not_{attr}'] = f"""
|
|
|
|
NOT EXISTS(
|
|
|
|
SELECT 1 FROM {attr} WHERE
|
2020-02-22 12:23:11 -05:00
|
|
|
{CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash
|
2020-01-18 21:23:38 -05:00
|
|
|
AND {attr} IN ({values})
|
|
|
|
)
|
|
|
|
"""
|