diff --git a/lbry/__init__.py b/lbry/__init__.py index bd50f5284..477bdf565 100644 --- a/lbry/__init__.py +++ b/lbry/__init__.py @@ -1,10 +1,8 @@ __version__ = "1.0.0" from lbry.wallet import Account, Wallet, WalletManager -from lbry.blockchain import ( - Ledger, RegTestLedger, TestNetLedger, - Transaction, Output, Input, - dewies_to_lbc, lbc_to_dewies, dict_values_to_lbc -) +from lbry.blockchain import Ledger, RegTestLedger, TestNetLedger +from lbry.blockchain import Transaction, Output, Input +from lbry.blockchain import dewies_to_lbc, lbc_to_dewies, dict_values_to_lbc from lbry.service import API, Daemon, FullNode, LightClient from lbry.db.database import Database from lbry.conf import Config diff --git a/lbry/blockchain/database.py b/lbry/blockchain/database.py index e4c35e013..2257db211 100644 --- a/lbry/blockchain/database.py +++ b/lbry/blockchain/database.py @@ -155,7 +155,8 @@ class BlockchainDB: def sync_get_takeover_count(self, start_height: int, end_height: int) -> int: sql = """ SELECT COUNT(*) FROM claim WHERE name IN ( - SELECT name FROM takeover WHERE claimID IS NOT NULL AND height BETWEEN ? AND ? + SELECT name FROM takeover + WHERE name IS NOT NULL AND height BETWEEN ? AND ? ) """, (start_height, end_height) return self.connection.execute(*sql).fetchone()[0] @@ -166,7 +167,7 @@ class BlockchainDB: def sync_get_takeovers(self, start_height: int, end_height: int) -> List[dict]: sql = """ SELECT name, claimID, MAX(height) AS height FROM takeover - WHERE claimID IS NOT NULL AND height BETWEEN ? AND ? + WHERE name IS NOT NULL AND height BETWEEN ? AND ? GROUP BY name """, (start_height, end_height) return [{ diff --git a/lbry/blockchain/hash.py b/lbry/blockchain/hash.py index 872a39506..0f92b471b 100644 --- a/lbry/blockchain/hash.py +++ b/lbry/blockchain/hash.py @@ -29,28 +29,35 @@ class TXRef: class TXRefImmutable(TXRef): - __slots__ = ('_height',) + __slots__ = ('_height', '_timestamp') def __init__(self): super().__init__() self._height = -1 + self._timestamp = -1 @classmethod - def from_hash(cls, tx_hash: bytes, height: int) -> 'TXRefImmutable': + def from_hash(cls, tx_hash: bytes, height: int, timestamp: int) -> 'TXRefImmutable': ref = cls() ref._hash = tx_hash ref._id = hexlify(tx_hash[::-1]).decode() ref._height = height + ref._timestamp = timestamp return ref @classmethod - def from_id(cls, tx_id: str, height: int) -> 'TXRefImmutable': + def from_id(cls, tx_id: str, height: int, timestamp: int) -> 'TXRefImmutable': ref = cls() ref._id = tx_id ref._hash = unhexlify(tx_id)[::-1] ref._height = height + ref._timestamp = timestamp return ref @property def height(self): return self._height + + @property + def timestamp(self): + return self._timestamp diff --git a/lbry/blockchain/lbrycrd.py b/lbry/blockchain/lbrycrd.py index f2f652cf4..8a1e9a0ad 100644 --- a/lbry/blockchain/lbrycrd.py +++ b/lbry/blockchain/lbrycrd.py @@ -90,8 +90,15 @@ class Lbrycrd: def temp_regtest(cls): return cls(RegTestLedger(Config.with_same_dir(tempfile.mkdtemp()))) - def get_block_file_path_from_number(self, block_file_number): - return os.path.join(self.actual_data_dir, 'blocks', f'blk{block_file_number:05}.dat') + @staticmethod + def get_block_file_name(block_file_number): + return f'blk{block_file_number:05}.dat' + + def get_block_file_path(self, block_file_number): + return os.path.join( + self.actual_data_dir, 'blocks', + self.get_block_file_name(block_file_number) + ) @property def is_regtest(self): diff --git a/lbry/blockchain/ledger.py b/lbry/blockchain/ledger.py index 04f938fbb..533439e97 100644 --- a/lbry/blockchain/ledger.py +++ b/lbry/blockchain/ledger.py @@ -1,15 +1,19 @@ +import typing from binascii import unhexlify from string import hexdigits from lbry.crypto.hash import hash160, double_sha256 from lbry.crypto.base58 import Base58 -from lbry.conf import Config from lbry.schema.url import URL from .header import Headers, UnvalidatedHeaders from .checkpoints import HASHES from .dewies import lbc_to_dewies +if typing.TYPE_CHECKING: + from lbry.conf import Config + + class Ledger: name = 'LBRY Credits' symbol = 'LBC' @@ -33,8 +37,8 @@ class Ledger: checkpoints = HASHES - def __init__(self, conf: Config = None): - self.conf = conf or Config.with_same_dir('/dev/null') + def __init__(self, conf: 'Config'): + self.conf = conf self.coin_selection_strategy = None @classmethod diff --git a/lbry/blockchain/sync/blocks.py b/lbry/blockchain/sync/blocks.py new file mode 100644 index 000000000..e2be98184 --- /dev/null +++ b/lbry/blockchain/sync/blocks.py @@ -0,0 +1,177 @@ +import logging + +from sqlalchemy import table, bindparam, text, func, union +from sqlalchemy.future import select +from sqlalchemy.schema import CreateTable + +from lbry.db.tables import Block as BlockTable, TX, TXO, TXI +from lbry.db.tables import ( + pg_add_txo_constraints_and_indexes, + pg_add_txi_constraints_and_indexes, +) +from lbry.db.query_context import ProgressContext, event_emitter, context +from lbry.db.sync import set_input_addresses, update_spent_outputs +from lbry.blockchain.block import Block, create_block_filter +from lbry.blockchain.bcd_data_stream import BCDataStream + +from .context import get_or_initialize_lbrycrd + + +log = logging.getLogger(__name__) + + +def get_best_block_height_for_file(file_number): + return context().fetchone( + select(func.coalesce(func.max(BlockTable.c.height), -1).label('height')) + .where(BlockTable.c.file_number == file_number) + )['height'] + + +@event_emitter("blockchain.sync.block.file", "blocks", "txs", throttle=50) +def sync_block_file( + file_number: int, start_height: int, txs: int, flush_size: int, p: ProgressContext +): + chain = get_or_initialize_lbrycrd(p.ctx) + new_blocks = chain.db.sync_get_blocks_in_file(file_number, start_height) + if not new_blocks: + return -1 + file_name = chain.get_block_file_name(file_number) + p.start(len(new_blocks), txs, progress_id=file_number, label=file_name) + block_file_path = chain.get_block_file_path(file_number) + done_blocks = done_txs = 0 + last_block_processed, loader = -1, p.ctx.get_bulk_loader() + with open(block_file_path, "rb") as fp: + stream = BCDataStream(fp=fp) + for done_blocks, block_info in enumerate(new_blocks, start=1): + block_height = block_info["height"] + fp.seek(block_info["data_offset"]) + block = Block.from_data_stream(stream, block_height, file_number) + loader.add_block(block) + if len(loader.txs) >= flush_size: + done_txs += loader.flush(TX) + p.step(done_blocks, done_txs) + last_block_processed = block_height + if p.ctx.stop_event.is_set(): + return last_block_processed + if loader.txs: + done_txs += loader.flush(TX) + p.step(done_blocks, done_txs) + return last_block_processed + + +@event_emitter("blockchain.sync.txoi.main", "steps") +def sync_txoi(initial_sync: bool, p: ProgressContext): + if initial_sync: + p.start(9) + # A. Update TXIs to have the address of TXO they are spending. + # 1. add tx constraints + if p.ctx.is_postgres: + p.ctx.execute(text("ALTER TABLE tx ADD PRIMARY KEY (tx_hash);")) + p.step() + # 2. txi table reshuffling + p.ctx.execute(text("ALTER TABLE txi RENAME TO old_txi;")) + p.ctx.execute(CreateTable(TXI, include_foreign_key_constraints=[])) + if p.ctx.is_postgres: + p.ctx.execute(text("ALTER TABLE txi DROP CONSTRAINT txi_pkey;")) + p.step() + # 3. insert + old_txi = table("old_txi", *(c.copy() for c in TXI.columns)) + columns = [c for c in old_txi.columns if c.name != "address"] + [TXO.c.address] + join_txi_on_txo = old_txi.join(TXO, old_txi.c.txo_hash == TXO.c.txo_hash) + select_txis = select(*columns).select_from(join_txi_on_txo) + insert_txis = TXI.insert().from_select(columns, select_txis) + p.ctx.execute(insert_txis) + p.step() + # 4. drop old txi and vacuum + p.ctx.execute(text("DROP TABLE old_txi;")) + if p.ctx.is_postgres: + with p.ctx.engine.connect() as c: + c.execute(text("COMMIT;")) + c.execute(text("VACUUM ANALYZE txi;")) + p.step() + # 5. restore integrity constraint + if p.ctx.is_postgres: + pg_add_txi_constraints_and_indexes(p.ctx.execute) + p.step() + # 6. txo table reshuffling + p.ctx.execute(text("ALTER TABLE txo RENAME TO old_txo;")) + p.ctx.execute(CreateTable(TXO, include_foreign_key_constraints=[])) + if p.ctx.is_postgres: + p.ctx.execute(text("ALTER TABLE txo DROP CONSTRAINT txo_pkey;")) + p.step() + # 7. insert + old_txo = table("old_txo", *(c.copy() for c in TXO.columns)) + columns = [c for c in old_txo.columns if c.name != "spent_height"] + insert_columns = columns + [TXO.c.spent_height] + select_columns = columns + [ + func.coalesce(TXI.c.height, 0).label("spent_height") + ] + join_txo_on_txi = old_txo.join( + TXI, old_txo.c.txo_hash == TXI.c.txo_hash, isouter=True + ) + select_txos = select(*select_columns).select_from(join_txo_on_txi) + insert_txos = TXO.insert().from_select(insert_columns, select_txos) + p.ctx.execute(insert_txos) + p.step() + # 8. drop old txo + p.ctx.execute(text("DROP TABLE old_txo;")) + if p.ctx.is_postgres: + with p.ctx.engine.connect() as c: + c.execute(text("COMMIT;")) + c.execute(text("VACUUM ANALYZE txo;")) + p.step() + # 9. restore integrity constraint + if p.ctx.is_postgres: + pg_add_txo_constraints_and_indexes(p.ctx.execute) + p.step() + else: + p.start(2) + # 1. Update TXIs to have the address of TXO they are spending. + set_input_addresses(p.ctx) + p.step() + # 2. Update spent TXOs setting spent_height + update_spent_outputs(p.ctx) + p.step() + + +@event_emitter("blockchain.sync.filter.generate", "blocks") +def sync_block_filters(p: ProgressContext): + blocks = [] + all_filters = [] + all_addresses = [] + for block in get_blocks_without_filters(): + addresses = { + p.ctx.ledger.address_to_hash160(r["address"]) + for r in get_block_tx_addresses(block_hash=block["block_hash"]) + } + all_addresses.extend(addresses) + block_filter = create_block_filter(addresses) + all_filters.append(block_filter) + blocks.append({"pk": block["block_hash"], "block_filter": block_filter}) + p.ctx.execute( + BlockTable.update().where(BlockTable.c.block_hash == bindparam("pk")), blocks + ) + + +def get_blocks_without_filters(): + return context().fetchall( + select(BlockTable.c.block_hash) + .where(BlockTable.c.block_filter.is_(None)) + ) + + +def get_block_tx_addresses(block_hash=None, tx_hash=None): + if block_hash is not None: + constraint = (TX.c.block_hash == block_hash) + elif tx_hash is not None: + constraint = (TX.c.tx_hash == tx_hash) + else: + raise ValueError('block_hash or tx_hash must be provided.') + return context().fetchall( + union( + select(TXO.c.address).select_from(TXO.join(TX)) + .where((TXO.c.address.isnot_(None)) & constraint), + select(TXI.c.address).select_from(TXI.join(TX)) + .where((TXI.c.address.isnot_(None)) & constraint), + ) + ) diff --git a/lbry/blockchain/sync/claims.py b/lbry/blockchain/sync/claims.py new file mode 100644 index 000000000..006a23f5f --- /dev/null +++ b/lbry/blockchain/sync/claims.py @@ -0,0 +1,253 @@ +import logging +from typing import Tuple, Union + +from sqlalchemy import case, func, desc +from sqlalchemy.future import select + +from lbry.db.queries.txio import ( + minimum_txo_columns, row_to_txo, + where_unspent_txos, where_claims_with_changed_supports, + count_unspent_txos, where_channels_with_changed_content, + where_abandoned_claims +) +from lbry.db.query_context import ProgressContext, event_emitter +from lbry.db.tables import TX, TXO, Claim, Support +from lbry.db.utils import least +from lbry.db.constants import TXO_TYPES +from lbry.blockchain.transaction import Output + +from .context import get_or_initialize_lbrycrd + + +log = logging.getLogger(__name__) + + +def channel_content_count_calc(signable): + return ( + select(func.count(signable.c.claim_hash)) + .where((signable.c.channel_hash == Claim.c.claim_hash) & signable.c.is_signature_valid) + .scalar_subquery() + ) + + +support = TXO.alias('support') + + +def staked_support_aggregation(aggregate): + return ( + select(aggregate).where( + (support.c.txo_type == TXO_TYPES['support']) & + (support.c.spent_height == 0) + ).scalar_subquery() + ) + + +def staked_support_amount_calc(other): + return ( + staked_support_aggregation(func.coalesce(func.sum(support.c.amount), 0)) + .where(support.c.claim_hash == other.c.claim_hash) + ) + + +def staked_support_count_calc(other): + return ( + staked_support_aggregation(func.coalesce(func.count('*'), 0)) + .where(support.c.claim_hash == other.c.claim_hash) + ) + + +def make_label(action, blocks): + if blocks[0] == blocks[-1]: + return f"{action} {blocks[0]}" + else: + return f"{action} {blocks[0]}-{blocks[-1]}" + + +def select_claims_for_saving( + txo_types: Union[int, Tuple[int, ...]], + blocks: Tuple[int, int], + missing_in_claims_table=False, + missing_or_stale_in_claims_table=False, +): + select_claims = select( + *minimum_txo_columns, TXO.c.claim_hash, + staked_support_amount_calc(TXO).label('staked_support_amount'), + staked_support_count_calc(TXO).label('staked_support_count') + ).where( + where_unspent_txos( + txo_types, blocks, + missing_in_claims_table=missing_in_claims_table, + missing_or_stale_in_claims_table=missing_or_stale_in_claims_table, + ) + ) + if txo_types != TXO_TYPES['channel']: + channel_txo = TXO.alias('channel_txo') + channel_claim = Claim.alias('channel_claim') + return ( + select_claims.add_columns( + TXO.c.signature, TXO.c.signature_digest, + case([( + TXO.c.channel_hash.isnot(None), + select(channel_txo.c.public_key).select_from(channel_txo).where( + (channel_txo.c.txo_type == TXO_TYPES['channel']) & + (channel_txo.c.claim_hash == TXO.c.channel_hash) & + (channel_txo.c.height <= TXO.c.height) + ).order_by(desc(channel_txo.c.height)).limit(1).scalar_subquery() + )]).label('channel_public_key'), + channel_claim.c.short_url.label('channel_url') + ).select_from( + TXO.join(TX).join( + channel_claim, channel_claim.c.claim_hash == TXO.c.channel_hash, isouter=True + ) + ) + ) + return select_claims.select_from(TXO.join(TX)) + + +def row_to_claim_for_saving(row) -> Tuple[Output, dict]: + txo = row_to_txo(row) + extra = { + 'staked_support_amount': int(row.staked_support_amount), + 'staked_support_count': int(row.staked_support_count), + } + if hasattr(row, 'signature'): + extra.update({ + 'signature': row.signature, + 'signature_digest': row.signature_digest, + 'channel_public_key': row.channel_public_key, + 'channel_url': row.channel_url + }) + return txo, extra + + +@event_emitter("blockchain.sync.claims.insert", "claims") +def claims_insert( + txo_types: Union[int, Tuple[int, ...]], + blocks: Tuple[int, int], + missing_in_claims_table: bool, + p: ProgressContext +): + chain = get_or_initialize_lbrycrd(p.ctx) + + p.start( + count_unspent_txos( + txo_types, blocks, + missing_in_claims_table=missing_in_claims_table, + ), progress_id=blocks[0], label=make_label("add claims at", blocks) + ) + + with p.ctx.engine.connect().execution_options(stream_results=True) as c: + loader = p.ctx.get_bulk_loader() + cursor = c.execute(select_claims_for_saving( + txo_types, blocks, missing_in_claims_table=missing_in_claims_table + ).order_by(TXO.c.claim_hash)) + for rows in cursor.partitions(900): + claim_metadata = iter(chain.db.sync_get_claim_metadata( + claim_hashes=[row['claim_hash'] for row in rows] + )) + for row in rows: + metadata = next(claim_metadata, None) + if metadata is None or metadata['claim_hash'] != row.claim_hash: + log.error( + r"During sync'ing a claim in our db couldn't find a " + r"match in lbrycrd's db. This could be because lbrycrd " + r"moved a block forward and updated its own claim table " + r"while we were still on a previous block, or it could be " + r"a more fundamental issue... ¯\_(ツ)_/¯" + ) + if metadata is None: + break + if metadata['claim_hash'] != row.claim_hash: + continue + txo, extra = row_to_claim_for_saving(row) + extra.update({ + 'short_url': metadata['short_url'], + 'creation_height': metadata['creation_height'], + 'activation_height': metadata['activation_height'], + 'expiration_height': metadata['expiration_height'], + 'takeover_height': metadata['takeover_height'], + }) + loader.add_claim(txo, **extra) + if len(loader.claims) >= 25_000: + p.add(loader.flush(Claim)) + p.add(loader.flush(Claim)) + + +@event_emitter("blockchain.sync.claims.update", "claims") +def claims_update(txo_types: Union[int, Tuple[int, ...]], blocks: Tuple[int, int], p: ProgressContext): + p.start( + count_unspent_txos(txo_types, blocks, missing_or_stale_in_claims_table=True), + progress_id=blocks[0], label=make_label("update claims at", blocks) + ) + with p.ctx.engine.connect().execution_options(stream_results=True) as c: + loader = p.ctx.get_bulk_loader() + cursor = c.execute(select_claims_for_saving( + txo_types, blocks, missing_or_stale_in_claims_table=True + )) + for row in cursor: + txo, extra = row_to_claim_for_saving(row) + loader.update_claim(txo, **extra) + if len(loader.update_claims) >= 500: + p.add(loader.flush(Claim)) + p.add(loader.flush(Claim)) + + +@event_emitter("blockchain.sync.claims.delete", "claims") +def claims_delete(claims, p: ProgressContext): + p.start(claims, label="delete claims") + deleted = p.ctx.execute(Claim.delete().where(where_abandoned_claims())) + p.step(deleted.rowcount) + + +@event_emitter("blockchain.sync.claims.takeovers", "claims") +def update_takeovers(blocks: Tuple[int, int], takeovers, p: ProgressContext): + p.start(takeovers, label="winning") + chain = get_or_initialize_lbrycrd(p.ctx) + for takeover in chain.db.sync_get_takeovers(start_height=blocks[0], end_height=blocks[-1]): + update_claims = ( + Claim.update() + .where(Claim.c.normalized == takeover['normalized']) + .values( + is_controlling=case( + [(Claim.c.claim_hash == takeover['claim_hash'], True)], + else_=False + ), + takeover_height=case( + [(Claim.c.claim_hash == takeover['claim_hash'], takeover['height'])], + else_=None + ), + activation_height=least(Claim.c.activation_height, takeover['height']), + ) + ) + result = p.ctx.execute(update_claims) + p.add(result.rowcount) + + +@event_emitter("blockchain.sync.claims.stakes", "claims") +def update_stakes(blocks: Tuple[int, int], claims: int, p: ProgressContext): + p.start(claims) + sql = ( + Claim.update() + .where(where_claims_with_changed_supports(blocks)) + .values( + staked_support_amount=staked_support_amount_calc(Claim), + staked_support_count=staked_support_count_calc(Claim), + ) + ) + result = p.ctx.execute(sql) + p.step(result.rowcount) + + +@event_emitter("blockchain.sync.claims.channels", "channels") +def update_channel_stats(blocks: Tuple[int, int], initial_sync: int, channels: int, p: ProgressContext): + p.start(channels, label="channel stats") + update_sql = Claim.update().values( + signed_claim_count=channel_content_count_calc(Claim.alias('content')), + signed_support_count=channel_content_count_calc(Support), + ) + if initial_sync: + update_sql = update_sql.where(Claim.c.claim_type == TXO_TYPES['channel']) + else: + update_sql = update_sql.where(where_channels_with_changed_content(blocks)) + result = p.ctx.execute(update_sql) + p.step(result.rowcount) diff --git a/lbry/blockchain/sync/context.py b/lbry/blockchain/sync/context.py new file mode 100644 index 000000000..88e2fc913 --- /dev/null +++ b/lbry/blockchain/sync/context.py @@ -0,0 +1,17 @@ +from contextvars import ContextVar +from lbry.db import query_context + +from lbry.blockchain.lbrycrd import Lbrycrd + + +_chain: ContextVar[Lbrycrd] = ContextVar('chain') + + +def get_or_initialize_lbrycrd(ctx=None) -> Lbrycrd: + chain = _chain.get(None) + if chain is not None: + return chain + chain = Lbrycrd((ctx or query_context.context()).ledger) + chain.db.sync_open() + _chain.set(chain) + return chain diff --git a/lbry/blockchain/sync/steps.py b/lbry/blockchain/sync/steps.py deleted file mode 100644 index f6189db9b..000000000 --- a/lbry/blockchain/sync/steps.py +++ /dev/null @@ -1,526 +0,0 @@ -# pylint: disable=singleton-comparison -import logging -from contextvars import ContextVar -from functools import partial -from typing import Optional, Tuple - -from sqlalchemy import table, bindparam, case, distinct, text, func, between, desc -from sqlalchemy.future import select -from sqlalchemy.schema import CreateTable - -from lbry.db import queries -from lbry.db.tables import ( - Block as BlockTable, TX, TXO, TXI, Claim, Support, - pg_add_txo_constraints_and_indexes, pg_add_txi_constraints_and_indexes -) -from lbry.db.query_context import ProgressContext, context, event_emitter -from lbry.db.queries import rows_to_txos -from lbry.db.sync import ( - select_missing_supports, - condition_spent_claims, - condition_spent_supports, condition_missing_supports, - set_input_addresses, update_spent_outputs, -) -from lbry.db.utils import least -from lbry.db.constants import TXO_TYPES, CLAIM_TYPE_CODES - -from lbry.blockchain.lbrycrd import Lbrycrd -from lbry.blockchain.block import Block, create_block_filter -from lbry.blockchain.bcd_data_stream import BCDataStream -from lbry.blockchain.transaction import Output, OutputScript, TXRefImmutable - - -log = logging.getLogger(__name__) -_chain: ContextVar[Lbrycrd] = ContextVar('chain') - - -def get_or_initialize_lbrycrd(ctx=None) -> Lbrycrd: - chain = _chain.get(None) - if chain is not None: - return chain - chain = Lbrycrd((ctx or context()).ledger) - chain.db.sync_open() - _chain.set(chain) - return chain - - -def process_block_file(block_file_number: int, starting_height: int): - ctx = context() - loader = ctx.get_bulk_loader() - last_block_processed = process_block_read(block_file_number, starting_height, loader) - process_block_save(block_file_number, loader) - return last_block_processed - - -@event_emitter("blockchain.sync.block.read", "blocks", step_size=100) -def process_block_read(block_file_number: int, starting_height: int, loader, p: ProgressContext): - chain = get_or_initialize_lbrycrd(p.ctx) - stop = p.ctx.stop_event - new_blocks = chain.db.sync_get_blocks_in_file(block_file_number, starting_height) - if not new_blocks: - return -1 - done, total, last_block_processed = 0, len(new_blocks), -1 - block_file_path = chain.get_block_file_path_from_number(block_file_number) - p.start(total, {'block_file': block_file_number}) - with open(block_file_path, 'rb') as fp: - stream = BCDataStream(fp=fp) - for done, block_info in enumerate(new_blocks, start=1): - if stop.is_set(): - return -1 - block_height = block_info['height'] - fp.seek(block_info['data_offset']) - block = Block.from_data_stream(stream, block_height, block_file_number) - loader.add_block(block) - last_block_processed = block_height - p.step(done) - return last_block_processed - - -@event_emitter("blockchain.sync.block.save", "txs") -def process_block_save(block_file_number: int, loader, p: ProgressContext): - p.extra = {'block_file': block_file_number} - loader.save(TX) - - -@event_emitter("blockchain.sync.block.filters", "blocks") -def process_block_filters(p: ProgressContext): - blocks = [] - all_filters = [] - all_addresses = [] - for block in queries.get_blocks_without_filters(): - addresses = { - p.ctx.ledger.address_to_hash160(r['address']) - for r in queries.get_block_tx_addresses(block_hash=block['block_hash']) - } - all_addresses.extend(addresses) - block_filter = create_block_filter(addresses) - all_filters.append(block_filter) - blocks.append({'pk': block['block_hash'], 'block_filter': block_filter}) - # filters = [get_block_filter(f) for f in all_filters] - p.ctx.execute(BlockTable.update().where(BlockTable.c.block_hash == bindparam('pk')), blocks) - -# txs = [] -# for tx in queries.get_transactions_without_filters(): -# tx_filter = create_block_filter( -# {r['address'] for r in queries.get_block_tx_addresses(tx_hash=tx['tx_hash'])} -# ) -# txs.append({'pk': tx['tx_hash'], 'tx_filter': tx_filter}) -# execute(TX.update().where(TX.c.tx_hash == bindparam('pk')), txs) - - -@event_emitter("blockchain.sync.spends", "steps") -def process_spends(initial_sync: bool, p: ProgressContext): - - step = 0 - - def next_step(): - nonlocal step - step += 1 - return step - - if initial_sync: - p.start(9) - else: - p.start(2) - - if initial_sync: - # A. add tx constraints - if p.ctx.is_postgres: - p.ctx.execute(text("ALTER TABLE tx ADD PRIMARY KEY (tx_hash);")) - p.step(next_step()) - - # 1. Update TXIs to have the address of TXO they are spending. - if initial_sync: - # B. txi table reshuffling - p.ctx.execute(text("ALTER TABLE txi RENAME TO old_txi;")) - p.ctx.execute(CreateTable(TXI, include_foreign_key_constraints=[])) - if p.ctx.is_postgres: - p.ctx.execute(text("ALTER TABLE txi DROP CONSTRAINT txi_pkey;")) - p.step(next_step()) - # C. insert - old_txi = TXI.alias('old_txi') - columns = [c for c in old_txi.columns if c.name != 'address'] + [TXO.c.address] - select_txis = select(*columns).select_from(old_txi.join(TXO)) - insert_txis = TXI.insert().from_select(columns, select_txis) - p.ctx.execute(text( - str(insert_txis.compile(p.ctx.engine)).replace('txi AS old_txi', 'old_txi') - )) - p.step(next_step()) - # D. drop old txi and vacuum - p.ctx.execute(text("DROP TABLE old_txi;")) - if p.ctx.is_postgres: - with p.ctx.engine.connect() as c: - c.execute(text("COMMIT;")) - c.execute(text("VACUUM ANALYZE txi;")) - p.step(next_step()) - # E. restore integrity constraint - if p.ctx.is_postgres: - pg_add_txi_constraints_and_indexes(p.ctx.execute) - p.step(next_step()) - else: - set_input_addresses(p.ctx) - p.step(next_step()) - - # 2. Update spent TXOs setting spent_height - if initial_sync: - # F. txo table reshuffling - p.ctx.execute(text("ALTER TABLE txo RENAME TO old_txo;")) - p.ctx.execute(CreateTable(TXO, include_foreign_key_constraints=[])) - if p.ctx.is_postgres: - p.ctx.execute(text("ALTER TABLE txo DROP CONSTRAINT txo_pkey;")) - p.step(next_step()) - # G. insert - old_txo = table('old_txo', *(c.copy() for c in TXO.columns)) - columns = [c for c in old_txo.columns if c.name != 'spent_height'] - select_columns = columns + [func.coalesce(TXI.c.height, 0).label('spent_height')] - insert_columns = columns + [TXO.c.spent_height] - join_txo_on_txi = old_txo.join(TXI, old_txo.c.txo_hash == TXI.c.txo_hash, isouter=True) - select_txos = (select(*select_columns).select_from(join_txo_on_txi)) - insert_txos = TXO.insert().from_select(insert_columns, select_txos) - p.ctx.execute(insert_txos) - p.step(next_step()) - # H. drop old txo - p.ctx.execute(text("DROP TABLE old_txo;")) - if p.ctx.is_postgres: - with p.ctx.engine.connect() as c: - c.execute(text("COMMIT;")) - c.execute(text("VACUUM ANALYZE txo;")) - p.step(next_step()) - # I. restore integrity constraint - if p.ctx.is_postgres: - pg_add_txo_constraints_and_indexes(p.ctx.execute) - p.step(next_step()) - else: - update_spent_outputs(p.ctx) - p.step(next_step()) - - -def insert_claims_with_lbrycrd(done, chain, p: ProgressContext, cursor): - loader = p.ctx.get_bulk_loader() - for rows in cursor.partitions(900): - claim_metadata = iter(chain.db.sync_get_claim_metadata(claim_hashes=[row['claim_hash'] for row in rows])) - for row in rows: - metadata = next(claim_metadata, None) - if metadata is None or metadata['claim_hash'] != row.claim_hash: - log.error( - r"During sync'ing a claim in our db couldn't find a " - r"match in lbrycrd's db. This could be because lbrycrd " - r"moved a block forward and updated its own claim table " - r"while we were still on a previous block, or it could be " - r"a more fundamental issue... ¯\_(ツ)_/¯" - ) - if metadata is None: - break - if metadata['claim_hash'] != row.claim_hash: - continue - txo = Output( - amount=row.amount, - script=OutputScript(row.src), - tx_ref=TXRefImmutable.from_hash(row.tx_hash, row.height), - position=row.txo_position, - ) - extra = { - 'timestamp': row.timestamp, - 'staked_support_amount': int(row.staked_support_amount), - 'staked_support_count': int(row.staked_support_count), - 'short_url': metadata['short_url'], - 'creation_height': metadata['creation_height'], - 'activation_height': metadata['activation_height'], - 'expiration_height': metadata['expiration_height'], - 'takeover_height': metadata['takeover_height'], - } - if hasattr(row, 'signature'): - extra.update({ - 'signature': row.signature, - 'signature_digest': row.signature_digest, - 'channel_public_key': row.channel_public_key, - 'channel_url': row.channel_url - }) - loader.add_claim(txo, **extra) - if len(loader.claims) >= 10_000: - done += loader.flush(Claim) - p.step(done) - done += loader.flush(Claim) - p.step(done) - return done - - -def channel_content_count_calc(signable): - return ( - select(func.count('*')) - .select_from(signable) - .where((signable.c.channel_hash == Claim.c.claim_hash) & signable.c.is_signature_valid) - .scalar_subquery() - ) - - -@event_emitter("blockchain.sync.claims", "claims") -def process_claims(starting_height: int, blocks_added: Optional[Tuple[int, int]], p: ProgressContext): - chain = get_or_initialize_lbrycrd(p.ctx) - initial_sync = not p.ctx.has_records(Claim) - to_be_modified = p.ctx.fetchtotal( - (TXO.c.txo_type.in_(CLAIM_TYPE_CODES)) & - (TXO.c.spent_height == 0) & - (TXO.c.txo_hash.notin_(select(Claim.c.txo_hash))) - ) - to_be_deleted = to_be_synced = to_be_overtaken = to_be_counted_channel_members = 0 - condition_changed_stakes = condition_changed_channel_content = None - if initial_sync: - to_be_counted_channel_members = p.ctx.fetchtotal( - (TXO.c.txo_type == TXO_TYPES['channel']) & - (TXO.c.spent_height == 0) - ) - else: - to_be_deleted = p.ctx.fetchtotal(condition_spent_claims()) - if blocks_added: - condition_changed_stakes = ( - (TXO.c.txo_type == TXO_TYPES['support']) & ( - between(TXO.c.height, blocks_added[0], blocks_added[-1]) | - between(TXO.c.spent_height, blocks_added[0], blocks_added[-1]) - ) - ) - sql = ( - select(func.count(distinct(TXO.c.claim_hash)).label('total')) - .where(condition_changed_stakes) - ) - to_be_synced = p.ctx.fetchone(sql)['total'] - - condition_changed_channel_content = ( - (TXO.c.channel_hash != None) & ( - between(TXO.c.height, blocks_added[0], blocks_added[-1]) | - between(TXO.c.spent_height, blocks_added[0], blocks_added[-1]) - ) - ) - sql = ( - select(func.count(distinct(TXO.c.channel_hash)).label('total')) - .where(condition_changed_channel_content) - ) - to_be_synced += p.ctx.fetchone(sql)['total'] - - to_be_overtaken = chain.db.sync_get_takeover_count( - start_height=blocks_added[0], end_height=blocks_added[-1]) - - p.start(to_be_deleted + to_be_modified + to_be_synced + to_be_overtaken + to_be_counted_channel_members) - - done = 0 - - if to_be_deleted: - deleted = p.ctx.execute(Claim.delete().where(condition_spent_claims())) - assert to_be_deleted == deleted.rowcount, \ - f"Expected claims to be deleted {to_be_deleted}, actual deleted {deleted.rowcount}." - done += deleted.rowcount - p.step(done) - - support = TXO.alias('support') - staked_support_amount_calc = ( - select(func.coalesce(func.sum(support.c.amount), 0)).where( - (support.c.txo_type == TXO_TYPES['support']) & - (support.c.spent_height == 0) - ) - ) - staked_support_count_calc = ( - select(func.coalesce(func.count('*'), 0)).where( - (support.c.txo_type == TXO_TYPES['support']) & - (support.c.spent_height == 0) - ) - ) - select_claims = ( - select( - TXO.c.claim_hash, TXO.c.amount, TXO.c.position.label('txo_position'), - TX.c.tx_hash, TX.c.height, TX.c.timestamp, - func.substr(TX.c.raw, TXO.c.script_offset+1, TXO.c.script_length).label('src'), - (staked_support_amount_calc - .where(support.c.claim_hash == TXO.c.claim_hash) - .label('staked_support_amount')), - (staked_support_count_calc - .where(support.c.claim_hash == TXO.c.claim_hash) - .label('staked_support_count')) - ).order_by(TXO.c.claim_hash) - ) - - with p.ctx.engine.connect().execution_options(stream_results=True) as c: - # all channels need to be inserted first because channel short_url will needed to - # set the contained claims canonical_urls when those are inserted next - done = insert_claims_with_lbrycrd( - done, chain, p, c.execute( - select_claims.select_from(TXO.join(TX)).where( - (TXO.c.txo_type == TXO_TYPES['channel']) & - (TXO.c.spent_height == 0) & - (TXO.c.claim_hash.notin_(select(Claim.c.claim_hash))) - ) - ) - ) - - channel_txo = TXO.alias('channel_txo') - channel_claim = Claim.alias('channel_claim') - select_claims = ( - select_claims.add_columns( - TXO.c.signature, TXO.c.signature_digest, - case([( - TXO.c.channel_hash != None, - select(channel_txo.c.public_key).select_from(channel_txo).where( - (channel_txo.c.txo_type == TXO_TYPES['channel']) & - (channel_txo.c.claim_hash == TXO.c.channel_hash) & - (channel_txo.c.height <= TXO.c.height) - ).order_by(desc(channel_txo.c.height)).limit(1).scalar_subquery() - )]).label('channel_public_key'), - channel_claim.c.short_url.label('channel_url') - ).select_from( - TXO - .join(TX) - .join(channel_claim, channel_claim.c.claim_hash == TXO.c.channel_hash, isouter=True) - ) - ) - - with p.ctx.engine.connect().execution_options(stream_results=True) as c: - done = insert_claims_with_lbrycrd( - done, chain, p, c.execute( - select_claims.where( - (TXO.c.txo_type.in_(list(set(CLAIM_TYPE_CODES) - {TXO_TYPES['channel']}))) & - (TXO.c.spent_height == 0) & - (TXO.c.claim_hash.notin_(select(Claim.c.claim_hash))) - ) - ) - ) - - if initial_sync: - channel_update_member_count_sql = ( - Claim.update() - .where(Claim.c.claim_type == TXO_TYPES['channel']) - .values( - signed_claim_count=channel_content_count_calc(Claim.alias('content')), - signed_support_count=channel_content_count_calc(Support), - ) - ) - result = p.ctx.execute(channel_update_member_count_sql) - done += result.rowcount - p.step(done) - - if initial_sync: - return - - select_stale_claims = select_claims.where( - (TXO.c.txo_type.in_(CLAIM_TYPE_CODES)) & - (TXO.c.spent_height == 0) & - (TXO.c.txo_hash.notin_(select(Claim.c.txo_hash))) - ) - loader = p.ctx.get_bulk_loader() - for row in p.ctx.connection.execution_options(stream_results=True).execute(select_stale_claims): - txo = Output( - amount=row['amount'], - script=OutputScript(row['src']), - tx_ref=TXRefImmutable.from_hash(row['tx_hash'], row['height']), - position=row['txo_position'], - ) - loader.update_claim( - txo, channel_url=row['channel_url'], timestamp=row['timestamp'], - staked_support_amount=int(row['staked_support_amount']), - staked_support_count=int(row['staked_support_count']), - signature=row['signature'], signature_digest=row['signature_digest'], - channel_public_key=row['channel_public_key'], - ) - if len(loader.update_claims) >= 1000: - done += loader.flush(Claim) - p.step(done) - done += loader.flush(Claim) - p.step(done) - - for takeover in chain.db.sync_get_takeovers(start_height=blocks_added[0], end_height=blocks_added[-1]): - update_claims = ( - Claim.update() - .where(Claim.c.normalized == takeover['normalized']) - .values( - is_controlling=case( - [(Claim.c.claim_hash == takeover['claim_hash'], True)], - else_=False - ), - takeover_height=case( - [(Claim.c.claim_hash == takeover['claim_hash'], takeover['height'])], - else_=None - ), - activation_height=least(Claim.c.activation_height, takeover['height']), - ) - ) - result = p.ctx.execute(update_claims) - done += result.rowcount - p.step(done) - - channel_update_member_count_sql = ( - Claim.update() - .where( - (Claim.c.claim_type == TXO_TYPES['channel']) & - Claim.c.claim_hash.in_(select(TXO.c.channel_hash).where(condition_changed_channel_content)) - ).values( - signed_claim_count=channel_content_count_calc(Claim.alias('content')), - signed_support_count=channel_content_count_calc(Support), - ) - ) - p.ctx.execute(channel_update_member_count_sql) - - claim_update_supports_sql = ( - Claim.update() - .where(Claim.c.claim_hash.in_(select(TXO.c.claim_hash).where(condition_changed_stakes))) - .values( - staked_support_amount=( - staked_support_amount_calc - .where(support.c.claim_hash == Claim.c.claim_hash) - .scalar_subquery()) - , - staked_support_count=( - staked_support_count_calc - .where(support.c.claim_hash == Claim.c.claim_hash) - .scalar_subquery() - ), - ) - ) - result = p.ctx.execute(claim_update_supports_sql) - p.step(done+result.rowcount) - - -@event_emitter("blockchain.sync.supports", "supports") -def process_supports(starting_height: int, blocks_added: Optional[Tuple[int, int]], p: ProgressContext): - done = 0 - to_be_deleted = p.ctx.fetchtotal(condition_spent_supports) - to_be_inserted = p.ctx.fetchtotal(condition_missing_supports) - p.start(to_be_deleted + to_be_inserted) - - sql = Support.delete().where(condition_spent_supports) - deleted = p.ctx.execute(sql) - assert to_be_deleted == deleted.rowcount,\ - f"Expected supports to be deleted {to_be_deleted}, actual deleted {deleted.rowcount}." - done += deleted.rowcount - p.step(done) - - if p.ctx.is_postgres: - insert_supports = partial(p.ctx.pg_copy, Support) - else: - insert_supports = partial(p.ctx.execute, Support.insert()) - loader = p.ctx.get_bulk_loader() - inserted_supports, supports = 0, [] - for txo in rows_to_txos(p.ctx.fetchall(select_missing_supports)): - supports.append(loader.support_to_row(txo)) - if len(supports) >= 50_000: - insert_supports(supports) - inserted_supports += len(supports) - supports = [] - if supports: - insert_supports(supports) - inserted_supports += len(supports) - assert to_be_inserted == inserted_supports, \ - f"Expected supports to be inserted {to_be_inserted}, actual inserted {inserted_supports}." - return - - p.start(get_unvalidated_signable_count(p.ctx, Support)) - support_updates = [] - for support in p.ctx.execute(select_unvalidated_signables(Support, Support.c.txo_hash)): - support_updates.append( - signature_validation({'pk': support['txo_hash']}, support, support['public_key']) - ) - if changes is not None: - changes.channels_with_changed_content.add(support['channel_hash']) - if len(support_updates) > 1000: - p.ctx.execute(Support.update().where(Support.c.txo_hash == bindparam('pk')), support_updates) - p.step(len(support_updates)) - support_updates.clear() - if support_updates: - p.ctx.execute(Support.update().where(Support.c.txo_hash == bindparam('pk')), support_updates) diff --git a/lbry/blockchain/sync/supports.py b/lbry/blockchain/sync/supports.py new file mode 100644 index 000000000..737cca53b --- /dev/null +++ b/lbry/blockchain/sync/supports.py @@ -0,0 +1,70 @@ +import logging +from typing import Tuple + +from sqlalchemy import case, desc +from sqlalchemy.future import select + +from lbry.db.tables import TX, TXO, Support +from lbry.db.query_context import ProgressContext, event_emitter +from lbry.db.queries import row_to_txo +from lbry.db.constants import TXO_TYPES +from lbry.db.queries.txio import ( + minimum_txo_columns, + where_unspent_txos, where_abandoned_supports, + count_unspent_txos, +) + +from .claims import make_label + + +log = logging.getLogger(__name__) + + +@event_emitter("blockchain.sync.supports.insert", "supports") +def supports_insert(blocks: Tuple[int, int], missing_in_supports_table: bool, p: ProgressContext): + p.start( + count_unspent_txos( + TXO_TYPES['support'], blocks, + missing_in_supports_table=missing_in_supports_table, + ), progress_id=blocks[0], label=make_label("add supports at", blocks) + ) + channel_txo = TXO.alias('channel_txo') + select_supports = select( + *minimum_txo_columns, TXO.c.claim_hash, + TXO.c.signature, TXO.c.signature_digest, + case([( + TXO.c.channel_hash.isnot(None), + select(channel_txo.c.public_key).select_from(channel_txo).where( + (channel_txo.c.txo_type == TXO_TYPES['channel']) & + (channel_txo.c.claim_hash == TXO.c.channel_hash) & + (channel_txo.c.height <= TXO.c.height) + ).order_by(desc(channel_txo.c.height)).limit(1).scalar_subquery() + )]).label('channel_public_key'), + ).select_from( + TXO.join(TX) + ).where( + where_unspent_txos( + TXO_TYPES['support'], blocks, + missing_in_supports_table=missing_in_supports_table, + ) + ) + with p.ctx.engine.connect().execution_options(stream_results=True) as c: + loader = p.ctx.get_bulk_loader() + for row in c.execute(select_supports): + txo = row_to_txo(row) + loader.add_support( + txo, + signature=row.signature, + signature_digest=row.signature_digest, + channel_public_key=row.channel_public_key + ) + if len(loader.supports) >= 25_000: + p.add(loader.flush(Support)) + p.add(loader.flush(Support)) + + +@event_emitter("blockchain.sync.supports.delete", "supports") +def supports_delete(supports, p: ProgressContext): + p.start(supports, label="delete supports") + deleted = p.ctx.execute(Support.delete().where(where_abandoned_supports())) + p.step(deleted.rowcount) diff --git a/lbry/blockchain/sync/synchronizer.py b/lbry/blockchain/sync/synchronizer.py index 00ad8fc08..9c5073f18 100644 --- a/lbry/blockchain/sync/synchronizer.py +++ b/lbry/blockchain/sync/synchronizer.py @@ -1,37 +1,55 @@ import os import asyncio import logging -from typing import Optional, Tuple +from functools import partial +from typing import Optional, Tuple, Set, List, Coroutine from lbry.db import Database -from lbry.db.query_context import Event +from lbry.db import queries as q +from lbry.db.constants import TXO_TYPES, CONTENT_TYPE_CODES +from lbry.db.query_context import Event, Progress from lbry.event import BroadcastSubscription from lbry.service.base import Sync, BlockEvent from lbry.blockchain.lbrycrd import Lbrycrd -from . import steps +from . import blocks as block_phase, claims as claim_phase, supports as support_phase log = logging.getLogger(__name__) +BLOCK_INIT_EVENT = Event.add("blockchain.sync.block.init", "steps") +BLOCK_MAIN_EVENT = Event.add("blockchain.sync.block.main", "blocks", "txs") +FILTER_INIT_EVENT = Event.add("blockchain.sync.filter.init", "steps") +FILTER_MAIN_EVENT = Event.add("blockchain.sync.filter.main", "blocks") +CLAIM_INIT_EVENT = Event.add("blockchain.sync.claims.init", "steps") +CLAIM_MAIN_EVENT = Event.add("blockchain.sync.claims.main", "claims") +SUPPORT_INIT_EVENT = Event.add("blockchain.sync.supports.init", "steps") +SUPPORT_MAIN_EVENT = Event.add("blockchain.sync.supports.main", "supports") +TREND_INIT_EVENT = Event.add("blockchain.sync.trends.init", "steps") +TREND_MAIN_EVENT = Event.add("blockchain.sync.trends.main", "blocks") + class BlockchainSync(Sync): + TX_FLUSH_SIZE = 20_000 # flush to db after processing this many TXs and update progress + FILTER_CHUNK_SIZE = 100_000 # split filter generation tasks into this size block chunks + FILTER_FLUSH_SIZE = 10_000 # flush to db after processing this many filters and update progress + CLAIM_CHUNK_SIZE = 50_000 # split claim sync tasks into this size block chunks + CLAIM_FLUSH_SIZE = 10_000 # flush to db after processing this many claims and update progress + SUPPORT_CHUNK_SIZE = 50_000 # split support sync tasks into this size block chunks + SUPPORT_FLUSH_SIZE = 10_000 # flush to db after processing this many supports and update progress + def __init__(self, chain: Lbrycrd, db: Database): super().__init__(chain.ledger, db) self.chain = chain + self.pid = os.getpid() self.on_block_subscription: Optional[BroadcastSubscription] = None self.advance_loop_task: Optional[asyncio.Task] = None self.advance_loop_event = asyncio.Event() async def start(self): - for _ in range(1): # range(2): - # initial sync can take a long time, new blocks may have been - # created while sync was running; therefore, run a second sync - # after first one finishes to possibly sync those new blocks. - # run advance as a task so that it can be stop()'ed if necessary. - self.advance_loop_task = asyncio.create_task(self.advance()) - await self.advance_loop_task + self.advance_loop_task = asyncio.create_task(self.advance()) + await self.advance_loop_task self.chain.subscribe() self.advance_loop_task = asyncio.create_task(self.advance_loop()) self.on_block_subscription = self.chain.on_block.listen( @@ -46,50 +64,7 @@ class BlockchainSync(Sync): if self.advance_loop_task is not None: self.advance_loop_task.cancel() - async def run(self, f, *args): - return await asyncio.get_running_loop().run_in_executor( - self.db.executor, f, *args - ) - - async def load_blocks(self) -> Optional[Tuple[int, int]]: - tasks = [] - starting_height, ending_height = None, await self.chain.db.get_best_height() - tx_count = block_count = 0 - for chain_file in await self.chain.db.get_block_files(): - # block files may be read and saved out of order, need to check - # each file individually to see if we have missing blocks - our_best_file_height = await self.db.get_best_block_height_for_file(chain_file['file_number']) - if our_best_file_height == chain_file['best_height']: - # we have all blocks in this file, skipping - continue - if -1 < our_best_file_height < chain_file['best_height']: - # we have some blocks, need to figure out what we're missing - # call get_block_files again limited to this file and current_height - chain_file = (await self.chain.db.get_block_files( - file_number=chain_file['file_number'], start_height=our_best_file_height+1 - ))[0] - tx_count += chain_file['txs'] - block_count += chain_file['blocks'] - starting_height = min( - our_best_file_height+1 if starting_height is None else starting_height, our_best_file_height+1 - ) - tasks.append(self.run( - steps.process_block_file, chain_file['file_number'], our_best_file_height+1 - )) - if not tasks: - return - await self._on_progress_controller.add({ - "event": "blockchain.sync.start", - "data": { - "starting_height": starting_height, - "ending_height": ending_height, - "files": len(tasks), - "blocks": block_count, - "txs": tx_count, - "claims": await self.chain.db.get_claim_metadata_count(starting_height, ending_height), - "supports": await self.chain.db.get_support_metadata_count(starting_height, ending_height), - } - }) + async def run_tasks(self, tasks: List[Coroutine]) -> Optional[Set[asyncio.Future]]: done, pending = await asyncio.wait( tasks, return_when=asyncio.FIRST_EXCEPTION ) @@ -100,23 +75,245 @@ class BlockchainSync(Sync): for future in done: future.result() return - best_height_processed = max(f.result() for f in done) - return starting_height, best_height_processed + return done + + async def get_best_block_height_for_file(self, file_number) -> int: + return await self.db.run( + block_phase.get_best_block_height_for_file, file_number + ) + + async def sync_blocks(self) -> Optional[Tuple[int, int]]: + tasks = [] + starting_height = None + tx_count = block_count = 0 + with Progress(self.db.message_queue, BLOCK_INIT_EVENT) as p: + ending_height = await self.chain.db.get_best_height() + for chain_file in p.iter(await self.chain.db.get_block_files()): + # block files may be read and saved out of order, need to check + # each file individually to see if we have missing blocks + our_best_file_height = await self.get_best_block_height_for_file( + chain_file['file_number'] + ) + if our_best_file_height == chain_file['best_height']: + # we have all blocks in this file, skipping + continue + if -1 < our_best_file_height < chain_file['best_height']: + # we have some blocks, need to figure out what we're missing + # call get_block_files again limited to this file and current_height + chain_file = (await self.chain.db.get_block_files( + file_number=chain_file['file_number'], start_height=our_best_file_height+1, + ))[0] + tx_count += chain_file['txs'] + block_count += chain_file['blocks'] + starting_height = min( + our_best_file_height+1 if starting_height is None else starting_height, our_best_file_height+1 + ) + tasks.append(self.db.run( + block_phase.sync_block_file, chain_file['file_number'], our_best_file_height+1, + chain_file['txs'], self.TX_FLUSH_SIZE + )) + with Progress(self.db.message_queue, BLOCK_MAIN_EVENT) as p: + p.start(block_count, tx_count, extra={ + "starting_height": starting_height, + "ending_height": ending_height, + "files": len(tasks), + "claims": await self.chain.db.get_claim_metadata_count(starting_height, ending_height), + "supports": await self.chain.db.get_support_metadata_count(starting_height, ending_height), + }) + completed = await self.run_tasks(tasks) + if completed: + best_height_processed = max(f.result() for f in completed) + return starting_height, best_height_processed + + async def sync_filters(self): + if not self.conf.spv_address_filters: + return + with Progress(self.db.message_queue, FILTER_MAIN_EVENT) as p: + blocks = 0 + tasks = [] + # for chunk in range(select min(height), max(height) from block where filter is null): + # tasks.append(self.db.run(block_phase.sync_filters, chunk)) + p.start(blocks) + await self.run_tasks(tasks) + + async def sync_txios(self, blocks_added): + if blocks_added: + await self.db.run(block_phase.sync_txoi, blocks_added[0] == 0) + + async def count_unspent_txos( + self, + txo_types: Tuple[int, ...], + blocks: Tuple[int, int] = None, + missing_in_supports_table: bool = False, + missing_in_claims_table: bool = False, + missing_or_stale_in_claims_table: bool = False, + ) -> int: + return await self.db.run( + q.count_unspent_txos, txo_types, blocks, + missing_in_supports_table, + missing_in_claims_table, + missing_or_stale_in_claims_table, + ) + + async def distribute_unspent_txos( + self, + txo_types: Tuple[int, ...], + blocks: Tuple[int, int] = None, + missing_in_supports_table: bool = False, + missing_in_claims_table: bool = False, + missing_or_stale_in_claims_table: bool = False, + ) -> int: + return await self.db.run( + q.distribute_unspent_txos, txo_types, blocks, + missing_in_supports_table, + missing_in_claims_table, + missing_or_stale_in_claims_table, + ) + + async def count_abandoned_supports(self) -> int: + return await self.db.run(q.count_abandoned_supports) + + async def count_abandoned_claims(self) -> int: + return await self.db.run(q.count_abandoned_claims) + + async def count_claims_with_changed_supports(self, blocks) -> int: + return await self.db.run(q.count_claims_with_changed_supports, blocks) + + async def count_channels_with_changed_content(self, blocks) -> int: + return await self.db.run(q.count_channels_with_changed_content, blocks) + + async def count_takeovers(self, blocks) -> int: + return await self.chain.db.get_takeover_count( + start_height=blocks[0], end_height=blocks[-1] + ) + + async def sync_claims(self, blocks): + total = delete_claims = takeovers = claims_with_changed_supports = 0 + initial_sync = not await self.db.has_claims() + with Progress(self.db.message_queue, CLAIM_INIT_EVENT) as p: + if initial_sync: + p.start(2) + # 1. distribute channel insertion load + channels, channel_batches = await self.distribute_unspent_txos(TXO_TYPES['channel']) + channels_with_changed_content = channels + total += channels + channels_with_changed_content + p.step() + # 2. distribute content insertion load + content, content_batches = await self.distribute_unspent_txos(CONTENT_TYPE_CODES) + total += content + p.step() + elif blocks: + p.start(6) + # 1. channel claims to be inserted or updated + channels = await self.count_unspent_txos( + TXO_TYPES['channel'], blocks, missing_or_stale_in_claims_table=True + ) + channel_batches = [blocks] if channels else [] + total += channels + p.step() + # 2. content claims to be inserted or updated + content = await self.count_unspent_txos( + CONTENT_TYPE_CODES, blocks, missing_or_stale_in_claims_table=True + ) + content_batches = [blocks] if content else [] + total += content + p.step() + # 3. claims to be deleted + delete_claims = await self.count_abandoned_claims() + total += delete_claims + p.step() + # 4. claims to be updated with new support totals + claims_with_changed_supports = await self.count_claims_with_changed_supports(blocks) + total += claims_with_changed_supports + p.step() + # 5. channels to be updated with changed content totals + channels_with_changed_content = await self.count_channels_with_changed_content(blocks) + total += channels_with_changed_content + p.step() + # 6. claims to be updated due to name takeovers + takeovers = await self.count_takeovers(blocks) + total += takeovers + p.step() + else: + return + with Progress(self.db.message_queue, CLAIM_MAIN_EVENT) as p: + p.start(total) + insertions = [ + (TXO_TYPES['channel'], channel_batches), + (CONTENT_TYPE_CODES, content_batches), + ] + for txo_type, batches in insertions: + if batches: + await self.run_tasks([ + self.db.run( + claim_phase.claims_insert, txo_type, batch, not initial_sync + ) for batch in batches + ]) + if not initial_sync: + await self.run_tasks([ + self.db.run(claim_phase.claims_update, txo_type, batch) + for batch in batches + ]) + if delete_claims: + await self.db.run(claim_phase.claims_delete, delete_claims) + if takeovers: + await self.db.run(claim_phase.update_takeovers, blocks, takeovers) + if claims_with_changed_supports: + await self.db.run(claim_phase.update_stakes, blocks, claims_with_changed_supports) + if channels_with_changed_content: + return initial_sync, channels_with_changed_content + + async def sync_supports(self, blocks): + delete_supports = 0 + initial_sync = not await self.db.has_supports() + with Progress(self.db.message_queue, SUPPORT_INIT_EVENT) as p: + if initial_sync: + total, support_batches = await self.distribute_unspent_txos(TXO_TYPES['support']) + elif blocks: + p.start(2) + # 1. supports to be inserted + total = await self.count_unspent_txos( + TXO_TYPES['support'], blocks, missing_in_supports_table=True + ) + support_batches = [blocks] if total else [] + p.step() + # 2. supports to be deleted + delete_supports = await self.count_abandoned_supports() + total += delete_supports + p.step() + else: + return + with Progress(self.db.message_queue, SUPPORT_MAIN_EVENT) as p: + p.start(total) + if support_batches: + await self.run_tasks([ + self.db.run( + support_phase.supports_insert, batch, not initial_sync + ) for batch in support_batches + ]) + if delete_supports: + await self.db.run(support_phase.supports_delete, delete_supports) + + async def sync_channel_stats(self, blocks, initial_sync, channels_with_changed_content): + if channels_with_changed_content: + await self.db.run( + claim_phase.update_channel_stats, blocks, initial_sync, channels_with_changed_content + ) + + async def sync_trends(self): + pass async def advance(self): - starting_height = await self.db.get_best_block_height() - blocks_added = await self.load_blocks() - process_block_filters = ( - self.run(steps.process_block_filters) - if blocks_added and self.conf.spv_address_filters else asyncio.sleep(0) - ) - if blocks_added: - await self.run(steps.process_spends, blocks_added[0] == 0) - await asyncio.wait([ - process_block_filters, - self.run(steps.process_claims, starting_height, blocks_added), - self.run(steps.process_supports, starting_height, blocks_added), - ]) + blocks_added = await self.sync_blocks() + sync_filters_task = asyncio.create_task(self.sync_filters()) + sync_trends_task = asyncio.create_task(self.sync_trends()) + await self.sync_txios(blocks_added) + channel_stats = await self.sync_claims(blocks_added) + await self.sync_supports(blocks_added) + if channel_stats: + await self.sync_channel_stats(blocks_added, *channel_stats) + await sync_trends_task + await sync_filters_task if blocks_added: await self._on_block_controller.add(BlockEvent(blocks_added[-1])) diff --git a/lbry/blockchain/transaction.py b/lbry/blockchain/transaction.py index 928a574df..165110038 100644 --- a/lbry/blockchain/transaction.py +++ b/lbry/blockchain/transaction.py @@ -170,7 +170,7 @@ class Input(InputOutput): @classmethod def deserialize_from(cls, stream): - tx_ref = TXRefImmutable.from_hash(stream.read(32), -1) + tx_ref = TXRefImmutable.from_hash(stream.read(32), -1, -1) position = stream.read_uint32() script = stream.read_string() sequence = stream.read_uint32() @@ -374,6 +374,13 @@ class Output(InputOutput): self._signable = self.support return self._signable + @property + def can_decode_signable(self) -> Signable: + try: + return self.signable + except Exception: + return False + @property def permanent_url(self) -> str: if self.script.is_claim_involved: diff --git a/lbry/cli.py b/lbry/cli.py index fa53ade7f..2745cc944 100644 --- a/lbry/cli.py +++ b/lbry/cli.py @@ -169,7 +169,7 @@ async def execute_command(conf, method, params): await client.connect() resp = await client.send(method, **params) print(await resp.first) - await client.close() + await client.disconnect() def normalize_value(x, key=None): diff --git a/lbry/conf.py b/lbry/conf.py index 7b81e528e..80d3904c5 100644 --- a/lbry/conf.py +++ b/lbry/conf.py @@ -659,6 +659,10 @@ class Config(CLIConfig): super().__init__(**kwargs) self.set_default_paths() + @classmethod + def with_null_dir(cls): + return cls.with_same_dir('/dev/null') + @classmethod def with_same_dir(cls, same_dir): return cls( diff --git a/lbry/console.py b/lbry/console.py index 44cdeb72c..86b68e081 100644 --- a/lbry/console.py +++ b/lbry/console.py @@ -1,10 +1,12 @@ import os import sys import time +import itertools from typing import Dict, Any from tempfile import TemporaryFile -import tqdm +from tqdm.std import tqdm, Bar +from tqdm.utils import FormatReplace, _unicode, disp_len, disp_trim, _is_ascii, _unich from lbry import __version__ from lbry.service.base import Service @@ -93,8 +95,7 @@ class Basic(Console): s.append(f'({os.cpu_count()} CPU(s) available)') print(' '.join(s)) - @staticmethod - def stopping(): + def stopping(self): print('bye.') @staticmethod @@ -102,18 +103,226 @@ class Basic(Console): print(event) +class Bar2(Bar): + + def __init__(self, frac, default_len=10, charset=None): + super().__init__(frac[0], default_len, charset) + self.frac2 = frac[1] + + def __format__(self, format_spec): + width = self.default_len + row1 = (1,)*int(self.frac * width * 2) + row2 = (2,)*int(self.frac2 * width * 2) + fill = [] + for one, two, _ in itertools.zip_longest(row1, row2, range(width*2)): + fill.append((one or 0)+(two or 0)) + bar = [] + for i in range(0, width*2, 2): + if fill[i] == 1: + if fill[i+1] == 1: + bar.append('▀') + else: + bar.append('▘') + elif fill[i] == 2: + if fill[i+1] == 2: + bar.append('▄') + else: + bar.append('▖') + elif fill[i] == 3: + if fill[i+1] == 1: + bar.append('▛') + elif fill[i+1] == 2: + bar.append('▙') + elif fill[i+1] == 3: + bar.append('█') + else: + bar.append('▌') + else: + bar.append(' ') + return ''.join(bar) + + +class tqdm2(tqdm): + + def __init__(self, initial=(0, 0), unit=('it', 'it'), total=(None, None), **kwargs): + self.n2 = self.last_print_n2 = initial[1] + self.unit2 = unit[1] + self.total2 = total[1] + super().__init__(initial=initial[0], unit=unit[0], total=total[0], **kwargs) + + @property + def format_dict(self): + d = super().format_dict + d.update({ + 'n2': self.n2, + 'unit2': self.unit2, + 'total2': self.total2, + }) + return d + + def update(self, n=(1, 1)): + if self.disable: + return + last_last_print_t = self.last_print_t + self.n2 += n[1] + super().update(n[0]) + if last_last_print_t != self.last_print_t: + self.last_print_n2 = self.n2 + + @staticmethod + def format_meter( + n, total, elapsed, ncols=None, prefix='', ascii=False, + unit='it', unit_scale=False, rate=None, bar_format=None, + postfix=None, unit_divisor=1000, **extra_kwargs + ): + + # sanity check: total + if total and n >= (total + 0.5): # allow float imprecision (#849) + total = None + + # apply custom scale if necessary + if unit_scale and unit_scale not in (True, 1): + if total: + total *= unit_scale + n *= unit_scale + if rate: + rate *= unit_scale # by default rate = 1 / self.avg_time + unit_scale = False + + elapsed_str = tqdm.format_interval(elapsed) + + # if unspecified, attempt to use rate = average speed + # (we allow manual override since predicting time is an arcane art) + if rate is None and elapsed: + rate = n / elapsed + inv_rate = 1 / rate if rate else None + format_sizeof = tqdm.format_sizeof + rate_noinv_fmt = ((format_sizeof(rate) if unit_scale else + '{0:5.2f}'.format(rate)) + if rate else '?') + unit + '/s' + rate_inv_fmt = ((format_sizeof(inv_rate) if unit_scale else + '{0:5.2f}'.format(inv_rate)) + if inv_rate else '?') + 's/' + unit + rate_fmt = rate_inv_fmt if inv_rate and inv_rate > 1 else rate_noinv_fmt + + if unit_scale: + n_fmt = format_sizeof(n, divisor=unit_divisor) + total_fmt = format_sizeof(total, divisor=unit_divisor) \ + if total is not None else '?' + else: + n_fmt = str(n) + total_fmt = str(total) if total is not None else '?' + + try: + postfix = ', ' + postfix if postfix else '' + except TypeError: + pass + + remaining = (total - n) / rate if rate and total else 0 + remaining_str = tqdm.format_interval(remaining) if rate else '?' + + # format the stats displayed to the left and right sides of the bar + if prefix: + # old prefix setup work around + bool_prefix_colon_already = (prefix[-2:] == ": ") + l_bar = prefix if bool_prefix_colon_already else prefix + ": " + else: + l_bar = '' + + r_bar = '| {0}/{1} [{2}<{3}, {4}{5}]'.format( + n_fmt, total_fmt, elapsed_str, remaining_str, rate_fmt, postfix) + + # Custom bar formatting + # Populate a dict with all available progress indicators + format_dict = dict( + # slight extension of self.format_dict + n=n, n_fmt=n_fmt, total=total, total_fmt=total_fmt, + elapsed=elapsed_str, elapsed_s=elapsed, + ncols=ncols, desc=prefix or '', unit=unit, + rate=inv_rate if inv_rate and inv_rate > 1 else rate, + rate_fmt=rate_fmt, rate_noinv=rate, + rate_noinv_fmt=rate_noinv_fmt, rate_inv=inv_rate, + rate_inv_fmt=rate_inv_fmt, + postfix=postfix, unit_divisor=unit_divisor, + # plus more useful definitions + remaining=remaining_str, remaining_s=remaining, + l_bar=l_bar, r_bar=r_bar, + **extra_kwargs) + + # total is known: we can predict some stats + if total: + n2, total2 = extra_kwargs['n2'], extra_kwargs['total2'] + + # fractional and percentage progress + frac = n / total + frac2 = n2 / total2 + percentage = frac * 100 + + l_bar += '{0:3.0f}%|'.format(percentage) + + if ncols == 0: + return l_bar[:-1] + r_bar[1:] + + format_dict.update(l_bar=l_bar) + if bar_format: + format_dict.update(percentage=percentage) + + # auto-remove colon for empty `desc` + if not prefix: + bar_format = bar_format.replace("{desc}: ", '') + else: + bar_format = "{l_bar}{bar}{r_bar}" + + full_bar = FormatReplace() + try: + nobar = bar_format.format(bar=full_bar, **format_dict) + except UnicodeEncodeError: + bar_format = _unicode(bar_format) + nobar = bar_format.format(bar=full_bar, **format_dict) + if not full_bar.format_called: + # no {bar}, we can just format and return + return nobar + + # Formatting progress bar space available for bar's display + full_bar = Bar2( + (frac, frac2), + max(1, ncols - disp_len(nobar)) + if ncols else 10, + charset=Bar2.ASCII if ascii is True else ascii or Bar2.UTF) + if not _is_ascii(full_bar.charset) and _is_ascii(bar_format): + bar_format = _unicode(bar_format) + res = bar_format.format(bar=full_bar, **format_dict) + return disp_trim(res, ncols) if ncols else res + + elif bar_format: + # user-specified bar_format but no total + l_bar += '|' + format_dict.update(l_bar=l_bar, percentage=0) + full_bar = FormatReplace() + nobar = bar_format.format(bar=full_bar, **format_dict) + if not full_bar.format_called: + return nobar + full_bar = Bar2( + (0, 0), + max(1, ncols - disp_len(nobar)) + if ncols else 10, + charset=Bar2.BLANK) + res = bar_format.format(bar=full_bar, **format_dict) + return disp_trim(res, ncols) if ncols else res + else: + # no total: no progressbar, ETA, just progress stats + return ((prefix + ": ") if prefix else '') + \ + '{0}{1} [{2}, {3}{4}]'.format( + n_fmt, unit, elapsed_str, rate_fmt, postfix) + + class Advanced(Basic): FORMAT = '{l_bar}{bar}| {n_fmt:>8}/{total_fmt:>8} [{elapsed:>7}<{remaining:>8}, {rate_fmt:>17}]' def __init__(self, service: Service): super().__init__(service) - self.bars: Dict[Any, tqdm.tqdm] = {} - self.is_single_sync_bar = False - self.single_bar_relative_steps = 0 - self.last_stats = "" - self.block_savers = 0 - self.block_readers = 0 + self.bars: Dict[Any, tqdm] = {} self.stderr = RedirectOutput('stderr') def starting(self): @@ -121,36 +330,75 @@ class Advanced(Basic): super().starting() def stopping(self): + for bar in self.bars.values(): + bar.close() super().stopping() - self.stderr.flush(self.bars['read'].write, True) - self.stderr.release() + #self.stderr.flush(self.bars['read'].write, True) + #self.stderr.release() - def get_or_create_bar(self, name, desc, unit, total, leave=False, bar_format=None, postfix=None, position=None): + def get_or_create_bar(self, name, desc, units, totals, leave=False, bar_format=None, postfix=None, position=None): bar = self.bars.get(name) if bar is None: - bar = self.bars[name] = tqdm.tqdm( - desc=desc, unit=unit, total=total, - bar_format=bar_format or self.FORMAT, leave=leave, - postfix=postfix, position=position - ) + if len(units) == 2: + bar = self.bars[name] = tqdm2( + desc=desc, unit=units, total=totals, + bar_format=bar_format or self.FORMAT, leave=leave, + postfix=postfix, position=position + ) + else: + bar = self.bars[name] = tqdm( + desc=desc, unit=units[0], total=totals[0], + bar_format=bar_format or self.FORMAT, leave=leave, + postfix=postfix, position=position + ) return bar - def sync_start(self, d): - self.bars.clear() - if d['ending_height']-d['starting_height'] > 0: - label = f"sync {d['starting_height']:,d}-{d['ending_height']:,d}" + def sync_init(self, name, d): + bar_name = f"{name}#{d['id']}" + bar = self.bars.get(bar_name) + if bar is None: + label = d.get('label', name[-10:]) + self.get_or_create_bar(bar_name, label, d['units'], d['total'], True) else: - label = f"sync {d['ending_height']:,d}" - print(label) - self.last_stats = f"{d['txs']:,d} txs, {d['claims']:,d} claims and {d['supports']:,d} supports" - self.get_or_create_bar("read", "├─ blocks read", "blocks", d['blocks'], True) - self.get_or_create_bar("save", "└─┬ txs saved", "txs", d['txs'], True) + if d['done'][0] != -1: + bar.update(d['done'][0] - bar.last_print_n) + if d['done'][0] == -1 or d['done'][0] == bar.total: + bar.close() - def update_progress(self, e, d): - if e in ('blockchain.sync.block.read', 'blockchain.sync.block.save'): - self.update_block_bars(e, d) + def sync_main(self, name, d): + bar = self.bars.get(name) + if bar is None: + label = d.get('label', name[-10:]) + self.get_or_create_bar(name, label, d['units'], d['total'], True) + #self.last_stats = f"{d['txs']:,d} txs, {d['claims']:,d} claims and {d['supports']:,d} supports" + #self.get_or_create_bar("read", "├─ blocks read", "blocks", d['blocks'], True) + #self.get_or_create_bar("save", "└─┬ txs saved", "txs", d['txs'], True) else: - self.update_other_bars(e, d) + base_name = name[:name.rindex('.')] + for child_name, child_bar in self.bars.items(): + if child_name.startswith(base_name): + child_bar.close() + bar.close() + + def sync_task(self, name, d): + bar_name = f"{name}#{d['id']}" + bar = self.bars.get(bar_name) + if bar is None: + assert d['done'][0] == 0 + label = d.get('label', name[-10:]) + self.get_or_create_bar(f"{name}#{d['id']}", label, d['units'], d['total']) + else: + if d['done'][0] != -1: + main_bar_name = f"{name[:name.rindex('.')]}.main" + last = (bar.last_print_n,) if len(d['done']) == 1 else (bar.last_print_n, bar.last_print_n2) + diff = tuple(a-b for a, b in zip(d['done'], last)) + if main_bar_name != name: + main_bar = self.bars.get(main_bar_name) + if main_bar.unit == bar.unit: + main_bar.update(diff) + bar.update(diff) + if d['done'][0] == -1 or d['done'][0] == bar.total: + bar.close() def update_block_bars(self, event, d): total_bar = self.bars[event[-4:]] @@ -213,12 +461,19 @@ class Advanced(Basic): def on_sync_progress(self, event): e, d = event['event'], event.get('data', {}) - if e.endswith("sync.start"): - self.sync_start(d) - self.stderr.flush(self.bars['read'].write) - elif e.endswith("sync.complete"): - self.stderr.flush(self.bars['read'].write, True) - self.sync_complete() + if e.endswith(".init"): + self.sync_init(e, d) + elif e.endswith(".main"): + self.sync_main(e, d) else: - self.stderr.flush(self.bars['read'].write) - self.update_progress(e, d) + self.sync_task(e, d) + +# if e.endswith("sync.start"): +# self.sync_start(d) +# self.stderr.flush(self.bars['read'].write) +# elif e.endswith("sync.complete"): +# self.stderr.flush(self.bars['read'].write, True) +# self.sync_complete() +# else: +# self.stderr.flush(self.bars['read'].write) +# self.update_progress(e, d) diff --git a/lbry/db/constants.py b/lbry/db/constants.py index 217edcedd..0f901b13f 100644 --- a/lbry/db/constants.py +++ b/lbry/db/constants.py @@ -1,3 +1,5 @@ +MAX_QUERY_VARIABLES = 900 + TXO_TYPES = { "other": 0, "stream": 1, @@ -15,10 +17,18 @@ CLAIM_TYPE_NAMES = [ 'repost', ] +CONTENT_TYPE_NAMES = [ + name for name in CLAIM_TYPE_NAMES if name != "channel" +] + CLAIM_TYPE_CODES = [ TXO_TYPES[name] for name in CLAIM_TYPE_NAMES ] +CONTENT_TYPE_CODES = [ + TXO_TYPES[name] for name in CONTENT_TYPE_NAMES +] + SPENDABLE_TYPE_CODES = [ TXO_TYPES['other'], TXO_TYPES['purchase'] diff --git a/lbry/db/database.py b/lbry/db/database.py index 0cb6f6f8e..821f95915 100644 --- a/lbry/db/database.py +++ b/lbry/db/database.py @@ -128,7 +128,7 @@ class Database: @classmethod def from_url(cls, db_url): from lbry import Config, Ledger # pylint: disable=import-outside-toplevel - return cls(Ledger(Config.with_same_dir('/dev/null').set(db_url=db_url))) + return cls(Ledger(Config.with_null_dir().set(db_url=db_url))) @classmethod def in_memory(cls): @@ -166,17 +166,17 @@ class Database: self.executor = ProcessPoolExecutor(max_workers=self.processes, **kwargs) else: self.executor = ThreadPoolExecutor(max_workers=1, **kwargs) - return await self.run_in_executor(q.check_version_and_create_tables) + return await self.run(q.check_version_and_create_tables) async def close(self): self.progress_publisher.stop() if self.executor is not None: if isinstance(self.executor, ThreadPoolExecutor): - await self.run_in_executor(uninitialize) + await self.run(uninitialize) self.executor.shutdown() self.executor = None - async def run_in_executor(self, func, *args, **kwargs): + async def run(self, func, *args, **kwargs): if kwargs: clean_wallet_account_ids(kwargs) return await asyncio.get_event_loop().run_in_executor( @@ -184,52 +184,40 @@ class Database: ) async def fetch_result(self, func, *args, **kwargs) -> Result: - rows, total = await self.run_in_executor(func, *args, **kwargs) + rows, total = await self.run(func, *args, **kwargs) return Result(rows, total) async def execute(self, sql): - return await self.run_in_executor(q.execute, sql) + return await self.run(q.execute, sql) async def execute_fetchall(self, sql): - return await self.run_in_executor(q.execute_fetchall, sql) + return await self.run(q.execute_fetchall, sql) - async def process_all_things_after_sync(self): - return await self.run_in_executor(sync.process_all_things_after_sync) + async def has_claims(self): + return await self.run(q.has_claims) + + async def has_supports(self): + return await self.run(q.has_claims) async def get_best_block_height(self) -> int: - return await self.run_in_executor(q.get_best_block_height) + return await self.run(q.get_best_block_height) - async def get_best_block_height_for_file(self, file_number) -> int: - return await self.run_in_executor(q.get_best_block_height_for_file, file_number) - - async def get_blocks_without_filters(self): - return await self.run_in_executor(q.get_blocks_without_filters) - - async def get_transactions_without_filters(self): - return await self.run_in_executor(q.get_transactions_without_filters) - - async def get_block_tx_addresses(self, block_hash=None, tx_hash=None): - return await self.run_in_executor(q.get_block_tx_addresses, block_hash, tx_hash) - - async def get_block_address_filters(self): - return await self.run_in_executor(q.get_block_address_filters) - - async def get_transaction_address_filters(self, block_hash): - return await self.run_in_executor(q.get_transaction_address_filters, block_hash) + async def process_all_things_after_sync(self): + return await self.run(sync.process_all_things_after_sync) async def insert_block(self, block): - return await self.run_in_executor(q.insert_block, block) + return await self.run(q.insert_block, block) async def insert_transaction(self, block_hash, tx): - return await self.run_in_executor(q.insert_transaction, block_hash, tx) + return await self.run(q.insert_transaction, block_hash, tx) async def update_address_used_times(self, addresses): - return await self.run_in_executor(q.update_address_used_times, addresses) + return await self.run(q.update_address_used_times, addresses) async def reserve_outputs(self, txos, is_reserved=True): txo_hashes = [txo.hash for txo in txos] if txo_hashes: - return await self.run_in_executor( + return await self.run( q.reserve_outputs, txo_hashes, is_reserved ) @@ -240,13 +228,13 @@ class Database: return await self.release_outputs([txi.txo_ref.txo for txi in tx.inputs]) async def release_all_outputs(self, account): - return await self.run_in_executor(q.release_all_outputs, account.id) + return await self.run(q.release_all_outputs, account.id) async def get_balance(self, **constraints): - return await self.run_in_executor(q.get_balance, **constraints) + return await self.run(q.get_balance, **constraints) async def get_report(self, accounts): - return await self.run_in_executor(q.get_report, accounts=accounts) + return await self.run(q.get_report, accounts=accounts) async def get_addresses(self, **constraints) -> Result[dict]: addresses = await self.fetch_result(q.get_addresses, **constraints) @@ -259,14 +247,14 @@ class Database: return addresses async def get_all_addresses(self): - return await self.run_in_executor(q.get_all_addresses) + return await self.run(q.get_all_addresses) async def get_address(self, **constraints): for address in await self.get_addresses(limit=1, **constraints): return address async def add_keys(self, account, chain, pubkeys): - return await self.run_in_executor(q.add_keys, account, chain, pubkeys) + return await self.run(q.add_keys, account, chain, pubkeys) async def get_transactions(self, **constraints) -> Result[Transaction]: return await self.fetch_result(q.get_transactions, **constraints) @@ -282,20 +270,20 @@ class Database: async def search_claims(self, **constraints) -> Result[Output]: #assert set(constraints).issubset(SEARCH_PARAMS), \ # f"Search query contains invalid arguments: {set(constraints).difference(SEARCH_PARAMS)}" - claims, total, censor = await self.run_in_executor(q.search_claims, **constraints) + claims, total, censor = await self.run(q.search_claims, **constraints) return Result(claims, total, censor) async def search_supports(self, **constraints) -> Result[Output]: return await self.fetch_result(q.search_supports, **constraints) async def resolve(self, *urls) -> Dict[str, Output]: - return await self.run_in_executor(q.resolve, *urls) + return await self.run(q.resolve, *urls) async def get_txo_sum(self, **constraints) -> int: - return await self.run_in_executor(q.get_txo_sum, **constraints) + return await self.run(q.get_txo_sum, **constraints) async def get_txo_plot(self, **constraints) -> List[dict]: - return await self.run_in_executor(q.get_txo_plot, **constraints) + return await self.run(q.get_txo_plot, **constraints) async def get_txos(self, **constraints) -> Result[Output]: txos = await self.fetch_result(q.get_txos, **constraints) diff --git a/lbry/db/queries.py b/lbry/db/queries.py deleted file mode 100644 index bc580dad3..000000000 --- a/lbry/db/queries.py +++ /dev/null @@ -1,973 +0,0 @@ -# pylint: disable=singleton-comparison -import struct -import logging -import itertools -from datetime import date -from decimal import Decimal -from binascii import unhexlify -from operator import itemgetter -from typing import Tuple, List, Dict, Optional - -from sqlalchemy import union, func, text -from sqlalchemy.future import select, Select - -from lbry.schema.tags import clean_tags -from lbry.schema.result import Censor -from lbry.schema.url import URL, normalize_name -from lbry.error import ResolveCensoredError -from lbry.blockchain.transaction import Transaction, Output, OutputScript, TXRefImmutable - -from .utils import query, in_account_ids -from .query_context import context -from .constants import ( - TXO_TYPES, STREAM_TYPES, ATTRIBUTE_ARRAY_MAX_LENGTH, - SEARCH_INTEGER_PARAMS, SEARCH_ORDER_FIELDS -) -from .tables import ( - metadata, - SCHEMA_VERSION, Version, - Block, TX, TXO, TXI, txi_join_account, txo_join_account, - Claim, Support, PubkeyAddress, AccountAddress -) - - -MAX_QUERY_VARIABLES = 900 - - -log = logging.getLogger(__name__) - - -def check_version_and_create_tables(): - with context("db.connecting") as ctx: - if ctx.has_table('version'): - version = ctx.fetchone(select(Version.c.version).limit(1)) - if version and version['version'] == SCHEMA_VERSION: - return - metadata.drop_all(ctx.engine) - metadata.create_all(ctx.engine) - ctx.execute(Version.insert().values(version=SCHEMA_VERSION)) - for table in metadata.sorted_tables: - disable_trigger_and_constraints(table.name) - - -def disable_trigger_and_constraints(table_name): - ctx = context() - if ctx.is_postgres: - ctx.execute(text(f"ALTER TABLE {table_name} DISABLE TRIGGER ALL;")) - if table_name == 'tag': - return - if ctx.is_postgres: - ctx.execute(text(f"ALTER TABLE {table_name} DROP CONSTRAINT {table_name}_pkey CASCADE;")) - - -def insert_block(block): - context().get_bulk_loader().add_block(block).save() - - -def insert_transaction(block_hash, tx): - context().get_bulk_loader().add_transaction(block_hash, tx).save() - - -def execute(sql): - return context().execute(text(sql)) - - -def execute_fetchall(sql): - return context().fetchall(text(sql)) - - -def get_best_block_height(): - return context().fetchone( - select(func.coalesce(func.max(Block.c.height), -1).label('height')) - )['height'] - - -def get_best_block_height_for_file(file_number): - return context().fetchone( - select(func.coalesce(func.max(Block.c.height), -1).label('height')) - .where(Block.c.file_number == file_number) - )['height'] - - -def get_blocks_without_filters(): - return context().fetchall( - select(Block.c.block_hash) - .select_from(Block) - .where(Block.c.block_filter == None) - ) - - -def get_transactions_without_filters(): - return context().fetchall( - select(TX.c.tx_hash) - .select_from(TX) - .where(TX.c.tx_filter == None) - ) - - -def get_block_tx_addresses(block_hash=None, tx_hash=None): - if block_hash is not None: - constraint = (TX.c.block_hash == block_hash) - elif tx_hash is not None: - constraint = (TX.c.tx_hash == tx_hash) - else: - raise ValueError('block_hash or tx_hash must be provided.') - return context().fetchall( - union( - select(TXO.c.address).select_from(TXO.join(TX)).where((TXO.c.address != None) & constraint), - select(TXI.c.address).select_from(TXI.join(TX)).where((TXI.c.address != None) & constraint), - ) - ) - - -def get_block_address_filters(): - return context().fetchall( - select(Block.c.block_hash, Block.c.block_filter).select_from(Block) - ) - - -def get_transaction_address_filters(block_hash): - return context().fetchall( - select(TX.c.tx_hash, TX.c.tx_filter) - .select_from(TX) - .where(TX.c.block_hash == block_hash) - ) - - -def update_address_used_times(addresses): - context().execute( - PubkeyAddress.update() - .values(used_times=( - select(func.count(TXO.c.address)).where((TXO.c.address == PubkeyAddress.c.address)), - )) - .where(PubkeyAddress.c.address._in(addresses)) - ) - - -def reserve_outputs(txo_hashes, is_reserved=True): - context().execute( - TXO.update().values(is_reserved=is_reserved).where(TXO.c.txo_hash.in_(txo_hashes)) - ) - - -def release_all_outputs(account_id): - context().execute( - TXO.update().values(is_reserved=False).where( - (TXO.c.is_reserved == True) & - (TXO.c.address.in_(select(AccountAddress.c.address).where(in_account_ids(account_id)))) - ) - ) - - -def select_transactions(cols, account_ids=None, **constraints): - s: Select = select(*cols).select_from(TX) - if not {'tx_hash', 'tx_hash__in'}.intersection(constraints): - assert account_ids, "'accounts' argument required when no 'tx_hash' constraint is present" - where = in_account_ids(account_ids) - tx_hashes = union( - select(TXO.c.tx_hash).select_from(txo_join_account).where(where), - select(TXI.c.tx_hash).select_from(txi_join_account).where(where) - ) - s = s.where(TX.c.tx_hash.in_(tx_hashes)) - return context().fetchall(query([TX], s, **constraints)) - - -TXO_NOT_MINE = Output(None, None, is_my_output=False) - - -def get_raw_transactions(tx_hashes): - return context().fetchall( - select(TX.c.tx_hash, TX.c.raw).where(TX.c.tx_hash.in_(tx_hashes)) - ) - - -def get_transactions(**constraints) -> Tuple[List[Transaction], Optional[int]]: - txs = [] - sql = select(TX.c.raw, TX.c.height, TX.c.position).select_from(TX) - rows = context().fetchall(query([TX], sql, **constraints)) - for row in rows: - txs.append(Transaction(row['raw'], height=row['height'], position=row['position'])) - return txs, 0 - - -def _get_transactions(wallet=None, include_total=False, **constraints) -> Tuple[List[Transaction], Optional[int]]: - include_is_my_input = constraints.pop('include_is_my_input', False) - include_is_my_output = constraints.pop('include_is_my_output', False) - - tx_rows = select_transactions( - [TX.c.tx_hash, TX.c.raw, TX.c.height, TX.c.position, TX.c.is_verified], - order_by=constraints.pop('order_by', ["height=0 DESC", "height DESC", "position DESC"]), - **constraints - ) - - txids, txs, txi_txoids = [], [], [] - for row in tx_rows: - txids.append(row['tx_hash']) - txs.append(Transaction( - raw=row['raw'], height=row['height'], position=row['position'], - is_verified=bool(row['is_verified']) - )) - for txi in txs[-1].inputs: - txi_txoids.append(txi.txo_ref.hash) - - annotated_txos = {} - for offset in range(0, len(txids), MAX_QUERY_VARIABLES): - annotated_txos.update({ - txo.id: txo for txo in - get_txos( - wallet=wallet, - tx_hash__in=txids[offset:offset + MAX_QUERY_VARIABLES], order_by='txo.tx_hash', - include_is_my_input=include_is_my_input, - include_is_my_output=include_is_my_output, - )[0] - }) - - referenced_txos = {} - for offset in range(0, len(txi_txoids), MAX_QUERY_VARIABLES): - referenced_txos.update({ - txo.id: txo for txo in - get_txos( - wallet=wallet, - txo_hash__in=txi_txoids[offset:offset + MAX_QUERY_VARIABLES], order_by='txo.txo_hash', - include_is_my_output=include_is_my_output, - )[0] - }) - - for tx in txs: - for txi in tx.inputs: - txo = referenced_txos.get(txi.txo_ref.id) - if txo: - txi.txo_ref = txo.ref - for txo in tx.outputs: - _txo = annotated_txos.get(txo.id) - if _txo: - txo.update_annotations(_txo) - else: - txo.update_annotations(TXO_NOT_MINE) - - for tx in txs: - txos = tx.outputs - if len(txos) >= 2 and txos[1].can_decode_purchase_data: - txos[0].purchase = txos[1] - - return txs, get_transaction_count(**constraints) if include_total else None - - -def get_transaction_count(**constraints): - constraints.pop('wallet', None) - constraints.pop('offset', None) - constraints.pop('limit', None) - constraints.pop('order_by', None) - count = select_transactions([func.count().label('total')], **constraints) - return count[0]['total'] or 0 - - -BASE_SELECT_TXO_COLUMNS = [ - TX.c.tx_hash, TX.c.raw, TX.c.height, TX.c.position.label('tx_position'), - TX.c.is_verified, TX.c.timestamp, - TXO.c.txo_type, TXO.c.position.label('txo_position'), TXO.c.amount, TXO.c.spent_height, - TXO.c.script_offset, TXO.c.script_length, -] - - -def select_txos( - cols=None, account_ids=None, is_my_input=None, - is_my_output=True, is_my_input_or_output=None, exclude_internal_transfers=False, - include_is_my_input=False, claim_id_not_in_claim_table=None, - txo_id_not_in_claim_table=None, txo_id_not_in_support_table=None, - **constraints) -> Select: - if cols is None: - cols = BASE_SELECT_TXO_COLUMNS - s: Select = select(*cols) - if account_ids: - my_addresses = select(AccountAddress.c.address).where(in_account_ids(account_ids)) - if is_my_input_or_output: - include_is_my_input = True - s = s.where( - TXO.c.address.in_(my_addresses) | ( - (TXI.c.address != None) & - (TXI.c.address.in_(my_addresses)) - ) - ) - else: - if is_my_output: - s = s.where(TXO.c.address.in_(my_addresses)) - elif is_my_output is False: - s = s.where(TXO.c.address.notin_(my_addresses)) - if is_my_input: - include_is_my_input = True - s = s.where( - (TXI.c.address != None) & - (TXI.c.address.in_(my_addresses)) - ) - elif is_my_input is False: - include_is_my_input = True - s = s.where( - (TXI.c.address == None) | - (TXI.c.address.notin_(my_addresses)) - ) - if exclude_internal_transfers: - include_is_my_input = True - s = s.where( - (TXO.c.txo_type != TXO_TYPES['other']) | - (TXO.c.address.notin_(my_addresses)) - (TXI.c.address == None) | - (TXI.c.address.notin_(my_addresses)) - ) - joins = TXO.join(TX) - #if constraints.get('is_spent', None) is False: - # s = s.where((TXO.c.is_spent == False) & (TXO.c.is_reserved == False)) - if include_is_my_input: - joins = joins.join(TXI, (TXI.c.position == 0) & (TXI.c.tx_hash == TXO.c.tx_hash), isouter=True) - if claim_id_not_in_claim_table: - s = s.where(TXO.c.claim_hash.notin_(select(Claim.c.claim_hash))) - elif txo_id_not_in_claim_table: - s = s.where(TXO.c.txo_hash.notin_(select(Claim.c.txo_hash))) - elif txo_id_not_in_support_table: - s = s.where(TXO.c.txo_hash.notin_(select(Support.c.txo_hash))) - return query([TXO, TX], s.select_from(joins), **constraints) - - -META_ATTRS = ( - 'activation_height', 'takeover_height', 'creation_height', 'staked_amount', - 'short_url', 'canonical_url', 'staked_support_amount', 'staked_support_count', - 'signed_claim_count', 'signed_support_count', 'is_signature_valid', -) - - -def rows_to_txos(rows: List[dict], include_tx=True) -> List[Output]: - txos = [] - tx_cache = {} - for row in rows: - if include_tx: - if row['tx_hash'] not in tx_cache: - tx_cache[row['tx_hash']] = Transaction( - row['raw'], height=row['height'], position=row['tx_position'], - is_verified=bool(row['is_verified']), - ) - txo = tx_cache[row['tx_hash']].outputs[row['txo_position']] - else: - source = row['raw'][row['script_offset']:row['script_offset']+row['script_length']] - txo = Output( - amount=row['amount'], - script=OutputScript(source), - tx_ref=TXRefImmutable.from_hash(row['tx_hash'], row['height']), - position=row['txo_position'], - ) - txo.spent_height = bool(row['spent_height']) - if 'is_my_input' in row: - txo.is_my_input = bool(row['is_my_input']) - if 'is_my_output' in row: - txo.is_my_output = bool(row['is_my_output']) - if 'is_my_input' in row and 'is_my_output' in row: - if txo.is_my_input and txo.is_my_output and row['txo_type'] == TXO_TYPES['other']: - txo.is_internal_transfer = True - else: - txo.is_internal_transfer = False - if 'received_tips' in row: - txo.received_tips = row['received_tips'] - for attr in META_ATTRS: - if attr in row: - txo.meta[attr] = row[attr] - txos.append(txo) - return txos - - -def get_txos(no_tx=False, include_total=False, **constraints) -> Tuple[List[Output], Optional[int]]: - wallet_account_ids = constraints.pop('wallet_account_ids', []) - include_is_my_input = constraints.get('include_is_my_input', False) - include_is_my_output = constraints.pop('include_is_my_output', False) - include_received_tips = constraints.pop('include_received_tips', False) - - select_columns = BASE_SELECT_TXO_COLUMNS + [ - TXO.c.claim_name - ] - - my_accounts = None - if wallet_account_ids: - my_accounts = select(AccountAddress.c.address).where(in_account_ids(wallet_account_ids)) - - if include_is_my_output and my_accounts is not None: - if constraints.get('is_my_output', None) in (True, False): - select_columns.append(text(f"{1 if constraints['is_my_output'] else 0} AS is_my_output")) - else: - select_columns.append(TXO.c.address.in_(my_accounts).label('is_my_output')) - - if include_is_my_input and my_accounts is not None: - if constraints.get('is_my_input', None) in (True, False): - select_columns.append(text(f"{1 if constraints['is_my_input'] else 0} AS is_my_input")) - else: - select_columns.append(( - (TXI.c.address != None) & - (TXI.c.address.in_(my_accounts)) - ).label('is_my_input')) - - if include_received_tips: - support = TXO.alias('support') - select_columns.append( - select(func.coalesce(func.sum(support.c.amount), 0)) - .select_from(support).where( - (support.c.claim_hash == TXO.c.claim_hash) & - (support.c.txo_type == TXO_TYPES['support']) & - (support.c.address.in_(my_accounts)) & - (support.c.txo_hash.notin_(select(TXI.c.txo_hash))) - ).label('received_tips') - ) - - if 'order_by' not in constraints or constraints['order_by'] == 'height': - constraints['order_by'] = [ - "tx.height=0 DESC", "tx.height DESC", "tx.position DESC", "txo.position" - ] - elif constraints.get('order_by', None) == 'none': - del constraints['order_by'] - - rows = context().fetchall(select_txos(select_columns, **constraints)) - txos = rows_to_txos(rows, not no_tx) - - channel_hashes = set() - for txo in txos: - if txo.is_claim and txo.can_decode_claim: - if txo.claim.is_signed: - channel_hashes.add(txo.claim.signing_channel_hash) - - if channel_hashes: - channels = { - txo.claim_hash: txo for txo in - get_txos( - txo_type=TXO_TYPES['channel'], spent_height=0, - wallet_account_ids=wallet_account_ids, claim_hash__in=channel_hashes - )[0] - } - for txo in txos: - if txo.is_claim and txo.can_decode_claim: - txo.channel = channels.get(txo.claim.signing_channel_hash, None) - - return txos, get_txo_count(**constraints) if include_total else None - - -def _clean_txo_constraints_for_aggregation(constraints): - constraints.pop('include_is_my_input', None) - constraints.pop('include_is_my_output', None) - constraints.pop('include_received_tips', None) - constraints.pop('wallet_account_ids', None) - constraints.pop('offset', None) - constraints.pop('limit', None) - constraints.pop('order_by', None) - - -def get_txo_count(**constraints): - _clean_txo_constraints_for_aggregation(constraints) - count = context().fetchall(select_txos([func.count().label('total')], **constraints)) - return count[0]['total'] or 0 - - -def get_txo_sum(**constraints): - _clean_txo_constraints_for_aggregation(constraints) - result = context().fetchall(select_txos([func.sum(TXO.c.amount).label('total')], **constraints)) - return result[0]['total'] or 0 - - -def get_balance(**constraints): - return get_txo_sum(spent_height=0, **constraints) - - -def get_report(account_ids): - return - - -def get_txo_plot(start_day=None, days_back=0, end_day=None, days_after=None, **constraints): - _clean_txo_constraints_for_aggregation(constraints) - if start_day is None: - # TODO: Fix - current_ordinal = 0 # self.ledger.headers.estimated_date(self.ledger.headers.height).toordinal() - constraints['day__gte'] = current_ordinal - days_back - else: - constraints['day__gte'] = date.fromisoformat(start_day).toordinal() - if end_day is not None: - constraints['day__lte'] = date.fromisoformat(end_day).toordinal() - elif days_after is not None: - constraints['day__lte'] = constraints['day__gte'] + days_after - plot = context().fetchall(select_txos( - [TX.c.day, func.sum(TXO.c.amount).label('total')], - group_by='day', order_by='day', **constraints - )) - for row in plot: - row['day'] = date.fromordinal(row['day']) - return plot - - -BASE_SELECT_SUPPORT_COLUMNS = BASE_SELECT_TXO_COLUMNS + [ - Support.c.channel_hash, - Support.c.is_signature_valid, -] - - -def select_supports(cols: List = None, **constraints) -> Select: - if cols is None: - cols = BASE_SELECT_SUPPORT_COLUMNS - joins = Support.join(TXO, ).join(TX) - return query([Support], select(*cols).select_from(joins), **constraints) - - -def search_supports(**constraints) -> Tuple[List[Output], Optional[int]]: - total = None - if not constraints.pop('no_totals', False): - total = search_support_count(**constraints) - rows = context().fetchall(select_supports(**constraints)) - txos = rows_to_txos(rows, include_tx=False) - return txos, total - - -def search_support_count(**constraints) -> int: - constraints.pop('offset', None) - constraints.pop('limit', None) - constraints.pop('order_by', None) - count = context().fetchall(select_supports([func.count().label('total')], **constraints)) - return count[0]['total'] or 0 - - -BASE_SELECT_CLAIM_COLUMNS = BASE_SELECT_TXO_COLUMNS + [ - Claim.c.activation_height, - Claim.c.takeover_height, - Claim.c.creation_height, - Claim.c.is_controlling, - Claim.c.channel_hash, - Claim.c.reposted_claim_hash, - Claim.c.short_url, - Claim.c.canonical_url, - Claim.c.signed_claim_count, - Claim.c.signed_support_count, - (Claim.c.amount + Claim.c.staked_support_amount).label('staked_amount'), - Claim.c.staked_support_amount, - Claim.c.staked_support_count, - Claim.c.is_signature_valid, -] - - -def select_claims(cols: List = None, for_count=False, **constraints) -> Select: - if cols is None: - cols = BASE_SELECT_CLAIM_COLUMNS - if 'order_by' in constraints: - order_by_parts = constraints['order_by'] - if isinstance(order_by_parts, str): - order_by_parts = [order_by_parts] - sql_order_by = [] - for order_by in order_by_parts: - is_asc = order_by.startswith('^') - column = order_by[1:] if is_asc else order_by - if column not in SEARCH_ORDER_FIELDS: - raise NameError(f'{column} is not a valid order_by field') - if column == 'name': - column = 'claim_name' - sql_order_by.append( - f"claim.{column} ASC" if is_asc else f"claim.{column} DESC" - ) - constraints['order_by'] = sql_order_by - - ops = {'<=': '__lte', '>=': '__gte', '<': '__lt', '>': '__gt'} - for constraint in SEARCH_INTEGER_PARAMS: - if constraint in constraints: - value = constraints.pop(constraint) - postfix = '' - if isinstance(value, str): - if len(value) >= 2 and value[:2] in ops: - postfix, value = ops[value[:2]], value[2:] - elif len(value) >= 1 and value[0] in ops: - postfix, value = ops[value[0]], value[1:] - if constraint == 'fee_amount': - value = Decimal(value)*1000 - constraints[f'{constraint}{postfix}'] = int(value) - - if 'sequence' in constraints: - constraints['order_by'] = 'activation_height ASC' - constraints['offset'] = int(constraints.pop('sequence')) - 1 - constraints['limit'] = 1 - if 'amount_order' in constraints: - constraints['order_by'] = 'effective_amount DESC' - constraints['offset'] = int(constraints.pop('amount_order')) - 1 - constraints['limit'] = 1 - - if 'claim_id' in constraints: - claim_id = constraints.pop('claim_id') - if len(claim_id) == 40: - constraints['claim_id'] = claim_id - else: - constraints['claim_id__like'] = f'{claim_id[:40]}%' - elif 'claim_ids' in constraints: - constraints['claim_id__in'] = set(constraints.pop('claim_ids')) - - if 'reposted_claim_id' in constraints: - constraints['reposted_claim_hash'] = unhexlify(constraints.pop('reposted_claim_id'))[::-1] - - if 'name' in constraints: - constraints['claim_name'] = normalize_name(constraints.pop('name')) - - if 'public_key_id' in constraints: - constraints['public_key_hash'] = ( - context().ledger.address_to_hash160(constraints.pop('public_key_id'))) - if 'channel_hash' in constraints: - constraints['channel_hash'] = constraints.pop('channel_hash') - if 'channel_ids' in constraints: - channel_ids = constraints.pop('channel_ids') - if channel_ids: - constraints['channel_hash__in'] = { - unhexlify(cid)[::-1] for cid in channel_ids - } - if 'not_channel_ids' in constraints: - not_channel_ids = constraints.pop('not_channel_ids') - if not_channel_ids: - not_channel_ids_binary = { - unhexlify(ncid)[::-1] for ncid in not_channel_ids - } - constraints['claim_hash__not_in#not_channel_ids'] = not_channel_ids_binary - if constraints.get('has_channel_signature', False): - constraints['channel_hash__not_in'] = not_channel_ids_binary - else: - constraints['null_or_not_channel__or'] = { - 'signature_valid__is_null': True, - 'channel_hash__not_in': not_channel_ids_binary - } - if 'signature_valid' in constraints: - has_channel_signature = constraints.pop('has_channel_signature', False) - if has_channel_signature: - constraints['signature_valid'] = constraints.pop('signature_valid') - else: - constraints['null_or_signature__or'] = { - 'signature_valid__is_null': True, - 'signature_valid': constraints.pop('signature_valid') - } - elif constraints.pop('has_channel_signature', False): - constraints['signature_valid__is_not_null'] = True - - if 'txid' in constraints: - tx_hash = unhexlify(constraints.pop('txid'))[::-1] - nout = constraints.pop('nout', 0) - constraints['txo_hash'] = tx_hash + struct.pack(' Tuple[List[Output], Optional[int], Optional[Censor]]: - total = None - if not constraints.pop('no_totals', False): - total = search_claim_count(**constraints) - constraints['offset'] = abs(constraints.get('offset', 0)) - constraints['limit'] = min(abs(constraints.get('limit', 10)), 50) - ctx = context() - search_censor = ctx.get_search_censor() - rows = context().fetchall(select_claims(**constraints)) - txos = rows_to_txos(rows, include_tx=False) - return txos, total, search_censor - - -def search_claim_count(**constraints) -> int: - constraints.pop('offset', None) - constraints.pop('limit', None) - constraints.pop('order_by', None) - count = context().fetchall(select_claims([func.count().label('total')], **constraints)) - return count[0]['total'] or 0 - - -def _get_referenced_rows(txo_rows: List[dict], censor_channels: List[bytes]): - # censor = context().get_resolve_censor() - repost_hashes = set(filter(None, map(itemgetter('reposted_claim_hash'), txo_rows))) - channel_hashes = set(itertools.chain( - filter(None, map(itemgetter('channel_hash'), txo_rows)), - censor_channels - )) - - reposted_txos = [] - if repost_hashes: - reposted_txos = search_claims(**{'claim.claim_hash__in': repost_hashes}) - channel_hashes |= set(filter(None, map(itemgetter('channel_hash'), reposted_txos))) - - channel_txos = [] - if channel_hashes: - channel_txos = search_claims(**{'claim.claim_hash__in': channel_hashes}) - - # channels must come first for client side inflation to work properly - return channel_txos + reposted_txos - - -def get_purchases(**constraints) -> Tuple[List[Output], Optional[int]]: - accounts = constraints.pop('accounts', None) - assert accounts, "'accounts' argument required to find purchases" - if not {'purchased_claim_hash', 'purchased_claim_hash__in'}.intersection(constraints): - constraints['purchased_claim_hash__is_not_null'] = True - constraints['tx_hash__in'] = ( - select(TXI.c.tx_hash).select_from(txi_join_account).where(in_account_ids(accounts)) - ) - txs, count = get_transactions(**constraints) - return [tx.outputs[0] for tx in txs], count - - -def select_addresses(cols, **constraints): - return context().fetchall(query( - [AccountAddress, PubkeyAddress], - select(*cols).select_from(PubkeyAddress.join(AccountAddress)), - **constraints - )) - - -def get_addresses(cols=None, include_total=False, **constraints) -> Tuple[List[dict], Optional[int]]: - if cols is None: - cols = ( - PubkeyAddress.c.address, - PubkeyAddress.c.used_times, - AccountAddress.c.account, - AccountAddress.c.chain, - AccountAddress.c.pubkey, - AccountAddress.c.chain_code, - AccountAddress.c.n, - AccountAddress.c.depth - ) - return ( - select_addresses(cols, **constraints), - get_address_count(**constraints) if include_total else None - ) - - -def get_address_count(**constraints): - count = select_addresses([func.count().label('total')], **constraints) - return count[0]['total'] or 0 - - -def get_all_addresses(self): - return context().execute(select(PubkeyAddress.c.address)) - - -def add_keys(account, chain, pubkeys): - c = context() - c.execute( - c.insert_or_ignore(PubkeyAddress) - .values([{'address': k.address} for k in pubkeys]) - ) - c.execute( - c.insert_or_ignore(AccountAddress) - .values([{ - 'account': account.id, - 'address': k.address, - 'chain': chain, - 'pubkey': k.pubkey_bytes, - 'chain_code': k.chain_code, - 'n': k.n, - 'depth': k.depth - } for k in pubkeys]) - ) - - -def get_supports_summary(self, **constraints): - return get_txos( - txo_type=TXO_TYPES['support'], - spent_height=0, is_my_output=True, - include_is_my_input=True, - no_tx=True, - **constraints - ) - - -def resolve(*urls) -> Dict[str, Output]: - return {url: resolve_url(url) for url in urls} - #txo_rows = [resolve_url(raw_url) for raw_url in urls] - #extra_txo_rows = _get_referenced_rows( - # [txo for txo in txo_rows if isinstance(txo, dict)], - # [txo.censor_hash for txo in txo_rows if isinstance(txo, ResolveCensoredError)] - #) - #return txo_rows, extra_txo_rows - - -def resolve_url(raw_url): - censor = context().get_resolve_censor() - - try: - url = URL.parse(raw_url) - except ValueError as e: - return e - - channel = None - - if url.has_channel: - q = url.channel.to_dict() - if set(q) == {'name'}: - q['is_controlling'] = True - else: - q['order_by'] = ['^creation_height'] - #matches = search_claims(censor, **q, limit=1) - matches = search_claims(**q, limit=1)[0] - if matches: - channel = matches[0] - elif censor.censored: - return ResolveCensoredError(raw_url, next(iter(censor.censored))) - else: - return LookupError(f'Could not find channel in "{raw_url}".') - - if url.has_stream: - q = url.stream.to_dict() - if channel is not None: - q['order_by'] = ['^creation_height'] - q['channel_hash'] = channel.claim_hash - q['is_signature_valid'] = True - elif set(q) == {'name'}: - q['is_controlling'] = True - # matches = search_claims(censor, **q, limit=1) - matches = search_claims(**q, limit=1)[0] - if matches: - return matches[0] - elif censor.censored: - return ResolveCensoredError(raw_url, next(iter(censor.censored))) - else: - return LookupError(f'Could not find claim at "{raw_url}".') - - return channel - - -CLAIM_HASH_OR_REPOST_HASH_SQL = f""" -CASE WHEN claim.claim_type = {TXO_TYPES['repost']} - THEN claim.reposted_claim_hash - ELSE claim.claim_hash -END -""" - - -def _apply_constraints_for_array_attributes(constraints, attr, cleaner, for_count=False): - any_items = set(cleaner(constraints.pop(f'any_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH]) - all_items = set(cleaner(constraints.pop(f'all_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH]) - not_items = set(cleaner(constraints.pop(f'not_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH]) - - all_items = {item for item in all_items if item not in not_items} - any_items = {item for item in any_items if item not in not_items} - - any_queries = {} - -# if attr == 'tag': -# common_tags = any_items & COMMON_TAGS.keys() -# if common_tags: -# any_items -= common_tags -# if len(common_tags) < 5: -# for item in common_tags: -# index_name = COMMON_TAGS[item] -# any_queries[f'#_common_tag_{index_name}'] = f""" -# EXISTS( -# SELECT 1 FROM tag INDEXED BY tag_{index_name}_idx -# WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=tag.claim_hash -# AND tag = '{item}' -# ) -# """ -# elif len(common_tags) >= 5: -# constraints.update({ -# f'$any_common_tag{i}': item for i, item in enumerate(common_tags) -# }) -# values = ', '.join( -# f':$any_common_tag{i}' for i in range(len(common_tags)) -# ) -# any_queries[f'#_any_common_tags'] = f""" -# EXISTS( -# SELECT 1 FROM tag WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=tag.claim_hash -# AND tag IN ({values}) -# ) -# """ - - if any_items: - - constraints.update({ - f'$any_{attr}{i}': item for i, item in enumerate(any_items) - }) - values = ', '.join( - f':$any_{attr}{i}' for i in range(len(any_items)) - ) - if for_count or attr == 'tag': - any_queries[f'#_any_{attr}'] = f""" - {CLAIM_HASH_OR_REPOST_HASH_SQL} IN ( - SELECT claim_hash FROM {attr} WHERE {attr} IN ({values}) - ) - """ - else: - any_queries[f'#_any_{attr}'] = f""" - EXISTS( - SELECT 1 FROM {attr} WHERE - {CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash - AND {attr} IN ({values}) - ) - """ - - if len(any_queries) == 1: - constraints.update(any_queries) - elif len(any_queries) > 1: - constraints[f'ORed_{attr}_queries__any'] = any_queries - - if all_items: - constraints[f'$all_{attr}_count'] = len(all_items) - constraints.update({ - f'$all_{attr}{i}': item for i, item in enumerate(all_items) - }) - values = ', '.join( - f':$all_{attr}{i}' for i in range(len(all_items)) - ) - if for_count: - constraints[f'#_all_{attr}'] = f""" - {CLAIM_HASH_OR_REPOST_HASH_SQL} IN ( - SELECT claim_hash FROM {attr} WHERE {attr} IN ({values}) - GROUP BY claim_hash HAVING COUNT({attr}) = :$all_{attr}_count - ) - """ - else: - constraints[f'#_all_{attr}'] = f""" - {len(all_items)}=( - SELECT count(*) FROM {attr} WHERE - {CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash - AND {attr} IN ({values}) - ) - """ - - if not_items: - constraints.update({ - f'$not_{attr}{i}': item for i, item in enumerate(not_items) - }) - values = ', '.join( - f':$not_{attr}{i}' for i in range(len(not_items)) - ) - if for_count: - constraints[f'#_not_{attr}'] = f""" - {CLAIM_HASH_OR_REPOST_HASH_SQL} NOT IN ( - SELECT claim_hash FROM {attr} WHERE {attr} IN ({values}) - ) - """ - else: - constraints[f'#_not_{attr}'] = f""" - NOT EXISTS( - SELECT 1 FROM {attr} WHERE - {CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash - AND {attr} IN ({values}) - ) - """ diff --git a/lbry/db/queries/__init__.py b/lbry/db/queries/__init__.py new file mode 100644 index 000000000..4de770595 --- /dev/null +++ b/lbry/db/queries/__init__.py @@ -0,0 +1,5 @@ +from .base import * +from .txio import * +from .search import * +from .resolve import * +from .address import * diff --git a/lbry/db/queries/address.py b/lbry/db/queries/address.py new file mode 100644 index 000000000..35ec51daf --- /dev/null +++ b/lbry/db/queries/address.py @@ -0,0 +1,78 @@ +import logging +from typing import Tuple, List, Optional + +from sqlalchemy import func +from sqlalchemy.future import select + +from ..utils import query +from ..query_context import context +from ..tables import TXO, PubkeyAddress, AccountAddress + + +log = logging.getLogger(__name__) + + +def update_address_used_times(addresses): + context().execute( + PubkeyAddress.update() + .values(used_times=( + select(func.count(TXO.c.address)) + .where((TXO.c.address == PubkeyAddress.c.address)), + )) + .where(PubkeyAddress.c.address._in(addresses)) + ) + + +def select_addresses(cols, **constraints): + return context().fetchall(query( + [AccountAddress, PubkeyAddress], + select(*cols).select_from(PubkeyAddress.join(AccountAddress)), + **constraints + )) + + +def get_addresses(cols=None, include_total=False, **constraints) -> Tuple[List[dict], Optional[int]]: + if cols is None: + cols = ( + PubkeyAddress.c.address, + PubkeyAddress.c.used_times, + AccountAddress.c.account, + AccountAddress.c.chain, + AccountAddress.c.pubkey, + AccountAddress.c.chain_code, + AccountAddress.c.n, + AccountAddress.c.depth + ) + return ( + select_addresses(cols, **constraints), + get_address_count(**constraints) if include_total else None + ) + + +def get_address_count(**constraints): + count = select_addresses([func.count().label('total')], **constraints) + return count[0]['total'] or 0 + + +def get_all_addresses(self): + return context().execute(select(PubkeyAddress.c.address)) + + +def add_keys(account, chain, pubkeys): + c = context() + c.execute( + c.insert_or_ignore(PubkeyAddress) + .values([{'address': k.address} for k in pubkeys]) + ) + c.execute( + c.insert_or_ignore(AccountAddress) + .values([{ + 'account': account.id, + 'address': k.address, + 'chain': chain, + 'pubkey': k.pubkey_bytes, + 'chain_code': k.chain_code, + 'n': k.n, + 'depth': k.depth + } for k in pubkeys]) + ) diff --git a/lbry/db/queries/base.py b/lbry/db/queries/base.py new file mode 100644 index 000000000..253674447 --- /dev/null +++ b/lbry/db/queries/base.py @@ -0,0 +1,58 @@ +from sqlalchemy import text +from sqlalchemy.future import select + +from ..query_context import context +from ..tables import SCHEMA_VERSION, metadata, Version, Claim, Support, Block + + +def execute(sql): + return context().execute(text(sql)) + + +def execute_fetchall(sql): + return context().fetchall(text(sql)) + + +def has_claims(): + return context().has_records(Claim) + + +def has_supports(): + return context().has_records(Support) + + +def get_best_block_height(): + context().fetchmax(Block.c.height, -1) + + +def insert_block(block): + context().get_bulk_loader().add_block(block).flush() + + +def insert_transaction(block_hash, tx): + context().get_bulk_loader().add_transaction(block_hash, tx).flush() + + +def check_version_and_create_tables(): + with context("db.connecting") as ctx: + if ctx.has_table('version'): + version = ctx.fetchone(select(Version.c.version).limit(1)) + if version and version['version'] == SCHEMA_VERSION: + return + metadata.drop_all(ctx.engine) + metadata.create_all(ctx.engine) + ctx.execute(Version.insert().values(version=SCHEMA_VERSION)) + for table in metadata.sorted_tables: + disable_trigger_and_constraints(table.name) + + +def disable_trigger_and_constraints(table_name): + ctx = context() + if ctx.is_postgres: + ctx.execute(text(f"ALTER TABLE {table_name} DISABLE TRIGGER ALL;")) + if table_name == 'tag': + return + if ctx.is_postgres: + ctx.execute(text( + f"ALTER TABLE {table_name} DROP CONSTRAINT {table_name}_pkey CASCADE;" + )) diff --git a/lbry/db/queries/resolve.py b/lbry/db/queries/resolve.py new file mode 100644 index 000000000..e93262357 --- /dev/null +++ b/lbry/db/queries/resolve.py @@ -0,0 +1,90 @@ +import logging +import itertools +from operator import itemgetter +from typing import List, Dict + +from lbry.schema.url import URL +from lbry.error import ResolveCensoredError +from lbry.blockchain.transaction import Output + +from ..query_context import context +from .search import search_claims + + +log = logging.getLogger(__name__) + + +def _get_referenced_rows(txo_rows: List[dict], censor_channels: List[bytes]): + # censor = context().get_resolve_censor() + repost_hashes = set(filter(None, map(itemgetter('reposted_claim_hash'), txo_rows))) + channel_hashes = set(itertools.chain( + filter(None, map(itemgetter('channel_hash'), txo_rows)), + censor_channels + )) + + reposted_txos = [] + if repost_hashes: + reposted_txos = search_claims(**{'claim.claim_hash__in': repost_hashes}) + channel_hashes |= set(filter(None, map(itemgetter('channel_hash'), reposted_txos))) + + channel_txos = [] + if channel_hashes: + channel_txos = search_claims(**{'claim.claim_hash__in': channel_hashes}) + + # channels must come first for client side inflation to work properly + return channel_txos + reposted_txos + + +def resolve(*urls) -> Dict[str, Output]: + return {url: resolve_url(url) for url in urls} + #txo_rows = [resolve_url(raw_url) for raw_url in urls] + #extra_txo_rows = _get_referenced_rows( + # [txo for txo in txo_rows if isinstance(txo, dict)], + # [txo.censor_hash for txo in txo_rows if isinstance(txo, ResolveCensoredError)] + #) + #return txo_rows, extra_txo_rows + + +def resolve_url(raw_url): + censor = context().get_resolve_censor() + + try: + url = URL.parse(raw_url) + except ValueError as e: + return e + + channel = None + + if url.has_channel: + q = url.channel.to_dict() + if set(q) == {'name'}: + q['is_controlling'] = True + else: + q['order_by'] = ['^creation_height'] + #matches = search_claims(censor, **q, limit=1) + matches = search_claims(**q, limit=1)[0] + if matches: + channel = matches[0] + elif censor.censored: + return ResolveCensoredError(raw_url, next(iter(censor.censored))) + else: + return LookupError(f'Could not find channel in "{raw_url}".') + + if url.has_stream: + q = url.stream.to_dict() + if channel is not None: + q['order_by'] = ['^creation_height'] + q['channel_hash'] = channel.claim_hash + q['is_signature_valid'] = True + elif set(q) == {'name'}: + q['is_controlling'] = True + # matches = search_claims(censor, **q, limit=1) + matches = search_claims(**q, limit=1)[0] + if matches: + return matches[0] + elif censor.censored: + return ResolveCensoredError(raw_url, next(iter(censor.censored))) + else: + return LookupError(f'Could not find claim at "{raw_url}".') + + return channel diff --git a/lbry/db/queries/search.py b/lbry/db/queries/search.py new file mode 100644 index 000000000..6eb17cb9b --- /dev/null +++ b/lbry/db/queries/search.py @@ -0,0 +1,347 @@ +import struct +import logging +from decimal import Decimal +from binascii import unhexlify +from typing import Tuple, List, Optional + +from sqlalchemy import func +from sqlalchemy.future import select, Select + +from lbry.schema.tags import clean_tags +from lbry.schema.result import Censor +from lbry.schema.url import normalize_name +from lbry.blockchain.transaction import Output + +from ..utils import query +from ..query_context import context +from ..tables import TX, TXO, Claim, Support +from ..constants import ( + TXO_TYPES, STREAM_TYPES, ATTRIBUTE_ARRAY_MAX_LENGTH, + SEARCH_INTEGER_PARAMS, SEARCH_ORDER_FIELDS +) + +from .txio import BASE_SELECT_TXO_COLUMNS, rows_to_txos + + +log = logging.getLogger(__name__) + + +BASE_SELECT_SUPPORT_COLUMNS = BASE_SELECT_TXO_COLUMNS + [ + Support.c.channel_hash, + Support.c.is_signature_valid, +] + + +def select_supports(cols: List = None, **constraints) -> Select: + if cols is None: + cols = BASE_SELECT_SUPPORT_COLUMNS + joins = Support.join(TXO, ).join(TX) + return query([Support], select(*cols).select_from(joins), **constraints) + + +def search_supports(**constraints) -> Tuple[List[Output], Optional[int]]: + total = None + if not constraints.pop('no_totals', False): + total = search_support_count(**constraints) + rows = context().fetchall(select_supports(**constraints)) + txos = rows_to_txos(rows, include_tx=False) + return txos, total + + +def search_support_count(**constraints) -> int: + constraints.pop('offset', None) + constraints.pop('limit', None) + constraints.pop('order_by', None) + count = context().fetchall(select_supports([func.count().label('total')], **constraints)) + return count[0]['total'] or 0 + + +BASE_SELECT_CLAIM_COLUMNS = BASE_SELECT_TXO_COLUMNS + [ + Claim.c.activation_height, + Claim.c.takeover_height, + Claim.c.creation_height, + Claim.c.is_controlling, + Claim.c.channel_hash, + Claim.c.reposted_claim_hash, + Claim.c.short_url, + Claim.c.canonical_url, + Claim.c.signed_claim_count, + Claim.c.signed_support_count, + (Claim.c.amount + Claim.c.staked_support_amount).label('staked_amount'), + Claim.c.staked_support_amount, + Claim.c.staked_support_count, + Claim.c.is_signature_valid, +] + +def select_claims(cols: List = None, for_count=False, **constraints) -> Select: + if cols is None: + cols = BASE_SELECT_CLAIM_COLUMNS + if 'order_by' in constraints: + order_by_parts = constraints['order_by'] + if isinstance(order_by_parts, str): + order_by_parts = [order_by_parts] + sql_order_by = [] + for order_by in order_by_parts: + is_asc = order_by.startswith('^') + column = order_by[1:] if is_asc else order_by + if column not in SEARCH_ORDER_FIELDS: + raise NameError(f'{column} is not a valid order_by field') + if column == 'name': + column = 'claim_name' + sql_order_by.append( + f"claim.{column} ASC" if is_asc else f"claim.{column} DESC" + ) + constraints['order_by'] = sql_order_by + + ops = {'<=': '__lte', '>=': '__gte', '<': '__lt', '>': '__gt'} + for constraint in SEARCH_INTEGER_PARAMS: + if constraint in constraints: + value = constraints.pop(constraint) + postfix = '' + if isinstance(value, str): + if len(value) >= 2 and value[:2] in ops: + postfix, value = ops[value[:2]], value[2:] + elif len(value) >= 1 and value[0] in ops: + postfix, value = ops[value[0]], value[1:] + if constraint == 'fee_amount': + value = Decimal(value)*1000 + constraints[f'{constraint}{postfix}'] = int(value) + + if 'sequence' in constraints: + constraints['order_by'] = 'activation_height ASC' + constraints['offset'] = int(constraints.pop('sequence')) - 1 + constraints['limit'] = 1 + if 'amount_order' in constraints: + constraints['order_by'] = 'effective_amount DESC' + constraints['offset'] = int(constraints.pop('amount_order')) - 1 + constraints['limit'] = 1 + + if 'claim_id' in constraints: + claim_id = constraints.pop('claim_id') + if len(claim_id) == 40: + constraints['claim_id'] = claim_id + else: + constraints['claim_id__like'] = f'{claim_id[:40]}%' + elif 'claim_ids' in constraints: + constraints['claim_id__in'] = set(constraints.pop('claim_ids')) + + if 'reposted_claim_id' in constraints: + constraints['reposted_claim_hash'] = unhexlify(constraints.pop('reposted_claim_id'))[::-1] + + if 'name' in constraints: + constraints['claim_name'] = normalize_name(constraints.pop('name')) + + if 'public_key_id' in constraints: + constraints['public_key_hash'] = ( + context().ledger.address_to_hash160(constraints.pop('public_key_id'))) + if 'channel_hash' in constraints: + constraints['channel_hash'] = constraints.pop('channel_hash') + if 'channel_ids' in constraints: + channel_ids = constraints.pop('channel_ids') + if channel_ids: + constraints['channel_hash__in'] = { + unhexlify(cid)[::-1] for cid in channel_ids + } + if 'not_channel_ids' in constraints: + not_channel_ids = constraints.pop('not_channel_ids') + if not_channel_ids: + not_channel_ids_binary = { + unhexlify(ncid)[::-1] for ncid in not_channel_ids + } + constraints['claim_hash__not_in#not_channel_ids'] = not_channel_ids_binary + if constraints.get('has_channel_signature', False): + constraints['channel_hash__not_in'] = not_channel_ids_binary + else: + constraints['null_or_not_channel__or'] = { + 'signature_valid__is_null': True, + 'channel_hash__not_in': not_channel_ids_binary + } + if 'signature_valid' in constraints: + has_channel_signature = constraints.pop('has_channel_signature', False) + if has_channel_signature: + constraints['signature_valid'] = constraints.pop('signature_valid') + else: + constraints['null_or_signature__or'] = { + 'signature_valid__is_null': True, + 'signature_valid': constraints.pop('signature_valid') + } + elif constraints.pop('has_channel_signature', False): + constraints['signature_valid__is_not_null'] = True + + if 'txid' in constraints: + tx_hash = unhexlify(constraints.pop('txid'))[::-1] + nout = constraints.pop('nout', 0) + constraints['txo_hash'] = tx_hash + struct.pack(' Tuple[List[Output], Optional[int], Optional[Censor]]: + total = None + if not constraints.pop('no_totals', False): + total = search_claim_count(**constraints) + constraints['offset'] = abs(constraints.get('offset', 0)) + constraints['limit'] = min(abs(constraints.get('limit', 10)), 50) + ctx = context() + search_censor = ctx.get_search_censor() + rows = context().fetchall(select_claims(**constraints)) + txos = rows_to_txos(rows, include_tx=False) + return txos, total, search_censor + + +def search_claim_count(**constraints) -> int: + constraints.pop('offset', None) + constraints.pop('limit', None) + constraints.pop('order_by', None) + count = context().fetchall(select_claims([func.count().label('total')], **constraints)) + return count[0]['total'] or 0 +CLAIM_HASH_OR_REPOST_HASH_SQL = f""" +CASE WHEN claim.claim_type = {TXO_TYPES['repost']} + THEN claim.reposted_claim_hash + ELSE claim.claim_hash +END +""" + + +def _apply_constraints_for_array_attributes(constraints, attr, cleaner, for_count=False): + any_items = set(cleaner(constraints.pop(f'any_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH]) + all_items = set(cleaner(constraints.pop(f'all_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH]) + not_items = set(cleaner(constraints.pop(f'not_{attr}s', []))[:ATTRIBUTE_ARRAY_MAX_LENGTH]) + + all_items = {item for item in all_items if item not in not_items} + any_items = {item for item in any_items if item not in not_items} + + any_queries = {} + + # if attr == 'tag': + # common_tags = any_items & COMMON_TAGS.keys() + # if common_tags: + # any_items -= common_tags + # if len(common_tags) < 5: + # for item in common_tags: + # index_name = COMMON_TAGS[item] + # any_queries[f'#_common_tag_{index_name}'] = f""" + # EXISTS( + # SELECT 1 FROM tag INDEXED BY tag_{index_name}_idx + # WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=tag.claim_hash + # AND tag = '{item}' + # ) + # """ + # elif len(common_tags) >= 5: + # constraints.update({ + # f'$any_common_tag{i}': item for i, item in enumerate(common_tags) + # }) + # values = ', '.join( + # f':$any_common_tag{i}' for i in range(len(common_tags)) + # ) + # any_queries[f'#_any_common_tags'] = f""" + # EXISTS( + # SELECT 1 FROM tag WHERE {CLAIM_HASH_OR_REPOST_HASH_SQL}=tag.claim_hash + # AND tag IN ({values}) + # ) + # """ + + if any_items: + + constraints.update({ + f'$any_{attr}{i}': item for i, item in enumerate(any_items) + }) + values = ', '.join( + f':$any_{attr}{i}' for i in range(len(any_items)) + ) + if for_count or attr == 'tag': + any_queries[f'#_any_{attr}'] = f""" + {CLAIM_HASH_OR_REPOST_HASH_SQL} IN ( + SELECT claim_hash FROM {attr} WHERE {attr} IN ({values}) + ) + """ + else: + any_queries[f'#_any_{attr}'] = f""" + EXISTS( + SELECT 1 FROM {attr} WHERE + {CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash + AND {attr} IN ({values}) + ) + """ + + if len(any_queries) == 1: + constraints.update(any_queries) + elif len(any_queries) > 1: + constraints[f'ORed_{attr}_queries__any'] = any_queries + + if all_items: + constraints[f'$all_{attr}_count'] = len(all_items) + constraints.update({ + f'$all_{attr}{i}': item for i, item in enumerate(all_items) + }) + values = ', '.join( + f':$all_{attr}{i}' for i in range(len(all_items)) + ) + if for_count: + constraints[f'#_all_{attr}'] = f""" + {CLAIM_HASH_OR_REPOST_HASH_SQL} IN ( + SELECT claim_hash FROM {attr} WHERE {attr} IN ({values}) + GROUP BY claim_hash HAVING COUNT({attr}) = :$all_{attr}_count + ) + """ + else: + constraints[f'#_all_{attr}'] = f""" + {len(all_items)}=( + SELECT count(*) FROM {attr} WHERE + {CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash + AND {attr} IN ({values}) + ) + """ + + if not_items: + constraints.update({ + f'$not_{attr}{i}': item for i, item in enumerate(not_items) + }) + values = ', '.join( + f':$not_{attr}{i}' for i in range(len(not_items)) + ) + if for_count: + constraints[f'#_not_{attr}'] = f""" + {CLAIM_HASH_OR_REPOST_HASH_SQL} NOT IN ( + SELECT claim_hash FROM {attr} WHERE {attr} IN ({values}) + ) + """ + else: + constraints[f'#_not_{attr}'] = f""" + NOT EXISTS( + SELECT 1 FROM {attr} WHERE + {CLAIM_HASH_OR_REPOST_HASH_SQL}={attr}.claim_hash + AND {attr} IN ({values}) + ) + """ diff --git a/lbry/db/queries/txio.py b/lbry/db/queries/txio.py new file mode 100644 index 000000000..fb6572145 --- /dev/null +++ b/lbry/db/queries/txio.py @@ -0,0 +1,574 @@ +import logging +from datetime import date +from typing import Tuple, List, Optional, Union + +from sqlalchemy import union, func, text, between, distinct +from sqlalchemy.future import select, Select + +from ...blockchain.transaction import ( + Transaction, Output, OutputScript, TXRefImmutable +) +from ..tables import ( + TX, TXO, TXI, txi_join_account, txo_join_account, + Claim, Support, AccountAddress +) +from ..utils import query, in_account_ids +from ..query_context import context +from ..constants import ( + TXO_TYPES, CLAIM_TYPE_CODES, CONTENT_TYPE_CODES, MAX_QUERY_VARIABLES +) + + +log = logging.getLogger(__name__) + + +minimum_txo_columns = ( + TXO.c.amount, TXO.c.position.label('txo_position'), + TX.c.tx_hash, TX.c.height, TX.c.timestamp, + func.substr(TX.c.raw, TXO.c.script_offset + 1, TXO.c.script_length).label('src'), +) + + +def row_to_txo(row): + return Output( + amount=row.amount, + script=OutputScript(row.src), + tx_ref=TXRefImmutable.from_hash(row.tx_hash, row.height, row.timestamp), + position=row.txo_position, + ) + + +def where_txo_type_in(txo_type: Optional[Union[tuple, int]] = None): + if txo_type is not None: + if isinstance(txo_type, int): + return TXO.c.txo_type == txo_type + assert len(txo_type) > 0 + if len(txo_type) == 1: + return TXO.c.txo_type == txo_type[0] + else: + return TXO.c.txo_type.in_(txo_type) + return TXO.c.txo_type.in_(CLAIM_TYPE_CODES) + + +def where_unspent_txos( + txo_types: Tuple[int, ...], + blocks: Tuple[int, int] = None, + missing_in_supports_table: bool = False, + missing_in_claims_table: bool = False, + missing_or_stale_in_claims_table: bool = False, +): + condition = where_txo_type_in(txo_types) & (TXO.c.spent_height == 0) + if blocks is not None: + condition &= between(TXO.c.height, *blocks) + if missing_in_supports_table: + condition &= TXO.c.txo_hash.notin_(select(Support.c.txo_hash)) + elif missing_or_stale_in_claims_table: + condition &= TXO.c.txo_hash.notin_(select(Claim.c.txo_hash)) + elif missing_in_claims_table: + condition &= TXO.c.claim_hash.notin_(select(Claim.c.claim_hash)) + return condition + + +def where_abandoned_claims(): + return Claim.c.claim_hash.notin_( + select(TXO.c.claim_hash).where(where_unspent_txos(CLAIM_TYPE_CODES)) + ) + + +def count_abandoned_claims(): + return context().fetchtotal(where_abandoned_claims()) + + +def where_abandoned_supports(): + return Support.c.txo_hash.notin_( + select(TXO.c.txo_hash).where(where_unspent_txos(TXO_TYPES['support'])) + ) + + +def count_abandoned_supports(): + return context().fetchtotal(where_abandoned_supports()) + + +def count_unspent_txos( + txo_types: Tuple[int, ...], + blocks: Tuple[int, int] = None, + missing_in_supports_table: bool = False, + missing_in_claims_table: bool = False, + missing_or_stale_in_claims_table: bool = False, +): + return context().fetchtotal( + where_unspent_txos( + txo_types, blocks, + missing_in_supports_table, + missing_in_claims_table, + missing_or_stale_in_claims_table, + ) + ) + + +def distribute_unspent_txos( + txo_types: Tuple[int, ...], + blocks: Tuple[int, int] = None, + missing_in_supports_table: bool = False, + missing_in_claims_table: bool = False, + missing_or_stale_in_claims_table: bool = False, +) -> Tuple[int, List[Tuple[int, int]]]: + chunks = ( + select(func.ntile(10).over(order_by=TXO.c.height).label('chunk'), TXO.c.height) + .where( + where_unspent_txos( + txo_types, blocks, + missing_in_supports_table, + missing_in_claims_table, + missing_or_stale_in_claims_table, + ) + ).cte('chunks') + ) + sql = ( + select( + func.count('*').label('items'), + func.min(chunks.c.height).label('start_height'), + func.max(chunks.c.height).label('end_height'), + ).group_by(chunks.c.chunk) + ) + total = 0 + buckets = [] + for bucket in context().fetchall(sql): + if len(buckets) > 0 and buckets[-1][-1] == bucket['start_height']: + bucket['start_height'] += 1 + total += bucket['items'] + buckets.append((bucket['start_height'], bucket['end_height'])) + return total, buckets + + +def where_changed_support_txos(blocks: Optional[Tuple[int, int]]): + return ( + (TXO.c.txo_type == TXO_TYPES['support']) & ( + between(TXO.c.height, blocks[0], blocks[-1]) | + between(TXO.c.spent_height, blocks[0], blocks[-1]) + ) + ) + + +def where_claims_with_changed_supports(blocks: Optional[Tuple[int, int]]): + return Claim.c.claim_hash.in_( + select(TXO.c.claim_hash).where( + where_changed_support_txos(blocks) + ) + ) + + +def count_claims_with_changed_supports(blocks: Optional[Tuple[int, int]]) -> int: + sql = ( + select(func.count(distinct(TXO.c.claim_hash)).label('total')) + .where(where_changed_support_txos(blocks)) + ) + return context().fetchone(sql)['total'] + + +def where_changed_content_txos(blocks: Optional[Tuple[int, int]]): + return ( + (TXO.c.channel_hash.isnot(None)) & ( + between(TXO.c.height, blocks[0], blocks[-1]) | + between(TXO.c.spent_height, blocks[0], blocks[-1]) + ) + ) + + +def where_channels_with_changed_content(blocks: Optional[Tuple[int, int]]): + return Claim.c.claim_hash.in_( + select(TXO.c.channel_hash).where( + where_changed_content_txos(blocks) + ) + ) + + +def count_channels_with_changed_content(blocks: Optional[Tuple[int, int]]): + sql = ( + select(func.count(distinct(TXO.c.channel_hash)).label('total')) + .where(where_changed_content_txos(blocks)) + ) + return context().fetchone(sql)['total'] + + +def select_transactions(cols, account_ids=None, **constraints): + s: Select = select(*cols).select_from(TX) + if not {'tx_hash', 'tx_hash__in'}.intersection(constraints): + assert account_ids, ( + "'accounts' argument required when " + "no 'tx_hash' constraint is present" + ) + where = in_account_ids(account_ids) + tx_hashes = union( + select(TXO.c.tx_hash).select_from(txo_join_account).where(where), + select(TXI.c.tx_hash).select_from(txi_join_account).where(where) + ) + s = s.where(TX.c.tx_hash.in_(tx_hashes)) + return context().fetchall(query([TX], s, **constraints)) + + +TXO_NOT_MINE = Output(None, None, is_my_output=False) + + +def get_raw_transactions(tx_hashes): + return context().fetchall( + select(TX.c.tx_hash, TX.c.raw).where(TX.c.tx_hash.in_(tx_hashes)) + ) + + +def get_transactions(**constraints) -> Tuple[List[Transaction], Optional[int]]: + txs = [] + sql = select(TX.c.raw, TX.c.height, TX.c.position).select_from(TX) + rows = context().fetchall(query([TX], sql, **constraints)) + for row in rows: + txs.append(Transaction(row['raw'], height=row['height'], position=row['position'])) + return txs, 0 + + +def _get_transactions( + wallet=None, include_total=False, **constraints +) -> Tuple[List[Transaction], Optional[int]]: + include_is_my_input = constraints.pop('include_is_my_input', False) + include_is_my_output = constraints.pop('include_is_my_output', False) + + tx_rows = select_transactions( + [TX.c.tx_hash, TX.c.raw, TX.c.height, TX.c.position, TX.c.is_verified], + order_by=constraints.pop('order_by', ["height=0 DESC", "height DESC", "position DESC"]), + **constraints + ) + + txids, txs, txi_txoids = [], [], [] + for row in tx_rows: + txids.append(row['tx_hash']) + txs.append(Transaction( + raw=row['raw'], height=row['height'], position=row['position'], + is_verified=bool(row['is_verified']) + )) + for txi in txs[-1].inputs: + txi_txoids.append(txi.txo_ref.hash) + + annotated_txos = {} + for offset in range(0, len(txids), MAX_QUERY_VARIABLES): + annotated_txos.update({ + txo.id: txo for txo in + get_txos( + wallet=wallet, + tx_hash__in=txids[offset:offset + MAX_QUERY_VARIABLES], order_by='txo.tx_hash', + include_is_my_input=include_is_my_input, + include_is_my_output=include_is_my_output, + )[0] + }) + + referenced_txos = {} + for offset in range(0, len(txi_txoids), MAX_QUERY_VARIABLES): + referenced_txos.update({ + txo.id: txo for txo in + get_txos( + wallet=wallet, + txo_hash__in=txi_txoids[offset:offset + MAX_QUERY_VARIABLES], order_by='txo.txo_hash', + include_is_my_output=include_is_my_output, + )[0] + }) + + for tx in txs: + for txi in tx.inputs: + txo = referenced_txos.get(txi.txo_ref.id) + if txo: + txi.txo_ref = txo.ref + for txo in tx.outputs: + _txo = annotated_txos.get(txo.id) + if _txo: + txo.update_annotations(_txo) + else: + txo.update_annotations(TXO_NOT_MINE) + + for tx in txs: + txos = tx.outputs + if len(txos) >= 2 and txos[1].can_decode_purchase_data: + txos[0].purchase = txos[1] + + return txs, get_transaction_count(**constraints) if include_total else None + + +def get_transaction_count(**constraints): + constraints.pop('wallet', None) + constraints.pop('offset', None) + constraints.pop('limit', None) + constraints.pop('order_by', None) + count = select_transactions([func.count().label('total')], **constraints) + return count[0]['total'] or 0 + + +BASE_SELECT_TXO_COLUMNS = [ + TX.c.tx_hash, TX.c.raw, TX.c.height, TX.c.position.label('tx_position'), + TX.c.is_verified, TX.c.timestamp, + TXO.c.txo_type, TXO.c.position.label('txo_position'), TXO.c.amount, TXO.c.spent_height, + TXO.c.script_offset, TXO.c.script_length, +] + + +def select_txos( + cols=None, account_ids=None, is_my_input=None, + is_my_output=True, is_my_input_or_output=None, exclude_internal_transfers=False, + include_is_my_input=False, claim_id_not_in_claim_table=None, + txo_id_not_in_claim_table=None, txo_id_not_in_support_table=None, + **constraints +) -> Select: + if cols is None: + cols = BASE_SELECT_TXO_COLUMNS + s: Select = select(*cols) + if account_ids: + my_addresses = select(AccountAddress.c.address).where(in_account_ids(account_ids)) + if is_my_input_or_output: + include_is_my_input = True + s = s.where( + TXO.c.address.in_(my_addresses) | ( + (TXI.c.address != None) & + (TXI.c.address.in_(my_addresses)) + ) + ) + else: + if is_my_output: + s = s.where(TXO.c.address.in_(my_addresses)) + elif is_my_output is False: + s = s.where(TXO.c.address.notin_(my_addresses)) + if is_my_input: + include_is_my_input = True + s = s.where( + (TXI.c.address != None) & + (TXI.c.address.in_(my_addresses)) + ) + elif is_my_input is False: + include_is_my_input = True + s = s.where( + (TXI.c.address == None) | + (TXI.c.address.notin_(my_addresses)) + ) + if exclude_internal_transfers: + include_is_my_input = True + s = s.where( + (TXO.c.txo_type != TXO_TYPES['other']) | + (TXO.c.address.notin_(my_addresses)) + (TXI.c.address == None) | + (TXI.c.address.notin_(my_addresses)) + ) + joins = TXO.join(TX) + #if constraints.get('is_spent', None) is False: + # s = s.where((TXO.c.is_spent == False) & (TXO.c.is_reserved == False)) + if include_is_my_input: + joins = joins.join(TXI, (TXI.c.position == 0) & (TXI.c.tx_hash == TXO.c.tx_hash), isouter=True) + if claim_id_not_in_claim_table: + s = s.where(TXO.c.claim_hash.notin_(select(Claim.c.claim_hash))) + elif txo_id_not_in_claim_table: + s = s.where(TXO.c.txo_hash.notin_(select(Claim.c.txo_hash))) + elif txo_id_not_in_support_table: + s = s.where(TXO.c.txo_hash.notin_(select(Support.c.txo_hash))) + return query([TXO, TX], s.select_from(joins), **constraints) + + +META_ATTRS = ( + 'activation_height', 'takeover_height', 'creation_height', 'staked_amount', + 'short_url', 'canonical_url', 'staked_support_amount', 'staked_support_count', + 'signed_claim_count', 'signed_support_count', 'is_signature_valid', +) + + +def rows_to_txos(rows: List[dict], include_tx=True) -> List[Output]: + txos = [] + tx_cache = {} + for row in rows: + if include_tx: + if row['tx_hash'] not in tx_cache: + tx_cache[row['tx_hash']] = Transaction( + row['raw'], height=row['height'], position=row['tx_position'], + is_verified=bool(row['is_verified']), + ) + txo = tx_cache[row['tx_hash']].outputs[row['txo_position']] + else: + source = row['raw'][row['script_offset']:row['script_offset']+row['script_length']] + txo = Output( + amount=row['amount'], + script=OutputScript(source), + tx_ref=TXRefImmutable.from_hash(row['tx_hash'], row['height'], row['timestamp']), + position=row['txo_position'], + ) + txo.spent_height = bool(row['spent_height']) + if 'is_my_input' in row: + txo.is_my_input = bool(row['is_my_input']) + if 'is_my_output' in row: + txo.is_my_output = bool(row['is_my_output']) + if 'is_my_input' in row and 'is_my_output' in row: + if txo.is_my_input and txo.is_my_output and row['txo_type'] == TXO_TYPES['other']: + txo.is_internal_transfer = True + else: + txo.is_internal_transfer = False + if 'received_tips' in row: + txo.received_tips = row['received_tips'] + for attr in META_ATTRS: + if attr in row: + txo.meta[attr] = row[attr] + txos.append(txo) + return txos + + +def get_txos(no_tx=False, include_total=False, **constraints) -> Tuple[List[Output], Optional[int]]: + wallet_account_ids = constraints.pop('wallet_account_ids', []) + include_is_my_input = constraints.get('include_is_my_input', False) + include_is_my_output = constraints.pop('include_is_my_output', False) + include_received_tips = constraints.pop('include_received_tips', False) + + select_columns = BASE_SELECT_TXO_COLUMNS + [ + TXO.c.claim_name + ] + + my_accounts = None + if wallet_account_ids: + my_accounts = select(AccountAddress.c.address).where(in_account_ids(wallet_account_ids)) + + if include_is_my_output and my_accounts is not None: + if constraints.get('is_my_output', None) in (True, False): + select_columns.append(text(f"{1 if constraints['is_my_output'] else 0} AS is_my_output")) + else: + select_columns.append(TXO.c.address.in_(my_accounts).label('is_my_output')) + + if include_is_my_input and my_accounts is not None: + if constraints.get('is_my_input', None) in (True, False): + select_columns.append(text(f"{1 if constraints['is_my_input'] else 0} AS is_my_input")) + else: + select_columns.append(( + (TXI.c.address != None) & + (TXI.c.address.in_(my_accounts)) + ).label('is_my_input')) + + if include_received_tips: + support = TXO.alias('support') + select_columns.append( + select(func.coalesce(func.sum(support.c.amount), 0)) + .select_from(support).where( + (support.c.claim_hash == TXO.c.claim_hash) & + (support.c.txo_type == TXO_TYPES['support']) & + (support.c.address.in_(my_accounts)) & + (support.c.txo_hash.notin_(select(TXI.c.txo_hash))) + ).label('received_tips') + ) + + if 'order_by' not in constraints or constraints['order_by'] == 'height': + constraints['order_by'] = [ + "tx.height=0 DESC", "tx.height DESC", "tx.position DESC", "txo.position" + ] + elif constraints.get('order_by', None) == 'none': + del constraints['order_by'] + + rows = context().fetchall(select_txos(select_columns, **constraints)) + txos = rows_to_txos(rows, not no_tx) + + channel_hashes = set() + for txo in txos: + if txo.is_claim and txo.can_decode_claim: + if txo.claim.is_signed: + channel_hashes.add(txo.claim.signing_channel_hash) + + if channel_hashes: + channels = { + txo.claim_hash: txo for txo in + get_txos( + txo_type=TXO_TYPES['channel'], spent_height=0, + wallet_account_ids=wallet_account_ids, claim_hash__in=channel_hashes + )[0] + } + for txo in txos: + if txo.is_claim and txo.can_decode_claim: + txo.channel = channels.get(txo.claim.signing_channel_hash, None) + + return txos, get_txo_count(**constraints) if include_total else None + + +def _clean_txo_constraints_for_aggregation(constraints): + constraints.pop('include_is_my_input', None) + constraints.pop('include_is_my_output', None) + constraints.pop('include_received_tips', None) + constraints.pop('wallet_account_ids', None) + constraints.pop('offset', None) + constraints.pop('limit', None) + constraints.pop('order_by', None) + + +def get_txo_count(**constraints): + _clean_txo_constraints_for_aggregation(constraints) + count = context().fetchall(select_txos([func.count().label('total')], **constraints)) + return count[0]['total'] or 0 + + +def get_txo_sum(**constraints): + _clean_txo_constraints_for_aggregation(constraints) + result = context().fetchall(select_txos([func.sum(TXO.c.amount).label('total')], **constraints)) + return result[0]['total'] or 0 + + +def get_balance(**constraints): + return get_txo_sum(spent_height=0, **constraints) + + +def get_report(account_ids): + return + + +def get_txo_plot(start_day=None, days_back=0, end_day=None, days_after=None, **constraints): + _clean_txo_constraints_for_aggregation(constraints) + if start_day is None: + # TODO: Fix + current_ordinal = 0 # self.ledger.headers.estimated_date(self.ledger.headers.height).toordinal() + constraints['day__gte'] = current_ordinal - days_back + else: + constraints['day__gte'] = date.fromisoformat(start_day).toordinal() + if end_day is not None: + constraints['day__lte'] = date.fromisoformat(end_day).toordinal() + elif days_after is not None: + constraints['day__lte'] = constraints['day__gte'] + days_after + plot = context().fetchall(select_txos( + [TX.c.day, func.sum(TXO.c.amount).label('total')], + group_by='day', order_by='day', **constraints + )) + for row in plot: + row['day'] = date.fromordinal(row['day']) + return plot + + +def get_purchases(**constraints) -> Tuple[List[Output], Optional[int]]: + accounts = constraints.pop('accounts', None) + assert accounts, "'accounts' argument required to find purchases" + if not {'purchased_claim_hash', 'purchased_claim_hash__in'}.intersection(constraints): + constraints['purchased_claim_hash__is_not_null'] = True + constraints['tx_hash__in'] = ( + select(TXI.c.tx_hash).select_from(txi_join_account).where(in_account_ids(accounts)) + ) + txs, count = get_transactions(**constraints) + return [tx.outputs[0] for tx in txs], count + + +def get_supports_summary(self, **constraints): + return get_txos( + txo_type=TXO_TYPES['support'], + spent_height=0, is_my_output=True, + include_is_my_input=True, + no_tx=True, + **constraints + ) + + +def reserve_outputs(txo_hashes, is_reserved=True): + context().execute( + TXO.update() + .values(is_reserved=is_reserved) + .where(TXO.c.txo_hash.in_(txo_hashes)) + ) + + +def release_all_outputs(account_id): + context().execute( + TXO.update().values(is_reserved=False).where( + TXO.c.is_reserved & TXO.c.address.in_( + select(AccountAddress.c.address).where(in_account_ids(account_id)) + ) + ) + ) diff --git a/lbry/db/query_context.py b/lbry/db/query_context.py index 4d0de0ac7..09172814f 100644 --- a/lbry/db/query_context.py +++ b/lbry/db/query_context.py @@ -8,7 +8,7 @@ from typing import Dict, List, Optional, Tuple from dataclasses import dataclass, field from contextvars import ContextVar -from sqlalchemy import create_engine, inspect, bindparam, func, exists, case, event +from sqlalchemy import create_engine, inspect, bindparam, func, exists, case, event as sqlalchemy_event from sqlalchemy.future import select from sqlalchemy.engine import Engine, Connection from sqlalchemy.sql import Insert @@ -49,7 +49,6 @@ class QueryContext: pid: int # QueryContext __enter__/__exit__ state - print_timers: List current_timer_name: Optional[str] = None current_timer_time: float = 0 current_progress: Optional['ProgressContext'] = None @@ -94,17 +93,17 @@ class QueryContext: rows = self.connection.execute(sql, *args).fetchall() return [dict(row._mapping) for row in rows] - def fetchtotal(self, condition): + def fetchtotal(self, condition) -> int: sql = select(func.count('*').label('total')).where(condition) return self.fetchone(sql)['total'] - def fetchmax(self, column): - sql = select(func.max(column).label('max_result')) + def fetchmax(self, column, default: int) -> int: + sql = select(func.coalesce(func.max(column), default).label('max_result')) return self.fetchone(sql)['max_result'] - def has_records(self, table): + def has_records(self, table) -> bool: sql = select(exists([1], from_obj=table).label('result')) - return self.fetchone(sql)['result'] + return bool(self.fetchone(sql)['result']) def insert_or_ignore(self, table): if self.is_sqlite: @@ -139,14 +138,15 @@ class QueryContext: self.current_timer_name = timer_name return self + @property + def elapsed(self): + return time.perf_counter() - self.current_timer_time + def __enter__(self) -> 'QueryContext': self.current_timer_time = time.perf_counter() return self def __exit__(self, exc_type, exc_val, exc_tb): - if self.current_timer_name and self.current_timer_name in self.print_timers: - elapsed = time.perf_counter() - self.current_timer_time - print(f"{self.print_timers} in {elapsed:.6f}s", flush=True) self.current_timer_name = None self.current_timer_time = 0 self.current_progress = None @@ -172,13 +172,13 @@ def set_sqlite_settings(connection, _): def initialize( ledger: Ledger, message_queue: mp.Queue, stop_event: mp.Event, - track_metrics=False, block_and_filter=None, print_timers=None): + track_metrics=False, block_and_filter=None): url = ledger.conf.db_url_or_default engine = create_engine(url) if engine.name == "postgresql": - event.listen(engine, "connect", set_postgres_settings) + sqlalchemy_event.listen(engine, "connect", set_postgres_settings) elif engine.name == "sqlite": - event.listen(engine, "connect", set_sqlite_settings) + sqlalchemy_event.listen(engine, "connect", set_sqlite_settings) connection = engine.connect() if block_and_filter is not None: blocked_streams, blocked_channels, filtered_streams, filtered_channels = block_and_filter @@ -192,7 +192,6 @@ def initialize( stack=[], metrics={}, is_tracking_metrics=track_metrics, blocked_streams=blocked_streams, blocked_channels=blocked_channels, filtered_streams=filtered_streams, filtered_channels=filtered_channels, - print_timers=print_timers or [] ) ) @@ -209,12 +208,11 @@ def uninitialize(): class Event: _events: List['Event'] = [] - __slots__ = 'id', 'name', 'unit', 'step_size' + __slots__ = 'id', 'name', 'units' - def __init__(self, name: str, unit: str, step_size: int): + def __init__(self, name: str, units: Tuple[str]): self.name = name - self.unit = unit - self.step_size = step_size + self.units = units @classmethod def get_by_id(cls, event_id) -> 'Event': @@ -227,21 +225,22 @@ class Event: return event @classmethod - def add(cls, name: str, unit: str, step_size: int) -> 'Event': + def add(cls, name: str, *units: str) -> 'Event': assert cls.get_by_name(name) is None, f"Event {name} already exists." - event = cls(name, unit, step_size) + assert name.count('.') == 3, f"Event {name} does not follow pattern of: [module].sync.[phase].[task]" + event = cls(name, units) cls._events.append(event) event.id = cls._events.index(event) return event -def event_emitter(name: str, unit: str, step_size=1): - event = Event.add(name, unit, step_size) +def event_emitter(name: str, *units: str, throttle=1): + event = Event.add(name, *units) def wrapper(f): @functools.wraps(f) def with_progress(*args, **kwargs): - with progress(event, step_size=step_size) as p: + with progress(event, throttle=throttle) as p: return f(*args, **kwargs, p=p) return with_progress @@ -251,18 +250,23 @@ def event_emitter(name: str, unit: str, step_size=1): class ProgressPublisher(EventQueuePublisher): def message_to_event(self, message): - event = Event.get_by_id(message[0]) + total, extra = None, None + if len(message) == 3: + event_id, progress_id, done = message + elif len(message) == 5: + event_id, progress_id, done, total, extra = message + else: + raise TypeError("progress message must be tuple of 3 or 5 values.") + event = Event.get_by_id(event_id) d = { "event": event.name, - "data": { - "pid": message[1], - "step": message[2], - "total": message[3], - "unit": event.unit - } + "data": {"id": progress_id, "done": done} } - if len(message) > 4 and isinstance(message[4], dict): - d['data'].update(message[4]) + if total is not None: + d['data']['total'] = total + d['data']['units'] = event.units + if isinstance(extra, dict): + d['data'].update(extra) return d @@ -270,56 +274,105 @@ class BreakProgress(Exception): """Break out of progress when total is 0.""" -class ProgressContext: +class Progress: - def __init__(self, ctx: QueryContext, event: Event, step_size=1): - self.ctx = ctx + def __init__(self, message_queue: mp.Queue, event: Event, throttle=1): + self.message_queue = message_queue self.event = event - self.extra = None - self.step_size = step_size - self.last_step = -1 - self.total = 0 + self.progress_id = 0 + self.throttle = throttle + self.last_done = (0,)*len(event.units) + self.last_done_queued = (0,)*len(event.units) + self.totals = (0,)*len(event.units) + + def __enter__(self) -> 'Progress': + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.last_done != self.last_done_queued: + self.message_queue.put((self.event.id, self.progress_id, self.last_done)) + self.last_done_queued = self.last_done + if exc_type == BreakProgress: + return True + if self.last_done != self.totals: # or exc_type is not None: + # TODO: add exception info into closing message if there is any + self.message_queue.put(( + self.event.id, self.progress_id, (-1,)*len(self.event.units) + )) + + def start(self, *totals: int, progress_id=0, label=None, extra=None): + assert len(totals) == len(self.event.units), \ + f"Totals {totals} do not match up with units {self.event.units}." + if not any(totals): + raise BreakProgress + self.totals = totals + self.progress_id = progress_id + extra = {} if extra is None else extra.copy() + if label is not None: + extra['label'] = label + self.step(*((0,)*len(totals)), force=True, extra=extra) + + def step(self, *done: int, force=False, extra=None): + if done == (): + assert len(self.totals) == 1, "Incrementing step() only works with one unit progress." + done = (self.last_done[0]+1,) + assert len(done) == len(self.totals), \ + f"Done elements {done} don't match total elements {self.totals}." + self.last_done = done + send_condition = force or extra is not None or ( + # throttle rate of events being generated (only throttles first unit value) + (self.throttle == 1 or done[0] % self.throttle == 0) and + # deduplicate finish event by not sending a step where done == total + any(i < j for i, j in zip(done, self.totals)) and + # deduplicate same event + done != self.last_done_queued + ) + if send_condition: + if extra is not None: + self.message_queue.put_nowait( + (self.event.id, self.progress_id, done, self.totals, extra) + ) + else: + self.message_queue.put_nowait( + (self.event.id, self.progress_id, done) + ) + self.last_done_queued = done + + def add(self, *done: int, force=False, extra=None): + assert len(done) == len(self.last_done), \ + f"Done elements {done} don't match total elements {self.last_done}." + self.step( + *(i+j for i, j in zip(self.last_done, done)), + force=force, extra=extra + ) + + def iter(self, items: List): + self.start(len(items)) + for item in items: + yield item + self.step() + + +class ProgressContext(Progress): + + def __init__(self, ctx: QueryContext, event: Event, throttle=1): + super().__init__(ctx.message_queue, event, throttle) + self.ctx = ctx def __enter__(self) -> 'ProgressContext': self.ctx.__enter__() return self def __exit__(self, exc_type, exc_val, exc_tb): - self.ctx.message_queue.put(self.get_event_args(self.total)) - self.ctx.__exit__(exc_type, exc_val, exc_tb) - if exc_type == BreakProgress: - return True - - def start(self, total, extra=None): - if not total: - raise BreakProgress - self.total = total - if extra is not None: - self.extra = extra - self.step(0) - - def step(self, done): - send_condition = ( - # enforce step rate - (self.step_size == 1 or done % self.step_size == 0) and - # deduplicate finish event by not sending a step where done == total - done < self.total and - # deduplicate same step - done != self.last_step - ) - if send_condition: - self.ctx.message_queue.put_nowait(self.get_event_args(done)) - self.last_step = done - - def get_event_args(self, done): - if self.extra is not None: - return self.event.id, self.ctx.pid, done, self.total, self.extra - return self.event.id, self.ctx.pid, done, self.total + return any(( + self.ctx.__exit__(exc_type, exc_val, exc_tb), + super().__exit__(exc_type, exc_val, exc_tb) + )) -def progress(e: Event, step_size=1) -> ProgressContext: +def progress(e: Event, throttle=1) -> ProgressContext: ctx = context(e.name) - ctx.current_progress = ProgressContext(ctx, e, step_size=step_size) + ctx.current_progress = ProgressContext(ctx, e, throttle=throttle) return ctx.current_progress @@ -400,10 +453,6 @@ class BulkLoader: if txo.can_decode_claim: claim = txo.claim row['txo_type'] = TXO_TYPES.get(claim.claim_type, TXO_TYPES['stream']) - if claim.is_signed: - row['channel_hash'] = claim.signing_channel_hash - row['signature'] = txo.get_encoded_signature() - row['signature_digest'] = txo.get_signature_digest(self.ledger) if claim.is_channel: row['public_key'] = claim.channel.public_key_bytes row['public_key_hash'] = self.ledger.address_to_hash160( @@ -413,15 +462,16 @@ class BulkLoader: row['txo_type'] = TXO_TYPES['stream'] elif txo.is_support: row['txo_type'] = TXO_TYPES['support'] - if txo.can_decode_support: - claim = txo.support - if claim.is_signed: - row['channel_hash'] = claim.signing_channel_hash elif txo.purchase is not None: row['txo_type'] = TXO_TYPES['purchase'] row['claim_id'] = txo.purchased_claim_id row['claim_hash'] = txo.purchased_claim_hash if txo.script.is_claim_involved: + signable = txo.can_decode_signable + if signable and signable.is_signed: + row['channel_hash'] = signable.signing_channel_hash + row['signature'] = txo.get_encoded_signature() + row['signature_digest'] = txo.get_signature_digest(self.ledger) row['claim_id'] = txo.claim_id row['claim_hash'] = txo.claim_hash try: @@ -431,17 +481,18 @@ class BulkLoader: return row def claim_to_rows( - self, txo: Output, timestamp: int, staked_support_amount: int, staked_support_count: int, - signature: bytes = None, signature_digest: bytes = None, channel_public_key: bytes = None, - ) -> Tuple[dict, List]: + self, txo: Output, staked_support_amount: int, staked_support_count: int, + signature: bytes = None, signature_digest: bytes = None, channel_public_key: bytes = None, + ) -> Tuple[dict, List]: + tx = txo.tx_ref d = { 'claim_type': None, 'address': txo.get_address(self.ledger), 'txo_hash': txo.hash, 'amount': txo.amount, - 'height': txo.tx_ref.height, - 'timestamp': timestamp, + 'height': tx.height, + 'timestamp': tx.timestamp, # support 'staked_amount': txo.amount + staked_support_amount, 'staked_support_amount': staked_support_amount, @@ -508,26 +559,30 @@ class BulkLoader: return d, tags - def support_to_row(self, txo): - tx = txo.tx_ref.tx + def support_to_row( + self, txo: Output, channel_public_key: bytes = None, + signature: bytes = None, signature_digest: bytes = None + ): + tx = txo.tx_ref d = { 'txo_hash': txo.ref.hash, 'claim_hash': txo.claim_hash, 'address': txo.get_address(self.ledger), 'amount': txo.amount, 'height': tx.height, + 'timestamp': tx.timestamp, 'emoji': None, 'channel_hash': None, - 'signature': None, - 'signature_digest': None, + 'is_signature_valid': None, } support = txo.can_decode_support if support: d['emoji'] = support.emoji if support.is_signed: d['channel_hash'] = support.signing_channel_hash - d['signature'] = txo.get_encoded_signature() - d['signature_digest'] = txo.get_signature_digest(None) + d['is_signature_valid'] = Output.is_signature_valid( + signature, signature_digest, channel_public_key + ) return d def add_block(self, block: Block): @@ -545,13 +600,14 @@ class BulkLoader: self.txos.append(self.txo_to_row(tx, txo)) return self - def add_support(self, txo: Output): - self.supports.append(self.support_to_row(txo)) + def add_support(self, txo: Output, **extra): + self.supports.append(self.support_to_row(txo, **extra)) def add_claim( - self, txo: Output, short_url: str, - creation_height: int, activation_height: int, expiration_height: int, - takeover_height: int = None, channel_url: str = None, **extra): + self, txo: Output, short_url: str, + creation_height: int, activation_height: int, expiration_height: int, + takeover_height: int = None, channel_url: str = None, **extra + ): try: claim_name = txo.claim_name.replace('\x00', '') normalized_name = txo.normalized_name @@ -576,7 +632,7 @@ class BulkLoader: self.tags.extend(tags) return self - def update_claim(self, txo: Output, channel_url: Optional[str], **extra): + def update_claim(self, txo: Output, channel_url: str = None, **extra): d, tags = self.claim_to_rows(txo, **extra) d['pk'] = txo.claim_hash d['channel_url'] = channel_url @@ -603,56 +659,7 @@ class BulkLoader: (Support.insert(), self.supports), ) - def save(self, unit_table, batch_size=10000): - queries = self.get_queries() - - p = self.ctx.current_progress - done = row_scale = 0 - if p: - progress_total, row_total = 0, sum(len(q[1]) for q in queries) - for sql, rows in queries: - if sql.table == unit_table: - progress_total += len(rows) - if not progress_total: - assert row_total == 0, "Rows used for progress are empty but other rows present." - return - row_scale = row_total / progress_total - p.start(progress_total) - - execute = self.ctx.connection.execute - for sql, rows in queries: - if not rows: - continue - if self.ctx.is_postgres and isinstance(sql, Insert): - self.ctx.pg_copy(sql.table, rows) - if p: - done += int(len(rows) / row_scale) - p.step(done) - else: - for chunk_rows in chunk(rows, batch_size): - try: - execute(sql, chunk_rows) - except Exception: - for row in chunk_rows: - try: - execute(sql, [row]) - except Exception: - p.ctx.message_queue.put_nowait( - (Event.COMPLETE.value, os.getpid(), 1, 1) - ) - with open('badrow', 'a') as badrow: - badrow.write(repr(sql)) - badrow.write('\n') - badrow.write(repr(row)) - badrow.write('\n') - print(sql) - print(row) - raise - if p: - done += int(len(chunk_rows)/row_scale) - p.step(done) - - def flush(self, done_counter_table) -> int: + def flush(self, return_row_count_for_table) -> int: execute = self.ctx.connection.execute done = 0 for sql, rows in self.get_queries(): @@ -662,7 +669,7 @@ class BulkLoader: self.ctx.pg_copy(sql.table, rows) else: execute(sql, rows) - if sql.table == done_counter_table: + if sql.table == return_row_count_for_table: done += len(rows) rows.clear() return done diff --git a/lbry/db/sync.py b/lbry/db/sync.py index 9463e540d..b0423f6f2 100644 --- a/lbry/db/sync.py +++ b/lbry/db/sync.py @@ -1,12 +1,8 @@ -# pylint: disable=singleton-comparison from sqlalchemy.future import select -from lbry.db.constants import CLAIM_TYPE_CODES, TXO_TYPES -from lbry.db.queries import select_txos, rows_to_txos from lbry.db.query_context import progress, Event -from lbry.db.tables import ( - TXO, TXI, Claim, Support -) +from lbry.db.tables import TXI, TXO +from .queries import rows_to_txos def process_all_things_after_sync(): @@ -48,15 +44,14 @@ def set_input_addresses(ctx): set_addresses = ( TXI.update() .values(address=address_query.scalar_subquery()) - .where(TXI.c.address == None) + .where(TXI.c.address.is_(None)) ) else: set_addresses = ( TXI.update() .values({TXI.c.address: TXO.c.address}) - .where((TXI.c.address == None) & (TXI.c.txo_hash == TXO.c.txo_hash)) + .where((TXI.c.address.is_(None)) & (TXI.c.txo_hash == TXO.c.txo_hash)) ) - ctx.execute(set_addresses) @@ -76,56 +71,3 @@ def update_spent_outputs(ctx): ) ) ctx.execute(set_spent_height) - - -def condition_spent_claims(claim_type: list = None): - if claim_type is not None: - if len(claim_type) == 0: - raise ValueError("Missing 'claim_type'.") - if len(claim_type) == 1: - type_filter = TXO.c.txo_type == claim_type[0] - else: - type_filter = TXO.c.txo_type.in_(claim_type) - else: - type_filter = TXO.c.txo_type.in_(CLAIM_TYPE_CODES) - return Claim.c.claim_hash.notin_( - select(TXO.c.claim_hash).where(type_filter & (TXO.c.spent_height == 0)) - ) - - -# find UTXOs that are claims and their claim_id is not in claim table, -# this means they need to be inserted -select_missing_claims = ( - select_txos(txo_type__in=CLAIM_TYPE_CODES, spent_height=0, claim_id_not_in_claim_table=True) -) - - -# find UTXOs that are claims and their txo_id is not in claim table, -# this ONLY works if you first ran select_missing_claims and inserted the missing claims, then -# all claims_ids should match between TXO and Claim table but txo_hashes will not match for -# claims that are not up-to-date -select_stale_claims = ( - select_txos(txo_type__in=CLAIM_TYPE_CODES, spent_height=0, txo_id_not_in_claim_table=True) -) - - -condition_spent_supports = ( - Support.c.txo_hash.notin_( - select(TXO.c.txo_hash).where( - (TXO.c.txo_type == TXO_TYPES['support']) & - (TXO.c.spent_height == 0) - ) - ) -) - - -condition_missing_supports = ( - (TXO.c.txo_type == TXO_TYPES['support']) & - (TXO.c.spent_height == 0) & - (TXO.c.txo_hash.notin_(select(Support.c.txo_hash))) -) - - -select_missing_supports = ( - select_txos(txo_type=TXO_TYPES['support'], spent_height=0, txo_id_not_in_support_table=True) -) diff --git a/scripts/simulate_sync_console.py b/scripts/simulate_sync_console.py index ee7f3dba8..a5a065199 100644 --- a/scripts/simulate_sync_console.py +++ b/scripts/simulate_sync_console.py @@ -1,14 +1,13 @@ import asyncio +import argparse from random import randrange from typing import List from binascii import unhexlify from google.protobuf.message import DecodeError -from lbry.schema.claim import Claim -from lbry.blockchain import Ledger -from lbry.service import FullNode +from lbry import Config, Ledger, FullNode from lbry.console import Advanced, Basic -from lbry.conf import Config +from lbry.schema.claim import Claim from lbry.db.utils import chunk from lbry.db.query_context import Event @@ -44,19 +43,78 @@ class Simulator: return self.ending_height-self.starting_height return (self.ending_height-self.starting_height)+1 + async def generate(self, name, units, eid, label, total, steps): + loop_time = min(5.0 / (total[0]/steps[0]), 1.0) + done = (0,)*len(total) + while not all(d >= t for d, t in zip(done, total)): + if done[0] == 0: + first_event = { + "event": name, + "data": { + "id": eid, + "done": done, + "total": total, + "units": units, + } + } + if label is not None: + first_event["data"]["label"] = label + await self.progress.add(first_event) + await asyncio.sleep(loop_time) + done = tuple(min(d+s, t) for d, s, t in zip(done, steps, total)) + await self.progress.add({ + "event": name, + "data": { + "id": eid, + "done": done, + } + }) + + async def generate_group(self, name, unit, init_steps, total, increment): + await self.generate(f"{name}.init", ("steps",), 0, None, (init_steps,), (1,)) + await self.progress.add({ + "event": f"{name}.main", + "data": {"id": 0, "done": (0,), "total": (total,), "units": (unit,)} + }) + tasks = [] + for group_range in self.make_ranges(total, max(int(total/self.processes), 1)): + tasks.append(self.generate( + f"{name}.insert", (unit,), + group_range[0], f"add {unit} at {group_range[0]}-{group_range[1]}", + (group_range[1] - group_range[0],), (increment,) + )) + await asyncio.wait(tasks) + await self.close_event(f"{name}.main") + + async def close_event(self, name): + await self.progress.add({"event": name, "data": {"id": 0, "done": (-1, -1)}}) + + @staticmethod + def make_ranges(num, size=1000): + ranges = [] + for i in range(0, num, size): + if ranges: + ranges[-1][-1] = i-1 + ranges.append([i, 0]) + ranges[-1][-1] = num + return ranges + async def advance(self, initial_sync: bool, ending_height: int, files: List[int], txs: int): self.ending_height = ending_height self.txs = txs self.claims = int(txs/4) self.supports = int(txs/2) + await self.generate("blockchain.sync.block.init", ("steps",), 0, None, (3,), (1,)) await self.progress.add({ - "event": "blockchain.sync.start", + "event": "blockchain.sync.block.main", "data": { + "id": 0, + "done": (0, 0), + "total": (self.blocks, self.txs), + "units": ("blocks", "txs"), "starting_height": self.starting_height, "ending_height": ending_height, "files": len(files), - "blocks": self.blocks, - "txs": self.txs, "claims": self.claims, "supports": self.supports, } @@ -67,90 +125,63 @@ class Simulator: for file in file_group: if file == files[-1]: cause_protobuf_stderr() - tasks.append(self.sync_block_file(file, self.blocks-blocks_synced, self.txs-txs_synced)) + tasks.append(self.generate( + "blockchain.sync.block.file", ("blocks", "txs"), file, f"blk0000{file}.dat", + (self.blocks-blocks_synced, self.txs-txs_synced), + (50, 100) + )) cause_protobuf_stderr() else: blocks = int(self.blocks / len(files)) blocks_synced += blocks txs = int(self.txs / len(files)) txs_synced += txs - tasks.append(self.sync_block_file(file, blocks, txs)) + tasks.append(self.generate( + "blockchain.sync.block.file", ("blocks", "txs"), file, f"blk0000{file}.dat", + (blocks, txs), (50, 100) + )) await asyncio.wait(tasks) - for step in Event._events: - if step.name in ("blockchain.sync.block.read", "blockchain.sync.block.save"): - continue - await getattr(self, step.name.replace('.', '_'))() - #await self.progress.add({ - # "event": "blockchain.sync.complete", - # "data": {"step": len(self.steps), "total": len(self.steps), "unit": "tasks"} - #}) + await self.close_event("blockchain.sync.block.main") + return + if initial_sync: + await self.generate("blockchain.sync.txoi.main", ("steps",), 0, None, (9,), (1,)) + else: + await self.generate("blockchain.sync.txoi.main", ("steps",), 0, None, (2,), (1,)) + if initial_sync: + await self.generate_group("blockchain.sync.claims", "claims", 4, self.claims, 100) + else: + await self.generate_group("blockchain.sync.claims", "claims", 2, self.claims, 100) + if initial_sync: + await self.generate_group("blockchain.sync.supports", "supports", 4, self.supports, 100) + else: + await self.generate_group("blockchain.sync.supports", "supports", 2, self.supports, 100) self.ending_height = ending_height+1 self.starting_height = self.ending_height - async def sync_block_file(self, block_file, blocks, txs): - for i in range(0, blocks, 1000): - await self.progress.add({ - "event": "blockchain.sync.block.read", - "data": {"step": i, "total": blocks, "unit": "blocks", "block_file": block_file} - }) - await asyncio.sleep(randrange(1, 10)/10) - await self.progress.add({ - "event": "blockchain.sync.block.read", - "data": {"step": blocks, "total": blocks, "unit": "blocks", "block_file": block_file} - }) - await asyncio.sleep(0.5) - for i in range(0, txs, 2000): - await self.progress.add({ - "event": "blockchain.sync.block.save", - "data": {"step": i, "total": txs, "unit": "txs", "block_file": block_file} - }) - await asyncio.sleep(randrange(1, 10) / 10) - await self.progress.add({ - "event": "blockchain.sync.block.save", - "data": {"step": txs, "total": txs, "unit": "txs", "block_file": block_file} - }) - async def generate_steps(self, event, steps, unit, delay=1.0, step=1): - await self.progress.add({"event": event, "data": {"step": 0, "total": steps, "unit": unit}}) - remaining = steps - for i in range(1, steps+1, step): - await asyncio.sleep(delay) - await self.progress.add({"event": event, "data": {"step": i, "total": steps, "unit": unit}}) - remaining -= i - if remaining: - await asyncio.sleep(delay) - await self.progress.add({"event": event, "data": {"step": steps, "total": steps, "unit": unit}}) - - async def blockchain_sync_block_filters(self): - await self.generate_steps("blockchain.sync.block.filters", 5, "blocks") - - async def blockchain_sync_spends(self): - await self.generate_steps("blockchain.sync.spends", 5, "steps") - - async def blockchain_sync_claims(self): - for i in range(0, self.claims, 1_000): - await self.progress.add({ - "event": "blockchain.sync.claims", - "data": {"step": i, "total": self.claims, "unit": "claims"} - }) - await asyncio.sleep(0.1) - await self.progress.add({ - "event": "blockchain.sync.claims", - "data": {"step": self.claims, "total": self.claims, "unit": "claims"} - }) - - async def blockchain_sync_supports(self): - await self.generate_steps("blockchain.sync.supports", 5, "supports") - - -async def main(): - console = Advanced(FullNode(Ledger(Config(processes=3, spv_address_filters=False)))) +async def main(console): sim = Simulator(console) - console.starting() - await sim.advance(True, 100_000, [1, 2, 3, 4, 5], 100_000) - await sim.advance(False, 100_001, [5], 100) - console.stopping() + await sim.advance(True, 10_000, [1, 2, 3, 4, 5], 10_000) + #await sim.advance(True, 100_000, [1, 2, 3, 4, 5], 100_000) + #await sim.advance(False, 100_001, [5], 100) if __name__ == "__main__": - asyncio.run(main()) + parser = argparse.ArgumentParser() + parser.add_argument("--basic", default=False, action="store_true") + parser.add_argument("--processes", default=3) + args = parser.parse_args() + + node = FullNode(Ledger(Config( + processes=args.processes, + spv_address_filters=False + ))) + console = Basic(node) if args.basic else Advanced(node) + + try: + console.starting() + asyncio.run(main(console)) + except KeyboardInterrupt: + pass + finally: + console.stopping() diff --git a/tests/integration/blockchain/test_blockchain.py b/tests/integration/blockchain/test_blockchain.py index 0bf92a98a..041f440b6 100644 --- a/tests/integration/blockchain/test_blockchain.py +++ b/tests/integration/blockchain/test_blockchain.py @@ -135,6 +135,11 @@ class SyncingBlockchainTestCase(BasicBlockchainTestCase): funded = await self.chain.fund_raw_transaction(hexlify(tx.raw).decode()) tx = Transaction(unhexlify(funded['hex'])) i = 1 + if '!' in claim_id_startswith: + claim_id_startswith, not_after_startswith = claim_id_startswith.split('!') + not_after_startswith = tuple(not_after_startswith) + else: + claim_id_startswith, not_after_startswith = claim_id_startswith, () while True: if sign: self.find_claim_txo(tx).sign(sign) @@ -144,7 +149,8 @@ class SyncingBlockchainTestCase(BasicBlockchainTestCase): txo = self.find_claim_txo(tx) claim = txo.claim.channel if is_channel else txo.claim.stream if txo.claim_id.startswith(claim_id_startswith): - break + if txo.claim_id[len(claim_id_startswith)] not in not_after_startswith: + break i += 1 claim.update(title=f'claim #{i:03}') txo.script.generate() @@ -375,10 +381,13 @@ class TestMultiBlockFileSyncing(BasicBlockchainTestCase): @staticmethod def extract_block_events(name, events): - return sorted([ - [p['data']['block_file'], p['data']['step'], p['data']['total']] - for p in events if p['event'] == name - ]) + return sorted([[ + p['data']['block_file'], + p['data']['step'], + p['data']['total'], + p['data']['txs_done'], + p['data']['txs_total'], + ] for p in events if p['event'] == name]) @staticmethod def extract_events(name, events): @@ -464,6 +473,41 @@ class TestMultiBlockFileSyncing(BasicBlockchainTestCase): for c in await db.get_support_metadata(0, 500)] ) + def assertConsumingEvents(self, events: list, name, units, expectation_generator): + expected_count = 0 + for expectation in expectation_generator: + expected_count += len(expectation[2:]) + self.assertGreaterEqual(len(events), expected_count) + extracted = [] + for _ in range(expected_count): + extracted.append(events.pop(0)) + actual = sorted(extracted, key=lambda e: (e["event"], e["data"]["id"], e["data"]["done"])) + expected = [] + for expectation in expectation_generator: + for i, done in enumerate(expectation[2:]): + if i == 0: + first_event = { + "event": name, + "data": { + "id": expectation[0], + "done": (0,) * len(units), + "total": expectation[2], + "units": units, + } + } + if expectation[1] is not None: + first_event["data"]["label"] = expectation[1] + expected.append(first_event) + else: + expected.append({ + "event": name, + "data": { + "id": expectation[0], + "done": done, + } + }) + self.assertEqual(expected, actual) + async def test_multi_block_file_sync(self): events = [] self.sync.on_progress.listen(events.append) @@ -471,90 +515,105 @@ class TestMultiBlockFileSyncing(BasicBlockchainTestCase): # initial_sync = True await self.sync.advance() await asyncio.sleep(1) # give it time to collect events + self.assertConsumingEvents( + events, "blockchain.sync.block.init", ("steps",), [ + (0, None, (3,), (1,), (2,), (3,)) + ] + ) self.assertEqual( - events[0], { - "event": "blockchain.sync.start", + events.pop(0), { + "event": "blockchain.sync.block.main", "data": { - "starting_height": 0, - "ending_height": 352, - "files": 3, - "blocks": 353, - "txs": 544, - "claims": 3610, - "supports": 2, + "id": 0, "done": (0, 0), "total": (353, 544), "units": ("blocks", "txs"), + "starting_height": 0, "ending_height": 352, + "files": 3, "claims": 3610, "supports": 2 } } ) - # 1 - blockchain.sync.block.read - self.assertEqual( - self.extract_block_events('blockchain.sync.block.read', events), [ - [0, 0, 191], - [0, 100, 191], - [0, 191, 191], - [1, 0, 89], - [1, 89, 89], - [2, 0, 73], - [2, 73, 73], + self.assertConsumingEvents( + events, "blockchain.sync.block.file", ("blocks", "txs"), [ + (0, "blk00000.dat", (191, 280), (50, 0), (100, 0), (150, 0), (191, 280)), + (1, "blk00001.dat", (89, 178), (50, 0), (89, 178)), + (2, "blk00002.dat", (73, 86), (50, 0), (73, 86)), ] ) - # 2 - blockchain.sync.block.save - self.assertEventsAlmostEqual( - self.extract_block_events('blockchain.sync.block.save', events), [ - [0, 0, 280], - [0, 19, 280], - [0, 47, 280], - [0, 267, 280], - [0, 278, 280], - [0, 280, 280], - [1, 0, 178], - [1, 6, 178], - [1, 19, 178], - [1, 167, 178], - [1, 175, 178], - [1, 178, 178], - [2, 0, 86], - [2, 11, 86], - [2, 24, 86], - [2, 83, 86], - [2, 85, 86], - [2, 86, 86], + self.assertEqual( + events.pop(0), { + "event": "blockchain.sync.block.main", + "data": {"id": 0, "done": (-1, -1)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.txoi.main", ("steps",), [ + (0, None, (9,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,)) ] ) - # 3 - blockchain.sync.spends - self.assertEventsAlmostEqual( - self.extract_events('blockchain.sync.spends', events), [ - [0, 9], - [1, 9], - [2, 9], - [3, 9], - [4, 9], - [5, 9], - [6, 9], - [7, 9], - [8, 9], - [9, 9], + self.assertConsumingEvents( + events, "blockchain.sync.claims.init", ("steps",), [ + (0, None, (2,), (1,), (2,)) ] ) - # 4 - blockchain.sync.claims self.assertEqual( - [[0, 3610], [3610, 3610]], - self.extract_events('blockchain.sync.claims', events) + events.pop(0), { + "event": "blockchain.sync.claims.main", + "data": {"id": 0, "done": (0,), "total": (3610,), "units": ("claims",)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.claims.insert", ("claims",), [ + (102, "add claims at 102-120", (361,), (361,)), + (121, "add claims at 121-139", (361,), (361,)), + (140, "add claims at 140-158", (361,), (361,)), + (159, "add claims at 159-177", (361,), (361,)), + (178, "add claims at 178-196", (361,), (361,)), + (197, "add claims at 197-215", (361,), (361,)), + (216, "add claims at 216-234", (361,), (361,)), + (235, "add claims at 235-253", (361,), (361,)), + (254, "add claims at 254-272", (361,), (361,)), + (273, "add claims at 273-291", (361,), (361,)), + ] ) - # 4 - blockchain.sync.supports self.assertEqual( - [[0, 2], [2, 2]], - self.extract_events('blockchain.sync.supports', events) + events.pop(0), { + "event": "blockchain.sync.claims.main", + "data": {"id": 0, "done": (-1,)} + } ) + self.assertConsumingEvents( + events, "blockchain.sync.supports.init", ("steps",), [ + (0, None, (2,), (1,), (2,)) + ] + ) + self.assertEqual( + events.pop(0), { + "event": "blockchain.sync.supports.main", + "data": {"id": 0, "done": (0,), "total": (2,), "units": ("supports",)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.supports.insert", ("supports",), [ + (0, "add supports at 0-352", (2,), (2,)), + ] + ) + self.assertEqual( + events.pop(0), { + "event": "blockchain.sync.supports.main", + "data": {"id": 0, "done": (-1,)} + } + ) + self.assertEqual(events, []) # initial_sync = False & no new blocks - events.clear() await self.sync.advance() # should be no-op await asyncio.sleep(1) # give it time to collect events - self.assertEqual([[0, 0]], self.extract_events('blockchain.sync.claims', events)) - self.assertEqual([[0, 0]], self.extract_events('blockchain.sync.supports', events)) + self.assertConsumingEvents( + events, "blockchain.sync.block.init", ("steps",), [ + (0, None, (3,), (1,), (2,), (3,)) + ] + ) + self.assertEqual(events, []) # initial_sync = False - events.clear() txid = await self.chain.claim_name('foo', 'beef', '0.01') await self.chain.generate(1) tx = Transaction(unhexlify(await self.chain.get_raw_transaction(txid))) @@ -563,52 +622,92 @@ class TestMultiBlockFileSyncing(BasicBlockchainTestCase): await self.chain.generate(1) await self.sync.advance() await asyncio.sleep(1) # give it time to collect events + self.assertConsumingEvents( + events, "blockchain.sync.block.init", ("steps",), [ + (0, None, (3,), (1,), (2,), (3,)) + ] + ) self.assertEqual( - events[0], { - "event": "blockchain.sync.start", + events.pop(0), { + "event": "blockchain.sync.block.main", "data": { - "starting_height": 353, - "ending_height": 354, - "files": 1, - "blocks": 2, - "txs": 4, - "claims": 1, - "supports": 1, + "id": 0, "done": (0, 0), "total": (2, 4), "units": ("blocks", "txs"), + "starting_height": 353, "ending_height": 354, + "files": 1, "claims": 1, "supports": 1 } } ) - # 1 - blockchain.sync.block.read - self.assertEqual( - self.extract_block_events('blockchain.sync.block.read', events), [ - [2, 0, 2], - [2, 2, 2], + self.assertConsumingEvents( + events, "blockchain.sync.block.file", ("blocks", "txs"), [ + (2, "blk00002.dat", (2, 4), (2, 4)), ] ) - # 2 - blockchain.sync.block.save self.assertEqual( - self.extract_block_events('blockchain.sync.block.save', events), [ - [2, 0, 4], - [2, 1, 4], - [2, 3, 4], - [2, 4, 4], + events.pop(0), { + "event": "blockchain.sync.block.main", + "data": {"id": 0, "done": (-1, -1)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.txoi.main", ("steps",), [ + (0, None, (2,), (1,), (2,)) ] ) - # 3 - blockchain.sync.spends - self.assertEqual( - self.extract_events('blockchain.sync.spends', events), [ - [0, 2], - [1, 2], - [2, 2], + self.assertConsumingEvents( + events, "blockchain.sync.claims.init", ("steps",), [ + (0, None, (6,), (1,), (2,), (3,), (4,), (5,), (6,)) ] ) - # 4 - blockchain.sync.claims self.assertEqual( - self.extract_events('blockchain.sync.claims', events), [ - [0, 3], [1, 3], [2, 3], [3, 3] + events.pop(0), { + "event": "blockchain.sync.claims.main", + "data": {"id": 0, "done": (0,), "total": (3,), "units": ("claims",)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.claims.insert", ("claims",), [ + (353, "add claims at 353-354", (1,), (1,)), ] ) - # 5 - blockchain.sync.supports - self.assertEqual([[0, 1], [1, 1]], self.extract_events('blockchain.sync.supports', events)) + self.assertConsumingEvents( + events, "blockchain.sync.claims.takeovers", ("claims",), [ + (0, "winning", (1,), (1,)), + ] + ) + self.assertConsumingEvents( + events, "blockchain.sync.claims.stakes", ("claims",), [ + (0, None, (1,), (1,)), + ] + ) + self.assertEqual( + events.pop(0), { + "event": "blockchain.sync.claims.main", + "data": {"id": 0, "done": (-1,)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.supports.init", ("steps",), [ + (0, None, (2,), (1,), (2,)) + ] + ) + self.assertEqual( + events.pop(0), { + "event": "blockchain.sync.supports.main", + "data": {"id": 0, "done": (0,), "total": (1,), "units": ("supports",)} + } + ) + self.assertConsumingEvents( + events, "blockchain.sync.supports.insert", ("supports",), [ + (353, "add supports at 353-354", (1,), (1,)), + ] + ) + self.assertEqual( + events.pop(0), { + "event": "blockchain.sync.supports.main", + "data": {"id": 0, "done": (-1,)} + } + ) + self.assertEqual(events, []) class TestGeneralBlockchainSync(SyncingBlockchainTestCase): @@ -846,7 +945,7 @@ class TestGeneralBlockchainSync(SyncingBlockchainTestCase): async def test_resolve(self): chan_a = await self.get_claim( - await self.create_claim(claim_id_startswith='a', is_channel=True)) + await self.create_claim(claim_id_startswith='a!b', is_channel=True)) await self.generate(1) chan_ab = await self.get_claim( await self.create_claim(claim_id_startswith='ab', is_channel=True)) @@ -855,7 +954,7 @@ class TestGeneralBlockchainSync(SyncingBlockchainTestCase): self.assertEqual(chan_ab.claim_id, await self.resolve_to_claim_id("@foo#ab")) stream_c = await self.get_claim( - await self.create_claim(claim_id_startswith='c', sign=chan_a)) + await self.create_claim(claim_id_startswith='c!d', sign=chan_a)) await self.generate(1) stream_cd = await self.get_claim( await self.create_claim(claim_id_startswith='cd', sign=chan_ab)) diff --git a/tests/unit/blockchain/test_transaction.py b/tests/unit/blockchain/test_transaction.py index 9d37dfb26..d522c07c5 100644 --- a/tests/unit/blockchain/test_transaction.py +++ b/tests/unit/blockchain/test_transaction.py @@ -1,9 +1,8 @@ from unittest import TestCase from binascii import hexlify, unhexlify -from lbry.blockchain.ledger import Ledger +from lbry import Config, Ledger, Transaction from lbry.constants import CENT, NULL_HASH32 -from lbry.blockchain.transaction import Transaction from lbry.testcase import ( get_transaction, get_input, get_output, get_claim_transaction ) @@ -15,7 +14,7 @@ FEE_PER_CHAR = 200000 class TestSizeAndFeeEstimation(TestCase): def setUp(self): - self.ledger = Ledger() + self.ledger = Ledger(Config.with_null_dir()) def test_output_size_and_fee(self): txo = get_output() diff --git a/tests/unit/crypto/test_bip32.py b/tests/unit/crypto/test_bip32.py index 562b8e718..a4642a7d8 100644 --- a/tests/unit/crypto/test_bip32.py +++ b/tests/unit/crypto/test_bip32.py @@ -1,7 +1,7 @@ from unittest import TestCase from binascii import unhexlify, hexlify -from lbry import Ledger +from lbry import Config, Ledger from lbry.crypto.bip32 import PubKey, PrivateKey, from_extended_key_string from tests.unit.crypto.key_fixtures import ( @@ -48,7 +48,7 @@ class BIP32Tests(TestCase): with self.assertRaisesRegex(ValueError, 'private key must be 32 bytes'): PrivateKey(None, b'abcd', b'abcd'*8, 0, 255) private_key = PrivateKey( - Ledger(), + Ledger(Config.with_null_dir()), unhexlify('2423f3dc6087d9683f73a684935abc0ccd8bc26370588f56653128c6a6f0bf7c'), b'abcd'*8, 0, 1 ) @@ -66,7 +66,7 @@ class BIP32Tests(TestCase): def test_private_key_derivation(self): private_key = PrivateKey( - Ledger(), + Ledger(Config.with_null_dir()), unhexlify('2423f3dc6087d9683f73a684935abc0ccd8bc26370588f56653128c6a6f0bf7c'), b'abcd'*8, 0, 1 ) @@ -82,14 +82,14 @@ class BIP32Tests(TestCase): def test_from_extended_keys(self): self.assertIsInstance( from_extended_key_string( - Ledger(), + Ledger(Config.with_null_dir()), 'xprv9s21ZrQH143K2dyhK7SevfRG72bYDRNv25yKPWWm6dqApNxm1Zb1m5gGcBWYfbsPjTr2v5joit8Af2Zp5P' '6yz3jMbycrLrRMpeAJxR8qDg8', ), PrivateKey ) self.assertIsInstance( from_extended_key_string( - Ledger(), + Ledger(Config.with_null_dir()), 'xpub661MyMwAqRbcF84AR8yfHoMzf4S2ct6mPJtvBtvNeyN9hBHuZ6uGJszkTSn5fQUCdz3XU17eBzFeAUwV6f' 'iW44g14WF52fYC5J483wqQ5ZP', ), PubKey diff --git a/tests/unit/schema/test_schema_signing.py b/tests/unit/schema/test_schema_signing.py index ad1bf56a7..b8164cb0c 100644 --- a/tests/unit/schema/test_schema_signing.py +++ b/tests/unit/schema/test_schema_signing.py @@ -1,9 +1,8 @@ from unittest import TestCase from binascii import unhexlify +from lbry import Config, Ledger, Transaction, Output from lbry.testcase import get_transaction, AsyncioTestCase -from lbry.blockchain.ledger import Ledger -from lbry.blockchain.transaction import Transaction, Output from lbry.constants import CENT from lbry.schema.claim import Claim @@ -93,4 +92,4 @@ class TestValidatingOldSignatures(TestCase): )) channel = channel_tx.outputs[0] - self.assertTrue(stream.is_signed_by(channel, Ledger())) + self.assertTrue(stream.is_signed_by(channel, Ledger(Config.with_null_dir()))) diff --git a/tests/unit/test_console.py b/tests/unit/test_console.py new file mode 100644 index 000000000..c21675489 --- /dev/null +++ b/tests/unit/test_console.py @@ -0,0 +1,28 @@ +import unittest +from lbry.console import Bar2 + + +class TestBar2(unittest.TestCase): + + def bar(self, top, bottom, expected): + self.assertEqual(expected, f"{Bar2((top, bottom))}") + + def test_rendering(self): + self.bar(0.00, 0.00, ' ') + self.bar(0.00, 0.05, '▖ ') + self.bar(0.05, 0.00, '▘ ') + self.bar(0.05, 0.05, '▌ ') + self.bar(0.00, 0.10, '▄ ') + self.bar(0.10, 0.00, '▀ ') + self.bar(0.05, 0.10, '▙ ') + self.bar(0.10, 0.05, '▛ ') + self.bar(0.30, 0.50, '███▄▄ ') + self.bar(0.35, 0.55, '███▙▄▖ ') + self.bar(0.40, 0.60, '████▄▄ ') + self.bar(0.50, 0.30, '███▀▀ ') + self.bar(0.55, 0.35, '███▛▀▘ ') + self.bar(0.60, 0.40, '████▀▀ ') + self.bar(0.95, 0.90, '█████████▘') + self.bar(0.99, 0.98, '█████████▌') + self.bar(0.99, 1.00, '█████████▙') + self.bar(1.00, 1.00, '██████████')