lbry-sdk/lbry/blockchain/sync/steps.py

530 lines
21 KiB
Python
Raw Normal View History

# pylint: disable=singleton-comparison
import logging
from contextvars import ContextVar
2020-07-06 05:03:45 +02:00
from functools import partial
from typing import Optional, Tuple
2020-07-06 05:03:45 +02:00
from sqlalchemy import bindparam, case, distinct, text, func, between, desc
from sqlalchemy.future import select
2020-07-03 21:47:25 +02:00
from sqlalchemy.schema import CreateTable
from lbry.db import queries
2020-07-06 05:03:45 +02:00
from lbry.db.tables import (
Block as BlockTable, TX, TXO, TXI, Claim, Support,
pg_add_txo_constraints_and_indexes, pg_add_txi_constraints_and_indexes
)
from lbry.db.query_context import ProgressContext, context, event_emitter
from lbry.db.queries import rows_to_txos
from lbry.db.sync import (
select_missing_supports,
2020-07-06 05:03:45 +02:00
condition_spent_claims,
condition_spent_supports, condition_missing_supports,
set_input_addresses, update_spent_outputs,
)
from lbry.db.utils import least
2020-07-06 05:03:45 +02:00
from lbry.db.constants import TXO_TYPES, CLAIM_TYPE_CODES
from lbry.blockchain.lbrycrd import Lbrycrd
from lbry.blockchain.block import Block, create_block_filter
from lbry.blockchain.bcd_data_stream import BCDataStream
2020-07-06 05:03:45 +02:00
from lbry.blockchain.transaction import Output, OutputScript, TXRefImmutable
log = logging.getLogger(__name__)
_chain: ContextVar[Lbrycrd] = ContextVar('chain')
def get_or_initialize_lbrycrd(ctx=None) -> Lbrycrd:
chain = _chain.get(None)
if chain is not None:
return chain
chain = Lbrycrd((ctx or context()).ledger)
chain.db.sync_open()
_chain.set(chain)
return chain
2020-07-06 05:03:45 +02:00
def process_block_file(block_file_number: int, starting_height: int):
ctx = context()
loader = ctx.get_bulk_loader()
2020-07-06 05:03:45 +02:00
last_block_processed = process_block_read(block_file_number, starting_height, loader)
process_block_save(block_file_number, loader)
return last_block_processed
2020-07-06 05:03:45 +02:00
@event_emitter("blockchain.sync.block.read", "blocks", step_size=100)
def process_block_read(block_file_number: int, starting_height: int, loader, p: ProgressContext):
chain = get_or_initialize_lbrycrd(p.ctx)
stop = p.ctx.stop_event
new_blocks = chain.db.sync_get_blocks_in_file(block_file_number, starting_height)
if not new_blocks:
return -1
done, total, last_block_processed = 0, len(new_blocks), -1
block_file_path = chain.get_block_file_path_from_number(block_file_number)
p.start(total, {'block_file': block_file_number})
with open(block_file_path, 'rb') as fp:
stream = BCDataStream(fp=fp)
for done, block_info in enumerate(new_blocks, start=1):
if stop.is_set():
return -1
block_height = block_info['height']
fp.seek(block_info['data_offset'])
block = Block.from_data_stream(stream, block_height, block_file_number)
2020-07-06 05:03:45 +02:00
loader.add_block(block)
last_block_processed = block_height
p.step(done)
return last_block_processed
2020-07-06 05:03:45 +02:00
@event_emitter("blockchain.sync.block.save", "txs")
def process_block_save(block_file_number: int, loader, p: ProgressContext):
p.extra = {'block_file': block_file_number}
2020-07-06 05:03:45 +02:00
loader.save(TX)
2020-07-06 05:03:45 +02:00
@event_emitter("blockchain.sync.block.filters", "blocks")
def process_block_filters(p: ProgressContext):
blocks = []
all_filters = []
all_addresses = []
for block in queries.get_blocks_without_filters():
addresses = {
p.ctx.ledger.address_to_hash160(r['address'])
for r in queries.get_block_tx_addresses(block_hash=block['block_hash'])
}
all_addresses.extend(addresses)
block_filter = create_block_filter(addresses)
all_filters.append(block_filter)
blocks.append({'pk': block['block_hash'], 'block_filter': block_filter})
# filters = [get_block_filter(f) for f in all_filters]
p.ctx.execute(BlockTable.update().where(BlockTable.c.block_hash == bindparam('pk')), blocks)
# txs = []
# for tx in queries.get_transactions_without_filters():
# tx_filter = create_block_filter(
# {r['address'] for r in queries.get_block_tx_addresses(tx_hash=tx['tx_hash'])}
# )
# txs.append({'pk': tx['tx_hash'], 'tx_filter': tx_filter})
# execute(TX.update().where(TX.c.tx_hash == bindparam('pk')), txs)
@event_emitter("blockchain.sync.spends", "steps")
def process_spends(initial_sync: bool, p: ProgressContext):
2020-06-30 23:32:51 +02:00
2020-07-03 21:47:25 +02:00
step = 0
def next_step():
nonlocal step
step += 1
return step
if initial_sync:
p.start(9)
2020-06-30 23:32:51 +02:00
else:
p.start(2)
2020-07-03 21:47:25 +02:00
if initial_sync:
# A. add tx constraints
if p.ctx.is_postgres:
p.ctx.execute(text("ALTER TABLE tx ADD PRIMARY KEY (tx_hash);"))
p.step(next_step())
2020-06-30 23:32:51 +02:00
2020-07-03 21:47:25 +02:00
# 1. Update TXIs to have the address of TXO they are spending.
if initial_sync:
# B. txi table reshuffling
p.ctx.execute(text("ALTER TABLE txi RENAME TO old_txi;"))
p.ctx.execute(CreateTable(TXI, include_foreign_key_constraints=[]))
if p.ctx.is_postgres:
p.ctx.execute(text("ALTER TABLE txi DROP CONSTRAINT txi_pkey;"))
p.step(next_step())
# C. insert
old_txi = TXI.alias('old_txi')
columns = [c for c in old_txi.columns if c.name != 'address'] + [TXO.c.address]
select_txis = select(*columns).select_from(old_txi.join(TXO))
insert_txis = TXI.insert().from_select(columns, select_txis)
p.ctx.execute(text(
str(insert_txis.compile(p.ctx.engine)).replace('txi AS old_txi', 'old_txi')
))
p.step(next_step())
2020-07-06 05:03:45 +02:00
# D. drop old txi and vacuum
2020-07-03 21:47:25 +02:00
p.ctx.execute(text("DROP TABLE old_txi;"))
2020-07-06 05:03:45 +02:00
if p.ctx.is_postgres:
with p.ctx.engine.connect() as c:
c.execute(text("COMMIT;"))
c.execute(text("VACUUM ANALYZE txi;"))
2020-07-03 21:47:25 +02:00
p.step(next_step())
# E. restore integrity constraint
if p.ctx.is_postgres:
2020-07-06 05:03:45 +02:00
pg_add_txi_constraints_and_indexes(p.ctx.execute)
2020-07-03 21:47:25 +02:00
p.step(next_step())
else:
set_input_addresses(p.ctx)
p.step(next_step())
2020-07-06 05:03:45 +02:00
# 2. Update spent TXOs setting spent_height
2020-07-03 21:47:25 +02:00
if initial_sync:
# F. txo table reshuffling
p.ctx.execute(text("ALTER TABLE txo RENAME TO old_txo;"))
p.ctx.execute(CreateTable(TXO, include_foreign_key_constraints=[]))
if p.ctx.is_postgres:
p.ctx.execute(text("ALTER TABLE txo DROP CONSTRAINT txo_pkey;"))
p.step(next_step())
# G. insert
old_txo = TXO.alias('old_txo')
2020-07-06 05:03:45 +02:00
columns = [c for c in old_txo.columns if c.name != 'spent_height']
select_columns = columns + [func.coalesce(TXI.c.height, 0).label('spent_height')]
insert_columns = columns + [TXO.c.spent_height]
select_txos = select(*select_columns).select_from(old_txo.join(TXI, isouter=True))
insert_txos = TXO.insert().from_select(insert_columns, select_txos)
2020-07-03 21:47:25 +02:00
p.ctx.execute(text(
2020-07-06 05:03:45 +02:00
str(insert_txos.compile(p.ctx.engine))
.replace('txo AS old_txo', 'old_txo')
.replace('%(coalesce_1)s', '0')
2020-07-03 21:47:25 +02:00
))
p.step(next_step())
# H. drop old txo
p.ctx.execute(text("DROP TABLE old_txo;"))
2020-07-06 05:03:45 +02:00
if p.ctx.is_postgres:
with p.ctx.engine.connect() as c:
c.execute(text("COMMIT;"))
c.execute(text("VACUUM ANALYZE txo;"))
2020-07-03 21:47:25 +02:00
p.step(next_step())
# I. restore integrity constraint
if p.ctx.is_postgres:
2020-07-06 05:03:45 +02:00
pg_add_txo_constraints_and_indexes(p.ctx.execute)
2020-07-03 21:47:25 +02:00
p.step(next_step())
else:
update_spent_outputs(p.ctx)
p.step(next_step())
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
def insert_claims_with_lbrycrd(done, chain, p: ProgressContext, cursor):
loader = p.ctx.get_bulk_loader()
for rows in cursor.partitions(900):
claim_metadata = iter(chain.db.sync_get_claim_metadata(claim_hashes=[row['claim_hash'] for row in rows]))
for row in rows:
metadata = next(claim_metadata, None)
if metadata is None or metadata['claim_hash'] != row.claim_hash:
log.error(
r"During sync'ing a claim in our db couldn't find a "
r"match in lbrycrd's db. This could be because lbrycrd "
r"moved a block forward and updated its own claim table "
r"while we were still on a previous block, or it could be "
r"a more fundamental issue... ¯\_(ツ)_/¯"
)
if metadata is None:
break
if metadata['claim_hash'] != row.claim_hash:
continue
txo = Output(
amount=row.amount,
script=OutputScript(row.src),
tx_ref=TXRefImmutable.from_hash(row.tx_hash, row.height),
position=row.txo_position,
)
extra = {
'timestamp': row.timestamp,
'staked_support_amount': int(row.staked_support_amount),
'staked_support_count': int(row.staked_support_count),
'short_url': metadata['short_url'],
'creation_height': metadata['creation_height'],
'activation_height': metadata['activation_height'],
'expiration_height': metadata['expiration_height'],
'takeover_height': metadata['takeover_height'],
}
if hasattr(row, 'signature'):
extra.update({
'signature': row.signature,
'signature_digest': row.signature_digest,
'channel_public_key': row.channel_public_key,
'channel_url': row.channel_url
})
loader.add_claim(txo, **extra)
if len(loader.claims) >= 10_000:
done += loader.flush(Claim)
p.step(done)
done += loader.flush(Claim)
p.step(done)
return done
2020-07-06 05:03:45 +02:00
def channel_content_count_calc(signable):
return (
select(func.count('*'))
.select_from(signable)
.where((signable.c.channel_hash == Claim.c.claim_hash) & signable.c.is_signature_valid)
.scalar_subquery()
)
2020-07-06 05:03:45 +02:00
@event_emitter("blockchain.sync.claims", "claims")
def process_claims(starting_height: int, blocks_added: Optional[Tuple[int, int]], p: ProgressContext):
chain = get_or_initialize_lbrycrd(p.ctx)
initial_sync = not p.ctx.has_records(Claim)
to_be_modified = p.ctx.fetchtotal(
(TXO.c.txo_type.in_(CLAIM_TYPE_CODES)) &
(TXO.c.spent_height == 0) &
(TXO.c.txo_hash.notin_(select(Claim.c.txo_hash)))
)
to_be_deleted = to_be_synced = to_be_overtaken = to_be_counted_channel_members = 0
condition_changed_stakes = condition_changed_channel_content = None
if initial_sync:
to_be_counted_channel_members = p.ctx.fetchtotal(
(TXO.c.txo_type == TXO_TYPES['channel']) &
(TXO.c.spent_height == 0)
2020-06-27 03:52:01 +02:00
)
2020-07-06 05:03:45 +02:00
else:
to_be_deleted = p.ctx.fetchtotal(condition_spent_claims())
if blocks_added:
condition_changed_stakes = (
(TXO.c.txo_type == TXO_TYPES['support']) & (
between(TXO.c.height, blocks_added[0], blocks_added[-1]) |
between(TXO.c.spent_height, blocks_added[0], blocks_added[-1])
)
2020-06-27 03:52:01 +02:00
)
2020-07-06 05:03:45 +02:00
sql = (
select(func.count(distinct(TXO.c.claim_hash)).label('total'))
.where(condition_changed_stakes)
)
to_be_synced = p.ctx.fetchone(sql)['total']
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
condition_changed_channel_content = (
(TXO.c.channel_hash != None) & (
between(TXO.c.height, blocks_added[0], blocks_added[-1]) |
between(TXO.c.spent_height, blocks_added[0], blocks_added[-1])
)
)
sql = (
select(func.count(distinct(TXO.c.channel_hash)).label('total'))
.where(condition_changed_channel_content)
)
to_be_synced += p.ctx.fetchone(sql)['total']
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
to_be_overtaken = chain.db.sync_get_takeover_count(
start_height=blocks_added[0], end_height=blocks_added[-1])
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
p.start(to_be_deleted + to_be_modified + to_be_synced + to_be_overtaken + to_be_counted_channel_members)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
done = 0
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
if to_be_deleted:
deleted = p.ctx.execute(Claim.delete().where(condition_spent_claims()))
assert to_be_deleted == deleted.rowcount, \
f"Expected claims to be deleted {to_be_deleted}, actual deleted {deleted.rowcount}."
done += deleted.rowcount
p.step(done)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
support = TXO.alias('support')
staked_support_amount_calc = (
select(func.coalesce(func.sum(support.c.amount), 0)).where(
(support.c.txo_type == TXO_TYPES['support']) &
(support.c.spent_height == 0)
2020-06-27 03:52:01 +02:00
)
2020-07-06 05:03:45 +02:00
)
staked_support_count_calc = (
select(func.coalesce(func.count('*'), 0)).where(
(support.c.txo_type == TXO_TYPES['support']) &
(support.c.spent_height == 0)
2020-06-27 03:52:01 +02:00
)
2020-07-06 05:03:45 +02:00
)
select_claims = (
select(
TXO.c.claim_hash, TXO.c.amount, TXO.c.position.label('txo_position'),
TX.c.tx_hash, TX.c.height, TX.c.timestamp,
func.substr(TX.c.raw, TXO.c.script_offset+1, TXO.c.script_length).label('src'),
(staked_support_amount_calc
.where(support.c.claim_hash == TXO.c.claim_hash)
.label('staked_support_amount')),
(staked_support_count_calc
.where(support.c.claim_hash == TXO.c.claim_hash)
.label('staked_support_count'))
).order_by(TXO.c.claim_hash)
)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
streaming_connection = p.ctx.connection.execution_options(stream_results=True)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
# all channels need to be inserted first because channel short_url will needed to
# set the contained claims canonical_urls when those are inserted next
done = insert_claims_with_lbrycrd(
done, chain, p, streaming_connection.execute(
select_claims.select_from(TXO.join(TX)).where(
(TXO.c.txo_type == TXO_TYPES['channel']) &
(TXO.c.spent_height == 0) &
(TXO.c.claim_hash.notin_(select(Claim.c.claim_hash)))
2020-06-27 03:52:01 +02:00
)
2020-07-06 05:03:45 +02:00
)
)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
channel_txo = TXO.alias('channel_txo')
channel_claim = Claim.alias('channel_claim')
select_claims = (
select_claims.add_columns(
TXO.c.signature, TXO.c.signature_digest,
case([(
TXO.c.channel_hash != None,
select(channel_txo.c.public_key).select_from(channel_txo).where(
(channel_txo.c.txo_type == TXO_TYPES['channel']) &
(channel_txo.c.claim_hash == TXO.c.channel_hash) &
(channel_txo.c.height <= TXO.c.height)
).order_by(desc(channel_txo.c.height)).limit(1).scalar_subquery()
)]).label('channel_public_key'),
channel_claim.c.short_url.label('channel_url')
).select_from(
TXO
.join(TX)
.join(channel_claim, channel_claim.c.claim_hash == TXO.c.channel_hash, isouter=True)
2020-06-27 03:52:01 +02:00
)
)
2020-07-06 05:03:45 +02:00
done = insert_claims_with_lbrycrd(
done, chain, p, streaming_connection.execute(
select_claims.where(
(TXO.c.txo_type.in_(list(set(CLAIM_TYPE_CODES) - {TXO_TYPES['channel']}))) &
(TXO.c.spent_height == 0) &
(TXO.c.claim_hash.notin_(select(Claim.c.claim_hash)))
)
2020-06-27 03:52:01 +02:00
)
2020-07-06 05:03:45 +02:00
)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
if initial_sync:
channel_update_member_count_sql = (
Claim.update()
.where(Claim.c.claim_type == TXO_TYPES['channel'])
.values(
signed_claim_count=channel_content_count_calc(Claim.alias('content')),
signed_support_count=channel_content_count_calc(Support),
)
)
result = p.ctx.execute(channel_update_member_count_sql)
done += result.rowcount
p.step(done)
2020-06-27 03:52:01 +02:00
2020-07-06 05:03:45 +02:00
if initial_sync:
return
2020-07-06 05:03:45 +02:00
select_stale_claims = select_claims.where(
(TXO.c.txo_type.in_(CLAIM_TYPE_CODES)) &
(TXO.c.spent_height == 0) &
(TXO.c.txo_hash.notin_(select(Claim.c.txo_hash)))
)
loader = p.ctx.get_bulk_loader()
for row in p.ctx.connection.execution_options(stream_results=True).execute(select_stale_claims):
txo = Output(
amount=row['amount'],
script=OutputScript(row['src']),
tx_ref=TXRefImmutable.from_hash(row['tx_hash'], row['height']),
position=row['txo_position'],
)
loader.update_claim(
txo, channel_url=row['channel_url'], timestamp=row['timestamp'],
staked_support_amount=int(row['staked_support_amount']),
staked_support_count=int(row['staked_support_count']),
signature=row['signature'], signature_digest=row['signature_digest'],
channel_public_key=row['channel_public_key'],
)
if len(loader.update_claims) >= 1000:
done += loader.flush(Claim)
p.step(done)
done += loader.flush(Claim)
p.step(done)
for takeover in chain.db.sync_get_takeovers(start_height=blocks_added[0], end_height=blocks_added[-1]):
update_claims = (
Claim.update()
.where(Claim.c.normalized == takeover['normalized'])
.values(
is_controlling=case(
[(Claim.c.claim_hash == takeover['claim_hash'], True)],
else_=False
),
takeover_height=case(
[(Claim.c.claim_hash == takeover['claim_hash'], takeover['height'])],
else_=None
),
activation_height=least(Claim.c.activation_height, takeover['height']),
)
)
result = p.ctx.execute(update_claims)
done += result.rowcount
p.step(done)
2020-07-06 05:03:45 +02:00
channel_update_member_count_sql = (
Claim.update()
.where(
(Claim.c.claim_type == TXO_TYPES['channel']) &
Claim.c.claim_hash.in_(select(TXO.c.channel_hash).where(condition_changed_channel_content))
).values(
signed_claim_count=channel_content_count_calc(Claim.alias('content')),
signed_support_count=channel_content_count_calc(Support),
)
)
2020-07-06 05:03:45 +02:00
p.ctx.execute(channel_update_member_count_sql)
claim_update_supports_sql = (
Claim.update()
.where(Claim.c.claim_hash.in_(select(TXO.c.claim_hash).where(condition_changed_stakes)))
.values(
staked_support_amount=(
staked_support_amount_calc
.where(support.c.claim_hash == Claim.c.claim_hash)
.scalar_subquery())
,
staked_support_count=(
staked_support_count_calc
.where(support.c.claim_hash == Claim.c.claim_hash)
.scalar_subquery()
),
)
2020-07-06 05:03:45 +02:00
)
result = p.ctx.execute(claim_update_supports_sql)
p.step(done+result.rowcount)
@event_emitter("blockchain.sync.supports", "supports")
def process_supports(starting_height: int, blocks_added: Optional[Tuple[int, int]], p: ProgressContext):
done = 0
to_be_deleted = p.ctx.fetchtotal(condition_spent_supports)
to_be_inserted = p.ctx.fetchtotal(condition_missing_supports)
p.start(to_be_deleted + to_be_inserted)
sql = Support.delete().where(condition_spent_supports)
deleted = p.ctx.execute(sql)
assert to_be_deleted == deleted.rowcount,\
f"Expected supports to be deleted {to_be_deleted}, actual deleted {deleted.rowcount}."
done += deleted.rowcount
p.step(done)
if p.ctx.is_postgres:
insert_supports = partial(p.ctx.pg_copy, Support)
else:
insert_supports = partial(p.ctx.execute, Support.insert())
loader = p.ctx.get_bulk_loader()
inserted_supports, supports = 0, []
for txo in rows_to_txos(p.ctx.fetchall(select_missing_supports)):
supports.append(loader.support_to_row(txo))
if len(supports) >= 50_000:
insert_supports(supports)
inserted_supports += len(supports)
supports = []
if supports:
insert_supports(supports)
inserted_supports += len(supports)
assert to_be_inserted == inserted_supports, \
f"Expected supports to be inserted {to_be_inserted}, actual inserted {inserted_supports}."
return
p.start(get_unvalidated_signable_count(p.ctx, Support))
support_updates = []
for support in p.ctx.execute(select_unvalidated_signables(Support, Support.c.txo_hash)):
support_updates.append(
signature_validation({'pk': support['txo_hash']}, support, support['public_key'])
)
if changes is not None:
changes.channels_with_changed_content.add(support['channel_hash'])
2020-06-30 23:32:51 +02:00
if len(support_updates) > 1000:
p.ctx.execute(Support.update().where(Support.c.txo_hash == bindparam('pk')), support_updates)
p.step(len(support_updates))
support_updates.clear()
if support_updates:
p.ctx.execute(Support.update().where(Support.c.txo_hash == bindparam('pk')), support_updates)