lbry-sdk/lbrynet/stream/stream_manager.py

424 lines
20 KiB
Python
Raw Normal View History

2019-01-22 18:54:17 +01:00
import os
import asyncio
import typing
import binascii
import logging
2019-01-25 21:05:22 +01:00
import random
2019-03-20 06:46:23 +01:00
from decimal import Decimal
from lbrynet.error import ResolveError, InvalidStreamDescriptorError, KeyFeeAboveMaxAllowed, InsufficientFundsError
2019-03-31 19:42:27 +02:00
from lbrynet.error import DownloadSDTimeout, DownloadDataTimeout, ResolveTimeout
from lbrynet.utils import cache_concurrent
from lbrynet.stream.descriptor import StreamDescriptor
2019-01-22 18:54:17 +01:00
from lbrynet.stream.managed_stream import ManagedStream
2019-03-20 06:46:23 +01:00
from lbrynet.schema.claim import Claim
from lbrynet.schema.uri import parse_lbry_uri
2019-01-31 18:40:49 +01:00
from lbrynet.extras.daemon.storage import lbc_to_dewies
2019-01-22 18:54:17 +01:00
if typing.TYPE_CHECKING:
2019-01-28 15:51:02 +01:00
from lbrynet.conf import Config
2019-03-28 19:51:55 +01:00
from lbrynet.blob.blob_manager import BlobManager
2019-01-22 18:54:17 +01:00
from lbrynet.dht.node import Node
from lbrynet.extras.daemon.analytics import AnalyticsManager
from lbrynet.extras.daemon.storage import SQLiteStorage, StoredStreamClaim
2019-03-20 07:00:03 +01:00
from lbrynet.wallet import LbryWalletManager
from lbrynet.extras.daemon.exchange_rate_manager import ExchangeRateManager
2019-01-22 18:54:17 +01:00
log = logging.getLogger(__name__)
filter_fields = [
2019-02-15 22:44:31 +01:00
'rowid',
2019-01-22 18:54:17 +01:00
'status',
'file_name',
'sd_hash',
'stream_hash',
'claim_name',
'claim_height',
'claim_id',
'outpoint',
'txid',
'nout',
'channel_claim_id',
'channel_name',
'full_status', # TODO: remove
'blobs_remaining',
'blobs_in_stream'
2019-01-22 18:54:17 +01:00
]
comparison_operators = {
'eq': lambda a, b: a == b,
'ne': lambda a, b: a != b,
'g': lambda a, b: a > b,
'l': lambda a, b: a < b,
'ge': lambda a, b: a >= b,
'le': lambda a, b: a <= b,
}
def path_or_none(p) -> typing.Optional[str]:
try:
return binascii.unhexlify(p).decode()
except binascii.Error as err:
if p == '{stream}':
return None
raise err
2019-01-22 18:54:17 +01:00
class StreamManager:
def __init__(self, loop: asyncio.BaseEventLoop, config: 'Config', blob_manager: 'BlobManager',
wallet: 'LbryWalletManager', storage: 'SQLiteStorage', node: typing.Optional['Node'],
analytics_manager: typing.Optional['AnalyticsManager'] = None):
2019-01-22 18:54:17 +01:00
self.loop = loop
self.config = config
2019-01-22 18:54:17 +01:00
self.blob_manager = blob_manager
self.wallet = wallet
self.storage = storage
self.node = node
self.analytics_manager = analytics_manager
self.streams: typing.Dict[str, ManagedStream] = {}
2019-01-22 18:54:17 +01:00
self.resume_downloading_task: asyncio.Task = None
self.re_reflect_task: asyncio.Task = None
2019-01-22 18:54:17 +01:00
self.update_stream_finished_futs: typing.List[asyncio.Future] = []
self.running_reflector_uploads: typing.List[asyncio.Task] = []
2019-01-22 18:54:17 +01:00
async def _update_content_claim(self, stream: ManagedStream):
claim_info = await self.storage.get_content_claim(stream.stream_hash)
2019-03-20 06:46:23 +01:00
stream.set_claim(claim_info, claim_info['value'])
async def stop_stream(self, stream: ManagedStream):
stream.stop_download()
if not stream.finished and stream.output_file_exists:
try:
os.remove(stream.full_path)
except OSError as err:
log.warning("Failed to delete partial download %s from downloads directory: %s", stream.full_path,
str(err))
if stream.running:
stream.update_status(ManagedStream.STATUS_STOPPED)
await self.storage.change_file_status(stream.stream_hash, ManagedStream.STATUS_STOPPED)
async def start_stream(self, stream: ManagedStream):
stream.update_status(ManagedStream.STATUS_RUNNING)
await self.storage.change_file_status(stream.stream_hash, ManagedStream.STATUS_RUNNING)
2019-03-31 19:42:27 +02:00
await stream.setup(self.node, save_file=not self.config.streaming_only)
self.storage.content_claim_callbacks[stream.stream_hash] = lambda: self._update_content_claim(stream)
2019-02-01 21:46:31 +01:00
async def recover_streams(self, file_infos: typing.List[typing.Dict]):
to_restore = []
async def recover_stream(sd_hash: str, stream_hash: str, stream_name: str,
suggested_file_name: str, key: str) -> typing.Optional[StreamDescriptor]:
sd_blob = self.blob_manager.get_blob(sd_hash)
blobs = await self.storage.get_blobs_for_stream(stream_hash)
descriptor = await StreamDescriptor.recover(
self.blob_manager.blob_dir, sd_blob, stream_hash, stream_name, suggested_file_name, key, blobs
)
if not descriptor:
return
to_restore.append((descriptor, sd_blob))
await asyncio.gather(*[
recover_stream(
file_info['sd_hash'], file_info['stream_hash'], binascii.unhexlify(file_info['stream_name']).decode(),
binascii.unhexlify(file_info['suggested_file_name']).decode(), file_info['key']
) for file_info in file_infos
])
if to_restore:
await self.storage.recover_streams(to_restore, self.config.download_dir)
# if self.blob_manager._save_blobs:
# log.info("Recovered %i/%i attempted streams", len(to_restore), len(file_infos))
async def add_stream(self, rowid: int, sd_hash: str, file_name: typing.Optional[str],
download_directory: typing.Optional[str], status: str,
claim: typing.Optional['StoredStreamClaim']):
try:
descriptor = await self.blob_manager.get_stream_descriptor(sd_hash)
except InvalidStreamDescriptorError as err:
log.warning("Failed to start stream for sd %s - %s", sd_hash, str(err))
return
stream = ManagedStream(
self.loop, self.config, self.blob_manager, descriptor.sd_hash, download_directory, file_name, status,
2019-03-31 19:42:27 +02:00
claim, rowid=rowid, descriptor=descriptor, analytics_manager=self.analytics_manager
)
self.streams[sd_hash] = stream
2019-02-01 17:37:51 +01:00
2019-01-22 18:54:17 +01:00
async def load_streams_from_database(self):
to_recover = []
to_start = []
2019-03-31 19:42:27 +02:00
await self.storage.sync_files_to_blobs()
for file_info in await self.storage.get_all_lbry_files():
if not self.blob_manager.get_blob(file_info['sd_hash']).get_is_verified():
to_recover.append(file_info)
to_start.append(file_info)
if to_recover:
# if self.blob_manager._save_blobs:
# log.info("Attempting to recover %i streams", len(to_recover))
await self.recover_streams(to_recover)
log.info("Initializing %i files", len(to_start))
if to_start:
await asyncio.gather(*[
self.add_stream(
file_info['rowid'], file_info['sd_hash'], path_or_none(file_info['file_name']),
path_or_none(file_info['download_directory']), file_info['status'],
file_info['claim']
) for file_info in to_start
])
log.info("Started stream manager with %i files", len(self.streams))
2019-01-22 18:54:17 +01:00
async def resume(self):
if not self.node:
2019-02-13 23:57:12 +01:00
log.warning("no DHT node given, resuming downloads trusting that we can contact reflector")
2019-02-04 00:39:01 +01:00
t = [
self.loop.create_task(self.start_stream(stream)) for stream in self.streams.values()
if stream.running
2019-02-04 00:39:01 +01:00
]
if t:
log.info("resuming %i downloads", len(t))
await asyncio.gather(*t, loop=self.loop)
2019-01-22 18:54:17 +01:00
2019-01-25 21:05:22 +01:00
async def reflect_streams(self):
while True:
2019-02-09 02:13:26 +01:00
if self.config.reflect_streams and self.config.reflector_servers:
sd_hashes = await self.storage.get_streams_to_re_reflect()
2019-03-31 19:42:27 +02:00
sd_hashes = [sd for sd in sd_hashes if sd in self.streams]
2019-01-25 21:05:22 +01:00
batch = []
2019-03-31 19:42:27 +02:00
while sd_hashes:
stream = self.streams[sd_hashes.pop()]
if not stream.fully_reflected.is_set():
host, port = random.choice(self.config.reflector_servers)
batch.append(stream.upload_to_reflector(host, port))
if len(batch) >= self.config.concurrent_reflector_uploads:
await asyncio.gather(*batch)
batch = []
if batch:
await asyncio.gather(*batch)
await asyncio.sleep(300, loop=self.loop)
2019-01-25 21:05:22 +01:00
2019-01-22 18:54:17 +01:00
async def start(self):
await self.load_streams_from_database()
self.resume_downloading_task = self.loop.create_task(self.resume())
2019-03-31 19:42:27 +02:00
self.re_reflect_task = self.loop.create_task(self.reflect_streams())
2019-01-22 18:54:17 +01:00
def stop(self):
2019-01-22 18:54:17 +01:00
if self.resume_downloading_task and not self.resume_downloading_task.done():
self.resume_downloading_task.cancel()
if self.re_reflect_task and not self.re_reflect_task.done():
self.re_reflect_task.cancel()
2019-01-22 18:54:17 +01:00
while self.streams:
_, stream = self.streams.popitem()
stream.stop_download()
2019-01-22 18:54:17 +01:00
while self.update_stream_finished_futs:
self.update_stream_finished_futs.pop().cancel()
while self.running_reflector_uploads:
self.running_reflector_uploads.pop().cancel()
2019-01-22 18:54:17 +01:00
2019-01-25 21:05:22 +01:00
async def create_stream(self, file_path: str, key: typing.Optional[bytes] = None,
iv_generator: typing.Optional[typing.Generator[bytes, None, None]] = None) -> ManagedStream:
stream = await ManagedStream.create(self.loop, self.config, self.blob_manager, file_path, key, iv_generator)
self.streams[stream.sd_hash] = stream
self.storage.content_claim_callbacks[stream.stream_hash] = lambda: self._update_content_claim(stream)
2019-02-09 02:13:26 +01:00
if self.config.reflect_streams and self.config.reflector_servers:
host, port = random.choice(self.config.reflector_servers)
task = self.loop.create_task(stream.upload_to_reflector(host, port))
self.running_reflector_uploads.append(task)
task.add_done_callback(
lambda _: None
if task not in self.running_reflector_uploads else self.running_reflector_uploads.remove(task)
)
2019-01-22 18:54:17 +01:00
return stream
async def delete_stream(self, stream: ManagedStream, delete_file: typing.Optional[bool] = False):
await self.stop_stream(stream)
if stream.sd_hash in self.streams:
del self.streams[stream.sd_hash]
blob_hashes = [stream.sd_hash] + [b.blob_hash for b in stream.descriptor.blobs[:-1]]
await self.blob_manager.delete_blobs(blob_hashes, delete_from_db=False)
2019-01-22 18:54:17 +01:00
await self.storage.delete_stream(stream.descriptor)
if delete_file and stream.output_file_exists:
os.remove(stream.full_path)
2019-01-22 18:54:17 +01:00
def get_stream_by_stream_hash(self, stream_hash: str) -> typing.Optional[ManagedStream]:
streams = tuple(filter(lambda stream: stream.stream_hash == stream_hash, self.streams.values()))
if streams:
return streams[0]
2019-01-22 18:54:17 +01:00
def get_filtered_streams(self, sort_by: typing.Optional[str] = None, reverse: typing.Optional[bool] = False,
comparison: typing.Optional[str] = None,
**search_by) -> typing.List[ManagedStream]:
"""
Get a list of filtered and sorted ManagedStream objects
:param sort_by: field to sort by
:param reverse: reverse sorting
:param comparison: comparison operator used for filtering
:param search_by: fields and values to filter by
"""
if sort_by and sort_by not in filter_fields:
raise ValueError(f"'{sort_by}' is not a valid field to sort by")
if comparison and comparison not in comparison_operators:
raise ValueError(f"'{comparison}' is not a valid comparison")
if 'full_status' in search_by:
del search_by['full_status']
2019-01-22 18:54:17 +01:00
for search in search_by.keys():
if search not in filter_fields:
raise ValueError(f"'{search}' is not a valid search operation")
if search_by:
comparison = comparison or 'eq'
streams = []
for stream in self.streams.values():
2019-01-22 18:54:17 +01:00
for search, val in search_by.items():
if comparison_operators[comparison](getattr(stream, search), val):
streams.append(stream)
break
else:
streams = list(self.streams.values())
2019-01-22 18:54:17 +01:00
if sort_by:
streams.sort(key=lambda s: getattr(s, sort_by))
if reverse:
streams.reverse()
return streams
2019-03-22 07:51:27 +01:00
async def _check_update_or_replace(self, outpoint: str, claim_id: str, claim: Claim) -> typing.Tuple[
2019-03-11 02:55:33 +01:00
typing.Optional[ManagedStream], typing.Optional[ManagedStream]]:
existing = self.get_filtered_streams(outpoint=outpoint)
if existing:
2019-03-31 19:42:27 +02:00
if existing[0].status == ManagedStream.STATUS_STOPPED:
await self.start_stream(existing[0])
2019-03-11 02:55:33 +01:00
return existing[0], None
2019-04-20 07:12:43 +02:00
existing = self.get_filtered_streams(sd_hash=claim.stream.source.sd_hash)
2019-03-11 02:55:33 +01:00
if existing and existing[0].claim_id != claim_id:
raise ResolveError(f"stream for {existing[0].claim_id} collides with existing "
f"download {claim_id}")
if existing:
log.info("claim contains a metadata only update to a stream we have")
await self.storage.save_content_claim(
existing[0].stream_hash, outpoint
)
await self._update_content_claim(existing[0])
if not existing[0].running:
await self.start_stream(existing[0])
2019-03-11 02:55:33 +01:00
return existing[0], None
else:
existing_for_claim_id = self.get_filtered_streams(claim_id=claim_id)
if existing_for_claim_id:
log.info("claim contains an update to a stream we have, downloading it")
return None, existing_for_claim_id[0]
return None, None
@cache_concurrent
2019-03-31 19:42:27 +02:00
async def download_stream_from_uri(self, uri, exchange_rate_manager: 'ExchangeRateManager',
timeout: typing.Optional[float] = None,
file_name: typing.Optional[str] = None,
download_directory: typing.Optional[str] = None,
save_file: bool = True, resolve_timeout: float = 3.0) -> ManagedStream:
timeout = timeout or self.config.download_timeout
2019-03-11 02:55:33 +01:00
start_time = self.loop.time()
2019-03-31 19:42:27 +02:00
resolved_time = None
stream = None
error = None
outpoint = None
try:
# resolve the claim
parsed_uri = parse_lbry_uri(uri)
if parsed_uri.is_channel:
raise ResolveError("cannot download a channel claim, specify a /path")
try:
resolved_result = await asyncio.wait_for(self.wallet.ledger.resolve(0, 1, uri), resolve_timeout)
except asyncio.TimeoutError:
raise ResolveTimeout(uri)
await self.storage.save_claims_for_resolve([
value for value in resolved_result.values() if 'error' not in value
])
resolved = resolved_result.get(uri, {})
resolved = resolved if 'value' in resolved else resolved.get('claim')
if not resolved:
raise ResolveError(f"Failed to resolve stream at '{uri}'")
if 'error' in resolved:
raise ResolveError(f"error resolving stream: {resolved['error']}")
2019-04-05 05:10:18 +02:00
claim = Claim.from_bytes(binascii.unhexlify(resolved['protobuf']))
outpoint = f"{resolved['txid']}:{resolved['nout']}"
resolved_time = self.loop.time() - start_time
2019-03-11 02:55:33 +01:00
2019-03-31 19:42:27 +02:00
# resume or update an existing stream, if the stream changed download it and delete the old one after
updated_stream, to_replace = await self._check_update_or_replace(outpoint, resolved['claim_id'], claim)
if updated_stream:
return updated_stream
2019-04-05 05:10:18 +02:00
content_fee = None
2019-03-31 19:42:27 +02:00
# check that the fee is payable
if not to_replace and claim.stream.has_fee:
fee_amount = round(exchange_rate_manager.convert_currency(
claim.stream.fee.currency, "LBC", claim.stream.fee.amount
), 5)
max_fee_amount = round(exchange_rate_manager.convert_currency(
self.config.max_key_fee['currency'], "LBC", Decimal(self.config.max_key_fee['amount'])
), 5)
if fee_amount > max_fee_amount:
msg = f"fee of {fee_amount} exceeds max configured to allow of {max_fee_amount}"
log.warning(msg)
raise KeyFeeAboveMaxAllowed(msg)
balance = await self.wallet.default_account.get_balance()
if lbc_to_dewies(str(fee_amount)) > balance:
msg = f"fee of {fee_amount} exceeds max available balance"
log.warning(msg)
raise InsufficientFundsError(msg)
fee_address = claim.stream.fee.address
2019-04-05 05:10:18 +02:00
content_fee = await self.wallet.send_amount_to_address(
2019-03-31 19:42:27 +02:00
lbc_to_dewies(str(fee_amount)), fee_address.encode('latin1')
)
2019-04-05 05:10:18 +02:00
2019-03-31 19:42:27 +02:00
log.info("paid fee of %s for %s", fee_amount, uri)
download_directory = download_directory or self.config.download_dir
if not file_name and (self.config.streaming_only or not save_file):
download_dir, file_name = None, None
stream = ManagedStream(
self.loop, self.config, self.blob_manager, claim.stream.source.sd_hash, download_directory,
2019-04-05 05:10:18 +02:00
file_name, ManagedStream.STATUS_RUNNING, content_fee=content_fee,
analytics_manager=self.analytics_manager
2019-03-31 19:42:27 +02:00
)
try:
await asyncio.wait_for(stream.setup(
self.node, save_file=save_file, file_name=file_name, download_directory=download_directory
), timeout, loop=self.loop)
except asyncio.TimeoutError:
if not stream.descriptor:
raise DownloadSDTimeout(stream.sd_hash)
raise DownloadDataTimeout(stream.sd_hash)
if to_replace: # delete old stream now that the replacement has started downloading
await self.delete_stream(to_replace)
stream.set_claim(resolved, claim)
await self.storage.save_content_claim(stream.stream_hash, outpoint)
self.streams[stream.sd_hash] = stream
return stream
except Exception as err:
error = err
if stream and stream.descriptor:
await self.storage.delete_stream(stream.descriptor)
2019-04-05 04:02:33 +02:00
await self.blob_manager.delete_blob(stream.sd_hash)
2019-03-31 19:42:27 +02:00
finally:
if self.analytics_manager and (error or (stream and (stream.downloader.time_to_descriptor or
stream.downloader.time_to_first_bytes))):
self.loop.create_task(
self.analytics_manager.send_time_to_first_bytes(
resolved_time, self.loop.time() - start_time, None if not stream else stream.download_id,
uri, outpoint,
None if not stream else len(stream.downloader.blob_downloader.active_connections),
None if not stream else len(stream.downloader.blob_downloader.scores),
False if not stream else stream.downloader.added_fixed_peers,
self.config.fixed_peer_delay if not stream else stream.downloader.fixed_peers_delay,
None if not stream else stream.sd_hash,
None if not stream else stream.downloader.time_to_descriptor,
None if not (stream and stream.descriptor) else stream.descriptor.blobs[0].blob_hash,
None if not (stream and stream.descriptor) else stream.descriptor.blobs[0].length,
None if not stream else stream.downloader.time_to_first_bytes,
None if not error else error.__class__.__name__
)
)
if error:
raise error