2019-01-22 18:54:17 +01:00
|
|
|
import os
|
|
|
|
import asyncio
|
|
|
|
import binascii
|
|
|
|
import logging
|
2019-01-25 21:05:22 +01:00
|
|
|
import random
|
2019-10-29 06:26:25 +01:00
|
|
|
import typing
|
|
|
|
from typing import Optional
|
2019-05-01 23:09:50 +02:00
|
|
|
from aiohttp.web import Request
|
2020-01-27 06:10:55 +01:00
|
|
|
from lbry.error import InvalidStreamDescriptorError
|
|
|
|
from lbry.file.source_manager import SourceManager
|
2019-06-21 02:55:47 +02:00
|
|
|
from lbry.stream.descriptor import StreamDescriptor
|
|
|
|
from lbry.stream.managed_stream import ManagedStream
|
2020-01-15 16:18:38 +01:00
|
|
|
from lbry.file.source import ManagedDownloadSource
|
2019-01-22 18:54:17 +01:00
|
|
|
if typing.TYPE_CHECKING:
|
2019-06-21 02:55:47 +02:00
|
|
|
from lbry.conf import Config
|
|
|
|
from lbry.blob.blob_manager import BlobManager
|
|
|
|
from lbry.dht.node import Node
|
2020-01-15 16:18:38 +01:00
|
|
|
from lbry.wallet.wallet import WalletManager
|
|
|
|
from lbry.wallet.transaction import Transaction
|
2019-06-21 02:55:47 +02:00
|
|
|
from lbry.extras.daemon.analytics import AnalyticsManager
|
2019-11-15 20:55:49 +01:00
|
|
|
from lbry.extras.daemon.storage import SQLiteStorage, StoredContentClaim
|
2019-01-22 18:54:17 +01:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2020-01-29 02:37:52 +01:00
|
|
|
def path_or_none(encoded_path) -> Optional[str]:
|
|
|
|
if not encoded_path:
|
2020-01-15 16:18:38 +01:00
|
|
|
return
|
2020-01-29 02:37:52 +01:00
|
|
|
return binascii.unhexlify(encoded_path).decode()
|
2019-01-22 18:54:17 +01:00
|
|
|
|
|
|
|
|
2020-01-15 16:18:38 +01:00
|
|
|
class StreamManager(SourceManager):
|
|
|
|
_sources: typing.Dict[str, ManagedStream]
|
2019-03-31 03:07:43 +02:00
|
|
|
|
2020-02-07 16:32:39 +01:00
|
|
|
filter_fields = SourceManager.filter_fields
|
2020-01-15 16:18:38 +01:00
|
|
|
filter_fields.update({
|
|
|
|
'sd_hash',
|
|
|
|
'stream_hash',
|
|
|
|
'full_status', # TODO: remove
|
|
|
|
'blobs_remaining',
|
2020-05-11 21:48:34 +02:00
|
|
|
'blobs_in_stream',
|
|
|
|
'uploading_to_reflector',
|
|
|
|
'is_fully_reflected'
|
2020-01-15 16:18:38 +01:00
|
|
|
})
|
2019-05-01 23:09:50 +02:00
|
|
|
|
2019-08-02 19:14:41 +02:00
|
|
|
def __init__(self, loop: asyncio.AbstractEventLoop, config: 'Config', blob_manager: 'BlobManager',
|
2020-01-03 04:18:49 +01:00
|
|
|
wallet_manager: 'WalletManager', storage: 'SQLiteStorage', node: Optional['Node'],
|
2019-10-29 06:26:25 +01:00
|
|
|
analytics_manager: Optional['AnalyticsManager'] = None):
|
2020-01-15 16:18:38 +01:00
|
|
|
super().__init__(loop, config, storage, analytics_manager)
|
2019-01-22 18:54:17 +01:00
|
|
|
self.blob_manager = blob_manager
|
2019-10-27 18:54:48 +01:00
|
|
|
self.wallet_manager = wallet_manager
|
2019-01-22 18:54:17 +01:00
|
|
|
self.node = node
|
2019-10-29 06:26:25 +01:00
|
|
|
self.resume_saving_task: Optional[asyncio.Task] = None
|
|
|
|
self.re_reflect_task: Optional[asyncio.Task] = None
|
2019-01-22 18:54:17 +01:00
|
|
|
self.update_stream_finished_futs: typing.List[asyncio.Future] = []
|
2020-02-04 01:38:36 +01:00
|
|
|
self.running_reflector_uploads: typing.Dict[str, asyncio.Task] = {}
|
2021-08-21 05:33:21 +02:00
|
|
|
self.started = asyncio.Event()
|
2019-01-22 18:54:17 +01:00
|
|
|
|
2020-01-29 17:49:14 +01:00
|
|
|
@property
|
|
|
|
def streams(self):
|
|
|
|
return self._sources
|
|
|
|
|
2020-01-15 16:18:38 +01:00
|
|
|
def add(self, source: ManagedStream):
|
|
|
|
super().add(source)
|
|
|
|
self.storage.content_claim_callbacks[source.stream_hash] = lambda: self._update_content_claim(source)
|
|
|
|
|
2019-01-31 18:32:52 +01:00
|
|
|
async def _update_content_claim(self, stream: ManagedStream):
|
|
|
|
claim_info = await self.storage.get_content_claim(stream.stream_hash)
|
2020-01-15 16:18:38 +01:00
|
|
|
self._sources.setdefault(stream.sd_hash, stream).set_claim(claim_info, claim_info['value'])
|
2019-01-31 18:32:52 +01:00
|
|
|
|
2019-02-15 00:19:01 +01:00
|
|
|
async def recover_streams(self, file_infos: typing.List[typing.Dict]):
|
|
|
|
to_restore = []
|
2022-03-22 01:33:33 +01:00
|
|
|
to_check = []
|
2019-02-15 00:19:01 +01:00
|
|
|
|
|
|
|
async def recover_stream(sd_hash: str, stream_hash: str, stream_name: str,
|
2019-05-07 20:30:35 +02:00
|
|
|
suggested_file_name: str, key: str,
|
2019-10-29 06:26:25 +01:00
|
|
|
content_fee: Optional['Transaction']) -> Optional[StreamDescriptor]:
|
2019-02-15 00:19:01 +01:00
|
|
|
sd_blob = self.blob_manager.get_blob(sd_hash)
|
|
|
|
blobs = await self.storage.get_blobs_for_stream(stream_hash)
|
|
|
|
descriptor = await StreamDescriptor.recover(
|
|
|
|
self.blob_manager.blob_dir, sd_blob, stream_hash, stream_name, suggested_file_name, key, blobs
|
|
|
|
)
|
|
|
|
if not descriptor:
|
2019-02-06 15:29:19 +01:00
|
|
|
return
|
2019-05-07 20:30:35 +02:00
|
|
|
to_restore.append((descriptor, sd_blob, content_fee))
|
2022-03-22 01:33:33 +01:00
|
|
|
to_check.extend([sd_blob.blob_hash] + [blob.blob_hash for blob in descriptor.blobs[:-1]])
|
2019-02-15 00:19:01 +01:00
|
|
|
|
|
|
|
await asyncio.gather(*[
|
|
|
|
recover_stream(
|
|
|
|
file_info['sd_hash'], file_info['stream_hash'], binascii.unhexlify(file_info['stream_name']).decode(),
|
2019-05-07 20:30:35 +02:00
|
|
|
binascii.unhexlify(file_info['suggested_file_name']).decode(), file_info['key'],
|
|
|
|
file_info['content_fee']
|
2019-02-15 00:19:01 +01:00
|
|
|
) for file_info in file_infos
|
|
|
|
])
|
2019-02-06 15:29:19 +01:00
|
|
|
|
2019-02-15 00:19:01 +01:00
|
|
|
if to_restore:
|
|
|
|
await self.storage.recover_streams(to_restore, self.config.download_dir)
|
2022-03-22 01:33:33 +01:00
|
|
|
if to_check:
|
|
|
|
await self.blob_manager.ensure_completed_blobs_status(to_check)
|
2019-02-15 00:19:01 +01:00
|
|
|
|
2019-03-31 03:07:43 +02:00
|
|
|
# if self.blob_manager._save_blobs:
|
|
|
|
# log.info("Recovered %i/%i attempted streams", len(to_restore), len(file_infos))
|
|
|
|
|
2020-01-15 16:18:38 +01:00
|
|
|
async def _load_stream(self, rowid: int, sd_hash: str, file_name: Optional[str],
|
|
|
|
download_directory: Optional[str], status: str,
|
|
|
|
claim: Optional['StoredContentClaim'], content_fee: Optional['Transaction'],
|
2020-02-24 17:03:42 +01:00
|
|
|
added_on: Optional[int], fully_reflected: Optional[bool]):
|
2019-02-15 00:19:01 +01:00
|
|
|
try:
|
2019-03-31 03:07:43 +02:00
|
|
|
descriptor = await self.blob_manager.get_stream_descriptor(sd_hash)
|
2019-02-15 00:19:01 +01:00
|
|
|
except InvalidStreamDescriptorError as err:
|
|
|
|
log.warning("Failed to start stream for sd %s - %s", sd_hash, str(err))
|
|
|
|
return
|
|
|
|
stream = ManagedStream(
|
2019-03-31 03:07:43 +02:00
|
|
|
self.loop, self.config, self.blob_manager, descriptor.sd_hash, download_directory, file_name, status,
|
2019-05-07 20:30:35 +02:00
|
|
|
claim, content_fee=content_fee, rowid=rowid, descriptor=descriptor,
|
2019-10-26 17:24:37 +02:00
|
|
|
analytics_manager=self.analytics_manager, added_on=added_on
|
2019-02-15 00:19:01 +01:00
|
|
|
)
|
2020-02-07 16:34:47 +01:00
|
|
|
if fully_reflected:
|
|
|
|
stream.fully_reflected.set()
|
2020-01-15 16:18:38 +01:00
|
|
|
self.add(stream)
|
2019-02-01 17:37:51 +01:00
|
|
|
|
2020-01-15 16:18:38 +01:00
|
|
|
async def initialize_from_database(self):
|
2019-02-15 00:19:01 +01:00
|
|
|
to_recover = []
|
2019-03-31 03:07:43 +02:00
|
|
|
to_start = []
|
2019-05-03 20:54:09 +02:00
|
|
|
|
2019-05-07 20:30:35 +02:00
|
|
|
await self.storage.update_manually_removed_files_since_last_run()
|
|
|
|
|
2019-02-15 00:19:01 +01:00
|
|
|
for file_info in await self.storage.get_all_lbry_files():
|
2019-05-03 20:54:09 +02:00
|
|
|
# if the sd blob is not verified, try to reconstruct it from the database
|
|
|
|
# this could either be because the blob files were deleted manually or save_blobs was not true when
|
|
|
|
# the stream was downloaded
|
2019-04-18 21:19:06 +02:00
|
|
|
if not self.blob_manager.is_blob_verified(file_info['sd_hash']):
|
2019-02-15 00:19:01 +01:00
|
|
|
to_recover.append(file_info)
|
2019-03-31 03:07:43 +02:00
|
|
|
to_start.append(file_info)
|
2019-02-15 00:19:01 +01:00
|
|
|
if to_recover:
|
|
|
|
await self.recover_streams(to_recover)
|
|
|
|
|
|
|
|
log.info("Initializing %i files", len(to_start))
|
2019-05-07 20:30:35 +02:00
|
|
|
to_resume_saving = []
|
|
|
|
add_stream_tasks = []
|
|
|
|
for file_info in to_start:
|
|
|
|
file_name = path_or_none(file_info['file_name'])
|
|
|
|
download_directory = path_or_none(file_info['download_directory'])
|
|
|
|
if file_name and download_directory and not file_info['saved_file'] and file_info['status'] == 'running':
|
|
|
|
to_resume_saving.append((file_name, download_directory, file_info['sd_hash']))
|
2020-01-15 16:18:38 +01:00
|
|
|
add_stream_tasks.append(self.loop.create_task(self._load_stream(
|
2019-05-07 20:30:35 +02:00
|
|
|
file_info['rowid'], file_info['sd_hash'], file_name,
|
|
|
|
download_directory, file_info['status'],
|
2019-10-11 01:46:00 +02:00
|
|
|
file_info['claim'], file_info['content_fee'],
|
2020-02-07 16:34:47 +01:00
|
|
|
file_info['added_on'], file_info['fully_reflected']
|
2019-05-07 20:30:35 +02:00
|
|
|
)))
|
|
|
|
if add_stream_tasks:
|
2021-08-21 05:33:21 +02:00
|
|
|
await asyncio.gather(*add_stream_tasks)
|
2020-01-15 16:18:38 +01:00
|
|
|
log.info("Started stream manager with %i files", len(self._sources))
|
2019-03-31 03:07:43 +02:00
|
|
|
if not self.node:
|
2019-10-26 21:02:43 +02:00
|
|
|
log.info("no DHT node given, resuming downloads trusting that we can contact reflector")
|
2019-05-07 20:30:35 +02:00
|
|
|
if to_resume_saving:
|
2020-01-15 16:18:38 +01:00
|
|
|
log.info("Resuming saving %i files", len(to_resume_saving))
|
2020-01-29 01:24:05 +01:00
|
|
|
self.resume_saving_task = asyncio.ensure_future(asyncio.gather(
|
|
|
|
*(self._sources[sd_hash].save_file(file_name, download_directory)
|
2020-01-15 16:18:38 +01:00
|
|
|
for (file_name, download_directory, sd_hash) in to_resume_saving),
|
|
|
|
))
|
2019-01-22 18:54:17 +01:00
|
|
|
|
2019-01-25 21:05:22 +01:00
|
|
|
async def reflect_streams(self):
|
2020-07-07 05:39:59 +02:00
|
|
|
try:
|
2021-01-08 15:53:12 +01:00
|
|
|
return await self._reflect_streams()
|
2020-07-07 05:39:59 +02:00
|
|
|
except Exception:
|
|
|
|
log.exception("reflector task encountered an unexpected error!")
|
|
|
|
|
2021-01-08 15:53:12 +01:00
|
|
|
async def _reflect_streams(self):
|
2020-07-07 05:39:59 +02:00
|
|
|
# todo: those debug statements are temporary for #2987 - remove them if its closed
|
2019-02-02 04:59:41 +01:00
|
|
|
while True:
|
2019-02-09 02:13:26 +01:00
|
|
|
if self.config.reflect_streams and self.config.reflector_servers:
|
2020-07-07 05:39:59 +02:00
|
|
|
log.debug("collecting streams to reflect")
|
2019-02-02 04:59:41 +01:00
|
|
|
sd_hashes = await self.storage.get_streams_to_re_reflect()
|
2020-01-15 16:18:38 +01:00
|
|
|
sd_hashes = [sd for sd in sd_hashes if sd in self._sources]
|
2019-01-25 21:05:22 +01:00
|
|
|
batch = []
|
2019-03-31 19:42:27 +02:00
|
|
|
while sd_hashes:
|
|
|
|
stream = self.streams[sd_hashes.pop()]
|
2020-02-04 01:38:36 +01:00
|
|
|
if self.blob_manager.is_blob_verified(stream.sd_hash) and stream.blobs_completed and \
|
|
|
|
stream.sd_hash not in self.running_reflector_uploads and not \
|
|
|
|
stream.fully_reflected.is_set():
|
|
|
|
batch.append(self.reflect_stream(stream))
|
2019-02-02 04:59:41 +01:00
|
|
|
if len(batch) >= self.config.concurrent_reflector_uploads:
|
2020-07-07 05:39:59 +02:00
|
|
|
log.debug("waiting for batch of %s reflecting streams", len(batch))
|
2021-08-21 05:33:21 +02:00
|
|
|
await asyncio.gather(*batch)
|
2020-07-07 05:39:59 +02:00
|
|
|
log.debug("done processing %s streams", len(batch))
|
2019-02-02 04:59:41 +01:00
|
|
|
batch = []
|
|
|
|
if batch:
|
2020-07-07 05:39:59 +02:00
|
|
|
log.debug("waiting for batch of %s reflecting streams", len(batch))
|
2021-08-21 05:33:21 +02:00
|
|
|
await asyncio.gather(*batch)
|
2020-07-07 05:39:59 +02:00
|
|
|
log.debug("done processing %s streams", len(batch))
|
2021-08-21 05:33:21 +02:00
|
|
|
await asyncio.sleep(300)
|
2019-01-25 21:05:22 +01:00
|
|
|
|
2019-01-22 18:54:17 +01:00
|
|
|
async def start(self):
|
2020-01-15 16:18:38 +01:00
|
|
|
await super().start()
|
2019-03-31 19:42:27 +02:00
|
|
|
self.re_reflect_task = self.loop.create_task(self.reflect_streams())
|
2019-01-22 18:54:17 +01:00
|
|
|
|
2019-02-01 20:04:53 +01:00
|
|
|
def stop(self):
|
2020-02-07 16:32:39 +01:00
|
|
|
super().stop()
|
2019-05-07 20:30:35 +02:00
|
|
|
if self.resume_saving_task and not self.resume_saving_task.done():
|
|
|
|
self.resume_saving_task.cancel()
|
2019-02-02 04:59:41 +01:00
|
|
|
if self.re_reflect_task and not self.re_reflect_task.done():
|
|
|
|
self.re_reflect_task.cancel()
|
2019-01-22 18:54:17 +01:00
|
|
|
while self.update_stream_finished_futs:
|
|
|
|
self.update_stream_finished_futs.pop().cancel()
|
2019-02-14 21:42:12 +01:00
|
|
|
while self.running_reflector_uploads:
|
2020-02-04 01:38:36 +01:00
|
|
|
_, t = self.running_reflector_uploads.popitem()
|
|
|
|
t.cancel()
|
2019-05-02 22:56:49 +02:00
|
|
|
self.started.clear()
|
2019-05-03 20:54:09 +02:00
|
|
|
log.info("finished stopping the stream manager")
|
2019-01-22 18:54:17 +01:00
|
|
|
|
2020-02-04 01:38:36 +01:00
|
|
|
def reflect_stream(self, stream: ManagedStream, server: Optional[str] = None,
|
|
|
|
port: Optional[int] = None) -> asyncio.Task:
|
|
|
|
if not server or not port:
|
|
|
|
server, port = random.choice(self.config.reflector_servers)
|
|
|
|
if stream.sd_hash in self.running_reflector_uploads:
|
|
|
|
return self.running_reflector_uploads[stream.sd_hash]
|
2021-05-21 01:11:18 +02:00
|
|
|
task = self.loop.create_task(self._retriable_reflect_stream(stream, server, port))
|
2020-02-04 01:38:36 +01:00
|
|
|
self.running_reflector_uploads[stream.sd_hash] = task
|
|
|
|
task.add_done_callback(
|
|
|
|
lambda _: None if stream.sd_hash not in self.running_reflector_uploads else
|
|
|
|
self.running_reflector_uploads.pop(stream.sd_hash)
|
|
|
|
)
|
|
|
|
return task
|
|
|
|
|
2021-05-21 01:11:18 +02:00
|
|
|
async def _retriable_reflect_stream(self, stream, host, port):
|
|
|
|
sent = await stream.upload_to_reflector(host, port)
|
|
|
|
while not stream.is_fully_reflected and stream.reflector_progress > 0 and len(sent) > 0:
|
|
|
|
stream.reflector_progress = 0
|
|
|
|
sent = await stream.upload_to_reflector(host, port)
|
2021-09-16 23:38:56 +02:00
|
|
|
return sent
|
2021-05-21 01:11:18 +02:00
|
|
|
|
2020-02-07 16:32:39 +01:00
|
|
|
async def create(self, file_path: str, key: Optional[bytes] = None,
|
|
|
|
iv_generator: Optional[typing.Generator[bytes, None, None]] = None) -> ManagedStream:
|
|
|
|
descriptor = await StreamDescriptor.create_stream(
|
|
|
|
self.loop, self.blob_manager.blob_dir, file_path, key=key, iv_generator=iv_generator,
|
|
|
|
blob_completed_callback=self.blob_manager.blob_completed
|
|
|
|
)
|
|
|
|
await self.storage.store_stream(
|
2021-09-15 16:37:08 +02:00
|
|
|
self.blob_manager.get_blob(descriptor.sd_hash, is_mine=True), descriptor
|
2020-02-07 16:32:39 +01:00
|
|
|
)
|
|
|
|
row_id = await self.storage.save_published_file(
|
|
|
|
descriptor.stream_hash, os.path.basename(file_path), os.path.dirname(file_path), 0
|
|
|
|
)
|
|
|
|
stream = ManagedStream(
|
|
|
|
self.loop, self.config, self.blob_manager, descriptor.sd_hash, os.path.dirname(file_path),
|
|
|
|
os.path.basename(file_path), status=ManagedDownloadSource.STATUS_FINISHED,
|
|
|
|
rowid=row_id, descriptor=descriptor
|
|
|
|
)
|
2019-03-31 03:07:43 +02:00
|
|
|
self.streams[stream.sd_hash] = stream
|
2019-01-31 18:32:52 +01:00
|
|
|
self.storage.content_claim_callbacks[stream.stream_hash] = lambda: self._update_content_claim(stream)
|
2019-02-09 02:13:26 +01:00
|
|
|
if self.config.reflect_streams and self.config.reflector_servers:
|
2020-02-04 01:38:36 +01:00
|
|
|
self.reflect_stream(stream)
|
2019-01-22 18:54:17 +01:00
|
|
|
return stream
|
|
|
|
|
2020-02-14 17:23:33 +01:00
|
|
|
async def delete(self, source: ManagedDownloadSource, delete_file: Optional[bool] = False):
|
2020-02-26 07:20:26 +01:00
|
|
|
if not isinstance(source, ManagedStream):
|
|
|
|
return
|
|
|
|
if source.identifier in self.running_reflector_uploads:
|
|
|
|
self.running_reflector_uploads[source.identifier].cancel()
|
2020-02-14 17:23:33 +01:00
|
|
|
source.stop_tasks()
|
2020-02-26 07:20:26 +01:00
|
|
|
if source.identifier in self.streams:
|
|
|
|
del self.streams[source.identifier]
|
|
|
|
blob_hashes = [source.identifier] + [b.blob_hash for b in source.descriptor.blobs[:-1]]
|
2019-02-06 15:29:19 +01:00
|
|
|
await self.blob_manager.delete_blobs(blob_hashes, delete_from_db=False)
|
2020-02-14 17:23:33 +01:00
|
|
|
await self.storage.delete_stream(source.descriptor)
|
|
|
|
if delete_file and source.output_file_exists:
|
|
|
|
os.remove(source.full_path)
|
2019-02-02 02:46:09 +01:00
|
|
|
|
2019-05-01 23:09:50 +02:00
|
|
|
async def stream_partial_content(self, request: Request, sd_hash: str):
|
2020-05-12 06:32:36 +02:00
|
|
|
stream = self._sources[sd_hash]
|
|
|
|
if not stream.downloader.node:
|
|
|
|
stream.downloader.node = self.node
|
|
|
|
return await stream.stream_file(request)
|