lbry-sdk/lbrynet/core/StreamDescriptor.py

473 lines
17 KiB
Python
Raw Normal View History

import binascii
2015-08-20 17:27:15 +02:00
from collections import defaultdict
2018-07-21 20:12:29 +02:00
import json
2015-08-20 17:27:15 +02:00
import logging
from twisted.internet import threads, defer
from lbrynet.core.cryptoutils import get_lbry_hash_obj
2015-08-20 17:27:15 +02:00
from lbrynet.core.client.StandaloneBlobDownloader import StandaloneBlobDownloader
from lbrynet.core.Error import UnknownStreamTypeError, InvalidStreamDescriptorError
from lbrynet.core.HTTPBlobDownloader import HTTPBlobDownloader
2015-08-20 17:27:15 +02:00
log = logging.getLogger(__name__)
class JSONBytesEncoder(json.JSONEncoder):
def default(self, obj): # pylint: disable=E0202
if isinstance(obj, bytes):
return obj.decode()
return super().default(obj)
class StreamDescriptorReader:
2015-08-20 17:27:15 +02:00
"""Classes which derive from this class read a stream descriptor file return
a dictionary containing the fields in the file"""
def __init__(self):
pass
def _get_raw_data(self):
"""This method must be overridden by subclasses. It should return a deferred
which fires with the raw data in the stream descriptor"""
pass
def get_info(self):
"""Return the fields contained in the file"""
d = self._get_raw_data()
d.addCallback(json.loads)
return d
class PlainStreamDescriptorReader(StreamDescriptorReader):
"""Read a stream descriptor file which is not a blob but a regular file"""
def __init__(self, stream_descriptor_filename):
super().__init__()
2015-08-20 17:27:15 +02:00
self.stream_descriptor_filename = stream_descriptor_filename
def _get_raw_data(self):
def get_data():
with open(self.stream_descriptor_filename) as file_handle:
raw_data = file_handle.read()
return raw_data
return threads.deferToThread(get_data)
class BlobStreamDescriptorReader(StreamDescriptorReader):
"""Read a stream descriptor file which is a blob"""
def __init__(self, blob):
super().__init__()
2015-08-20 17:27:15 +02:00
self.blob = blob
def _get_raw_data(self):
def get_data():
f = self.blob.open_for_reading()
if f is not None:
raw_data = f.read()
f.close()
2015-08-20 17:27:15 +02:00
return raw_data
else:
raise ValueError("Could not open the blob for reading")
return threads.deferToThread(get_data)
class StreamDescriptorWriter:
2015-08-20 17:27:15 +02:00
"""Classes which derive from this class write fields from a dictionary
of fields to a stream descriptor"""
def __init__(self):
pass
def create_descriptor(self, sd_info):
return self._write_stream_descriptor(
json.dumps(sd_info, sort_keys=True, cls=JSONBytesEncoder).encode()
)
2015-08-20 17:27:15 +02:00
def _write_stream_descriptor(self, raw_data):
2016-11-30 21:20:45 +01:00
"""This method must be overridden by subclasses to write raw data to
the stream descriptor
"""
2015-08-20 17:27:15 +02:00
pass
class PlainStreamDescriptorWriter(StreamDescriptorWriter):
def __init__(self, sd_file_name):
super().__init__()
2015-08-20 17:27:15 +02:00
self.sd_file_name = sd_file_name
def _write_stream_descriptor(self, raw_data):
def write_file():
log.info("Writing the sd file to disk")
2015-08-20 17:27:15 +02:00
with open(self.sd_file_name, 'w') as sd_file:
sd_file.write(raw_data)
return self.sd_file_name
return threads.deferToThread(write_file)
class BlobStreamDescriptorWriter(StreamDescriptorWriter):
def __init__(self, blob_manager):
super().__init__()
2015-08-20 17:27:15 +02:00
self.blob_manager = blob_manager
@defer.inlineCallbacks
2015-08-20 17:27:15 +02:00
def _write_stream_descriptor(self, raw_data):
log.debug("Creating the new blob for the stream descriptor")
2015-08-20 17:27:15 +02:00
blob_creator = self.blob_manager.get_blob_creator()
blob_creator.write(raw_data)
log.debug("Wrote the data to the new blob")
sd_hash = yield blob_creator.close()
yield self.blob_manager.creator_finished(blob_creator, should_announce=True)
defer.returnValue(sd_hash)
2015-08-20 17:27:15 +02:00
class StreamMetadata:
FROM_BLOB = 1
FROM_PLAIN = 2
def __init__(self, validator, options, factories):
self.validator = validator
self.options = options
self.factories = factories
self.metadata_source = None
self.source_blob_hash = None
self.source_file = None
class StreamDescriptorIdentifier:
2015-08-20 17:27:15 +02:00
"""Tries to determine the type of stream described by the stream descriptor using the
'stream_type' field. Keeps a list of StreamDescriptorValidators and StreamDownloaderFactorys
and returns the appropriate ones based on the type of the stream descriptor given
"""
def __init__(self):
2016-11-30 21:20:45 +01:00
# {stream_type: IStreamDescriptorValidator}
self._sd_info_validators = {}
# {stream_type: IStreamOptions
self._stream_options = {}
# {stream_type: [IStreamDownloaderFactory]}
self._stream_downloader_factories = defaultdict(list)
2015-08-20 17:27:15 +02:00
def add_stream_type(self, stream_type, sd_info_validator, stream_options):
2016-11-30 21:20:45 +01:00
"""This is how the StreamDescriptorIdentifier learns about new types of stream descriptors.
2015-08-20 17:27:15 +02:00
There can only be one StreamDescriptorValidator for each type of stream.
2016-11-30 21:20:45 +01:00
@param stream_type: A string representing the type of stream
descriptor. This must be unique to this stream descriptor.
2015-08-20 17:27:15 +02:00
2016-11-30 21:20:45 +01:00
@param sd_info_validator: A class implementing the
IStreamDescriptorValidator interface. This class's
constructor will be passed the raw metadata in the stream
descriptor file and its 'validate' method will then be
called. If the validation step fails, an exception will be
thrown, preventing the stream descriptor from being
further processed.
2015-08-20 17:27:15 +02:00
2016-11-30 21:20:45 +01:00
@param stream_options: A class implementing the IStreamOptions
interface. This class's constructor will be passed the
sd_info_validator object containing the raw metadata from
the stream descriptor file.
2015-08-20 17:27:15 +02:00
@return: None
2016-11-30 21:20:45 +01:00
2015-08-20 17:27:15 +02:00
"""
self._sd_info_validators[stream_type] = sd_info_validator
self._stream_options[stream_type] = stream_options
2015-08-20 17:27:15 +02:00
def add_stream_downloader_factory(self, stream_type, factory):
2016-11-30 21:20:45 +01:00
"""Register a stream downloader factory with the StreamDescriptorIdentifier.
2015-08-20 17:27:15 +02:00
2016-11-30 21:20:45 +01:00
This is how the StreamDescriptorIdentifier determines what
factories may be used to process different stream descriptor
files. There must be at least one factory for each type of
stream added via "add_stream_info_validator".
2015-08-20 17:27:15 +02:00
2016-11-30 21:20:45 +01:00
@param stream_type: A string representing the type of stream
descriptor which the factory knows how to process.
2015-08-20 17:27:15 +02:00
@param factory: An object implementing the IStreamDownloaderFactory interface.
@return: None
2016-11-30 21:20:45 +01:00
2015-08-20 17:27:15 +02:00
"""
self._stream_downloader_factories[stream_type].append(factory)
def _return_metadata(self, options_validator_factories, source_type, source):
validator, options, factories = options_validator_factories
m = StreamMetadata(validator, options, factories)
m.metadata_source = source_type
if source_type == StreamMetadata.FROM_BLOB:
m.source_blob_hash = source
if source_type == StreamMetadata.FROM_PLAIN:
m.source_file = source
return m
def get_metadata_for_sd_file(self, sd_path):
2015-08-20 17:27:15 +02:00
sd_reader = PlainStreamDescriptorReader(sd_path)
d = sd_reader.get_info()
d.addCallback(self._return_options_and_validator_and_factories)
d.addCallback(self._return_metadata, StreamMetadata.FROM_PLAIN, sd_path)
2015-08-20 17:27:15 +02:00
return d
def get_metadata_for_sd_blob(self, sd_blob):
2015-08-20 17:27:15 +02:00
sd_reader = BlobStreamDescriptorReader(sd_blob)
d = sd_reader.get_info()
d.addCallback(self._return_options_and_validator_and_factories)
d.addCallback(self._return_metadata, StreamMetadata.FROM_BLOB, sd_blob.blob_hash)
2015-08-20 17:27:15 +02:00
return d
def _get_factories(self, stream_type):
if not stream_type in self._stream_downloader_factories:
raise UnknownStreamTypeError(stream_type)
2015-08-20 17:27:15 +02:00
return self._stream_downloader_factories[stream_type]
def _get_validator(self, stream_type):
2016-07-17 16:00:00 +02:00
if not stream_type in self._sd_info_validators:
raise UnknownStreamTypeError(stream_type)
2015-08-20 17:27:15 +02:00
return self._sd_info_validators[stream_type]
def _get_options(self, stream_type):
if not stream_type in self._stream_downloader_factories:
raise UnknownStreamTypeError(stream_type)
return self._stream_options[stream_type]
def _return_options_and_validator_and_factories(self, sd_info):
if not 'stream_type' in sd_info:
raise InvalidStreamDescriptorError('No stream_type parameter in stream descriptor.')
2015-08-20 17:27:15 +02:00
stream_type = sd_info['stream_type']
validator = self._get_validator(stream_type)(sd_info)
factories = [f for f in self._get_factories(stream_type) if f.can_download(validator)]
2015-08-20 17:27:15 +02:00
d = validator.validate()
def get_options():
options = self._get_options(stream_type)
return validator, options, factories
d.addCallback(lambda _: get_options())
2015-08-20 17:27:15 +02:00
return d
EncryptedFileStreamType = "lbryfile"
@defer.inlineCallbacks
def save_sd_info(blob_manager, sd_hash, sd_info):
if not blob_manager.blobs.get(sd_hash) or not blob_manager.blobs[sd_hash].get_is_verified():
descriptor_writer = BlobStreamDescriptorWriter(blob_manager)
calculated_sd_hash = yield descriptor_writer.create_descriptor(sd_info)
if calculated_sd_hash != sd_hash:
raise InvalidStreamDescriptorError("%s does not match calculated %s" %
(sd_hash, calculated_sd_hash))
stream_hash = yield blob_manager.storage.get_stream_hash_for_sd_hash(sd_hash)
if not stream_hash:
log.debug("Saving info for %s", sd_info['stream_name'].decode('hex'))
stream_name = sd_info['stream_name']
key = sd_info['key']
stream_hash = sd_info['stream_hash']
stream_blobs = sd_info['blobs']
suggested_file_name = sd_info['suggested_file_name']
yield blob_manager.storage.add_known_blobs(stream_blobs)
yield blob_manager.storage.store_stream(
stream_hash, sd_hash, stream_name, key, suggested_file_name, stream_blobs
)
defer.returnValue(stream_hash)
def format_blobs(crypt_blob_infos):
formatted_blobs = []
for blob_info in crypt_blob_infos:
blob = {}
if blob_info.length != 0:
2018-07-13 06:21:45 +02:00
blob['blob_hash'] = blob_info.blob_hash
blob['blob_num'] = blob_info.blob_num
2018-07-13 06:21:45 +02:00
blob['iv'] = blob_info.iv
blob['length'] = blob_info.length
formatted_blobs.append(blob)
return formatted_blobs
def format_sd_info(stream_type, stream_name, key, suggested_file_name, stream_hash, blobs):
return {
"stream_type": stream_type,
"stream_name": stream_name,
"key": key,
"suggested_file_name": suggested_file_name,
"stream_hash": stream_hash,
"blobs": blobs
}
@defer.inlineCallbacks
def get_sd_info(storage, stream_hash, include_blobs):
"""
Get an sd info dictionary from storage
:param storage: (SQLiteStorage) storage instance
:param stream_hash: (str) stream hash
:param include_blobs: (bool) include stream blob infos
:return: {
"stream_type": "lbryfile",
"stream_name": <hex encoded stream name>,
"key": <stream key>,
"suggested_file_name": <hex encoded suggested file name>,
"stream_hash": <stream hash>,
"blobs": [
{
"blob_hash": <head blob_hash>,
"blob_num": 0,
"iv": <iv>,
"length": <head blob length>
}, ...
{
"blob_num": <stream length>,
"iv": <iv>,
"length": 0
}
]
}
"""
stream_info = yield storage.get_stream_info(stream_hash)
blobs = []
if include_blobs:
blobs = yield storage.get_blobs_for_stream(stream_hash)
defer.returnValue(
format_sd_info(
EncryptedFileStreamType, stream_info[0], stream_info[1],
stream_info[2], stream_hash, format_blobs(blobs)
)
)
def get_blob_hashsum(b):
length = b['length']
if length != 0:
blob_hash = b['blob_hash']
else:
blob_hash = None
blob_num = b['blob_num']
iv = b['iv']
blob_hashsum = get_lbry_hash_obj()
if length != 0:
blob_hashsum.update(blob_hash.encode())
blob_hashsum.update(str(blob_num).encode())
blob_hashsum.update(iv)
blob_hashsum.update(str(length).encode())
return blob_hashsum.digest()
def get_stream_hash(hex_stream_name, key, hex_suggested_file_name, blob_infos):
h = get_lbry_hash_obj()
h.update(hex_stream_name)
h.update(key)
h.update(hex_suggested_file_name)
blobs_hashsum = get_lbry_hash_obj()
for blob in blob_infos:
blobs_hashsum.update(get_blob_hashsum(blob))
h.update(blobs_hashsum.digest())
return h.hexdigest()
def verify_hex(text, field_name):
for c in text:
if c not in b'0123456789abcdef':
raise InvalidStreamDescriptorError("%s is not a hex-encoded string" % field_name)
def validate_descriptor(stream_info):
try:
hex_stream_name = stream_info['stream_name']
key = stream_info['key']
hex_suggested_file_name = stream_info['suggested_file_name']
stream_hash = stream_info['stream_hash']
blobs = stream_info['blobs']
except KeyError as e:
raise InvalidStreamDescriptorError("Missing '%s'" % (e.args[0]))
if stream_info['blobs'][-1]['length'] != 0:
raise InvalidStreamDescriptorError("Does not end with a zero-length blob.")
if any([False if blob_info['length'] > 0 else True for blob_info in stream_info['blobs'][:-1]]):
raise InvalidStreamDescriptorError("Contains zero-length data blob")
if 'blob_hash' in stream_info['blobs'][-1]:
raise InvalidStreamDescriptorError("Stream terminator blob should not have a hash")
verify_hex(key, "key")
verify_hex(hex_suggested_file_name, "suggested file name")
verify_hex(stream_hash, "stream_hash")
calculated_stream_hash = get_stream_hash(
hex_stream_name, key, hex_suggested_file_name, blobs
).encode()
if calculated_stream_hash != stream_hash:
raise InvalidStreamDescriptorError("Stream hash does not match stream metadata")
return True
class EncryptedFileStreamDescriptorValidator:
def __init__(self, raw_info):
self.raw_info = raw_info
def validate(self):
return defer.succeed(validate_descriptor(self.raw_info))
def info_to_show(self):
info = []
info.append(("stream_name", binascii.unhexlify(self.raw_info.get("stream_name"))))
size_so_far = 0
for blob_info in self.raw_info.get("blobs", []):
size_so_far += int(blob_info['length'])
info.append(("stream_size", str(self.get_length_of_stream())))
suggested_file_name = self.raw_info.get("suggested_file_name", None)
if suggested_file_name is not None:
suggested_file_name = binascii.unhexlify(suggested_file_name)
info.append(("suggested_file_name", suggested_file_name))
return info
def get_length_of_stream(self):
size_so_far = 0
for blob_info in self.raw_info.get("blobs", []):
size_so_far += int(blob_info['length'])
return size_so_far
@defer.inlineCallbacks
def download_sd_blob(blob_hash, blob_manager, peer_finder, rate_limiter, payment_rate_manager, wallet, timeout=None,
download_mirrors=None):
2015-08-20 17:27:15 +02:00
"""
Downloads a single blob from the network
@param session:
@param blob_hash:
@param payment_rate_manager:
@return: An object of type HashBlob
"""
2016-11-30 21:20:45 +01:00
downloader = StandaloneBlobDownloader(blob_hash,
blob_manager,
peer_finder,
rate_limiter,
2016-11-30 21:20:45 +01:00
payment_rate_manager,
wallet,
timeout)
2018-08-22 19:12:19 +02:00
mirror = HTTPBlobDownloader(blob_manager, [blob_hash], download_mirrors or [], sd_hashes=[blob_hash], retry=False)
mirror.start()
sd_blob = yield downloader.download()
mirror.stop()
sd_reader = BlobStreamDescriptorReader(sd_blob)
sd_info = yield sd_reader.get_info()
try:
validate_descriptor(sd_info)
except InvalidStreamDescriptorError as err:
yield blob_manager.delete_blobs([blob_hash])
raise err
raw_sd = yield sd_reader._get_raw_data()
yield blob_manager.storage.add_known_blob(blob_hash, len(raw_sd))
yield save_sd_info(blob_manager, sd_blob.blob_hash, sd_info)
defer.returnValue(sd_blob)