forked from LBRYCommunity/lbry-sdk
async lbrynet.blob
This commit is contained in:
parent
c713fac2d9
commit
a5524d490c
17 changed files with 333 additions and 1330 deletions
lbrynet/blob
|
@ -1,26 +1,27 @@
|
|||
import os
|
||||
import asyncio
|
||||
import binascii
|
||||
import logging
|
||||
from twisted.internet import defer
|
||||
from twisted.web.client import FileBodyProducer
|
||||
from twisted.python.failure import Failure
|
||||
from lbrynet.cryptoutils import get_lbry_hash_obj
|
||||
from lbrynet.p2p.Error import DownloadCanceledError, InvalidDataError, InvalidBlobHashError
|
||||
import typing
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, modes
|
||||
from cryptography.hazmat.primitives.ciphers.algorithms import AES
|
||||
from cryptography.hazmat.primitives.padding import PKCS7
|
||||
|
||||
from lbrynet.cryptoutils import backend, get_lbry_hash_obj
|
||||
from lbrynet.error import DownloadCancelledError, InvalidBlobHashError, InvalidDataError
|
||||
|
||||
from lbrynet.blob import MAX_BLOB_SIZE, blobhash_length
|
||||
from lbrynet.blob.blob_info import BlobInfo
|
||||
from lbrynet.blob.writer import HashBlobWriter
|
||||
from lbrynet.blob.reader import HashBlobReader
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
MAX_BLOB_SIZE = 2 * 2 ** 20
|
||||
|
||||
# digest_size is in bytes, and blob hashes are hex encoded
|
||||
blobhash_length = get_lbry_hash_obj().digest_size * 2
|
||||
|
||||
|
||||
def is_valid_hashcharacter(char):
|
||||
def is_valid_hashcharacter(char: str) -> bool:
|
||||
return char in "0123456789abcdef"
|
||||
|
||||
|
||||
def is_valid_blobhash(blobhash):
|
||||
def is_valid_blobhash(blobhash: str) -> bool:
|
||||
"""Checks whether the blobhash is the correct length and contains only
|
||||
valid characters (0-9, a-f)
|
||||
|
||||
|
@ -31,6 +32,16 @@ def is_valid_blobhash(blobhash):
|
|||
return len(blobhash) == blobhash_length and all(is_valid_hashcharacter(l) for l in blobhash)
|
||||
|
||||
|
||||
def encrypt_blob_bytes(key: bytes, iv: bytes, unencrypted: bytes) -> typing.Tuple[bytes, str]:
|
||||
cipher = Cipher(AES(key), modes.CBC(iv), backend=backend)
|
||||
padder = PKCS7(AES.block_size).padder()
|
||||
encryptor = cipher.encryptor()
|
||||
encrypted = encryptor.update(padder.update(unencrypted) + padder.finalize()) + encryptor.finalize()
|
||||
digest = get_lbry_hash_obj()
|
||||
digest.update(encrypted)
|
||||
return encrypted, digest.hexdigest()
|
||||
|
||||
|
||||
class BlobFile:
|
||||
"""
|
||||
A chunk of data available on the network which is specified by a hashsum
|
||||
|
@ -40,178 +51,137 @@ class BlobFile:
|
|||
Also can be used for reading from blobs on the local filesystem
|
||||
"""
|
||||
|
||||
def __str__(self):
|
||||
return self.blob_hash[:16]
|
||||
|
||||
def __repr__(self):
|
||||
return '<{}({})>'.format(self.__class__.__name__, str(self))
|
||||
|
||||
def __init__(self, blob_dir, blob_hash, length=None):
|
||||
def __init__(self, loop: asyncio.BaseEventLoop, blob_dir: str, blob_hash: str,
|
||||
length: typing.Optional[int] = None,
|
||||
blob_completed_callback: typing.Optional[typing.Callable[['BlobFile'], typing.Awaitable]] = None):
|
||||
if not is_valid_blobhash(blob_hash):
|
||||
raise InvalidBlobHashError(blob_hash)
|
||||
self.loop = loop
|
||||
self.blob_hash = blob_hash
|
||||
self.length = length
|
||||
self.writers = {} # {Peer: writer, finished_deferred}
|
||||
self._verified = False
|
||||
self.readers = 0
|
||||
self.blob_dir = blob_dir
|
||||
self.file_path = os.path.join(blob_dir, self.blob_hash)
|
||||
self.blob_write_lock = defer.DeferredLock()
|
||||
self.writers: typing.List[HashBlobWriter] = []
|
||||
|
||||
self.verified: asyncio.Event = asyncio.Event(loop=self.loop)
|
||||
self.finished_writing = asyncio.Event(loop=loop)
|
||||
self.blob_write_lock = asyncio.Lock(loop=loop)
|
||||
if os.path.isfile(os.path.join(blob_dir, blob_hash)):
|
||||
length = length or int(os.stat(os.path.join(blob_dir, blob_hash)).st_size)
|
||||
self.length = length
|
||||
self.verified.set()
|
||||
self.finished_writing.set()
|
||||
self.saved_verified_blob = False
|
||||
if os.path.isfile(self.file_path):
|
||||
self.set_length(os.path.getsize(self.file_path))
|
||||
# This assumes that the hash of the blob has already been
|
||||
# checked as part of the blob creation process. It might
|
||||
# be worth having a function that checks the actual hash;
|
||||
# its probably too expensive to have that check be part of
|
||||
# this call.
|
||||
self._verified = True
|
||||
self.blob_completed_callback = blob_completed_callback
|
||||
|
||||
def open_for_writing(self, peer):
|
||||
"""
|
||||
open a blob file to be written by peer, supports concurrent
|
||||
writers, as long as they are from different peers.
|
||||
|
||||
returns tuple of (writer, finished_deferred)
|
||||
|
||||
writer - a file like object with a write() function, close() when finished
|
||||
finished_deferred - deferred that is fired when write is finished and returns
|
||||
a instance of itself as HashBlob
|
||||
"""
|
||||
if peer not in self.writers:
|
||||
log.debug("Opening %s to be written by %s", str(self), str(peer))
|
||||
finished_deferred = defer.Deferred()
|
||||
writer = HashBlobWriter(self.get_length, self.writer_finished)
|
||||
self.writers[peer] = (writer, finished_deferred)
|
||||
return writer, finished_deferred
|
||||
log.warning("Tried to download the same file twice simultaneously from the same peer")
|
||||
return None, None
|
||||
|
||||
def open_for_reading(self):
|
||||
"""
|
||||
open blob for reading
|
||||
|
||||
returns a file like object that can be read() from, and closed() when
|
||||
finished
|
||||
"""
|
||||
if self._verified is True:
|
||||
f = open(self.file_path, 'rb')
|
||||
reader = HashBlobReader(f, self.reader_finished)
|
||||
self.readers += 1
|
||||
return reader
|
||||
return None
|
||||
|
||||
def delete(self):
|
||||
"""
|
||||
delete blob file from file system, prevent deletion
|
||||
if a blob is being read from or written to
|
||||
|
||||
returns a deferred that firesback when delete is completed
|
||||
"""
|
||||
if not self.writers and not self.readers:
|
||||
self._verified = False
|
||||
self.saved_verified_blob = False
|
||||
def writer_finished(self, writer: HashBlobWriter):
|
||||
def callback(finished: asyncio.Future):
|
||||
try:
|
||||
if os.path.isfile(self.file_path):
|
||||
os.remove(self.file_path)
|
||||
except Exception as e:
|
||||
log.exception("An error occurred deleting %s:", str(self.file_path), exc_info=e)
|
||||
else:
|
||||
raise ValueError("File is currently being read or written and cannot be deleted")
|
||||
error = finished.result()
|
||||
except Exception as err:
|
||||
error = err
|
||||
if writer in self.writers: # remove this download attempt
|
||||
self.writers.remove(writer)
|
||||
if not error: # the blob downloaded, cancel all the other download attempts and set the result
|
||||
while self.writers:
|
||||
other = self.writers.pop()
|
||||
other.finished.cancel()
|
||||
t = self.loop.create_task(self.save_verified_blob(writer))
|
||||
t.add_done_callback(lambda *_: self.finished_writing.set())
|
||||
return
|
||||
if isinstance(error, (InvalidBlobHashError, InvalidDataError)):
|
||||
log.error("writer error downloading %s: %s", self.blob_hash[:8], str(error))
|
||||
elif not isinstance(error, (DownloadCancelledError, asyncio.CancelledError, asyncio.TimeoutError)):
|
||||
log.exception("something else")
|
||||
raise error
|
||||
return callback
|
||||
|
||||
@property
|
||||
def verified(self):
|
||||
async def save_verified_blob(self, writer):
|
||||
def _save_verified():
|
||||
# log.debug(f"write blob file {self.blob_hash[:8]} from {writer.peer.address}")
|
||||
if not self.saved_verified_blob and not os.path.isfile(self.file_path):
|
||||
if self.get_length() == len(writer.verified_bytes):
|
||||
with open(self.file_path, 'wb') as write_handle:
|
||||
write_handle.write(writer.verified_bytes)
|
||||
self.saved_verified_blob = True
|
||||
else:
|
||||
raise Exception("length mismatch")
|
||||
|
||||
if self.verified.is_set():
|
||||
return
|
||||
async with self.blob_write_lock:
|
||||
await self.loop.run_in_executor(None, _save_verified)
|
||||
if self.blob_completed_callback:
|
||||
await self.blob_completed_callback(self)
|
||||
self.verified.set()
|
||||
|
||||
def open_for_writing(self) -> HashBlobWriter:
|
||||
if os.path.exists(self.file_path):
|
||||
raise OSError(f"File already exists '{self.file_path}'")
|
||||
fut = asyncio.Future(loop=self.loop)
|
||||
writer = HashBlobWriter(self.blob_hash, self.get_length, fut)
|
||||
self.writers.append(writer)
|
||||
fut.add_done_callback(self.writer_finished(writer))
|
||||
return writer
|
||||
|
||||
async def sendfile(self, writer: asyncio.StreamWriter) -> int:
|
||||
"""
|
||||
Protect verified from being modified by other classes.
|
||||
verified is True if a write to a blob has completed successfully,
|
||||
or a blob has been read to have the same length as specified
|
||||
in init
|
||||
Read and send the file to the writer and return the number of bytes sent
|
||||
"""
|
||||
return self._verified
|
||||
|
||||
with open(self.file_path, 'rb') as handle:
|
||||
return await self.loop.sendfile(writer.transport, handle)
|
||||
|
||||
async def close(self):
|
||||
while self.writers:
|
||||
self.writers.pop().finished.cancel()
|
||||
|
||||
async def delete(self):
|
||||
await self.close()
|
||||
async with self.blob_write_lock:
|
||||
self.saved_verified_blob = False
|
||||
if os.path.isfile(self.file_path):
|
||||
os.remove(self.file_path)
|
||||
|
||||
def decrypt(self, key: bytes, iv: bytes) -> bytes:
|
||||
"""
|
||||
Decrypt a BlobFile to plaintext bytes
|
||||
"""
|
||||
|
||||
with open(self.file_path, "rb") as f:
|
||||
buff = f.read()
|
||||
if len(buff) != self.length:
|
||||
raise ValueError("unexpected length")
|
||||
cipher = Cipher(AES(key), modes.CBC(iv), backend=backend)
|
||||
unpadder = PKCS7(AES.block_size).unpadder()
|
||||
decryptor = cipher.decryptor()
|
||||
return unpadder.update(decryptor.update(buff) + decryptor.finalize()) + unpadder.finalize()
|
||||
|
||||
@classmethod
|
||||
async def create_from_unencrypted(cls, loop: asyncio.BaseEventLoop, blob_dir: str, key: bytes,
|
||||
iv: bytes, unencrypted: bytes, blob_num: int) -> BlobInfo:
|
||||
"""
|
||||
Create an encrypted BlobFile from plaintext bytes
|
||||
"""
|
||||
|
||||
blob_bytes, blob_hash = encrypt_blob_bytes(key, iv, unencrypted)
|
||||
length = len(blob_bytes)
|
||||
blob = cls(loop, blob_dir, blob_hash, length)
|
||||
writer = blob.open_for_writing()
|
||||
writer.write(blob_bytes)
|
||||
await blob.verified.wait()
|
||||
return BlobInfo(blob_num, length, binascii.hexlify(iv).decode(), blob_hash)
|
||||
|
||||
def set_length(self, length):
|
||||
if self.length is not None and length == self.length:
|
||||
return True
|
||||
return
|
||||
if self.length is None and 0 <= length <= MAX_BLOB_SIZE:
|
||||
self.length = length
|
||||
return True
|
||||
log.warning("Got an invalid length. Previous length: %s, Invalid length: %s",
|
||||
self.length, length)
|
||||
return False
|
||||
return
|
||||
log.warning("Got an invalid length. Previous length: %s, Invalid length: %s", self.length, length)
|
||||
|
||||
def get_length(self):
|
||||
return self.length
|
||||
|
||||
def get_is_verified(self):
|
||||
return self.verified
|
||||
|
||||
def is_downloading(self):
|
||||
if self.writers:
|
||||
return True
|
||||
return False
|
||||
|
||||
def reader_finished(self, reader):
|
||||
self.readers -= 1
|
||||
return defer.succeed(True)
|
||||
|
||||
def writer_finished(self, writer, err=None):
|
||||
def fire_finished_deferred():
|
||||
self._verified = True
|
||||
for p, (w, finished_deferred) in list(self.writers.items()):
|
||||
if w == writer:
|
||||
del self.writers[p]
|
||||
finished_deferred.callback(self)
|
||||
return True
|
||||
log.warning(
|
||||
"Somehow, the writer that was accepted as being valid was already removed: %s",
|
||||
writer)
|
||||
return False
|
||||
|
||||
def errback_finished_deferred(err):
|
||||
for p, (w, finished_deferred) in list(self.writers.items()):
|
||||
if w == writer:
|
||||
del self.writers[p]
|
||||
finished_deferred.errback(err)
|
||||
|
||||
def cancel_other_downloads():
|
||||
for p, (w, finished_deferred) in self.writers.items():
|
||||
w.close()
|
||||
|
||||
if err is None:
|
||||
if writer.len_so_far == self.length and writer.blob_hash == self.blob_hash:
|
||||
if self._verified is False:
|
||||
d = self.save_verified_blob(writer)
|
||||
d.addCallbacks(lambda _: fire_finished_deferred(), errback_finished_deferred)
|
||||
d.addCallback(lambda _: cancel_other_downloads())
|
||||
else:
|
||||
d = defer.succeed(None)
|
||||
fire_finished_deferred()
|
||||
else:
|
||||
if writer.len_so_far != self.length:
|
||||
err_string = "blob length is %i vs expected %i" % (writer.len_so_far, self.length)
|
||||
else:
|
||||
err_string = f"blob hash is {writer.blob_hash} vs expected {self.blob_hash}"
|
||||
errback_finished_deferred(Failure(InvalidDataError(err_string)))
|
||||
d = defer.succeed(None)
|
||||
else:
|
||||
errback_finished_deferred(err)
|
||||
d = defer.succeed(None)
|
||||
d.addBoth(lambda _: writer.close_handle())
|
||||
return d
|
||||
|
||||
def save_verified_blob(self, writer):
|
||||
# we cannot have multiple _save_verified_blob interrupting
|
||||
# each other, can happen since startProducing is a deferred
|
||||
return self.blob_write_lock.run(self._save_verified_blob, writer)
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def _save_verified_blob(self, writer):
|
||||
if self.saved_verified_blob is False:
|
||||
writer.write_handle.seek(0)
|
||||
out_path = os.path.join(self.blob_dir, self.blob_hash)
|
||||
producer = FileBodyProducer(writer.write_handle)
|
||||
yield producer.startProducing(open(out_path, 'wb'))
|
||||
self.saved_verified_blob = True
|
||||
defer.returnValue(True)
|
||||
else:
|
||||
raise DownloadCanceledError()
|
||||
return self.verified.is_set()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue