2015-08-20 11:27:15 -04:00
|
|
|
import os
|
2019-02-08 19:54:59 -05:00
|
|
|
import re
|
2019-01-22 12:47:46 -05:00
|
|
|
import asyncio
|
|
|
|
import binascii
|
2018-11-07 15:15:05 -05:00
|
|
|
import logging
|
2019-01-22 12:47:46 -05:00
|
|
|
import typing
|
|
|
|
from cryptography.hazmat.primitives.ciphers import Cipher, modes
|
|
|
|
from cryptography.hazmat.primitives.ciphers.algorithms import AES
|
|
|
|
from cryptography.hazmat.primitives.padding import PKCS7
|
2015-08-20 11:27:15 -04:00
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
from lbrynet.cryptoutils import backend, get_lbry_hash_obj
|
|
|
|
from lbrynet.error import DownloadCancelledError, InvalidBlobHashError, InvalidDataError
|
2015-09-08 15:42:56 -04:00
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
from lbrynet.blob import MAX_BLOB_SIZE, blobhash_length
|
|
|
|
from lbrynet.blob.blob_info import BlobInfo
|
|
|
|
from lbrynet.blob.writer import HashBlobWriter
|
2017-09-13 15:27:43 -04:00
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
log = logging.getLogger(__name__)
|
2018-11-09 13:20:58 -05:00
|
|
|
|
|
|
|
|
2019-02-08 19:57:26 -05:00
|
|
|
_hexmatch = re.compile("^[a-f,0-9]+$")
|
2018-11-09 13:20:58 -05:00
|
|
|
|
|
|
|
|
2019-02-08 19:57:26 -05:00
|
|
|
def is_valid_hashcharacter(char: str) -> bool:
|
|
|
|
return len(char) == 1 and _hexmatch.match(char)
|
2019-02-08 19:54:59 -05:00
|
|
|
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
def is_valid_blobhash(blobhash: str) -> bool:
|
2018-11-09 13:20:58 -05:00
|
|
|
"""Checks whether the blobhash is the correct length and contains only
|
|
|
|
valid characters (0-9, a-f)
|
|
|
|
|
|
|
|
@param blobhash: string, the blobhash to check
|
|
|
|
|
|
|
|
@return: True/False
|
|
|
|
"""
|
2019-02-08 19:54:59 -05:00
|
|
|
return len(blobhash) == blobhash_length and _hexmatch.match(blobhash)
|
2018-02-12 14:16:43 -05:00
|
|
|
|
2019-02-08 19:57:26 -05:00
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
def encrypt_blob_bytes(key: bytes, iv: bytes, unencrypted: bytes) -> typing.Tuple[bytes, str]:
|
|
|
|
cipher = Cipher(AES(key), modes.CBC(iv), backend=backend)
|
|
|
|
padder = PKCS7(AES.block_size).padder()
|
|
|
|
encryptor = cipher.encryptor()
|
|
|
|
encrypted = encryptor.update(padder.update(unencrypted) + padder.finalize()) + encryptor.finalize()
|
|
|
|
digest = get_lbry_hash_obj()
|
|
|
|
digest.update(encrypted)
|
|
|
|
return encrypted, digest.hexdigest()
|
|
|
|
|
|
|
|
|
2018-07-21 18:34:59 -04:00
|
|
|
class BlobFile:
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
|
|
|
A chunk of data available on the network which is specified by a hashsum
|
2015-08-20 11:27:15 -04:00
|
|
|
|
2017-09-13 15:27:43 -04:00
|
|
|
This class is used to create blobs on the local filesystem
|
|
|
|
when we already know the blob hash before hand (i.e., when downloading blobs)
|
|
|
|
Also can be used for reading from blobs on the local filesystem
|
|
|
|
"""
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
def __init__(self, loop: asyncio.BaseEventLoop, blob_dir: str, blob_hash: str,
|
|
|
|
length: typing.Optional[int] = None,
|
|
|
|
blob_completed_callback: typing.Optional[typing.Callable[['BlobFile'], typing.Awaitable]] = None):
|
2017-09-13 15:27:43 -04:00
|
|
|
if not is_valid_blobhash(blob_hash):
|
|
|
|
raise InvalidBlobHashError(blob_hash)
|
2019-01-22 12:47:46 -05:00
|
|
|
self.loop = loop
|
2015-08-20 11:27:15 -04:00
|
|
|
self.blob_hash = blob_hash
|
|
|
|
self.length = length
|
2017-09-13 15:27:43 -04:00
|
|
|
self.blob_dir = blob_dir
|
|
|
|
self.file_path = os.path.join(blob_dir, self.blob_hash)
|
2019-01-22 12:47:46 -05:00
|
|
|
self.writers: typing.List[HashBlobWriter] = []
|
2017-09-13 15:27:43 -04:00
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
self.verified: asyncio.Event = asyncio.Event(loop=self.loop)
|
|
|
|
self.finished_writing = asyncio.Event(loop=loop)
|
|
|
|
self.blob_write_lock = asyncio.Lock(loop=loop)
|
2019-02-08 02:38:27 -03:00
|
|
|
if self.file_exists:
|
2019-02-06 15:40:16 -03:00
|
|
|
length = int(os.stat(os.path.join(blob_dir, blob_hash)).st_size)
|
2019-01-22 12:47:46 -05:00
|
|
|
self.length = length
|
|
|
|
self.verified.set()
|
|
|
|
self.finished_writing.set()
|
|
|
|
self.saved_verified_blob = False
|
|
|
|
self.blob_completed_callback = blob_completed_callback
|
2017-09-13 15:27:43 -04:00
|
|
|
|
2019-02-08 02:38:27 -03:00
|
|
|
@property
|
|
|
|
def file_exists(self):
|
|
|
|
return os.path.isfile(self.file_path)
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
def writer_finished(self, writer: HashBlobWriter):
|
|
|
|
def callback(finished: asyncio.Future):
|
|
|
|
try:
|
2019-02-01 16:02:27 -03:00
|
|
|
error = finished.exception()
|
2019-01-22 12:47:46 -05:00
|
|
|
except Exception as err:
|
|
|
|
error = err
|
|
|
|
if writer in self.writers: # remove this download attempt
|
|
|
|
self.writers.remove(writer)
|
|
|
|
if not error: # the blob downloaded, cancel all the other download attempts and set the result
|
|
|
|
while self.writers:
|
|
|
|
other = self.writers.pop()
|
|
|
|
other.finished.cancel()
|
2019-02-01 16:02:27 -03:00
|
|
|
t = self.loop.create_task(self.save_verified_blob(writer, finished.result()))
|
2019-01-22 12:47:46 -05:00
|
|
|
t.add_done_callback(lambda *_: self.finished_writing.set())
|
|
|
|
return
|
|
|
|
if isinstance(error, (InvalidBlobHashError, InvalidDataError)):
|
2019-02-14 18:19:01 -05:00
|
|
|
log.debug("writer error downloading %s: %s", self.blob_hash[:8], str(error))
|
2019-01-22 12:47:46 -05:00
|
|
|
elif not isinstance(error, (DownloadCancelledError, asyncio.CancelledError, asyncio.TimeoutError)):
|
|
|
|
log.exception("something else")
|
|
|
|
raise error
|
|
|
|
return callback
|
|
|
|
|
2019-02-01 17:09:37 -03:00
|
|
|
async def save_verified_blob(self, writer, verified_bytes: bytes):
|
2019-01-22 12:47:46 -05:00
|
|
|
def _save_verified():
|
|
|
|
# log.debug(f"write blob file {self.blob_hash[:8]} from {writer.peer.address}")
|
|
|
|
if not self.saved_verified_blob and not os.path.isfile(self.file_path):
|
2019-02-01 16:02:27 -03:00
|
|
|
if self.get_length() == len(verified_bytes):
|
2019-01-22 12:47:46 -05:00
|
|
|
with open(self.file_path, 'wb') as write_handle:
|
2019-02-01 16:02:27 -03:00
|
|
|
write_handle.write(verified_bytes)
|
2019-01-22 12:47:46 -05:00
|
|
|
self.saved_verified_blob = True
|
|
|
|
else:
|
|
|
|
raise Exception("length mismatch")
|
|
|
|
|
|
|
|
async with self.blob_write_lock:
|
2019-02-01 16:02:27 -03:00
|
|
|
if self.verified.is_set():
|
|
|
|
return
|
2019-01-22 12:47:46 -05:00
|
|
|
await self.loop.run_in_executor(None, _save_verified)
|
|
|
|
if self.blob_completed_callback:
|
|
|
|
await self.blob_completed_callback(self)
|
2019-02-05 13:17:00 -03:00
|
|
|
self.verified.set()
|
2019-01-22 12:47:46 -05:00
|
|
|
|
|
|
|
def open_for_writing(self) -> HashBlobWriter:
|
2019-02-08 02:38:27 -03:00
|
|
|
if self.file_exists:
|
2019-01-22 12:47:46 -05:00
|
|
|
raise OSError(f"File already exists '{self.file_path}'")
|
|
|
|
fut = asyncio.Future(loop=self.loop)
|
|
|
|
writer = HashBlobWriter(self.blob_hash, self.get_length, fut)
|
|
|
|
self.writers.append(writer)
|
|
|
|
fut.add_done_callback(self.writer_finished(writer))
|
|
|
|
return writer
|
|
|
|
|
|
|
|
async def sendfile(self, writer: asyncio.StreamWriter) -> int:
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
2019-01-22 12:47:46 -05:00
|
|
|
Read and send the file to the writer and return the number of bytes sent
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
with open(self.file_path, 'rb') as handle:
|
2019-02-04 16:43:11 -05:00
|
|
|
return await self.loop.sendfile(writer.transport, handle, count=self.get_length())
|
2019-01-22 12:47:46 -05:00
|
|
|
|
2019-02-06 09:20:21 -05:00
|
|
|
def close(self):
|
2019-01-22 12:47:46 -05:00
|
|
|
while self.writers:
|
|
|
|
self.writers.pop().finished.cancel()
|
|
|
|
|
2019-02-14 15:42:12 -05:00
|
|
|
def delete(self):
|
2019-02-06 09:20:21 -05:00
|
|
|
self.close()
|
2019-02-14 15:42:12 -05:00
|
|
|
self.saved_verified_blob = False
|
|
|
|
if os.path.isfile(self.file_path):
|
|
|
|
os.remove(self.file_path)
|
|
|
|
self.verified.clear()
|
|
|
|
self.finished_writing.clear()
|
2019-02-21 21:00:28 -03:00
|
|
|
self.length = None
|
2019-01-22 12:47:46 -05:00
|
|
|
|
|
|
|
def decrypt(self, key: bytes, iv: bytes) -> bytes:
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
2019-01-22 12:47:46 -05:00
|
|
|
Decrypt a BlobFile to plaintext bytes
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
|
|
|
|
2019-01-22 12:47:46 -05:00
|
|
|
with open(self.file_path, "rb") as f:
|
|
|
|
buff = f.read()
|
|
|
|
if len(buff) != self.length:
|
|
|
|
raise ValueError("unexpected length")
|
|
|
|
cipher = Cipher(AES(key), modes.CBC(iv), backend=backend)
|
|
|
|
unpadder = PKCS7(AES.block_size).unpadder()
|
|
|
|
decryptor = cipher.decryptor()
|
|
|
|
return unpadder.update(decryptor.update(buff) + decryptor.finalize()) + unpadder.finalize()
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
async def create_from_unencrypted(cls, loop: asyncio.BaseEventLoop, blob_dir: str, key: bytes,
|
|
|
|
iv: bytes, unencrypted: bytes, blob_num: int) -> BlobInfo:
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
2019-01-22 12:47:46 -05:00
|
|
|
Create an encrypted BlobFile from plaintext bytes
|
2017-09-12 12:01:06 -04:00
|
|
|
"""
|
2019-01-22 12:47:46 -05:00
|
|
|
|
|
|
|
blob_bytes, blob_hash = encrypt_blob_bytes(key, iv, unencrypted)
|
|
|
|
length = len(blob_bytes)
|
|
|
|
blob = cls(loop, blob_dir, blob_hash, length)
|
|
|
|
writer = blob.open_for_writing()
|
|
|
|
writer.write(blob_bytes)
|
|
|
|
await blob.verified.wait()
|
|
|
|
return BlobInfo(blob_num, length, binascii.hexlify(iv).decode(), blob_hash)
|
2017-01-20 10:54:36 -06:00
|
|
|
|
2015-08-20 11:27:15 -04:00
|
|
|
def set_length(self, length):
|
|
|
|
if self.length is not None and length == self.length:
|
2019-01-22 12:47:46 -05:00
|
|
|
return
|
2017-09-25 12:12:40 -04:00
|
|
|
if self.length is None and 0 <= length <= MAX_BLOB_SIZE:
|
2015-08-20 11:27:15 -04:00
|
|
|
self.length = length
|
2019-01-22 12:47:46 -05:00
|
|
|
return
|
|
|
|
log.warning("Got an invalid length. Previous length: %s, Invalid length: %s", self.length, length)
|
2015-08-20 11:27:15 -04:00
|
|
|
|
|
|
|
def get_length(self):
|
|
|
|
return self.length
|
|
|
|
|
2017-09-20 14:02:34 -04:00
|
|
|
def get_is_verified(self):
|
2019-01-22 12:47:46 -05:00
|
|
|
return self.verified.is_set()
|