2015-08-20 17:27:15 +02:00
|
|
|
import os
|
2018-11-07 21:15:05 +01:00
|
|
|
import logging
|
2018-12-15 21:29:25 +01:00
|
|
|
from twisted.internet import defer
|
2017-07-24 09:04:15 +02:00
|
|
|
from twisted.web.client import FileBodyProducer
|
2015-08-20 17:27:15 +02:00
|
|
|
from twisted.python.failure import Failure
|
2018-11-09 19:20:58 +01:00
|
|
|
from lbrynet.cryptoutils import get_lbry_hash_obj
|
2018-11-04 20:06:29 +01:00
|
|
|
from lbrynet.p2p.Error import DownloadCanceledError, InvalidDataError, InvalidBlobHashError
|
2017-09-13 21:46:39 +02:00
|
|
|
from lbrynet.blob.writer import HashBlobWriter
|
2017-09-29 20:45:11 +02:00
|
|
|
from lbrynet.blob.reader import HashBlobReader
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2015-09-08 21:42:56 +02:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2017-09-25 18:12:40 +02:00
|
|
|
MAX_BLOB_SIZE = 2 * 2 ** 20
|
2017-09-13 21:27:43 +02:00
|
|
|
|
2018-11-09 19:20:58 +01:00
|
|
|
# digest_size is in bytes, and blob hashes are hex encoded
|
|
|
|
blobhash_length = get_lbry_hash_obj().digest_size * 2
|
|
|
|
|
|
|
|
|
|
|
|
def is_valid_hashcharacter(char):
|
|
|
|
return char in "0123456789abcdef"
|
|
|
|
|
|
|
|
|
|
|
|
def is_valid_blobhash(blobhash):
|
|
|
|
"""Checks whether the blobhash is the correct length and contains only
|
|
|
|
valid characters (0-9, a-f)
|
|
|
|
|
|
|
|
@param blobhash: string, the blobhash to check
|
|
|
|
|
|
|
|
@return: True/False
|
|
|
|
"""
|
|
|
|
return len(blobhash) == blobhash_length and all(is_valid_hashcharacter(l) for l in blobhash)
|
|
|
|
|
2018-02-12 20:16:43 +01:00
|
|
|
|
2018-07-22 00:34:59 +02:00
|
|
|
class BlobFile:
|
2017-09-13 21:27:43 +02:00
|
|
|
"""
|
|
|
|
A chunk of data available on the network which is specified by a hashsum
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2017-09-13 21:27:43 +02:00
|
|
|
This class is used to create blobs on the local filesystem
|
|
|
|
when we already know the blob hash before hand (i.e., when downloading blobs)
|
|
|
|
Also can be used for reading from blobs on the local filesystem
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return self.blob_hash[:16]
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return '<{}({})>'.format(self.__class__.__name__, str(self))
|
|
|
|
|
|
|
|
def __init__(self, blob_dir, blob_hash, length=None):
|
|
|
|
if not is_valid_blobhash(blob_hash):
|
|
|
|
raise InvalidBlobHashError(blob_hash)
|
2015-08-20 17:27:15 +02:00
|
|
|
self.blob_hash = blob_hash
|
|
|
|
self.length = length
|
|
|
|
self.writers = {} # {Peer: writer, finished_deferred}
|
2017-01-20 17:54:36 +01:00
|
|
|
self._verified = False
|
2015-08-20 17:27:15 +02:00
|
|
|
self.readers = 0
|
2017-09-13 21:27:43 +02:00
|
|
|
self.blob_dir = blob_dir
|
|
|
|
self.file_path = os.path.join(blob_dir, self.blob_hash)
|
2017-10-06 23:05:10 +02:00
|
|
|
self.blob_write_lock = defer.DeferredLock()
|
2017-10-06 23:04:27 +02:00
|
|
|
self.saved_verified_blob = False
|
2017-09-13 21:27:43 +02:00
|
|
|
if os.path.isfile(self.file_path):
|
|
|
|
self.set_length(os.path.getsize(self.file_path))
|
|
|
|
# This assumes that the hash of the blob has already been
|
|
|
|
# checked as part of the blob creation process. It might
|
|
|
|
# be worth having a function that checks the actual hash;
|
|
|
|
# its probably too expensive to have that check be part of
|
|
|
|
# this call.
|
|
|
|
self._verified = True
|
|
|
|
|
|
|
|
def open_for_writing(self, peer):
|
|
|
|
"""
|
|
|
|
open a blob file to be written by peer, supports concurrent
|
2018-10-18 13:40:37 +02:00
|
|
|
writers, as long as they are from different peers.
|
2017-09-13 21:27:43 +02:00
|
|
|
|
|
|
|
returns tuple of (writer, finished_deferred)
|
|
|
|
|
|
|
|
writer - a file like object with a write() function, close() when finished
|
|
|
|
finished_deferred - deferred that is fired when write is finished and returns
|
|
|
|
a instance of itself as HashBlob
|
|
|
|
"""
|
2018-07-22 03:12:33 +02:00
|
|
|
if peer not in self.writers:
|
2017-09-13 21:27:43 +02:00
|
|
|
log.debug("Opening %s to be written by %s", str(self), str(peer))
|
|
|
|
finished_deferred = defer.Deferred()
|
|
|
|
writer = HashBlobWriter(self.get_length, self.writer_finished)
|
|
|
|
self.writers[peer] = (writer, finished_deferred)
|
2018-07-22 03:12:33 +02:00
|
|
|
return writer, finished_deferred
|
2017-09-13 21:27:43 +02:00
|
|
|
log.warning("Tried to download the same file twice simultaneously from the same peer")
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
def open_for_reading(self):
|
|
|
|
"""
|
|
|
|
open blob for reading
|
|
|
|
|
2017-09-27 23:00:21 +02:00
|
|
|
returns a file like object that can be read() from, and closed() when
|
|
|
|
finished
|
2017-09-13 21:27:43 +02:00
|
|
|
"""
|
|
|
|
if self._verified is True:
|
2017-10-03 00:07:07 +02:00
|
|
|
f = open(self.file_path, 'rb')
|
|
|
|
reader = HashBlobReader(f, self.reader_finished)
|
2017-09-27 22:53:31 +02:00
|
|
|
self.readers += 1
|
|
|
|
return reader
|
2017-09-13 21:27:43 +02:00
|
|
|
return None
|
|
|
|
|
|
|
|
def delete(self):
|
|
|
|
"""
|
|
|
|
delete blob file from file system, prevent deletion
|
|
|
|
if a blob is being read from or written to
|
|
|
|
|
|
|
|
returns a deferred that firesback when delete is completed
|
|
|
|
"""
|
|
|
|
if not self.writers and not self.readers:
|
|
|
|
self._verified = False
|
2017-10-06 23:04:27 +02:00
|
|
|
self.saved_verified_blob = False
|
2019-01-07 16:47:35 +01:00
|
|
|
try:
|
|
|
|
if os.path.isfile(self.file_path):
|
|
|
|
os.remove(self.file_path)
|
|
|
|
except Exception as e:
|
|
|
|
log.exception("An error occurred deleting %s:", str(self.file_path), exc_info=e)
|
2017-09-13 21:27:43 +02:00
|
|
|
else:
|
2019-01-07 16:47:35 +01:00
|
|
|
raise ValueError("File is currently being read or written and cannot be deleted")
|
2015-08-20 17:27:15 +02:00
|
|
|
|
2017-01-20 17:54:36 +01:00
|
|
|
@property
|
|
|
|
def verified(self):
|
2017-09-12 18:01:06 +02:00
|
|
|
"""
|
|
|
|
Protect verified from being modified by other classes.
|
2018-10-18 13:40:37 +02:00
|
|
|
verified is True if a write to a blob has completed successfully,
|
2017-09-12 18:01:06 +02:00
|
|
|
or a blob has been read to have the same length as specified
|
|
|
|
in init
|
|
|
|
"""
|
2017-01-20 17:54:36 +01:00
|
|
|
return self._verified
|
|
|
|
|
2015-08-20 17:27:15 +02:00
|
|
|
def set_length(self, length):
|
|
|
|
if self.length is not None and length == self.length:
|
|
|
|
return True
|
2017-09-25 18:12:40 +02:00
|
|
|
if self.length is None and 0 <= length <= MAX_BLOB_SIZE:
|
2015-08-20 17:27:15 +02:00
|
|
|
self.length = length
|
|
|
|
return True
|
2016-11-30 21:20:45 +01:00
|
|
|
log.warning("Got an invalid length. Previous length: %s, Invalid length: %s",
|
|
|
|
self.length, length)
|
2015-08-20 17:27:15 +02:00
|
|
|
return False
|
|
|
|
|
|
|
|
def get_length(self):
|
|
|
|
return self.length
|
|
|
|
|
2017-09-20 20:02:34 +02:00
|
|
|
def get_is_verified(self):
|
|
|
|
return self.verified
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
def is_downloading(self):
|
|
|
|
if self.writers:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2017-09-27 23:00:21 +02:00
|
|
|
def reader_finished(self, reader):
|
|
|
|
self.readers -= 1
|
|
|
|
return defer.succeed(True)
|
|
|
|
|
2015-08-20 17:27:15 +02:00
|
|
|
def writer_finished(self, writer, err=None):
|
|
|
|
def fire_finished_deferred():
|
2017-01-20 17:54:36 +01:00
|
|
|
self._verified = True
|
2018-07-31 19:20:25 +02:00
|
|
|
for p, (w, finished_deferred) in list(self.writers.items()):
|
2015-08-20 17:27:15 +02:00
|
|
|
if w == writer:
|
|
|
|
del self.writers[p]
|
2017-09-12 18:03:49 +02:00
|
|
|
finished_deferred.callback(self)
|
2015-08-20 17:27:15 +02:00
|
|
|
return True
|
2016-11-30 21:20:45 +01:00
|
|
|
log.warning(
|
|
|
|
"Somehow, the writer that was accepted as being valid was already removed: %s",
|
|
|
|
writer)
|
2015-08-20 17:27:15 +02:00
|
|
|
return False
|
|
|
|
|
|
|
|
def errback_finished_deferred(err):
|
2018-07-22 03:12:33 +02:00
|
|
|
for p, (w, finished_deferred) in list(self.writers.items()):
|
2015-08-20 17:27:15 +02:00
|
|
|
if w == writer:
|
|
|
|
del self.writers[p]
|
2017-09-12 18:03:49 +02:00
|
|
|
finished_deferred.errback(err)
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
def cancel_other_downloads():
|
|
|
|
for p, (w, finished_deferred) in self.writers.items():
|
2017-09-13 05:14:19 +02:00
|
|
|
w.close()
|
2015-08-20 17:27:15 +02:00
|
|
|
|
|
|
|
if err is None:
|
2017-01-20 17:54:36 +01:00
|
|
|
if writer.len_so_far == self.length and writer.blob_hash == self.blob_hash:
|
|
|
|
if self._verified is False:
|
2017-10-06 23:05:10 +02:00
|
|
|
d = self.save_verified_blob(writer)
|
2015-08-20 17:27:15 +02:00
|
|
|
d.addCallbacks(lambda _: fire_finished_deferred(), errback_finished_deferred)
|
|
|
|
d.addCallback(lambda _: cancel_other_downloads())
|
|
|
|
else:
|
2018-03-22 18:23:12 +01:00
|
|
|
d = defer.succeed(None)
|
|
|
|
fire_finished_deferred()
|
2015-08-20 17:27:15 +02:00
|
|
|
else:
|
2018-03-22 18:23:12 +01:00
|
|
|
if writer.len_so_far != self.length:
|
|
|
|
err_string = "blob length is %i vs expected %i" % (writer.len_so_far, self.length)
|
|
|
|
else:
|
2018-10-18 12:42:45 +02:00
|
|
|
err_string = f"blob hash is {writer.blob_hash} vs expected {self.blob_hash}"
|
2015-08-20 17:27:15 +02:00
|
|
|
errback_finished_deferred(Failure(InvalidDataError(err_string)))
|
2018-03-22 18:23:12 +01:00
|
|
|
d = defer.succeed(None)
|
2015-08-20 17:27:15 +02:00
|
|
|
else:
|
|
|
|
errback_finished_deferred(err)
|
2018-03-22 18:23:12 +01:00
|
|
|
d = defer.succeed(None)
|
2017-09-13 18:58:11 +02:00
|
|
|
d.addBoth(lambda _: writer.close_handle())
|
2015-08-20 17:27:15 +02:00
|
|
|
return d
|
|
|
|
|
2017-10-06 23:05:10 +02:00
|
|
|
def save_verified_blob(self, writer):
|
|
|
|
# we cannot have multiple _save_verified_blob interrupting
|
|
|
|
# each other, can happen since startProducing is a deferred
|
|
|
|
return self.blob_write_lock.run(self._save_verified_blob, writer)
|
|
|
|
|
2017-09-07 17:53:02 +02:00
|
|
|
@defer.inlineCallbacks
|
2015-08-20 17:27:15 +02:00
|
|
|
def _save_verified_blob(self, writer):
|
2017-10-06 23:04:27 +02:00
|
|
|
if self.saved_verified_blob is False:
|
2017-10-06 23:05:10 +02:00
|
|
|
writer.write_handle.seek(0)
|
|
|
|
out_path = os.path.join(self.blob_dir, self.blob_hash)
|
|
|
|
producer = FileBodyProducer(writer.write_handle)
|
|
|
|
yield producer.startProducing(open(out_path, 'wb'))
|
2017-10-06 23:04:27 +02:00
|
|
|
self.saved_verified_blob = True
|
2017-10-06 23:05:10 +02:00
|
|
|
defer.returnValue(True)
|
|
|
|
else:
|
|
|
|
raise DownloadCanceledError()
|