2015-08-20 11:27:15 -04:00
|
|
|
import os
|
2018-11-07 15:15:05 -05:00
|
|
|
import logging
|
2018-12-15 15:29:25 -05:00
|
|
|
from twisted.internet import defer
|
2017-07-24 04:04:15 -03:00
|
|
|
from twisted.web.client import FileBodyProducer
|
2015-08-20 11:27:15 -04:00
|
|
|
from twisted.python.failure import Failure
|
2018-11-09 13:20:58 -05:00
|
|
|
from lbrynet.cryptoutils import get_lbry_hash_obj
|
2018-11-04 14:06:29 -05:00
|
|
|
from lbrynet.p2p.Error import DownloadCanceledError, InvalidDataError, InvalidBlobHashError
|
2017-09-13 15:46:39 -04:00
|
|
|
from lbrynet.blob.writer import HashBlobWriter
|
2017-09-29 14:45:11 -04:00
|
|
|
from lbrynet.blob.reader import HashBlobReader
|
2015-08-20 11:27:15 -04:00
|
|
|
|
2015-09-08 15:42:56 -04:00
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
2017-09-25 12:12:40 -04:00
|
|
|
MAX_BLOB_SIZE = 2 * 2 ** 20
|
2017-09-13 15:27:43 -04:00
|
|
|
|
2018-11-09 13:20:58 -05:00
|
|
|
# digest_size is in bytes, and blob hashes are hex encoded
|
|
|
|
blobhash_length = get_lbry_hash_obj().digest_size * 2
|
|
|
|
|
|
|
|
|
|
|
|
def is_valid_hashcharacter(char):
|
|
|
|
return char in "0123456789abcdef"
|
|
|
|
|
|
|
|
|
|
|
|
def is_valid_blobhash(blobhash):
|
|
|
|
"""Checks whether the blobhash is the correct length and contains only
|
|
|
|
valid characters (0-9, a-f)
|
|
|
|
|
|
|
|
@param blobhash: string, the blobhash to check
|
|
|
|
|
|
|
|
@return: True/False
|
|
|
|
"""
|
|
|
|
return len(blobhash) == blobhash_length and all(is_valid_hashcharacter(l) for l in blobhash)
|
|
|
|
|
2018-02-12 14:16:43 -05:00
|
|
|
|
2018-07-21 18:34:59 -04:00
|
|
|
class BlobFile:
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
|
|
|
A chunk of data available on the network which is specified by a hashsum
|
2015-08-20 11:27:15 -04:00
|
|
|
|
2017-09-13 15:27:43 -04:00
|
|
|
This class is used to create blobs on the local filesystem
|
|
|
|
when we already know the blob hash before hand (i.e., when downloading blobs)
|
|
|
|
Also can be used for reading from blobs on the local filesystem
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
return self.blob_hash[:16]
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return '<{}({})>'.format(self.__class__.__name__, str(self))
|
|
|
|
|
|
|
|
def __init__(self, blob_dir, blob_hash, length=None):
|
|
|
|
if not is_valid_blobhash(blob_hash):
|
|
|
|
raise InvalidBlobHashError(blob_hash)
|
2015-08-20 11:27:15 -04:00
|
|
|
self.blob_hash = blob_hash
|
|
|
|
self.length = length
|
|
|
|
self.writers = {} # {Peer: writer, finished_deferred}
|
2017-01-20 10:54:36 -06:00
|
|
|
self._verified = False
|
2015-08-20 11:27:15 -04:00
|
|
|
self.readers = 0
|
2017-09-13 15:27:43 -04:00
|
|
|
self.blob_dir = blob_dir
|
|
|
|
self.file_path = os.path.join(blob_dir, self.blob_hash)
|
2017-10-06 17:05:10 -04:00
|
|
|
self.blob_write_lock = defer.DeferredLock()
|
2017-10-06 17:04:27 -04:00
|
|
|
self.saved_verified_blob = False
|
2017-09-13 15:27:43 -04:00
|
|
|
if os.path.isfile(self.file_path):
|
|
|
|
self.set_length(os.path.getsize(self.file_path))
|
|
|
|
# This assumes that the hash of the blob has already been
|
|
|
|
# checked as part of the blob creation process. It might
|
|
|
|
# be worth having a function that checks the actual hash;
|
|
|
|
# its probably too expensive to have that check be part of
|
|
|
|
# this call.
|
|
|
|
self._verified = True
|
|
|
|
|
|
|
|
def open_for_writing(self, peer):
|
|
|
|
"""
|
|
|
|
open a blob file to be written by peer, supports concurrent
|
2018-10-18 14:40:37 +03:00
|
|
|
writers, as long as they are from different peers.
|
2017-09-13 15:27:43 -04:00
|
|
|
|
|
|
|
returns tuple of (writer, finished_deferred)
|
|
|
|
|
|
|
|
writer - a file like object with a write() function, close() when finished
|
|
|
|
finished_deferred - deferred that is fired when write is finished and returns
|
|
|
|
a instance of itself as HashBlob
|
|
|
|
"""
|
2018-07-21 21:12:33 -04:00
|
|
|
if peer not in self.writers:
|
2017-09-13 15:27:43 -04:00
|
|
|
log.debug("Opening %s to be written by %s", str(self), str(peer))
|
|
|
|
finished_deferred = defer.Deferred()
|
|
|
|
writer = HashBlobWriter(self.get_length, self.writer_finished)
|
|
|
|
self.writers[peer] = (writer, finished_deferred)
|
2018-07-21 21:12:33 -04:00
|
|
|
return writer, finished_deferred
|
2017-09-13 15:27:43 -04:00
|
|
|
log.warning("Tried to download the same file twice simultaneously from the same peer")
|
|
|
|
return None, None
|
|
|
|
|
|
|
|
def open_for_reading(self):
|
|
|
|
"""
|
|
|
|
open blob for reading
|
|
|
|
|
2017-09-27 17:00:21 -04:00
|
|
|
returns a file like object that can be read() from, and closed() when
|
|
|
|
finished
|
2017-09-13 15:27:43 -04:00
|
|
|
"""
|
|
|
|
if self._verified is True:
|
2017-10-02 18:07:07 -04:00
|
|
|
f = open(self.file_path, 'rb')
|
|
|
|
reader = HashBlobReader(f, self.reader_finished)
|
2017-09-27 16:53:31 -04:00
|
|
|
self.readers += 1
|
|
|
|
return reader
|
2017-09-13 15:27:43 -04:00
|
|
|
return None
|
|
|
|
|
|
|
|
def delete(self):
|
|
|
|
"""
|
|
|
|
delete blob file from file system, prevent deletion
|
|
|
|
if a blob is being read from or written to
|
|
|
|
|
|
|
|
returns a deferred that firesback when delete is completed
|
|
|
|
"""
|
|
|
|
if not self.writers and not self.readers:
|
|
|
|
self._verified = False
|
2017-10-06 17:04:27 -04:00
|
|
|
self.saved_verified_blob = False
|
2019-01-07 10:47:35 -05:00
|
|
|
try:
|
|
|
|
if os.path.isfile(self.file_path):
|
|
|
|
os.remove(self.file_path)
|
|
|
|
except Exception as e:
|
|
|
|
log.exception("An error occurred deleting %s:", str(self.file_path), exc_info=e)
|
2017-09-13 15:27:43 -04:00
|
|
|
else:
|
2019-01-07 10:47:35 -05:00
|
|
|
raise ValueError("File is currently being read or written and cannot be deleted")
|
2015-08-20 11:27:15 -04:00
|
|
|
|
2017-01-20 10:54:36 -06:00
|
|
|
@property
|
|
|
|
def verified(self):
|
2017-09-12 12:01:06 -04:00
|
|
|
"""
|
|
|
|
Protect verified from being modified by other classes.
|
2018-10-18 14:40:37 +03:00
|
|
|
verified is True if a write to a blob has completed successfully,
|
2017-09-12 12:01:06 -04:00
|
|
|
or a blob has been read to have the same length as specified
|
|
|
|
in init
|
|
|
|
"""
|
2017-01-20 10:54:36 -06:00
|
|
|
return self._verified
|
|
|
|
|
2015-08-20 11:27:15 -04:00
|
|
|
def set_length(self, length):
|
|
|
|
if self.length is not None and length == self.length:
|
|
|
|
return True
|
2017-09-25 12:12:40 -04:00
|
|
|
if self.length is None and 0 <= length <= MAX_BLOB_SIZE:
|
2015-08-20 11:27:15 -04:00
|
|
|
self.length = length
|
|
|
|
return True
|
2016-11-30 14:20:45 -06:00
|
|
|
log.warning("Got an invalid length. Previous length: %s, Invalid length: %s",
|
|
|
|
self.length, length)
|
2015-08-20 11:27:15 -04:00
|
|
|
return False
|
|
|
|
|
|
|
|
def get_length(self):
|
|
|
|
return self.length
|
|
|
|
|
2017-09-20 14:02:34 -04:00
|
|
|
def get_is_verified(self):
|
|
|
|
return self.verified
|
2015-08-20 11:27:15 -04:00
|
|
|
|
|
|
|
def is_downloading(self):
|
|
|
|
if self.writers:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
2017-09-27 17:00:21 -04:00
|
|
|
def reader_finished(self, reader):
|
|
|
|
self.readers -= 1
|
|
|
|
return defer.succeed(True)
|
|
|
|
|
2015-08-20 11:27:15 -04:00
|
|
|
def writer_finished(self, writer, err=None):
|
|
|
|
def fire_finished_deferred():
|
2017-01-20 10:54:36 -06:00
|
|
|
self._verified = True
|
2018-07-31 13:20:25 -04:00
|
|
|
for p, (w, finished_deferred) in list(self.writers.items()):
|
2015-08-20 11:27:15 -04:00
|
|
|
if w == writer:
|
|
|
|
del self.writers[p]
|
2017-09-12 12:03:49 -04:00
|
|
|
finished_deferred.callback(self)
|
2015-08-20 11:27:15 -04:00
|
|
|
return True
|
2016-11-30 14:20:45 -06:00
|
|
|
log.warning(
|
|
|
|
"Somehow, the writer that was accepted as being valid was already removed: %s",
|
|
|
|
writer)
|
2015-08-20 11:27:15 -04:00
|
|
|
return False
|
|
|
|
|
|
|
|
def errback_finished_deferred(err):
|
2018-07-21 21:12:33 -04:00
|
|
|
for p, (w, finished_deferred) in list(self.writers.items()):
|
2015-08-20 11:27:15 -04:00
|
|
|
if w == writer:
|
|
|
|
del self.writers[p]
|
2017-09-12 12:03:49 -04:00
|
|
|
finished_deferred.errback(err)
|
2015-08-20 11:27:15 -04:00
|
|
|
|
|
|
|
def cancel_other_downloads():
|
|
|
|
for p, (w, finished_deferred) in self.writers.items():
|
2017-09-12 23:14:19 -04:00
|
|
|
w.close()
|
2015-08-20 11:27:15 -04:00
|
|
|
|
|
|
|
if err is None:
|
2017-01-20 10:54:36 -06:00
|
|
|
if writer.len_so_far == self.length and writer.blob_hash == self.blob_hash:
|
|
|
|
if self._verified is False:
|
2017-10-06 17:05:10 -04:00
|
|
|
d = self.save_verified_blob(writer)
|
2015-08-20 11:27:15 -04:00
|
|
|
d.addCallbacks(lambda _: fire_finished_deferred(), errback_finished_deferred)
|
|
|
|
d.addCallback(lambda _: cancel_other_downloads())
|
|
|
|
else:
|
2018-03-22 13:23:12 -04:00
|
|
|
d = defer.succeed(None)
|
|
|
|
fire_finished_deferred()
|
2015-08-20 11:27:15 -04:00
|
|
|
else:
|
2018-03-22 13:23:12 -04:00
|
|
|
if writer.len_so_far != self.length:
|
|
|
|
err_string = "blob length is %i vs expected %i" % (writer.len_so_far, self.length)
|
|
|
|
else:
|
2018-10-18 13:42:45 +03:00
|
|
|
err_string = f"blob hash is {writer.blob_hash} vs expected {self.blob_hash}"
|
2015-08-20 11:27:15 -04:00
|
|
|
errback_finished_deferred(Failure(InvalidDataError(err_string)))
|
2018-03-22 13:23:12 -04:00
|
|
|
d = defer.succeed(None)
|
2015-08-20 11:27:15 -04:00
|
|
|
else:
|
|
|
|
errback_finished_deferred(err)
|
2018-03-22 13:23:12 -04:00
|
|
|
d = defer.succeed(None)
|
2017-09-13 12:58:11 -04:00
|
|
|
d.addBoth(lambda _: writer.close_handle())
|
2015-08-20 11:27:15 -04:00
|
|
|
return d
|
|
|
|
|
2017-10-06 17:05:10 -04:00
|
|
|
def save_verified_blob(self, writer):
|
|
|
|
# we cannot have multiple _save_verified_blob interrupting
|
|
|
|
# each other, can happen since startProducing is a deferred
|
|
|
|
return self.blob_write_lock.run(self._save_verified_blob, writer)
|
|
|
|
|
2017-09-07 11:53:02 -04:00
|
|
|
@defer.inlineCallbacks
|
2015-08-20 11:27:15 -04:00
|
|
|
def _save_verified_blob(self, writer):
|
2017-10-06 17:04:27 -04:00
|
|
|
if self.saved_verified_blob is False:
|
2017-10-06 17:05:10 -04:00
|
|
|
writer.write_handle.seek(0)
|
|
|
|
out_path = os.path.join(self.blob_dir, self.blob_hash)
|
|
|
|
producer = FileBodyProducer(writer.write_handle)
|
|
|
|
yield producer.startProducing(open(out_path, 'wb'))
|
2017-10-06 17:04:27 -04:00
|
|
|
self.saved_verified_blob = True
|
2017-10-06 17:05:10 -04:00
|
|
|
defer.returnValue(True)
|
|
|
|
else:
|
|
|
|
raise DownloadCanceledError()
|