From d68ca65e41d2aa76fafbd9031df269739dc0a120 Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Fri, 29 Sep 2017 14:29:35 -0400 Subject: [PATCH 1/8] warn if reader is garbage collected but not closed, do the same for writer --- lbrynet/blob/reader.py | 6 +++++- lbrynet/blob/writer.py | 5 +++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/lbrynet/blob/reader.py b/lbrynet/blob/reader.py index dd248c8fd..4d1f51dff 100644 --- a/lbrynet/blob/reader.py +++ b/lbrynet/blob/reader.py @@ -40,11 +40,15 @@ class HashBlobReader(object): read(size) and close() """ def __init__(self, file_path, finished_cb): + self.file_path = file_path self.finished_cb = finished_cb self.finished_cb_d = None - self.read_handle = open(file_path, 'rb') + self.read_handle = open(self.file_path, 'rb') def __del__(self): + if self.finished_cb_d is None: + log.warn("Garbage collection was called, but reader for %s was not closed yet", + self.file_path) self.close() def read(self, size=-1): diff --git a/lbrynet/blob/writer.py b/lbrynet/blob/writer.py index a95430386..dc4d3d77a 100644 --- a/lbrynet/blob/writer.py +++ b/lbrynet/blob/writer.py @@ -16,6 +16,11 @@ class HashBlobWriter(object): self._hashsum = get_lbry_hash_obj() self.len_so_far = 0 + def __del__(self): + if self.finished_cb_d is None: + log.warn("Garbage collection was called, but writer was not closed yet") + self.close() + @property def blob_hash(self): return self._hashsum.hexdigest() From e07c2f7bd8d7152a592b614c9ccbc6a2e48d5ae0 Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Mon, 2 Oct 2017 18:07:07 -0400 Subject: [PATCH 2/8] take read handle as argument instead of file path --- lbrynet/blob/blob_file.py | 3 ++- lbrynet/blob/reader.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/lbrynet/blob/blob_file.py b/lbrynet/blob/blob_file.py index 78cf974ad..0c4d2555c 100644 --- a/lbrynet/blob/blob_file.py +++ b/lbrynet/blob/blob_file.py @@ -78,7 +78,8 @@ class BlobFile(object): finished """ if self._verified is True: - reader = HashBlobReader(self.file_path, self.reader_finished) + f = open(self.file_path, 'rb') + reader = HashBlobReader(f, self.reader_finished) self.readers += 1 return reader return None diff --git a/lbrynet/blob/reader.py b/lbrynet/blob/reader.py index 4d1f51dff..745e62ef8 100644 --- a/lbrynet/blob/reader.py +++ b/lbrynet/blob/reader.py @@ -39,16 +39,15 @@ class HashBlobReader(object): This is a file like reader class that supports read(size) and close() """ - def __init__(self, file_path, finished_cb): - self.file_path = file_path + def __init__(self, read_handle, finished_cb): self.finished_cb = finished_cb self.finished_cb_d = None - self.read_handle = open(self.file_path, 'rb') + self.read_handle = read_handle def __del__(self): if self.finished_cb_d is None: log.warn("Garbage collection was called, but reader for %s was not closed yet", - self.file_path) + self.read_handle.name) self.close() def read(self, size=-1): From 136034539514d5d56f5c7539a5ebccd9033a99f5 Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Fri, 29 Sep 2017 14:43:03 -0400 Subject: [PATCH 3/8] use blob.open_for_reading() in StreamBlobDecryptor --- lbrynet/cryptstream/CryptBlob.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lbrynet/cryptstream/CryptBlob.py b/lbrynet/cryptstream/CryptBlob.py index a7303a588..6d4bcbc5e 100644 --- a/lbrynet/cryptstream/CryptBlob.py +++ b/lbrynet/cryptstream/CryptBlob.py @@ -1,6 +1,6 @@ import binascii import logging -from twisted.internet import defer +from twisted.internet import defer, threads from cryptography.hazmat.primitives.ciphers import Cipher, modes from cryptography.hazmat.primitives.ciphers.algorithms import AES from cryptography.hazmat.primitives.padding import PKCS7 @@ -46,6 +46,10 @@ class StreamBlobDecryptor(object): write_func - function that takes decrypted string as arugment and writes it somewhere + + Returns: + + deferred that returns after decrypting blob and writing content """ def remove_padding(data): @@ -67,13 +71,17 @@ class StreamBlobDecryptor(object): last_chunk = self.cipher.update(data_to_decrypt) + self.cipher.finalize() write_func(remove_padding(last_chunk)) - def decrypt_bytes(data): + + read_handle = self.blob.open_for_reading() + + def decrypt_bytes(): + data = read_handle.read() self.buff += data self.len_read += len(data) write_bytes() + finish_decrypt() - d = self.blob.read(decrypt_bytes) - d.addCallback(lambda _: finish_decrypt()) + d = threads.deferToThread(decrypt_bytes) return d From 96d8cb17d91df4662fec9f648bc8a5a688d00dc9 Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Fri, 29 Sep 2017 14:45:11 -0400 Subject: [PATCH 4/8] delete deprecated producer/consumer read methods from BlobFile --- lbrynet/blob/blob_file.py | 30 +----------------------------- lbrynet/blob/reader.py | 31 ------------------------------- 2 files changed, 1 insertion(+), 60 deletions(-) diff --git a/lbrynet/blob/blob_file.py b/lbrynet/blob/blob_file.py index 0c4d2555c..5c08b2059 100644 --- a/lbrynet/blob/blob_file.py +++ b/lbrynet/blob/blob_file.py @@ -1,14 +1,13 @@ import logging import os from twisted.internet import defer, threads -from twisted.protocols.basic import FileSender from twisted.web.client import FileBodyProducer from twisted.python.failure import Failure from lbrynet import conf from lbrynet.core.Error import DownloadCanceledError, InvalidDataError, InvalidBlobHashError from lbrynet.core.utils import is_valid_blobhash from lbrynet.blob.writer import HashBlobWriter -from lbrynet.blob.reader import HashBlobReader, HashBlobReader_v0 +from lbrynet.blob.reader import HashBlobReader log = logging.getLogger(__name__) @@ -143,33 +142,6 @@ class BlobFile(object): return True return False - def read(self, write_func): - """ - This function is only used in StreamBlobDecryptor - and should be deprecated in favor of open_for_reading() - """ - def close_self(*args): - self.close_read_handle(file_handle) - return args[0] - - file_sender = FileSender() - reader = HashBlobReader_v0(write_func) - file_handle = self.open_for_reading() - if file_handle is not None: - d = file_sender.beginFileTransfer(file_handle, reader) - d.addCallback(close_self) - else: - d = defer.fail(IOError("Could not read the blob")) - return d - - def close_read_handle(self, file_handle): - """ - This function is only used in StreamBlobDecryptor - and should be deprecated in favor of open_for_reading() - """ - if file_handle is not None: - file_handle.close() - def reader_finished(self, reader): self.readers -= 1 return defer.succeed(True) diff --git a/lbrynet/blob/reader.py b/lbrynet/blob/reader.py index 745e62ef8..afd62e57e 100644 --- a/lbrynet/blob/reader.py +++ b/lbrynet/blob/reader.py @@ -1,39 +1,8 @@ import logging -from twisted.internet import interfaces -from zope.interface import implements log = logging.getLogger(__name__) -class HashBlobReader_v0(object): - """ - This is a class that is only used in StreamBlobDecryptor - and should be deprecated - """ - implements(interfaces.IConsumer) - - def __init__(self, write_func): - self.write_func = write_func - - def registerProducer(self, producer, streaming): - from twisted.internet import reactor - - self.producer = producer - self.streaming = streaming - if self.streaming is False: - reactor.callLater(0, self.producer.resumeProducing) - - def unregisterProducer(self): - pass - - def write(self, data): - from twisted.internet import reactor - - self.write_func(data) - if self.streaming is False: - reactor.callLater(0, self.producer.resumeProducing) - - class HashBlobReader(object): """ This is a file like reader class that supports From 51d466876300a4e3dea366ee1749081fda0b3d43 Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Fri, 29 Sep 2017 14:43:29 -0400 Subject: [PATCH 5/8] fix test --- lbrynet/tests/unit/cryptstream/test_cryptblob.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lbrynet/tests/unit/cryptstream/test_cryptblob.py b/lbrynet/tests/unit/cryptstream/test_cryptblob.py index 083d2e1fc..1631b68b0 100644 --- a/lbrynet/tests/unit/cryptstream/test_cryptblob.py +++ b/lbrynet/tests/unit/cryptstream/test_cryptblob.py @@ -9,6 +9,7 @@ from Crypto import Random from Crypto.Cipher import AES import random import string +import StringIO class MocBlob(object): def __init__(self): @@ -19,6 +20,9 @@ class MocBlob(object): write_func(data) return defer.succeed(True) + def open_for_reading(self): + return StringIO.StringIO(self.data) + def write(self, data): self.data += data @@ -64,7 +68,7 @@ class TestCryptBlob(unittest.TestCase): # decrypt string decryptor = CryptBlob.StreamBlobDecryptor(blob, key, iv, size_of_data) - decryptor.decrypt(write_func) + yield decryptor.decrypt(write_func) self.assertEqual(self.data_buf, string_to_encrypt) @defer.inlineCallbacks From 9470b318b0021df41a14494f23922d47b0ab46ae Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Mon, 25 Sep 2017 12:12:40 -0400 Subject: [PATCH 6/8] moved BLOB_SIZE in conf to MAX_BLOB_SIZE constant in blob.blob_file --- lbrynet/blob/blob_file.py | 5 ++--- lbrynet/conf.py | 1 - lbrynet/core/client/ClientRequest.py | 5 ++--- lbrynet/cryptstream/CryptBlob.py | 5 ++--- lbrynet/tests/unit/cryptstream/test_cryptblob.py | 4 ++-- 5 files changed, 8 insertions(+), 12 deletions(-) diff --git a/lbrynet/blob/blob_file.py b/lbrynet/blob/blob_file.py index 5c08b2059..42b402030 100644 --- a/lbrynet/blob/blob_file.py +++ b/lbrynet/blob/blob_file.py @@ -3,15 +3,14 @@ import os from twisted.internet import defer, threads from twisted.web.client import FileBodyProducer from twisted.python.failure import Failure -from lbrynet import conf from lbrynet.core.Error import DownloadCanceledError, InvalidDataError, InvalidBlobHashError from lbrynet.core.utils import is_valid_blobhash from lbrynet.blob.writer import HashBlobWriter from lbrynet.blob.reader import HashBlobReader - log = logging.getLogger(__name__) +MAX_BLOB_SIZE = 2 * 2 ** 20 class BlobFile(object): """ @@ -124,7 +123,7 @@ class BlobFile(object): def set_length(self, length): if self.length is not None and length == self.length: return True - if self.length is None and 0 <= length <= conf.settings['BLOB_SIZE']: + if self.length is None and 0 <= length <= MAX_BLOB_SIZE: self.length = length return True log.warning("Got an invalid length. Previous length: %s, Invalid length: %s", diff --git a/lbrynet/conf.py b/lbrynet/conf.py index 4b156ad89..cc290a3bf 100644 --- a/lbrynet/conf.py +++ b/lbrynet/conf.py @@ -206,7 +206,6 @@ FIXED_SETTINGS = { 'API_ADDRESS': 'lbryapi', 'APP_NAME': APP_NAME, 'BLOBFILES_DIR': 'blobfiles', - 'BLOB_SIZE': 2 * MB, 'CRYPTSD_FILE_EXTENSION': '.cryptsd', 'CURRENCIES': { 'BTC': {'type': 'crypto'}, diff --git a/lbrynet/core/client/ClientRequest.py b/lbrynet/core/client/ClientRequest.py index 04c3dac7f..1dee9b9d6 100644 --- a/lbrynet/core/client/ClientRequest.py +++ b/lbrynet/core/client/ClientRequest.py @@ -1,5 +1,4 @@ -from lbrynet import conf - +from lbrynet.blob.blob_file import MAX_BLOB_SIZE class ClientRequest(object): def __init__(self, request_dict, response_identifier=None): @@ -17,7 +16,7 @@ class ClientBlobRequest(ClientPaidRequest): def __init__(self, request_dict, response_identifier, write_func, finished_deferred, cancel_func, blob): if blob.length is None: - max_pay_units = conf.settings['BLOB_SIZE'] + max_pay_units = MAX_BLOB_SIZE else: max_pay_units = blob.length ClientPaidRequest.__init__(self, request_dict, response_identifier, max_pay_units) diff --git a/lbrynet/cryptstream/CryptBlob.py b/lbrynet/cryptstream/CryptBlob.py index 6d4bcbc5e..08c0fe09e 100644 --- a/lbrynet/cryptstream/CryptBlob.py +++ b/lbrynet/cryptstream/CryptBlob.py @@ -5,9 +5,8 @@ from cryptography.hazmat.primitives.ciphers import Cipher, modes from cryptography.hazmat.primitives.ciphers.algorithms import AES from cryptography.hazmat.primitives.padding import PKCS7 from cryptography.hazmat.backends import default_backend -from lbrynet import conf from lbrynet.core.BlobInfo import BlobInfo - +from lbrynet.blob.blob_file import MAX_BLOB_SIZE log = logging.getLogger(__name__) backend = default_backend() @@ -114,7 +113,7 @@ class CryptStreamBlobMaker(object): max bytes are written. num_bytes_to_write is the number of bytes that will be written from data in this call """ - max_bytes_to_write = conf.settings['BLOB_SIZE'] - self.length - 1 + max_bytes_to_write = MAX_BLOB_SIZE - self.length - 1 done = False if max_bytes_to_write <= len(data): num_bytes_to_write = max_bytes_to_write diff --git a/lbrynet/tests/unit/cryptstream/test_cryptblob.py b/lbrynet/tests/unit/cryptstream/test_cryptblob.py index 1631b68b0..2378c5770 100644 --- a/lbrynet/tests/unit/cryptstream/test_cryptblob.py +++ b/lbrynet/tests/unit/cryptstream/test_cryptblob.py @@ -1,7 +1,7 @@ from twisted.trial import unittest from twisted.internet import defer from lbrynet.cryptstream import CryptBlob -from lbrynet import conf +from lbrynet.blob.blob_file import MAX_BLOB_SIZE from lbrynet.tests.mocks import mock_conf_settings @@ -57,7 +57,7 @@ class TestCryptBlob(unittest.TestCase): expected_encrypted_blob_size = ((size_of_data / AES.block_size) + 1) * AES.block_size self.assertEqual(expected_encrypted_blob_size, len(blob.data)) - if size_of_data < conf.settings['BLOB_SIZE']-1: + if size_of_data < MAX_BLOB_SIZE-1: self.assertFalse(done) else: self.assertTrue(done) From a5293de44b837ce5823e50a5bf2f57b3c65f3dfd Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Fri, 29 Sep 2017 15:23:49 -0400 Subject: [PATCH 7/8] adding changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 820e64931..59498f825 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ at anytime. * ### Changed - * + * Moved BLOB_SIZE from conf.py to MAX_BLOB_SIZE in blob/blob_file.py * ### Added @@ -30,7 +30,7 @@ at anytime. * ### Removed - * + * Removed some alternate methods of reading from blob files * From 10ac86a99e052c18f6be80b6d4f678c0ed2c65a7 Mon Sep 17 00:00:00 2001 From: Kay Kurokawa Date: Wed, 25 Oct 2017 16:04:35 -0400 Subject: [PATCH 8/8] use FileBodyProducer to read --- lbrynet/cryptstream/CryptBlob.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lbrynet/cryptstream/CryptBlob.py b/lbrynet/cryptstream/CryptBlob.py index 08c0fe09e..c99465673 100644 --- a/lbrynet/cryptstream/CryptBlob.py +++ b/lbrynet/cryptstream/CryptBlob.py @@ -1,6 +1,8 @@ import binascii import logging -from twisted.internet import defer, threads +from io import BytesIO +from twisted.internet import defer +from twisted.web.client import FileBodyProducer from cryptography.hazmat.primitives.ciphers import Cipher, modes from cryptography.hazmat.primitives.ciphers.algorithms import AES from cryptography.hazmat.primitives.padding import PKCS7 @@ -73,14 +75,17 @@ class StreamBlobDecryptor(object): read_handle = self.blob.open_for_reading() + @defer.inlineCallbacks def decrypt_bytes(): - data = read_handle.read() - self.buff += data - self.len_read += len(data) + producer = FileBodyProducer(read_handle) + buff = BytesIO() + yield producer.startProducing(buff) + self.buff = buff.getvalue() + self.len_read += len(self.buff) write_bytes() finish_decrypt() - d = threads.deferToThread(decrypt_bytes) + d = decrypt_bytes() return d