Merge branch 'blobs_mirroring'

This commit is contained in:
Jack Robison 2018-07-25 12:24:37 -04:00
commit 826d269e0e
No known key found for this signature in database
GPG key ID: DF25C68FE0239BB2
9 changed files with 171 additions and 5 deletions

View file

@ -26,6 +26,7 @@ at anytime.
* `status` to no longer return a base58 encoded `lbry_id`, instead return this as the hex encoded `node_id` in a new `dht_node_status` field.
* `startup_status` field in the response to `status` to be a dict of component names to status booleans
* moved wallet, upnp and dht startup code from `Session` to `Components`
* attempt blob downloads from http mirror sources (by default) concurrently to p2p sources
### Added
* `skipped_components` list to the response from `status`
@ -36,6 +37,8 @@ at anytime.
* script to generate docs/api.json file (https://github.com/lbryio/lbry.tech/issues/42)
* additional information to the balance error message when editing a claim (https://github.com/lbryio/lbry/pull/1309)
* `address` and `port` arguments to `peer_ping` (https://github.com/lbryio/lbry/issues/1313)
* ability to download from HTTP mirrors by setting `download_mirrors`
*
### Removed
* most of the internal attributes from `Daemon`

View file

@ -268,6 +268,7 @@ ADJUSTABLE_SETTINGS = {
'dht_node_port': (int, 4444),
'download_directory': (str, default_download_dir),
'download_timeout': (int, 180),
'download_mirrors': (list, ['blobs.lbry.io']),
'is_generous_host': (bool, True),
'announce_head_blobs_only': (bool, True),
'concurrent_announcers': (int, DEFAULT_CONCURRENT_ANNOUNCERS),

View file

@ -0,0 +1,86 @@
from random import choice
import logging
from twisted.internet import defer
import treq
from twisted.internet.task import LoopingCall
log = logging.getLogger(__name__)
class HTTPBlobDownloader(object):
'''
A downloader that is able to get blobs from HTTP mirrors.
Note that when a blob gets downloaded from a mirror or from a peer, BlobManager will mark it as completed
and cause any other type of downloader to progress to the next missing blob. Also, BlobFile is naturally able
to cancel other writers when a writer finishes first. That's why there is no call to cancel/resume/stop between
different types of downloaders.
'''
def __init__(self, blob_manager, blob_hashes=None, servers=None, client=None):
self.blob_manager = blob_manager
self.servers = servers or []
self.client = client or treq
self.blob_hashes = blob_hashes or []
self.looping_call = LoopingCall(self._download_next_blob_hash_for_file)
self.failures = 0
self.max_failures = 3
self.interval = 1
@property
def running(self):
return self.looping_call.running
def start(self):
if not self.running and self.blob_hashes and self.servers:
return self.looping_call.start(self.interval, now=True)
defer.succeed(None)
def stop(self):
if self.running:
self.blob_hashes = []
return self.looping_call.stop()
@defer.inlineCallbacks
def _download_next_blob_hash_for_file(self):
for blob_hash in self.blob_hashes:
blob = yield self.blob_manager.get_blob(blob_hash)
if not blob.verified:
self.download_blob(blob)
return
self.stop()
@defer.inlineCallbacks
def download_blob(self, blob):
try:
yield self._download_blob(blob)
self.failures = 0
except Exception as exception:
self.failures += 1
log.exception('Mirror failed downloading')
if self.failures >= self.max_failures:
self.stop()
self.failures = 0
@defer.inlineCallbacks
def _download_blob(self, blob):
if not blob.get_is_verified() and not blob.is_downloading() and 'mirror' not in blob.writers:
response = yield self.client.get(url_for(choice(self.servers), blob.blob_hash))
if response.code != 200:
log.debug('[Mirror] Missing a blob: %s', blob.blob_hash)
if blob.blob_hash in self.blob_hashes:
self.blob_hashes.remove(blob.blob_hash)
defer.returnValue(blob.blob_hash)
log.debug('[Mirror] Download started: %s', blob.blob_hash)
blob.set_length(response.length)
writer, finished_deferred = blob.open_for_writing('mirror')
try:
yield self.client.collect(response, writer.write)
log.info('Mirror completed download for %s', blob.blob_hash)
except Exception as e:
writer.close(e)
yield finished_deferred
defer.returnValue(blob.blob_hash)
def url_for(server, blob_hash=''):
return 'http://{}/{}'.format(server, blob_hash)

View file

@ -32,7 +32,7 @@ class Session(object):
def __init__(self, blob_data_payment_rate, db_dir=None, node_id=None, dht_node_port=None,
known_dht_nodes=None, peer_finder=None, hash_announcer=None, blob_dir=None, blob_manager=None,
peer_port=None, rate_limiter=None, wallet=None, external_ip=None, storage=None,
dht_node=None, peer_manager=None):
dht_node=None, peer_manager=None, download_mirrors=None):
"""@param blob_data_payment_rate: The default payment rate for blob data
@param db_dir: The directory in which levelDB files should be stored
@ -104,6 +104,7 @@ class Session(object):
self.base_payment_rate_manager = BasePaymentRateManager(blob_data_payment_rate)
self.payment_rate_manager = OnlyFreePaymentsManager()
self.storage = storage or SQLiteStorage(self.db_dir)
self.download_mirrors = download_mirrors
def setup(self):
"""Create the blob directory and database if necessary, start all desired services"""

View file

@ -7,7 +7,7 @@ from twisted.internet import threads, defer
from lbrynet.core.cryptoutils import get_lbry_hash_obj
from lbrynet.core.client.StandaloneBlobDownloader import StandaloneBlobDownloader
from lbrynet.core.Error import UnknownStreamTypeError, InvalidStreamDescriptorError
from lbrynet.core.HTTPBlobDownloader import HTTPBlobDownloader
log = logging.getLogger(__name__)
@ -445,7 +445,10 @@ def download_sd_blob(session, blob_hash, payment_rate_manager, timeout=None):
payment_rate_manager,
session.wallet,
timeout)
mirror = HTTPBlobDownloader(session.blob_manager, [blob_hash], session.download_mirrors)
mirror.start()
sd_blob = yield downloader.download()
mirror.stop()
sd_reader = BlobStreamDescriptorReader(sd_blob)
sd_info = yield sd_reader.get_info()
try:

View file

@ -211,7 +211,8 @@ class SessionComponent(Component):
peer_port=GCS('peer_port'),
wallet=self.component_manager.get_component(WALLET_COMPONENT),
external_ip=CS.get_external_ip(),
storage=self.component_manager.get_component(DATABASE_COMPONENT)
storage=self.component_manager.get_component(DATABASE_COMPONENT),
download_mirrors=GCS('download_mirrors')
)
yield self.session.setup()

View file

@ -8,6 +8,7 @@ from zope.interface import implements
from twisted.internet import defer
from lbrynet.core.client.StreamProgressManager import FullStreamProgressManager
from lbrynet.core.HTTPBlobDownloader import HTTPBlobDownloader
from lbrynet.core.utils import short_hash
from lbrynet.lbry_file.client.EncryptedFileDownloader import EncryptedFileSaver
from lbrynet.lbry_file.client.EncryptedFileDownloader import EncryptedFileDownloader
@ -37,7 +38,7 @@ class ManagedEncryptedFileDownloader(EncryptedFileSaver):
def __init__(self, rowid, stream_hash, peer_finder, rate_limiter, blob_manager, storage, lbry_file_manager,
payment_rate_manager, wallet, download_directory, file_name, stream_name, sd_hash, key,
suggested_file_name):
suggested_file_name, download_mirrors=None):
EncryptedFileSaver.__init__(
self, stream_hash, peer_finder, rate_limiter, blob_manager, storage, payment_rate_manager, wallet,
download_directory, key, stream_name, file_name
@ -55,6 +56,7 @@ class ManagedEncryptedFileDownloader(EncryptedFileSaver):
self.channel_claim_id = None
self.channel_name = None
self.metadata = None
self.mirror = HTTPBlobDownloader(self.blob_manager, servers=download_mirrors) if download_mirrors else None
def set_claim_info(self, claim_info):
self.claim_id = claim_info['claim_id']
@ -94,6 +96,8 @@ class ManagedEncryptedFileDownloader(EncryptedFileSaver):
@defer.inlineCallbacks
def stop(self, err=None, change_status=True):
log.debug('Stopping download for stream %s', short_hash(self.stream_hash))
if self.mirror:
self.mirror.stop()
# EncryptedFileSaver deletes metadata when it's stopped. We don't want that here.
yield EncryptedFileDownloader.stop(self, err=err)
if change_status is True:
@ -123,6 +127,10 @@ class ManagedEncryptedFileDownloader(EncryptedFileSaver):
yield EncryptedFileSaver._start(self)
status = yield self._save_status()
log_status(self.sd_hash, status)
if self.mirror:
blobs = yield self.storage.get_blobs_for_stream(self.stream_hash)
self.mirror.blob_hashes = [b.blob_hash for b in blobs if b.blob_hash is not None]
self.mirror.start()
defer.returnValue(status)
def _get_finished_deferred_callback_value(self):

View file

@ -92,7 +92,8 @@ class EncryptedFileManager(object):
stream_name=stream_name,
sd_hash=sd_hash,
key=key,
suggested_file_name=suggested_file_name
suggested_file_name=suggested_file_name,
download_mirrors=self.session.download_mirrors
)
def _start_lbry_file(self, file_info, payment_rate_manager, claim_info):

View file

@ -0,0 +1,62 @@
from mock import MagicMock
from twisted.trial import unittest
from twisted.internet import defer
from lbrynet.blob import BlobFile
from lbrynet.core.HTTPBlobDownloader import HTTPBlobDownloader
from lbrynet.tests.util import mk_db_and_blob_dir, rm_db_and_blob_dir
class HTTPBlobDownloaderTest(unittest.TestCase):
def setUp(self):
self.db_dir, self.blob_dir = mk_db_and_blob_dir()
self.blob_manager = MagicMock()
self.client = MagicMock()
self.blob_hash = ('d17272b17a1ad61c4316ac13a651c2b0952063214a81333e'
'838364b01b2f07edbd165bb7ec60d2fb2f337a2c02923852')
self.blob = BlobFile(self.blob_dir, self.blob_hash)
self.blob_manager.get_blob.side_effect = lambda _: defer.succeed(self.blob)
self.response = MagicMock(code=200, length=400)
self.client.get.side_effect = lambda uri: defer.succeed(self.response)
self.downloader = HTTPBlobDownloader(self.blob_manager, [self.blob_hash], ['server1'], self.client)
self.downloader.interval = 0
def tearDown(self):
rm_db_and_blob_dir(self.db_dir, self.blob_dir)
@defer.inlineCallbacks
def test_download_successful(self):
self.client.collect.side_effect = collect
yield self.downloader.start()
self.blob_manager.get_blob.assert_called_with(self.blob_hash)
self.client.get.assert_called_with('http://{}/{}'.format('server1', self.blob_hash))
self.client.collect.assert_called()
self.assertEqual(self.blob.get_length(), self.response.length)
self.assertEqual(self.blob.get_is_verified(), True)
self.assertEqual(self.blob.writers, {})
@defer.inlineCallbacks
def test_download_transfer_failed(self):
self.client.collect.side_effect = lambda response, write: defer.fail(Exception())
yield self.downloader.start()
self.assertEqual(len(self.client.collect.mock_calls), self.downloader.max_failures)
self.blob_manager.get_blob.assert_called_with(self.blob_hash)
self.assertEqual(self.blob.get_length(), self.response.length)
self.assertEqual(self.blob.get_is_verified(), False)
self.assertEqual(self.blob.writers, {})
@defer.inlineCallbacks
def test_blob_not_found(self):
self.response.code = 404
yield self.downloader.start()
self.blob_manager.get_blob.assert_called_with(self.blob_hash)
self.client.get.assert_called_with('http://{}/{}'.format('server1', self.blob_hash))
self.client.collect.assert_not_called()
self.assertEqual(self.blob.get_is_verified(), False)
self.assertEqual(self.blob.writers, {})
def collect(response, write):
write('f' * response.length)
defer.succeed(None)