lbry-sdk/lbrynet/blob_exchange/downloader.py

126 lines
5.6 KiB
Python
Raw Normal View History

2019-01-22 12:52:32 -05:00
import asyncio
import typing
import logging
2019-05-01 00:42:51 -03:00
from lbrynet.utils import cache_concurrent
from lbrynet.blob_exchange.client import request_blob
2019-01-22 12:52:32 -05:00
if typing.TYPE_CHECKING:
from lbrynet.conf import Config
2019-01-22 12:52:32 -05:00
from lbrynet.dht.node import Node
from lbrynet.dht.peer import KademliaPeer
2019-03-28 14:51:55 -04:00
from lbrynet.blob.blob_manager import BlobManager
from lbrynet.blob.blob_file import AbstractBlob
2019-01-22 12:52:32 -05:00
log = logging.getLogger(__name__)
class BlobDownloader:
BAN_FACTOR = 2.0 # fixme: when connection manager gets implemented, move it out from here
2019-03-28 14:51:55 -04:00
def __init__(self, loop: asyncio.BaseEventLoop, config: 'Config', blob_manager: 'BlobManager',
peer_queue: asyncio.Queue):
2019-01-22 12:52:32 -05:00
self.loop = loop
self.config = config
2019-01-22 12:52:32 -05:00
self.blob_manager = blob_manager
self.peer_queue = peer_queue
2019-01-31 13:05:00 -05:00
self.active_connections: typing.Dict['KademliaPeer', asyncio.Task] = {} # active request_blob calls
self.ignored: typing.Dict['KademliaPeer', int] = {}
2019-01-31 12:28:59 -05:00
self.scores: typing.Dict['KademliaPeer', int] = {}
self.failures: typing.Dict['KademliaPeer', int] = {}
self.connections: typing.Dict['KademliaPeer', asyncio.Transport] = {}
2019-01-22 12:52:32 -05:00
def should_race_continue(self, blob: 'AbstractBlob'):
2019-02-08 01:28:03 -03:00
if len(self.active_connections) >= self.config.max_connections_per_download:
return False
return not (blob.get_is_verified() or not blob.is_writeable())
2019-02-08 01:28:03 -03:00
2019-05-05 20:22:10 -04:00
async def request_blob_from_peer(self, blob: 'AbstractBlob', peer: 'KademliaPeer', connection_id: int = 0):
2019-02-04 16:03:08 -05:00
if blob.get_is_verified():
return
self.scores[peer] = self.scores.get(peer, 0) - 1 # starts losing score, to account for cancelled ones
transport = self.connections.get(peer)
2019-02-08 03:05:53 -03:00
start = self.loop.time()
bytes_received, transport = await request_blob(
2019-02-04 16:03:08 -05:00
self.loop, blob, peer.address, peer.tcp_port, self.config.peer_connect_timeout,
2019-05-05 20:22:10 -04:00
self.config.blob_download_timeout, connected_transport=transport, connection_id=connection_id
2019-02-04 16:03:08 -05:00
)
if not transport and peer not in self.ignored:
self.ignored[peer] = self.loop.time()
2019-02-04 16:03:08 -05:00
log.debug("drop peer %s:%i", peer.address, peer.tcp_port)
self.failures[peer] = self.failures.get(peer, 0) + 1
if peer in self.connections:
del self.connections[peer]
elif transport:
2019-02-04 16:03:08 -05:00
log.debug("keep peer %s:%i", peer.address, peer.tcp_port)
self.failures[peer] = 0
self.connections[peer] = transport
elapsed = self.loop.time() - start
2019-05-02 11:52:09 -03:00
self.scores[peer] = bytes_received / elapsed if bytes_received and elapsed else 0
2019-01-22 12:52:32 -05:00
async def new_peer_or_finished(self):
active_tasks = list(self.active_connections.values()) + [asyncio.sleep(1)]
await asyncio.wait(active_tasks, loop=self.loop, return_when='FIRST_COMPLETED')
2019-01-22 12:52:32 -05:00
2019-02-07 20:11:28 -03:00
def cleanup_active(self):
to_remove = [peer for (peer, task) in self.active_connections.items() if task.done()]
for peer in to_remove:
del self.active_connections[peer]
2019-05-01 18:04:45 -03:00
self.clearbanned()
2019-02-07 20:11:28 -03:00
def clearbanned(self):
now = self.loop.time()
2019-05-01 18:04:45 -03:00
self.ignored = dict((
(peer, when) for (peer, when) in self.ignored.items()
if (now - when) < min(30.0, (self.failures.get(peer, 0) ** self.BAN_FACTOR))
))
2019-03-30 21:05:46 -04:00
@cache_concurrent
2019-05-05 20:22:10 -04:00
async def download_blob(self, blob_hash: str, length: typing.Optional[int] = None,
connection_id: int = 0) -> 'AbstractBlob':
blob = self.blob_manager.get_blob(blob_hash, length)
if blob.get_is_verified():
return blob
2019-01-22 12:52:32 -05:00
try:
while not blob.get_is_verified():
2019-05-01 18:04:45 -03:00
batch: typing.Set['KademliaPeer'] = set()
while not self.peer_queue.empty():
2019-05-01 18:04:45 -03:00
batch.update(self.peer_queue.get_nowait())
if batch:
self.peer_queue.put_nowait(list(batch))
2019-02-07 20:11:28 -03:00
log.debug(
"running, %d peers, %d ignored, %d active",
len(batch), len(self.ignored), len(self.active_connections)
)
2019-05-01 18:04:45 -03:00
for peer in sorted(batch, key=lambda peer: self.scores.get(peer, 0), reverse=True):
if not self.should_race_continue(blob):
2019-01-31 12:28:59 -05:00
break
2019-01-31 13:05:00 -05:00
if peer not in self.active_connections and peer not in self.ignored:
2019-01-31 12:28:59 -05:00
log.debug("request %s from %s:%i", blob_hash[:8], peer.address, peer.tcp_port)
2019-05-05 20:22:10 -04:00
t = self.loop.create_task(self.request_blob_from_peer(blob, peer, connection_id))
2019-02-04 16:03:08 -05:00
self.active_connections[peer] = t
await self.new_peer_or_finished()
self.cleanup_active()
2019-02-04 16:09:50 -05:00
log.debug("downloaded %s", blob_hash[:8])
return blob
finally:
blob.close()
def close(self):
self.scores.clear()
self.ignored.clear()
2019-02-08 03:05:53 -03:00
for transport in self.connections.values():
transport.close()
2019-03-28 14:51:55 -04:00
async def download_blob(loop, config: 'Config', blob_manager: 'BlobManager', node: 'Node',
blob_hash: str) -> 'AbstractBlob':
2019-01-31 12:28:59 -05:00
search_queue = asyncio.Queue(loop=loop, maxsize=config.max_connections_per_download)
search_queue.put_nowait(blob_hash)
peer_queue, accumulate_task = node.accumulate_peers(search_queue)
downloader = BlobDownloader(loop, config, blob_manager, peer_queue)
try:
return await downloader.download_blob(blob_hash)
finally:
if accumulate_task and not accumulate_task.done():
accumulate_task.cancel()
downloader.close()