2019-01-22 12:52:32 -05:00
|
|
|
import asyncio
|
|
|
|
import typing
|
|
|
|
import logging
|
2019-06-20 20:55:47 -04:00
|
|
|
from lbry.utils import cache_concurrent
|
|
|
|
from lbry.blob_exchange.client import request_blob
|
2021-07-20 16:50:24 +03:00
|
|
|
from lbry.dht.node import get_kademlia_peers_from_hosts
|
2019-01-22 12:52:32 -05:00
|
|
|
if typing.TYPE_CHECKING:
|
2019-06-20 20:55:47 -04:00
|
|
|
from lbry.conf import Config
|
|
|
|
from lbry.dht.node import Node
|
|
|
|
from lbry.dht.peer import KademliaPeer
|
|
|
|
from lbry.blob.blob_manager import BlobManager
|
|
|
|
from lbry.blob.blob_file import AbstractBlob
|
2019-08-16 15:52:02 -04:00
|
|
|
from lbry.blob_exchange.client import BlobExchangeClientProtocol
|
2019-01-22 12:52:32 -05:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2019-01-30 14:57:09 -05:00
|
|
|
class BlobDownloader:
|
2019-04-30 17:56:29 -03:00
|
|
|
BAN_FACTOR = 2.0 # fixme: when connection manager gets implemented, move it out from here
|
2019-03-28 14:51:55 -04:00
|
|
|
|
2019-08-02 13:14:41 -04:00
|
|
|
def __init__(self, loop: asyncio.AbstractEventLoop, config: 'Config', blob_manager: 'BlobManager',
|
2019-01-30 14:57:09 -05:00
|
|
|
peer_queue: asyncio.Queue):
|
2019-01-22 12:52:32 -05:00
|
|
|
self.loop = loop
|
2019-01-30 14:57:09 -05:00
|
|
|
self.config = config
|
2019-01-22 12:52:32 -05:00
|
|
|
self.blob_manager = blob_manager
|
2019-01-30 14:57:09 -05:00
|
|
|
self.peer_queue = peer_queue
|
2019-01-31 13:05:00 -05:00
|
|
|
self.active_connections: typing.Dict['KademliaPeer', asyncio.Task] = {} # active request_blob calls
|
2019-02-18 17:11:07 -03:00
|
|
|
self.ignored: typing.Dict['KademliaPeer', int] = {}
|
2019-01-31 12:28:59 -05:00
|
|
|
self.scores: typing.Dict['KademliaPeer', int] = {}
|
2019-04-30 17:56:29 -03:00
|
|
|
self.failures: typing.Dict['KademliaPeer', int] = {}
|
2019-11-26 14:53:14 -05:00
|
|
|
self.connection_failures: typing.Set['KademliaPeer'] = set()
|
2019-08-16 15:52:02 -04:00
|
|
|
self.connections: typing.Dict['KademliaPeer', 'BlobExchangeClientProtocol'] = {}
|
2019-05-06 15:04:38 -04:00
|
|
|
self.is_running = asyncio.Event(loop=self.loop)
|
2019-01-22 12:52:32 -05:00
|
|
|
|
2019-03-30 20:17:42 -04:00
|
|
|
def should_race_continue(self, blob: 'AbstractBlob'):
|
2019-07-24 22:32:20 -03:00
|
|
|
max_probes = self.config.max_connections_per_download * (1 if self.connections else 10)
|
|
|
|
if len(self.active_connections) >= max_probes:
|
2019-02-08 01:28:03 -03:00
|
|
|
return False
|
2019-03-30 20:17:42 -04:00
|
|
|
return not (blob.get_is_verified() or not blob.is_writeable())
|
2019-02-08 01:28:03 -03:00
|
|
|
|
2019-07-24 19:59:12 -03:00
|
|
|
async def request_blob_from_peer(self, blob: 'AbstractBlob', peer: 'KademliaPeer', connection_id: int = 0,
|
|
|
|
just_probe: bool = False):
|
2019-02-04 16:03:08 -05:00
|
|
|
if blob.get_is_verified():
|
|
|
|
return
|
2019-02-08 03:05:53 -03:00
|
|
|
start = self.loop.time()
|
2019-08-16 15:52:02 -04:00
|
|
|
bytes_received, protocol = await request_blob(
|
2019-07-24 19:59:12 -03:00
|
|
|
self.loop, blob if not just_probe else None, peer.address, peer.tcp_port, self.config.peer_connect_timeout,
|
2019-08-16 15:52:02 -04:00
|
|
|
self.config.blob_download_timeout, connected_protocol=self.connections.get(peer),
|
|
|
|
connection_id=connection_id, connection_manager=self.blob_manager.connection_manager
|
2019-02-04 16:03:08 -05:00
|
|
|
)
|
2019-08-16 15:52:02 -04:00
|
|
|
if not bytes_received and not protocol and peer not in self.connection_failures:
|
2019-11-26 14:53:14 -05:00
|
|
|
self.connection_failures.add(peer)
|
2019-08-16 15:52:02 -04:00
|
|
|
if not protocol and peer not in self.ignored:
|
2019-02-18 17:11:07 -03:00
|
|
|
self.ignored[peer] = self.loop.time()
|
2019-02-04 16:03:08 -05:00
|
|
|
log.debug("drop peer %s:%i", peer.address, peer.tcp_port)
|
2019-04-30 17:56:29 -03:00
|
|
|
self.failures[peer] = self.failures.get(peer, 0) + 1
|
2019-02-08 02:27:58 -03:00
|
|
|
if peer in self.connections:
|
|
|
|
del self.connections[peer]
|
2019-08-16 15:52:02 -04:00
|
|
|
elif protocol:
|
2019-02-04 16:03:08 -05:00
|
|
|
log.debug("keep peer %s:%i", peer.address, peer.tcp_port)
|
2019-05-01 02:55:51 -03:00
|
|
|
self.failures[peer] = 0
|
2019-08-16 15:52:02 -04:00
|
|
|
self.connections[peer] = protocol
|
2019-05-02 16:41:48 -03:00
|
|
|
elapsed = self.loop.time() - start
|
2019-05-13 14:24:13 -03:00
|
|
|
self.scores[peer] = bytes_received / elapsed if bytes_received and elapsed else 1
|
2019-01-22 12:52:32 -05:00
|
|
|
|
2019-04-30 17:56:29 -03:00
|
|
|
async def new_peer_or_finished(self):
|
2019-05-01 02:55:51 -03:00
|
|
|
active_tasks = list(self.active_connections.values()) + [asyncio.sleep(1)]
|
2019-04-30 17:56:29 -03:00
|
|
|
await asyncio.wait(active_tasks, loop=self.loop, return_when='FIRST_COMPLETED')
|
2019-01-22 12:52:32 -05:00
|
|
|
|
2019-02-07 20:11:28 -03:00
|
|
|
def cleanup_active(self):
|
2019-05-13 14:24:13 -03:00
|
|
|
if not self.active_connections and not self.connections:
|
2019-05-13 13:14:08 -03:00
|
|
|
self.clearbanned()
|
2019-02-07 20:11:28 -03:00
|
|
|
to_remove = [peer for (peer, task) in self.active_connections.items() if task.done()]
|
|
|
|
for peer in to_remove:
|
|
|
|
del self.active_connections[peer]
|
|
|
|
|
2019-02-18 17:11:07 -03:00
|
|
|
def clearbanned(self):
|
|
|
|
now = self.loop.time()
|
2019-10-02 19:38:56 +03:00
|
|
|
self.ignored = {
|
|
|
|
peer: when for (peer, when) in self.ignored.items()
|
2019-05-01 18:04:45 -03:00
|
|
|
if (now - when) < min(30.0, (self.failures.get(peer, 0) ** self.BAN_FACTOR))
|
2019-10-02 19:38:56 +03:00
|
|
|
}
|
2019-02-18 17:11:07 -03:00
|
|
|
|
2019-03-30 21:05:46 -04:00
|
|
|
@cache_concurrent
|
2019-05-05 20:22:10 -04:00
|
|
|
async def download_blob(self, blob_hash: str, length: typing.Optional[int] = None,
|
|
|
|
connection_id: int = 0) -> 'AbstractBlob':
|
2019-01-30 14:57:09 -05:00
|
|
|
blob = self.blob_manager.get_blob(blob_hash, length)
|
|
|
|
if blob.get_is_verified():
|
|
|
|
return blob
|
2019-05-06 15:04:38 -04:00
|
|
|
self.is_running.set()
|
2019-11-26 14:53:14 -05:00
|
|
|
tried_for_this_blob: typing.Set['KademliaPeer'] = set()
|
2019-01-22 12:52:32 -05:00
|
|
|
try:
|
2019-05-06 15:04:38 -04:00
|
|
|
while not blob.get_is_verified() and self.is_running.is_set():
|
2019-11-26 14:53:14 -05:00
|
|
|
batch: typing.Set['KademliaPeer'] = set(self.connections.keys())
|
2019-01-30 14:57:09 -05:00
|
|
|
while not self.peer_queue.empty():
|
2019-05-01 18:04:45 -03:00
|
|
|
batch.update(self.peer_queue.get_nowait())
|
2019-02-07 20:11:28 -03:00
|
|
|
log.debug(
|
2019-11-26 14:53:14 -05:00
|
|
|
"%s running, %d peers, %d ignored, %d active, %s connections", blob_hash[:6],
|
2019-07-24 22:32:20 -03:00
|
|
|
len(batch), len(self.ignored), len(self.active_connections), len(self.connections)
|
2019-02-07 20:11:28 -03:00
|
|
|
)
|
2019-11-26 14:53:14 -05:00
|
|
|
re_add: typing.Set['KademliaPeer'] = set()
|
2019-05-01 18:04:45 -03:00
|
|
|
for peer in sorted(batch, key=lambda peer: self.scores.get(peer, 0), reverse=True):
|
2019-11-26 14:53:14 -05:00
|
|
|
if peer in self.ignored:
|
|
|
|
continue
|
|
|
|
if peer in tried_for_this_blob:
|
|
|
|
continue
|
|
|
|
if peer in self.active_connections:
|
|
|
|
if peer not in re_add:
|
|
|
|
re_add.add(peer)
|
2019-07-24 22:32:20 -03:00
|
|
|
continue
|
2019-02-15 18:57:33 -03:00
|
|
|
if not self.should_race_continue(blob):
|
2019-07-24 22:32:20 -03:00
|
|
|
break
|
|
|
|
log.debug("request %s from %s:%i", blob_hash[:8], peer.address, peer.tcp_port)
|
2019-11-26 14:53:14 -05:00
|
|
|
t = self.loop.create_task(self.request_blob_from_peer(blob, peer, connection_id))
|
2019-07-24 22:32:20 -03:00
|
|
|
self.active_connections[peer] = t
|
2019-11-26 14:53:14 -05:00
|
|
|
tried_for_this_blob.add(peer)
|
|
|
|
if not re_add:
|
|
|
|
self.peer_queue.put_nowait(list(batch))
|
2019-04-30 17:56:29 -03:00
|
|
|
await self.new_peer_or_finished()
|
2019-02-08 01:04:38 -03:00
|
|
|
self.cleanup_active()
|
2019-02-04 16:09:50 -05:00
|
|
|
log.debug("downloaded %s", blob_hash[:8])
|
2019-01-30 14:57:09 -05:00
|
|
|
return blob
|
2019-02-22 00:03:40 -03:00
|
|
|
finally:
|
2019-03-30 20:17:42 -04:00
|
|
|
blob.close()
|
2019-11-26 14:53:14 -05:00
|
|
|
if self.loop.is_running():
|
|
|
|
self.loop.call_soon(self.cleanup_active)
|
2019-01-30 14:57:09 -05:00
|
|
|
|
2019-02-08 02:27:58 -03:00
|
|
|
def close(self):
|
2019-08-05 09:24:23 -04:00
|
|
|
self.connection_failures.clear()
|
2019-02-18 17:11:07 -03:00
|
|
|
self.scores.clear()
|
|
|
|
self.ignored.clear()
|
2019-05-06 15:04:38 -04:00
|
|
|
self.is_running.clear()
|
2019-08-16 15:52:02 -04:00
|
|
|
for protocol in self.connections.values():
|
|
|
|
protocol.close()
|
2019-02-08 02:27:58 -03:00
|
|
|
|
2019-01-30 14:57:09 -05:00
|
|
|
|
2021-07-20 16:50:24 +03:00
|
|
|
async def download_blob(loop, config: 'Config', blob_manager: 'BlobManager', dht_node: 'Node',
|
2019-03-30 20:17:42 -04:00
|
|
|
blob_hash: str) -> 'AbstractBlob':
|
2019-01-31 12:28:59 -05:00
|
|
|
search_queue = asyncio.Queue(loop=loop, maxsize=config.max_connections_per_download)
|
2019-01-30 14:57:09 -05:00
|
|
|
search_queue.put_nowait(blob_hash)
|
2021-07-20 16:50:24 +03:00
|
|
|
peer_queue, accumulate_task = dht_node.accumulate_peers(search_queue)
|
|
|
|
fixed_peers = None if not config.fixed_peers else await get_kademlia_peers_from_hosts(config.fixed_peers)
|
|
|
|
if fixed_peers:
|
|
|
|
loop.call_later(config.fixed_peer_delay, peer_queue.put_nowait, fixed_peers)
|
2019-01-30 14:57:09 -05:00
|
|
|
downloader = BlobDownloader(loop, config, blob_manager, peer_queue)
|
|
|
|
try:
|
|
|
|
return await downloader.download_blob(blob_hash)
|
|
|
|
finally:
|
|
|
|
if accumulate_task and not accumulate_task.done():
|
|
|
|
accumulate_task.cancel()
|
2019-02-08 02:27:58 -03:00
|
|
|
downloader.close()
|