2019-01-22 18:52:32 +01:00
|
|
|
import asyncio
|
|
|
|
import typing
|
|
|
|
import logging
|
|
|
|
from lbrynet.utils import drain_tasks
|
2019-01-30 20:57:09 +01:00
|
|
|
from lbrynet.blob_exchange.client import request_blob
|
2019-01-22 18:52:32 +01:00
|
|
|
if typing.TYPE_CHECKING:
|
2019-01-30 20:57:09 +01:00
|
|
|
from lbrynet.conf import Config
|
2019-01-22 18:52:32 +01:00
|
|
|
from lbrynet.dht.node import Node
|
|
|
|
from lbrynet.dht.peer import KademliaPeer
|
2019-03-28 19:51:55 +01:00
|
|
|
from lbrynet.blob.blob_manager import BlobManager
|
2019-01-22 18:52:32 +01:00
|
|
|
from lbrynet.blob.blob_file import BlobFile
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2019-01-30 20:57:09 +01:00
|
|
|
class BlobDownloader:
|
2019-02-18 21:11:07 +01:00
|
|
|
BAN_TIME = 10.0 # fixme: when connection manager gets implemented, move it out from here
|
2019-03-28 19:51:55 +01:00
|
|
|
|
|
|
|
def __init__(self, loop: asyncio.BaseEventLoop, config: 'Config', blob_manager: 'BlobManager',
|
2019-01-30 20:57:09 +01:00
|
|
|
peer_queue: asyncio.Queue):
|
2019-01-22 18:52:32 +01:00
|
|
|
self.loop = loop
|
2019-01-30 20:57:09 +01:00
|
|
|
self.config = config
|
2019-01-22 18:52:32 +01:00
|
|
|
self.blob_manager = blob_manager
|
2019-01-30 20:57:09 +01:00
|
|
|
self.peer_queue = peer_queue
|
2019-01-31 19:05:00 +01:00
|
|
|
self.active_connections: typing.Dict['KademliaPeer', asyncio.Task] = {} # active request_blob calls
|
2019-02-18 21:11:07 +01:00
|
|
|
self.ignored: typing.Dict['KademliaPeer', int] = {}
|
2019-01-31 18:28:59 +01:00
|
|
|
self.scores: typing.Dict['KademliaPeer', int] = {}
|
2019-02-08 06:27:58 +01:00
|
|
|
self.connections: typing.Dict['KademliaPeer', asyncio.Transport] = {}
|
2019-02-18 21:11:07 +01:00
|
|
|
self.time_since_last_blob = loop.time()
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-02-15 22:57:33 +01:00
|
|
|
def should_race_continue(self, blob: 'BlobFile'):
|
2019-02-08 05:28:03 +01:00
|
|
|
if len(self.active_connections) >= self.config.max_connections_per_download:
|
|
|
|
return False
|
2019-02-08 07:05:53 +01:00
|
|
|
# if a peer won 3 or more blob races and is active as a downloader, stop the race so bandwidth improves
|
2019-02-08 05:28:03 +01:00
|
|
|
# the safe net side is that any failure will reset the peer score, triggering the race back
|
2019-02-08 07:32:38 +01:00
|
|
|
# TODO: this is a good idea for low bandwidth, but doesnt play nice on high bandwidth
|
|
|
|
# for peer, task in self.active_connections.items():
|
|
|
|
# if self.scores.get(peer, 0) >= 0 and self.rounds_won.get(peer, 0) >= 3 and not task.done():
|
|
|
|
# return False
|
2019-02-15 22:57:33 +01:00
|
|
|
return not (blob.get_is_verified() or blob.file_exists)
|
2019-02-08 05:28:03 +01:00
|
|
|
|
2019-02-04 22:03:08 +01:00
|
|
|
async def request_blob_from_peer(self, blob: 'BlobFile', peer: 'KademliaPeer'):
|
|
|
|
if blob.get_is_verified():
|
|
|
|
return
|
2019-02-08 05:04:38 +01:00
|
|
|
self.scores[peer] = self.scores.get(peer, 0) - 1 # starts losing score, to account for cancelled ones
|
2019-02-08 06:27:58 +01:00
|
|
|
transport = self.connections.get(peer)
|
2019-02-08 07:05:53 +01:00
|
|
|
start = self.loop.time()
|
|
|
|
bytes_received, transport = await request_blob(
|
2019-02-04 22:03:08 +01:00
|
|
|
self.loop, blob, peer.address, peer.tcp_port, self.config.peer_connect_timeout,
|
2019-02-08 06:27:58 +01:00
|
|
|
self.config.blob_download_timeout, connected_transport=transport
|
2019-02-04 22:03:08 +01:00
|
|
|
)
|
2019-02-08 07:05:53 +01:00
|
|
|
if bytes_received == blob.get_length():
|
2019-02-18 21:11:07 +01:00
|
|
|
self.time_since_last_blob = self.loop.time()
|
2019-02-08 06:27:58 +01:00
|
|
|
if not transport and peer not in self.ignored:
|
2019-02-18 21:11:07 +01:00
|
|
|
self.ignored[peer] = self.loop.time()
|
2019-02-04 22:03:08 +01:00
|
|
|
log.debug("drop peer %s:%i", peer.address, peer.tcp_port)
|
2019-02-08 06:27:58 +01:00
|
|
|
if peer in self.connections:
|
|
|
|
del self.connections[peer]
|
|
|
|
elif transport:
|
2019-02-04 22:03:08 +01:00
|
|
|
log.debug("keep peer %s:%i", peer.address, peer.tcp_port)
|
2019-02-08 06:27:58 +01:00
|
|
|
self.connections[peer] = transport
|
2019-02-18 21:11:07 +01:00
|
|
|
rough_speed = (bytes_received / (self.loop.time() - start)) if bytes_received else 0
|
|
|
|
self.scores[peer] = rough_speed
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-01-30 20:57:09 +01:00
|
|
|
async def new_peer_or_finished(self, blob: 'BlobFile'):
|
|
|
|
async def get_and_re_add_peers():
|
2019-02-18 21:11:07 +01:00
|
|
|
try:
|
|
|
|
new_peers = await asyncio.wait_for(self.peer_queue.get(), timeout=1.0)
|
|
|
|
self.peer_queue.put_nowait(new_peers)
|
|
|
|
except asyncio.TimeoutError:
|
|
|
|
pass
|
2019-01-30 20:57:09 +01:00
|
|
|
tasks = [self.loop.create_task(get_and_re_add_peers()), self.loop.create_task(blob.verified.wait())]
|
2019-02-08 05:04:38 +01:00
|
|
|
active_tasks = list(self.active_connections.values())
|
2019-01-22 18:52:32 +01:00
|
|
|
try:
|
2019-02-08 05:04:38 +01:00
|
|
|
await asyncio.wait(tasks + active_tasks, loop=self.loop, return_when='FIRST_COMPLETED')
|
|
|
|
finally:
|
2019-01-30 20:57:09 +01:00
|
|
|
drain_tasks(tasks)
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-02-08 00:11:28 +01:00
|
|
|
def cleanup_active(self):
|
|
|
|
to_remove = [peer for (peer, task) in self.active_connections.items() if task.done()]
|
|
|
|
for peer in to_remove:
|
|
|
|
del self.active_connections[peer]
|
|
|
|
|
2019-02-18 21:11:07 +01:00
|
|
|
def clearbanned(self):
|
|
|
|
now = self.loop.time()
|
|
|
|
if now - self.time_since_last_blob > 60.0:
|
|
|
|
return
|
|
|
|
forgiven = [banned_peer for banned_peer, when in self.ignored.items() if now - when > self.BAN_TIME]
|
|
|
|
self.peer_queue.put_nowait(forgiven)
|
|
|
|
for banned_peer in forgiven:
|
|
|
|
self.ignored.pop(banned_peer)
|
|
|
|
|
2019-01-30 20:57:09 +01:00
|
|
|
async def download_blob(self, blob_hash: str, length: typing.Optional[int] = None) -> 'BlobFile':
|
|
|
|
blob = self.blob_manager.get_blob(blob_hash, length)
|
|
|
|
if blob.get_is_verified():
|
|
|
|
return blob
|
2019-01-22 18:52:32 +01:00
|
|
|
try:
|
2019-01-30 20:57:09 +01:00
|
|
|
while not blob.get_is_verified():
|
|
|
|
batch: typing.List['KademliaPeer'] = []
|
|
|
|
while not self.peer_queue.empty():
|
2019-02-08 00:11:28 +01:00
|
|
|
batch.extend(self.peer_queue.get_nowait())
|
2019-02-08 05:04:38 +01:00
|
|
|
batch.sort(key=lambda peer: self.scores.get(peer, 0), reverse=True)
|
2019-02-08 00:11:28 +01:00
|
|
|
log.debug(
|
|
|
|
"running, %d peers, %d ignored, %d active",
|
|
|
|
len(batch), len(self.ignored), len(self.active_connections)
|
|
|
|
)
|
2019-01-30 20:57:09 +01:00
|
|
|
for peer in batch:
|
2019-02-15 22:57:33 +01:00
|
|
|
if not self.should_race_continue(blob):
|
2019-01-31 18:28:59 +01:00
|
|
|
break
|
2019-01-31 19:05:00 +01:00
|
|
|
if peer not in self.active_connections and peer not in self.ignored:
|
2019-01-31 18:28:59 +01:00
|
|
|
log.debug("request %s from %s:%i", blob_hash[:8], peer.address, peer.tcp_port)
|
2019-02-04 22:03:08 +01:00
|
|
|
t = self.loop.create_task(self.request_blob_from_peer(blob, peer))
|
|
|
|
self.active_connections[peer] = t
|
2019-02-08 05:04:38 +01:00
|
|
|
await self.new_peer_or_finished(blob)
|
|
|
|
self.cleanup_active()
|
|
|
|
if batch:
|
|
|
|
self.peer_queue.put_nowait(set(batch).difference(self.ignored))
|
2019-02-18 21:11:07 +01:00
|
|
|
else:
|
|
|
|
self.clearbanned()
|
2019-02-06 15:20:21 +01:00
|
|
|
blob.close()
|
2019-02-04 22:09:50 +01:00
|
|
|
log.debug("downloaded %s", blob_hash[:8])
|
2019-01-30 20:57:09 +01:00
|
|
|
return blob
|
2019-02-22 04:03:40 +01:00
|
|
|
finally:
|
2019-01-30 20:57:09 +01:00
|
|
|
while self.active_connections:
|
2019-02-22 04:03:40 +01:00
|
|
|
self.active_connections.popitem()[1].cancel()
|
2019-01-30 20:57:09 +01:00
|
|
|
|
2019-02-08 06:27:58 +01:00
|
|
|
def close(self):
|
2019-02-18 21:11:07 +01:00
|
|
|
self.scores.clear()
|
|
|
|
self.ignored.clear()
|
2019-02-08 07:05:53 +01:00
|
|
|
for transport in self.connections.values():
|
|
|
|
transport.close()
|
2019-02-08 06:27:58 +01:00
|
|
|
|
2019-01-30 20:57:09 +01:00
|
|
|
|
2019-03-28 19:51:55 +01:00
|
|
|
async def download_blob(loop, config: 'Config', blob_manager: 'BlobManager', node: 'Node',
|
2019-01-30 20:57:09 +01:00
|
|
|
blob_hash: str) -> 'BlobFile':
|
2019-01-31 18:28:59 +01:00
|
|
|
search_queue = asyncio.Queue(loop=loop, maxsize=config.max_connections_per_download)
|
2019-01-30 20:57:09 +01:00
|
|
|
search_queue.put_nowait(blob_hash)
|
|
|
|
peer_queue, accumulate_task = node.accumulate_peers(search_queue)
|
|
|
|
downloader = BlobDownloader(loop, config, blob_manager, peer_queue)
|
|
|
|
try:
|
|
|
|
return await downloader.download_blob(blob_hash)
|
|
|
|
finally:
|
|
|
|
if accumulate_task and not accumulate_task.done():
|
|
|
|
accumulate_task.cancel()
|
2019-02-08 06:27:58 +01:00
|
|
|
downloader.close()
|