Merge pull request #1825 from lbryio/fix-hanging-download

Fix hanging download
This commit is contained in:
Jack Robison 2019-01-28 23:44:42 -05:00 committed by GitHub
commit adf378b6e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 103 additions and 61 deletions

View file

@ -19,15 +19,14 @@ class BlobExchangeClientProtocol(asyncio.Protocol):
self.writer: 'HashBlobWriter' = None
self.blob: 'BlobFile' = None
self.download_running = asyncio.Event(loop=self.loop)
self._blob_bytes_received = 0
self._response_fut: asyncio.Future = None
self._request_lock = asyncio.Lock(loop=self.loop)
def handle_data_received(self, data: bytes):
if self.transport.is_closing():
if self._response_fut and not (self._response_fut.done() or self._response_fut.cancelled()):
def data_received(self, data: bytes):
if self.transport.is_closing(): # TODO: is this needed?
if self._response_fut and not self._response_fut.done():
self._response_fut.cancel()
return
@ -36,60 +35,72 @@ class BlobExchangeClientProtocol(asyncio.Protocol):
if response.responses and self.blob:
blob_response = response.get_blob_response()
if blob_response and not blob_response.error and blob_response.blob_hash == self.blob.blob_hash:
# set the expected length for the incoming blob if we didn't know it
self.blob.set_length(blob_response.length)
elif blob_response and not blob_response.error and self.blob.blob_hash != blob_response.blob_hash:
# the server started sending a blob we didn't request
log.warning("mismatch with self.blob %s", self.blob.blob_hash)
return
if response.responses:
log.debug("got response from %s:%i <- %s", self.peer_address, self.peer_port, response.to_dict())
# fire the Future with the response to our request
self._response_fut.set_result(response)
if response.blob_data and self.writer and not self.writer.closed():
log.debug("got %i blob bytes from %s:%i", len(response.blob_data), self.peer_address, self.peer_port)
# write blob bytes if we're writing a blob and have blob bytes to write
self._blob_bytes_received += len(response.blob_data)
try:
self.writer.write(response.blob_data)
return
except IOError as err:
log.error("error downloading blob: %s", err)
def data_received(self, data):
try:
return self.handle_data_received(data)
except (asyncio.CancelledError, asyncio.TimeoutError) as err:
log.error("error downloading blob from %s:%i: %s", self.peer_address, self.peer_port, err)
if self._response_fut and not self._response_fut.done():
self._response_fut.set_exception(err)
except (asyncio.CancelledError, asyncio.TimeoutError) as err: # TODO: is this needed?
log.error("%s downloading blob from %s:%i", str(err), self.peer_address, self.peer_port)
if self._response_fut and not self._response_fut.done():
self._response_fut.set_exception(err)
async def _download_blob(self) -> typing.Tuple[bool, bool]:
"""
:return: download success (bool), keep connection (bool)
"""
request = BlobRequest.make_request_for_blob_hash(self.blob.blob_hash)
try:
self.transport.write(request.serialize())
msg = request.serialize()
log.debug("send request to %s:%i -> %s", self.peer_address, self.peer_port, msg.decode())
self.transport.write(msg)
response: BlobResponse = await asyncio.wait_for(self._response_fut, self.peer_timeout, loop=self.loop)
availability_response = response.get_availability_response()
price_response = response.get_price_response()
blob_response = response.get_blob_response()
if (not blob_response or blob_response.error) and\
(not availability_response or not availability_response.available_blobs):
log.warning("blob not in availability response")
log.warning("blob not in availability response from %s:%i", self.peer_address, self.peer_port)
return False, True
elif availability_response.available_blobs and \
availability_response.available_blobs != [self.blob.blob_hash]:
log.warning("blob availability response doesn't match our request")
log.warning("blob availability response doesn't match our request from %s:%i",
self.peer_address, self.peer_port)
return False, False
if not price_response or price_response.blob_data_payment_rate != 'RATE_ACCEPTED':
log.warning("data rate rejected")
log.warning("data rate rejected by %s:%i", self.peer_address, self.peer_port)
return False, False
if not blob_response or blob_response.error:
log.warning("blob cant be downloaded from this peer")
log.warning("blob cant be downloaded from %s:%i", self.peer_address, self.peer_port)
return False, True
if not blob_response.error and blob_response.blob_hash != self.blob.blob_hash:
log.warning("incoming blob hash mismatch")
log.warning("incoming blob hash mismatch from %s:%i", self.peer_address, self.peer_port)
return False, False
if self.blob.length is not None and self.blob.length != blob_response.length:
log.warning("incoming blob unexpected length")
log.warning("incoming blob unexpected length from %s:%i", self.peer_address, self.peer_port)
return False, False
msg = f"downloading {self.blob.blob_hash[:8]} from {self.peer_address}:{self.peer_port}," \
f" timeout in {self.peer_timeout}"
log.info(msg)
log.debug(msg)
msg = f"downloaded {self.blob.blob_hash[:8]} from {self.peer_address}:{self.peer_port}"
await asyncio.wait_for(self.writer.finished, self.peer_timeout, loop=self.loop)
await self.blob.finished_writing.wait()
msg = f"downloaded {self.blob.blob_hash[:8]} from {self.peer_address}"
log.info(msg)
return True, True
except asyncio.CancelledError:
@ -106,7 +117,6 @@ class BlobExchangeClientProtocol(asyncio.Protocol):
self.writer.close_handle()
if self.blob:
await self.blob.close()
self.download_running.clear()
self._response_fut = None
self.writer = None
self.blob = None
@ -119,15 +129,11 @@ class BlobExchangeClientProtocol(asyncio.Protocol):
return False, True
async with self._request_lock:
try:
if self.download_running.is_set():
log.info("wait for download already running")
await self.download_running.wait()
self.blob, self.writer, self._blob_bytes_received = blob, blob.open_for_writing(), 0
self.download_running.set()
self._response_fut = asyncio.Future(loop=self.loop)
return await self._download_blob()
except OSError:
log.error("race happened")
log.error("race happened downloading from %s:%i", self.peer_address, self.peer_port)
# i'm not sure how to fix this race condition - jack
return False, True
except asyncio.TimeoutError:
@ -142,10 +148,11 @@ class BlobExchangeClientProtocol(asyncio.Protocol):
def connection_made(self, transport: asyncio.Transport):
self.transport = transport
self.peer_address, self.peer_port = self.transport.get_extra_info('peername')
# log.info("connection made to %s: %s", self.peer_address, transport)
log.debug("connection made to %s:%i", self.peer_address, self.peer_port)
def connection_lost(self, reason):
# log.info("connection lost to %s (reason: %s)", self.peer_address, reason)
log.debug("connection lost to %s:%i (reason: %s, %s)", self.peer_address, self.peer_port, str(reason),
str(type(reason)))
self.transport = None
self.loop.create_task(self.close())
@ -157,9 +164,6 @@ async def request_blob(loop: asyncio.BaseEventLoop, blob: 'BlobFile', protocol:
"""
if blob.get_is_verified():
return False, True
if blob.get_is_verified():
log.info("already verified")
return False, True
try:
await asyncio.wait_for(loop.create_connection(lambda: protocol, address, tcp_port),
peer_connect_timeout, loop=loop)

View file

@ -615,7 +615,9 @@ class UPnPComponent(Component):
else:
log.error("failed to setup upnp")
if self.component_manager.analytics_manager:
self.component_manager.analytics_manager.send_upnp_setup_success_fail(success, await self.get_status())
await self.component_manager.analytics_manager.send_upnp_setup_success_fail(
success, await self.get_status()
)
self._maintain_redirects_task = asyncio.create_task(self._repeatedly_maintain_redirects(now=False))
async def stop(self):

View file

@ -51,7 +51,7 @@ class StreamDownloader(StreamAssembler): # TODO: reduce duplication, refactor t
async def _request_blob(self, peer: 'KademliaPeer'):
if self.current_blob.get_is_verified():
log.info("already verified")
log.debug("already verified")
return
if peer not in self.active_connections:
log.warning("not active, adding: %s", str(peer))
@ -61,12 +61,12 @@ class StreamDownloader(StreamAssembler): # TODO: reduce duplication, refactor t
peer.address, peer.tcp_port, self.peer_connect_timeout)
await protocol.close()
if not keep_connection:
log.info("drop peer %s:%i", peer.address, peer.tcp_port)
log.debug("drop peer %s:%i", peer.address, peer.tcp_port)
if peer in self.active_connections:
async with self._lock:
del self.active_connections[peer]
return
log.info("keep peer %s:%i", peer.address, peer.tcp_port)
log.debug("keep peer %s:%i", peer.address, peer.tcp_port)
def _update_requests(self):
self.new_peer_event.clear()
@ -77,7 +77,7 @@ class StreamDownloader(StreamAssembler): # TODO: reduce duplication, refactor t
if peer not in self.requested_from[self.current_blob.blob_hash] and peer not in to_add:
to_add.append(peer)
if to_add or self.running_download_requests:
log.info("adding download probes for %i peers to %i already active",
log.debug("adding download probes for %i peers to %i already active",
min(len(to_add), 8 - len(self.running_download_requests)),
len(self.running_download_requests))
else:
@ -159,10 +159,8 @@ class StreamDownloader(StreamAssembler): # TODO: reduce duplication, refactor t
if self.fixed_peers:
def check_added_peers():
if not added_peers.is_set():
self._add_peer_protocols(self.fixed_peers)
log.info("no dht peers for download yet, adding fixed peer")
added_peers.set()
log.info("adding fixed peer %s:%i", self.fixed_peers[0].address, self.fixed_peers[0].tcp_port)
add_fixed_peers_timer = self.loop.call_later(2, check_added_peers)
@ -178,9 +176,7 @@ class StreamDownloader(StreamAssembler): # TODO: reduce duplication, refactor t
try:
async with node.stream_peer_search_junction(blob_queue) as search_junction:
async for peers in search_junction:
if not isinstance(peers, list): # TODO: what's up with this?
log.error("not a list: %s %s", peers, str(type(peers)))
else:
if peers:
self._add_peer_protocols(peers)
if not added_peers.is_set():
added_peers.set()

View file

@ -30,7 +30,6 @@ class ManagedStream:
self.stream_hash = descriptor.stream_hash
self.stream_claim_info = claim
self._status = status
self._store_after_finished: asyncio.Task = None
self.fully_reflected = asyncio.Event(loop=self.loop)
@property

View file

@ -141,11 +141,7 @@ class StreamManager:
blob_hashes = [stream.sd_hash]
for blob_info in stream.descriptor.blobs[:-1]:
blob_hashes.append(blob_info.blob_hash)
for blob_hash in blob_hashes:
blob = self.blob_manager.get_blob(blob_hash)
if blob.get_is_verified():
await blob.delete()
await self.blob_manager.delete_blobs(blob_hashes)
if delete_file:
path = os.path.join(stream.download_directory, stream.file_name)
if os.path.isfile(path):

View file

@ -0,0 +1,43 @@
import sys
import os
import asyncio
import socket
from lbrynet.conf import Config
from lbrynet.extras.daemon.storage import SQLiteStorage
from lbrynet.blob.blob_manager import BlobFileManager
from lbrynet.blob_exchange.client import BlobExchangeClientProtocol, request_blob
import logging
log = logging.getLogger("lbrynet")
log.addHandler(logging.StreamHandler())
log.setLevel(logging.DEBUG)
async def main(blob_hash: str, url: str):
conf = Config()
loop = asyncio.get_running_loop()
host_url, port = url.split(":")
host_info = await loop.getaddrinfo(
host_url, 'https',
proto=socket.IPPROTO_TCP,
)
host = host_info[0][4][0]
storage = SQLiteStorage(conf, os.path.join(conf.data_dir, "lbrynet.sqlite"))
blob_manager = BlobFileManager(loop, os.path.join(conf.data_dir, "blobfiles"), storage)
await storage.open()
await blob_manager.setup()
blob = blob_manager.get_blob(blob_hash)
protocol = BlobExchangeClientProtocol(loop, conf.blob_download_timeout)
success, keep = await request_blob(loop, blob, protocol, host, int(port), conf.peer_connect_timeout)
print(success, keep)
if blob.get_is_verified():
await blob_manager.delete_blobs([blob.blob_hash])
if __name__ == "__main__": # usage: python download_blob_from_peer.py <blob_hash> [host url:port]
url = 'reflector.lbry.io:5567'
if len(sys.argv) > 2:
url = sys.argv[2]
asyncio.run(main(sys.argv[1], url))

View file

@ -1,3 +1,4 @@
import unittest
import asyncio
from torba.testcase import AsyncioTestCase
from lbrynet.dht.protocol.async_generator_junction import AsyncGeneratorJunction
@ -41,16 +42,17 @@ class TestAsyncGeneratorJunction(AsyncioTestCase):
async def test_yield_order(self):
expected_order = [1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 2, 2]
fast_gen = MockAsyncGen(self.loop, 1, 0.1)
slow_gen = MockAsyncGen(self.loop, 2, 0.2)
fast_gen = MockAsyncGen(self.loop, 1, 0.2)
slow_gen = MockAsyncGen(self.loop, 2, 0.4)
await self._test_junction(expected_order, fast_gen, slow_gen)
self.assertEqual(fast_gen.called_close, True)
self.assertEqual(slow_gen.called_close, True)
@unittest.SkipTest
async def test_one_stopped_first(self):
expected_order = [1, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2]
fast_gen = MockAsyncGen(self.loop, 1, 0.101, 5)
slow_gen = MockAsyncGen(self.loop, 2, 0.201)
fast_gen = MockAsyncGen(self.loop, 1, 0.2, 5)
slow_gen = MockAsyncGen(self.loop, 2, 0.4)
await self._test_junction(expected_order, fast_gen, slow_gen)
self.assertEqual(fast_gen.called_close, True)
self.assertEqual(slow_gen.called_close, True)
@ -62,16 +64,16 @@ class TestAsyncGeneratorJunction(AsyncioTestCase):
for i in range(10):
if i == 5:
return
await asyncio.sleep(0.101)
await asyncio.sleep(0.2)
yield 1
slow_gen = MockAsyncGen(self.loop, 2, 0.201)
slow_gen = MockAsyncGen(self.loop, 2, 0.4)
await self._test_junction(expected_order, fast_gen(), slow_gen)
self.assertEqual(slow_gen.called_close, True)
async def test_stop_when_encapsulating_task_cancelled(self):
fast_gen = MockAsyncGen(self.loop, 1, 0.1)
slow_gen = MockAsyncGen(self.loop, 2, 0.2)
fast_gen = MockAsyncGen(self.loop, 1, 0.2)
slow_gen = MockAsyncGen(self.loop, 2, 0.4)
async def _task():
async with AsyncGeneratorJunction(self.loop) as junction:
@ -81,7 +83,7 @@ class TestAsyncGeneratorJunction(AsyncioTestCase):
pass
task = self.loop.create_task(_task())
self.loop.call_later(0.5, task.cancel)
self.loop.call_later(1.0, task.cancel)
with self.assertRaises(asyncio.CancelledError):
await task
self.assertEqual(fast_gen.called_close, True)