2019-01-22 18:52:32 +01:00
|
|
|
import asyncio
|
2019-08-16 21:52:02 +02:00
|
|
|
import time
|
2019-01-22 18:52:32 +01:00
|
|
|
import logging
|
|
|
|
import typing
|
2019-01-30 02:47:02 +01:00
|
|
|
import binascii
|
2019-08-16 21:52:02 +02:00
|
|
|
from typing import Optional
|
2019-06-21 02:55:47 +02:00
|
|
|
from lbry.error import InvalidBlobHashError, InvalidDataError
|
|
|
|
from lbry.blob_exchange.serialization import BlobResponse, BlobRequest
|
|
|
|
from lbry.utils import cache_concurrent
|
2019-01-22 18:52:32 +01:00
|
|
|
if typing.TYPE_CHECKING:
|
2019-06-21 02:55:47 +02:00
|
|
|
from lbry.blob.blob_file import AbstractBlob
|
|
|
|
from lbry.blob.writer import HashBlobWriter
|
|
|
|
from lbry.connection_manager import ConnectionManager
|
2019-01-22 18:52:32 +01:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class BlobExchangeClientProtocol(asyncio.Protocol):
|
2019-08-02 19:14:41 +02:00
|
|
|
def __init__(self, loop: asyncio.AbstractEventLoop, peer_timeout: typing.Optional[float] = 10,
|
2019-06-03 05:50:17 +02:00
|
|
|
connection_manager: typing.Optional['ConnectionManager'] = None):
|
2019-01-22 18:52:32 +01:00
|
|
|
self.loop = loop
|
|
|
|
self.peer_port: typing.Optional[int] = None
|
|
|
|
self.peer_address: typing.Optional[str] = None
|
2019-03-31 01:17:42 +01:00
|
|
|
self.transport: typing.Optional[asyncio.Transport] = None
|
2019-06-03 05:50:17 +02:00
|
|
|
self.peer_timeout = peer_timeout
|
|
|
|
self.connection_manager = connection_manager
|
2019-03-31 01:17:42 +01:00
|
|
|
self.writer: typing.Optional['HashBlobWriter'] = None
|
|
|
|
self.blob: typing.Optional['AbstractBlob'] = None
|
2019-01-22 18:52:32 +01:00
|
|
|
|
|
|
|
self._blob_bytes_received = 0
|
2019-05-08 18:21:11 +02:00
|
|
|
self._response_fut: typing.Optional[asyncio.Future] = None
|
2019-02-12 07:08:34 +01:00
|
|
|
self.buf = b''
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-05-08 18:21:11 +02:00
|
|
|
# this is here to handle the race when the downloader is closed right as response_fut gets a result
|
2021-08-21 05:33:21 +02:00
|
|
|
self.closed = asyncio.Event()
|
2019-05-08 18:21:11 +02:00
|
|
|
|
2019-01-28 23:29:08 +01:00
|
|
|
def data_received(self, data: bytes):
|
2019-06-03 05:50:17 +02:00
|
|
|
if self.connection_manager:
|
|
|
|
if not self.peer_address:
|
|
|
|
addr_info = self.transport.get_extra_info('peername')
|
|
|
|
self.peer_address, self.peer_port = addr_info
|
|
|
|
# assert self.peer_address is not None
|
|
|
|
self.connection_manager.received_data(f"{self.peer_address}:{self.peer_port}", len(data))
|
2019-01-30 02:47:02 +01:00
|
|
|
if not self.transport or self.transport.is_closing():
|
|
|
|
log.warning("transport closing, but got more bytes from %s:%i\n%s", self.peer_address, self.peer_port,
|
|
|
|
binascii.hexlify(data))
|
2019-01-28 23:29:08 +01:00
|
|
|
if self._response_fut and not self._response_fut.done():
|
2019-01-22 18:52:32 +01:00
|
|
|
self._response_fut.cancel()
|
|
|
|
return
|
2019-05-02 17:13:33 +02:00
|
|
|
if not self._response_fut:
|
|
|
|
log.warning("Protocol received data before expected, probable race on keep alive. Closing transport.")
|
|
|
|
return self.close()
|
2019-02-08 00:20:39 +01:00
|
|
|
if self._blob_bytes_received and not self.writer.closed():
|
|
|
|
return self._write(data)
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-02-12 07:08:34 +01:00
|
|
|
response = BlobResponse.deserialize(self.buf + data)
|
|
|
|
if not response.responses and not self._response_fut.done():
|
|
|
|
self.buf += data
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
self.buf = b''
|
2019-01-22 18:52:32 +01:00
|
|
|
|
|
|
|
if response.responses and self.blob:
|
|
|
|
blob_response = response.get_blob_response()
|
|
|
|
if blob_response and not blob_response.error and blob_response.blob_hash == self.blob.blob_hash:
|
2019-01-28 23:29:08 +01:00
|
|
|
# set the expected length for the incoming blob if we didn't know it
|
2019-01-22 18:52:32 +01:00
|
|
|
self.blob.set_length(blob_response.length)
|
|
|
|
elif blob_response and not blob_response.error and self.blob.blob_hash != blob_response.blob_hash:
|
2019-01-28 23:29:08 +01:00
|
|
|
# the server started sending a blob we didn't request
|
2019-10-02 20:04:30 +02:00
|
|
|
log.warning("%s started sending blob we didn't request %s instead of %s", self.peer_address,
|
2019-08-16 21:52:02 +02:00
|
|
|
blob_response.blob_hash, self.blob.blob_hash)
|
2019-01-22 18:52:32 +01:00
|
|
|
return
|
|
|
|
if response.responses:
|
2019-01-28 23:29:08 +01:00
|
|
|
log.debug("got response from %s:%i <- %s", self.peer_address, self.peer_port, response.to_dict())
|
|
|
|
# fire the Future with the response to our request
|
2019-01-22 18:52:32 +01:00
|
|
|
self._response_fut.set_result(response)
|
|
|
|
if response.blob_data and self.writer and not self.writer.closed():
|
2019-02-08 21:53:44 +01:00
|
|
|
# log.debug("got %i blob bytes from %s:%i", len(response.blob_data), self.peer_address, self.peer_port)
|
2019-01-28 23:29:08 +01:00
|
|
|
# write blob bytes if we're writing a blob and have blob bytes to write
|
2019-02-08 00:20:39 +01:00
|
|
|
self._write(response.blob_data)
|
|
|
|
|
2019-03-31 01:17:42 +01:00
|
|
|
def _write(self, data: bytes):
|
2019-02-08 00:20:39 +01:00
|
|
|
if len(data) > (self.blob.get_length() - self._blob_bytes_received):
|
|
|
|
data = data[:(self.blob.get_length() - self._blob_bytes_received)]
|
|
|
|
log.warning("got more than asked from %s:%d, probable sendfile bug", self.peer_address, self.peer_port)
|
|
|
|
self._blob_bytes_received += len(data)
|
|
|
|
try:
|
|
|
|
self.writer.write(data)
|
2019-10-02 18:38:56 +02:00
|
|
|
except OSError as err:
|
2019-02-08 00:20:39 +01:00
|
|
|
log.error("error downloading blob from %s:%i: %s", self.peer_address, self.peer_port, err)
|
|
|
|
if self._response_fut and not self._response_fut.done():
|
|
|
|
self._response_fut.set_exception(err)
|
2019-08-05 18:54:41 +02:00
|
|
|
except asyncio.TimeoutError as err:
|
2019-02-08 00:20:39 +01:00
|
|
|
log.error("%s downloading blob from %s:%i", str(err), self.peer_address, self.peer_port)
|
|
|
|
if self._response_fut and not self._response_fut.done():
|
|
|
|
self._response_fut.set_exception(err)
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2020-01-03 06:07:21 +01:00
|
|
|
async def _download_blob(self) -> typing.Tuple[int, Optional['BlobExchangeClientProtocol']]: # pylint: disable=too-many-return-statements
|
2019-01-28 23:29:08 +01:00
|
|
|
"""
|
2019-08-16 21:52:02 +02:00
|
|
|
:return: download success (bool), connected protocol (BlobExchangeClientProtocol)
|
2019-01-28 23:29:08 +01:00
|
|
|
"""
|
2019-08-16 21:52:02 +02:00
|
|
|
start_time = time.perf_counter()
|
2019-01-22 18:52:32 +01:00
|
|
|
request = BlobRequest.make_request_for_blob_hash(self.blob.blob_hash)
|
2019-05-08 18:21:11 +02:00
|
|
|
blob_hash = self.blob.blob_hash
|
2019-06-03 05:50:17 +02:00
|
|
|
if not self.peer_address:
|
|
|
|
addr_info = self.transport.get_extra_info('peername')
|
|
|
|
self.peer_address, self.peer_port = addr_info
|
2019-01-22 18:52:32 +01:00
|
|
|
try:
|
2019-01-28 23:24:55 +01:00
|
|
|
msg = request.serialize()
|
|
|
|
log.debug("send request to %s:%i -> %s", self.peer_address, self.peer_port, msg.decode())
|
|
|
|
self.transport.write(msg)
|
2019-06-03 05:50:17 +02:00
|
|
|
if self.connection_manager:
|
|
|
|
self.connection_manager.sent_data(f"{self.peer_address}:{self.peer_port}", len(msg))
|
2021-08-21 05:33:21 +02:00
|
|
|
response: BlobResponse = await asyncio.wait_for(self._response_fut, self.peer_timeout)
|
2019-01-22 18:52:32 +01:00
|
|
|
availability_response = response.get_availability_response()
|
|
|
|
price_response = response.get_price_response()
|
|
|
|
blob_response = response.get_blob_response()
|
2019-05-08 18:21:11 +02:00
|
|
|
if self.closed.is_set():
|
|
|
|
msg = f"cancelled blob request for {blob_hash} immediately after we got a response"
|
|
|
|
log.warning(msg)
|
|
|
|
raise asyncio.CancelledError(msg)
|
2019-01-22 18:52:32 +01:00
|
|
|
if (not blob_response or blob_response.error) and\
|
|
|
|
(not availability_response or not availability_response.available_blobs):
|
2019-01-31 18:28:59 +01:00
|
|
|
log.warning("%s not in availability response from %s:%i", self.blob.blob_hash, self.peer_address,
|
|
|
|
self.peer_port)
|
2019-01-30 20:57:09 +01:00
|
|
|
log.warning(response.to_dict())
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-06-10 22:33:45 +02:00
|
|
|
elif availability_response and availability_response.available_blobs and \
|
2019-01-22 18:52:32 +01:00
|
|
|
availability_response.available_blobs != [self.blob.blob_hash]:
|
2019-01-28 23:24:55 +01:00
|
|
|
log.warning("blob availability response doesn't match our request from %s:%i",
|
|
|
|
self.peer_address, self.peer_port)
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-06-10 22:33:45 +02:00
|
|
|
elif not availability_response:
|
|
|
|
log.warning("response from %s:%i did not include an availability response (we requested %s)",
|
|
|
|
self.peer_address, self.peer_port, blob_hash)
|
|
|
|
return self._blob_bytes_received, self.close()
|
|
|
|
|
2019-01-22 18:52:32 +01:00
|
|
|
if not price_response or price_response.blob_data_payment_rate != 'RATE_ACCEPTED':
|
2019-01-28 23:24:55 +01:00
|
|
|
log.warning("data rate rejected by %s:%i", self.peer_address, self.peer_port)
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-01-22 18:52:32 +01:00
|
|
|
if not blob_response or blob_response.error:
|
2019-10-02 20:04:30 +02:00
|
|
|
log.warning("blob can't be downloaded from %s:%i", self.peer_address, self.peer_port)
|
2019-02-18 21:11:07 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-01-22 18:52:32 +01:00
|
|
|
if not blob_response.error and blob_response.blob_hash != self.blob.blob_hash:
|
2019-01-28 23:24:55 +01:00
|
|
|
log.warning("incoming blob hash mismatch from %s:%i", self.peer_address, self.peer_port)
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-01-22 18:52:32 +01:00
|
|
|
if self.blob.length is not None and self.blob.length != blob_response.length:
|
2019-01-28 23:24:55 +01:00
|
|
|
log.warning("incoming blob unexpected length from %s:%i", self.peer_address, self.peer_port)
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-01-22 18:52:32 +01:00
|
|
|
msg = f"downloading {self.blob.blob_hash[:8]} from {self.peer_address}:{self.peer_port}," \
|
|
|
|
f" timeout in {self.peer_timeout}"
|
2019-08-21 21:58:34 +02:00
|
|
|
log.debug(msg)
|
2019-01-29 04:38:17 +01:00
|
|
|
msg = f"downloaded {self.blob.blob_hash[:8]} from {self.peer_address}:{self.peer_port}"
|
2021-08-21 05:33:21 +02:00
|
|
|
await asyncio.wait_for(self.writer.finished, self.peer_timeout)
|
2020-04-22 22:10:23 +02:00
|
|
|
# wait for the io to finish
|
|
|
|
await self.blob.verified.wait()
|
2019-08-19 17:05:26 +02:00
|
|
|
log.info("%s at %fMB/s", msg,
|
|
|
|
round((float(self._blob_bytes_received) /
|
|
|
|
float(time.perf_counter() - start_time)) / 1000000.0, 2))
|
2019-02-08 08:12:53 +01:00
|
|
|
# await self.blob.finished_writing.wait() not necessary, but a dangerous change. TODO: is it needed?
|
2019-08-16 21:52:02 +02:00
|
|
|
return self._blob_bytes_received, self
|
2019-01-22 18:52:32 +01:00
|
|
|
except asyncio.TimeoutError:
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-02-04 22:03:22 +01:00
|
|
|
except (InvalidBlobHashError, InvalidDataError):
|
|
|
|
log.warning("invalid blob from %s:%i", self.peer_address, self.peer_port)
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, self.close()
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-02-08 06:27:58 +01:00
|
|
|
def close(self):
|
2019-05-08 18:21:11 +02:00
|
|
|
self.closed.set()
|
2019-01-22 18:52:32 +01:00
|
|
|
if self._response_fut and not self._response_fut.done():
|
|
|
|
self._response_fut.cancel()
|
|
|
|
if self.writer and not self.writer.closed():
|
|
|
|
self.writer.close_handle()
|
|
|
|
self._response_fut = None
|
|
|
|
self.writer = None
|
|
|
|
self.blob = None
|
|
|
|
if self.transport:
|
|
|
|
self.transport.close()
|
|
|
|
self.transport = None
|
2019-02-12 07:08:34 +01:00
|
|
|
self.buf = b''
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2019-08-16 21:52:02 +02:00
|
|
|
async def download_blob(self, blob: 'AbstractBlob') -> typing.Tuple[int, Optional['BlobExchangeClientProtocol']]:
|
2019-05-08 18:21:11 +02:00
|
|
|
self.closed.clear()
|
2019-04-23 19:55:58 +02:00
|
|
|
blob_hash = blob.blob_hash
|
2019-03-31 01:17:42 +01:00
|
|
|
if blob.get_is_verified() or not blob.is_writeable():
|
2019-08-16 21:52:02 +02:00
|
|
|
return 0, self
|
2019-02-01 02:19:50 +01:00
|
|
|
try:
|
2019-04-23 19:55:58 +02:00
|
|
|
self._blob_bytes_received = 0
|
|
|
|
self.blob, self.writer = blob, blob.get_blob_writer(self.peer_address, self.peer_port)
|
2021-08-21 05:33:21 +02:00
|
|
|
self._response_fut = asyncio.Future()
|
2019-02-01 02:19:50 +01:00
|
|
|
return await self._download_blob()
|
2019-08-02 19:14:41 +02:00
|
|
|
except OSError:
|
2019-02-01 02:19:50 +01:00
|
|
|
# i'm not sure how to fix this race condition - jack
|
2019-07-25 04:12:51 +02:00
|
|
|
log.warning("race happened downloading %s from %s:%s", blob_hash, self.peer_address, self.peer_port)
|
2019-05-03 20:52:06 +02:00
|
|
|
# return self._blob_bytes_received, self.transport
|
|
|
|
raise
|
2019-02-01 02:19:50 +01:00
|
|
|
except asyncio.TimeoutError:
|
|
|
|
if self._response_fut and not self._response_fut.done():
|
|
|
|
self._response_fut.cancel()
|
2019-02-08 06:27:58 +01:00
|
|
|
self.close()
|
2019-02-08 07:05:53 +01:00
|
|
|
return self._blob_bytes_received, None
|
2019-02-01 02:19:50 +01:00
|
|
|
except asyncio.CancelledError:
|
2019-02-08 06:27:58 +01:00
|
|
|
self.close()
|
2019-02-08 05:03:13 +01:00
|
|
|
raise
|
2019-05-02 17:20:49 +02:00
|
|
|
finally:
|
|
|
|
if self.writer and not self.writer.closed():
|
|
|
|
self.writer.close_handle()
|
|
|
|
self.writer = None
|
2019-01-22 18:52:32 +01:00
|
|
|
|
|
|
|
def connection_made(self, transport: asyncio.Transport):
|
2019-06-03 05:50:17 +02:00
|
|
|
addr = transport.get_extra_info('peername')
|
|
|
|
self.peer_address, self.peer_port = addr[0], addr[1]
|
2019-01-22 18:52:32 +01:00
|
|
|
self.transport = transport
|
2019-06-03 05:50:17 +02:00
|
|
|
if self.connection_manager:
|
|
|
|
self.connection_manager.connection_made(f"{self.peer_address}:{self.peer_port}")
|
2019-01-28 23:24:55 +01:00
|
|
|
log.debug("connection made to %s:%i", self.peer_address, self.peer_port)
|
2019-01-22 18:52:32 +01:00
|
|
|
|
2020-01-03 06:07:21 +01:00
|
|
|
def connection_lost(self, exc):
|
2019-06-03 05:50:17 +02:00
|
|
|
if self.connection_manager:
|
|
|
|
self.connection_manager.outgoing_connection_lost(f"{self.peer_address}:{self.peer_port}")
|
2020-01-03 06:07:21 +01:00
|
|
|
log.debug("connection lost to %s:%i (reason: %s, %s)", self.peer_address, self.peer_port, str(exc),
|
|
|
|
str(type(exc)))
|
2019-02-08 06:27:58 +01:00
|
|
|
self.close()
|
2019-01-22 18:52:32 +01:00
|
|
|
|
|
|
|
|
2019-05-03 20:52:06 +02:00
|
|
|
@cache_concurrent
|
2019-08-16 21:52:02 +02:00
|
|
|
async def request_blob(loop: asyncio.AbstractEventLoop, blob: Optional['AbstractBlob'], address: str,
|
2019-08-02 19:14:41 +02:00
|
|
|
tcp_port: int, peer_connect_timeout: float, blob_download_timeout: float,
|
2019-08-16 21:52:02 +02:00
|
|
|
connected_protocol: Optional['BlobExchangeClientProtocol'] = None,
|
|
|
|
connection_id: int = 0, connection_manager: Optional['ConnectionManager'] = None)\
|
|
|
|
-> typing.Tuple[int, Optional['BlobExchangeClientProtocol']]:
|
2019-01-22 18:52:32 +01:00
|
|
|
"""
|
2019-08-16 21:52:02 +02:00
|
|
|
Returns [<amount of bytes received>, <client protocol if connected>]
|
2019-01-22 18:52:32 +01:00
|
|
|
"""
|
2019-01-30 20:57:09 +01:00
|
|
|
|
2019-08-16 21:52:02 +02:00
|
|
|
protocol = connected_protocol
|
|
|
|
if not connected_protocol or not connected_protocol.transport or connected_protocol.transport.is_closing():
|
|
|
|
connected_protocol = None
|
|
|
|
protocol = BlobExchangeClientProtocol(
|
|
|
|
loop, blob_download_timeout, connection_manager
|
|
|
|
)
|
2019-02-08 06:27:58 +01:00
|
|
|
else:
|
2019-08-16 21:52:02 +02:00
|
|
|
log.debug("reusing connection for %s:%d", address, tcp_port)
|
2019-01-22 18:52:32 +01:00
|
|
|
try:
|
2019-08-16 21:52:02 +02:00
|
|
|
if not connected_protocol:
|
2019-02-08 06:27:58 +01:00
|
|
|
await asyncio.wait_for(loop.create_connection(lambda: protocol, address, tcp_port),
|
2021-08-21 05:33:21 +02:00
|
|
|
peer_connect_timeout)
|
2019-08-16 21:52:02 +02:00
|
|
|
connected_protocol = protocol
|
2019-07-25 00:59:12 +02:00
|
|
|
if blob is None or blob.get_is_verified() or not blob.is_writeable():
|
|
|
|
# blob is None happens when we are just opening a connection
|
2019-02-15 22:57:33 +01:00
|
|
|
# file exists but not verified means someone is writing right now, give it time, come back later
|
2019-08-16 21:52:02 +02:00
|
|
|
return 0, connected_protocol
|
|
|
|
return await connected_protocol.download_blob(blob)
|
2019-02-08 05:03:13 +01:00
|
|
|
except (asyncio.TimeoutError, ConnectionRefusedError, ConnectionAbortedError, OSError):
|
2019-02-08 07:05:53 +01:00
|
|
|
return 0, None
|