lbry-sdk/lbrynet/blob_exchange/client.py

225 lines
12 KiB
Python
Raw Normal View History

2019-01-22 12:52:32 -05:00
import asyncio
import logging
import typing
2019-01-29 20:47:02 -05:00
import binascii
2019-02-04 16:03:22 -05:00
from lbrynet.error import InvalidBlobHashError, InvalidDataError
2019-01-22 12:52:32 -05:00
from lbrynet.blob_exchange.serialization import BlobResponse, BlobRequest
2019-05-03 14:52:06 -04:00
from lbrynet.utils import cache_concurrent
2019-01-22 12:52:32 -05:00
if typing.TYPE_CHECKING:
from lbrynet.blob.blob_file import AbstractBlob
2019-01-22 12:52:32 -05:00
from lbrynet.blob.writer import HashBlobWriter
log = logging.getLogger(__name__)
class BlobExchangeClientProtocol(asyncio.Protocol):
def __init__(self, loop: asyncio.BaseEventLoop, peer_timeout: typing.Optional[float] = 10):
self.loop = loop
self.peer_port: typing.Optional[int] = None
self.peer_address: typing.Optional[str] = None
self.peer_timeout = peer_timeout
self.transport: typing.Optional[asyncio.Transport] = None
2019-01-22 12:52:32 -05:00
self.writer: typing.Optional['HashBlobWriter'] = None
self.blob: typing.Optional['AbstractBlob'] = None
2019-01-22 12:52:32 -05:00
self._blob_bytes_received = 0
self._response_fut: typing.Optional[asyncio.Future] = None
2019-02-12 03:08:34 -03:00
self.buf = b''
2019-01-22 12:52:32 -05:00
# this is here to handle the race when the downloader is closed right as response_fut gets a result
self.closed = asyncio.Event(loop=self.loop)
2019-01-28 17:29:08 -05:00
def data_received(self, data: bytes):
2019-02-12 03:08:34 -03:00
log.debug("%s:%d -- got %s bytes -- %s bytes on buffer -- %s blob bytes received",
self.peer_address, self.peer_port, len(data), len(self.buf), self._blob_bytes_received)
2019-01-29 20:47:02 -05:00
if not self.transport or self.transport.is_closing():
log.warning("transport closing, but got more bytes from %s:%i\n%s", self.peer_address, self.peer_port,
binascii.hexlify(data))
2019-01-28 17:29:08 -05:00
if self._response_fut and not self._response_fut.done():
2019-01-22 12:52:32 -05:00
self._response_fut.cancel()
return
if not self._response_fut:
log.warning("Protocol received data before expected, probable race on keep alive. Closing transport.")
return self.close()
2019-02-07 20:20:39 -03:00
if self._blob_bytes_received and not self.writer.closed():
return self._write(data)
2019-01-22 12:52:32 -05:00
2019-02-12 03:08:34 -03:00
response = BlobResponse.deserialize(self.buf + data)
if not response.responses and not self._response_fut.done():
self.buf += data
return
else:
self.buf = b''
2019-01-22 12:52:32 -05:00
if response.responses and self.blob:
blob_response = response.get_blob_response()
if blob_response and not blob_response.error and blob_response.blob_hash == self.blob.blob_hash:
2019-01-28 17:29:08 -05:00
# set the expected length for the incoming blob if we didn't know it
2019-01-22 12:52:32 -05:00
self.blob.set_length(blob_response.length)
elif blob_response and not blob_response.error and self.blob.blob_hash != blob_response.blob_hash:
2019-01-28 17:29:08 -05:00
# the server started sending a blob we didn't request
2019-01-22 12:52:32 -05:00
log.warning("mismatch with self.blob %s", self.blob.blob_hash)
return
if response.responses:
2019-01-28 17:29:08 -05:00
log.debug("got response from %s:%i <- %s", self.peer_address, self.peer_port, response.to_dict())
# fire the Future with the response to our request
2019-01-22 12:52:32 -05:00
self._response_fut.set_result(response)
if response.blob_data and self.writer and not self.writer.closed():
# log.debug("got %i blob bytes from %s:%i", len(response.blob_data), self.peer_address, self.peer_port)
2019-01-28 17:29:08 -05:00
# write blob bytes if we're writing a blob and have blob bytes to write
2019-02-07 20:20:39 -03:00
self._write(response.blob_data)
def _write(self, data: bytes):
2019-02-07 20:20:39 -03:00
if len(data) > (self.blob.get_length() - self._blob_bytes_received):
data = data[:(self.blob.get_length() - self._blob_bytes_received)]
log.warning("got more than asked from %s:%d, probable sendfile bug", self.peer_address, self.peer_port)
else:
data = data
self._blob_bytes_received += len(data)
try:
self.writer.write(data)
except IOError as err:
log.error("error downloading blob from %s:%i: %s", self.peer_address, self.peer_port, err)
if self._response_fut and not self._response_fut.done():
self._response_fut.set_exception(err)
2019-02-08 01:03:13 -03:00
except (asyncio.TimeoutError) as err: # TODO: is this needed?
2019-02-07 20:20:39 -03:00
log.error("%s downloading blob from %s:%i", str(err), self.peer_address, self.peer_port)
if self._response_fut and not self._response_fut.done():
self._response_fut.set_exception(err)
2019-01-22 12:52:32 -05:00
2019-02-08 03:05:53 -03:00
async def _download_blob(self) -> typing.Tuple[int, typing.Optional[asyncio.Transport]]:
2019-01-28 17:29:08 -05:00
"""
:return: download success (bool), keep connection (bool)
"""
2019-01-22 12:52:32 -05:00
request = BlobRequest.make_request_for_blob_hash(self.blob.blob_hash)
blob_hash = self.blob.blob_hash
2019-01-22 12:52:32 -05:00
try:
2019-01-28 17:24:55 -05:00
msg = request.serialize()
log.debug("send request to %s:%i -> %s", self.peer_address, self.peer_port, msg.decode())
self.transport.write(msg)
2019-01-22 12:52:32 -05:00
response: BlobResponse = await asyncio.wait_for(self._response_fut, self.peer_timeout, loop=self.loop)
availability_response = response.get_availability_response()
price_response = response.get_price_response()
blob_response = response.get_blob_response()
if self.closed.is_set():
msg = f"cancelled blob request for {blob_hash} immediately after we got a response"
log.warning(msg)
raise asyncio.CancelledError(msg)
2019-01-22 12:52:32 -05:00
if (not blob_response or blob_response.error) and\
(not availability_response or not availability_response.available_blobs):
2019-01-31 12:28:59 -05:00
log.warning("%s not in availability response from %s:%i", self.blob.blob_hash, self.peer_address,
self.peer_port)
log.warning(response.to_dict())
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
elif availability_response.available_blobs and \
availability_response.available_blobs != [self.blob.blob_hash]:
2019-01-28 17:24:55 -05:00
log.warning("blob availability response doesn't match our request from %s:%i",
self.peer_address, self.peer_port)
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
if not price_response or price_response.blob_data_payment_rate != 'RATE_ACCEPTED':
2019-01-28 17:24:55 -05:00
log.warning("data rate rejected by %s:%i", self.peer_address, self.peer_port)
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
if not blob_response or blob_response.error:
2019-01-28 17:24:55 -05:00
log.warning("blob cant be downloaded from %s:%i", self.peer_address, self.peer_port)
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
if not blob_response.error and blob_response.blob_hash != self.blob.blob_hash:
2019-01-28 17:24:55 -05:00
log.warning("incoming blob hash mismatch from %s:%i", self.peer_address, self.peer_port)
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
if self.blob.length is not None and self.blob.length != blob_response.length:
2019-01-28 17:24:55 -05:00
log.warning("incoming blob unexpected length from %s:%i", self.peer_address, self.peer_port)
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
msg = f"downloading {self.blob.blob_hash[:8]} from {self.peer_address}:{self.peer_port}," \
f" timeout in {self.peer_timeout}"
2019-01-28 17:24:55 -05:00
log.debug(msg)
2019-01-28 22:38:17 -05:00
msg = f"downloaded {self.blob.blob_hash[:8]} from {self.peer_address}:{self.peer_port}"
2019-01-22 12:52:32 -05:00
await asyncio.wait_for(self.writer.finished, self.peer_timeout, loop=self.loop)
log.info(msg)
# await self.blob.finished_writing.wait() not necessary, but a dangerous change. TODO: is it needed?
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.transport
2019-01-22 12:52:32 -05:00
except asyncio.TimeoutError:
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-02-04 16:03:22 -05:00
except (InvalidBlobHashError, InvalidDataError):
log.warning("invalid blob from %s:%i", self.peer_address, self.peer_port)
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, self.close()
2019-01-22 12:52:32 -05:00
def close(self):
self.closed.set()
2019-01-22 12:52:32 -05:00
if self._response_fut and not self._response_fut.done():
self._response_fut.cancel()
if self.writer and not self.writer.closed():
self.writer.close_handle()
self._response_fut = None
self.writer = None
self.blob = None
if self.transport:
self.transport.close()
self.transport = None
2019-02-12 03:08:34 -03:00
self.buf = b''
2019-01-22 12:52:32 -05:00
async def download_blob(self, blob: 'AbstractBlob') -> typing.Tuple[int, typing.Optional[asyncio.Transport]]:
self.closed.clear()
blob_hash = blob.blob_hash
if blob.get_is_verified() or not blob.is_writeable():
2019-02-08 03:05:53 -03:00
return 0, self.transport
2019-01-31 20:19:50 -05:00
try:
self._blob_bytes_received = 0
self.blob, self.writer = blob, blob.get_blob_writer(self.peer_address, self.peer_port)
2019-01-31 20:19:50 -05:00
self._response_fut = asyncio.Future(loop=self.loop)
return await self._download_blob()
except OSError as e:
2019-01-31 20:19:50 -05:00
# i'm not sure how to fix this race condition - jack
2019-05-03 14:52:06 -04:00
log.warning("race happened downloading %s from %s:%i", blob_hash, self.peer_address, self.peer_port)
# return self._blob_bytes_received, self.transport
raise
2019-01-31 20:19:50 -05:00
except asyncio.TimeoutError:
if self._response_fut and not self._response_fut.done():
self._response_fut.cancel()
self.close()
2019-02-08 03:05:53 -03:00
return self._blob_bytes_received, None
2019-01-31 20:19:50 -05:00
except asyncio.CancelledError:
self.close()
2019-02-08 01:03:13 -03:00
raise
2019-05-02 12:20:49 -03:00
finally:
if self.writer and not self.writer.closed():
self.writer.close_handle()
self.writer = None
2019-01-22 12:52:32 -05:00
def connection_made(self, transport: asyncio.Transport):
self.transport = transport
self.peer_address, self.peer_port = self.transport.get_extra_info('peername')
2019-01-28 17:24:55 -05:00
log.debug("connection made to %s:%i", self.peer_address, self.peer_port)
2019-01-22 12:52:32 -05:00
def connection_lost(self, reason):
2019-01-28 17:24:55 -05:00
log.debug("connection lost to %s:%i (reason: %s, %s)", self.peer_address, self.peer_port, str(reason),
str(type(reason)))
self.close()
2019-01-22 12:52:32 -05:00
2019-05-03 14:52:06 -04:00
@cache_concurrent
async def request_blob(loop: asyncio.BaseEventLoop, blob: 'AbstractBlob', address: str, tcp_port: int,
peer_connect_timeout: float, blob_download_timeout: float,
2019-05-05 20:22:10 -04:00
connected_transport: asyncio.Transport = None, connection_id: int = 0)\
2019-02-08 03:05:53 -03:00
-> typing.Tuple[int, typing.Optional[asyncio.Transport]]:
2019-01-22 12:52:32 -05:00
"""
Returns [<downloaded blob>, <keep connection>]
"""
protocol = BlobExchangeClientProtocol(loop, blob_download_timeout)
if connected_transport and not connected_transport.is_closing():
connected_transport.set_protocol(protocol)
protocol.connection_made(connected_transport)
log.debug("reusing connection for %s:%d", address, tcp_port)
else:
connected_transport = None
2019-01-22 12:52:32 -05:00
try:
if not connected_transport:
await asyncio.wait_for(loop.create_connection(lambda: protocol, address, tcp_port),
peer_connect_timeout, loop=loop)
if blob.get_is_verified() or not blob.is_writeable():
# file exists but not verified means someone is writing right now, give it time, come back later
return 0, connected_transport
2019-01-22 12:52:32 -05:00
return await protocol.download_blob(blob)
2019-02-08 01:03:13 -03:00
except (asyncio.TimeoutError, ConnectionRefusedError, ConnectionAbortedError, OSError):
2019-02-08 03:05:53 -03:00
return 0, None