2018-05-23 23:37:20 +02:00
|
|
|
import logging
|
2018-07-03 06:51:25 +02:00
|
|
|
from twisted.internet import defer
|
2018-11-09 20:02:03 +01:00
|
|
|
from lbrynet.dht.distance import Distance
|
|
|
|
from lbrynet.dht.error import TimeoutError
|
|
|
|
from lbrynet.dht import constants
|
2018-05-23 23:37:20 +02:00
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
def get_contact(contact_list, node_id, address, port):
|
|
|
|
for contact in contact_list:
|
|
|
|
if contact.id == node_id and contact.address == address and contact.port == port:
|
|
|
|
return contact
|
|
|
|
raise IndexError(node_id)
|
|
|
|
|
|
|
|
|
2018-07-06 02:38:52 +02:00
|
|
|
def expand_peer(compact_peer_info):
|
2018-08-11 01:41:08 +02:00
|
|
|
host = "{}.{}.{}.{}".format(*compact_peer_info[:4])
|
2018-08-22 06:12:46 +02:00
|
|
|
port = int.from_bytes(compact_peer_info[4:6], 'big')
|
2018-07-06 02:38:52 +02:00
|
|
|
peer_node_id = compact_peer_info[6:]
|
|
|
|
return (peer_node_id, host, port)
|
|
|
|
|
|
|
|
|
2018-07-22 00:34:59 +02:00
|
|
|
class _IterativeFind:
|
2018-05-23 23:37:20 +02:00
|
|
|
# TODO: use polymorphism to search for a value or node
|
|
|
|
# instead of using a find_value flag
|
2018-07-06 02:38:52 +02:00
|
|
|
def __init__(self, node, shortlist, key, rpc, exclude=None):
|
|
|
|
self.exclude = set(exclude or [])
|
2018-05-23 23:37:20 +02:00
|
|
|
self.node = node
|
|
|
|
self.finished_deferred = defer.Deferred()
|
|
|
|
# all distance operations in this class only care about the distance
|
|
|
|
# to self.key, so this makes it easier to calculate those
|
|
|
|
self.distance = Distance(key)
|
|
|
|
# The closest known and active node yet found
|
|
|
|
self.closest_node = None if not shortlist else shortlist[0]
|
|
|
|
self.prev_closest_node = None
|
|
|
|
# Shortlist of contact objects (the k closest known contacts to the key from the routing table)
|
|
|
|
self.shortlist = shortlist
|
|
|
|
# The search key
|
2018-07-24 23:51:41 +02:00
|
|
|
self.key = key
|
2018-05-23 23:37:20 +02:00
|
|
|
# The rpc method name (findValue or findNode)
|
|
|
|
self.rpc = rpc
|
|
|
|
# List of active queries; len() indicates number of active probes
|
|
|
|
self.active_probes = []
|
|
|
|
# List of contact (address, port) tuples that have already been queried, includes contacts that didn't reply
|
|
|
|
self.already_contacted = []
|
|
|
|
# A list of found and known-to-be-active remote nodes (Contact objects)
|
|
|
|
self.active_contacts = []
|
|
|
|
# Ensure only one searchIteration call is running at a time
|
|
|
|
self._search_iteration_semaphore = defer.DeferredSemaphore(1)
|
|
|
|
self._iteration_count = 0
|
|
|
|
self.find_value_result = {}
|
|
|
|
self.pending_iteration_calls = []
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_find_node_request(self):
|
|
|
|
return self.rpc == "findNode"
|
|
|
|
|
|
|
|
@property
|
|
|
|
def is_find_value_request(self):
|
|
|
|
return self.rpc == "findValue"
|
|
|
|
|
2018-05-31 16:50:11 +02:00
|
|
|
def is_closer(self, contact):
|
2018-05-23 23:37:20 +02:00
|
|
|
if not self.closest_node:
|
|
|
|
return True
|
2018-05-31 16:50:11 +02:00
|
|
|
return self.distance.is_closer(contact.id, self.closest_node.id)
|
2018-05-23 23:37:20 +02:00
|
|
|
|
|
|
|
def getContactTriples(self, result):
|
|
|
|
if self.is_find_value_request:
|
2018-11-14 21:35:59 +01:00
|
|
|
contact_triples = result[b'contacts']
|
2018-05-23 23:37:20 +02:00
|
|
|
else:
|
|
|
|
contact_triples = result
|
|
|
|
for contact_tup in contact_triples:
|
|
|
|
if not isinstance(contact_tup, (list, tuple)) or len(contact_tup) != 3:
|
|
|
|
raise ValueError("invalid contact triple")
|
2018-07-24 23:51:41 +02:00
|
|
|
contact_tup[1] = contact_tup[1].decode() # ips are strings
|
2018-05-23 23:37:20 +02:00
|
|
|
return contact_triples
|
|
|
|
|
|
|
|
def sortByDistance(self, contact_list):
|
|
|
|
"""Sort the list of contacts in order by distance from key"""
|
|
|
|
contact_list.sort(key=lambda c: self.distance(c.id))
|
|
|
|
|
2018-05-31 16:50:11 +02:00
|
|
|
def extendShortlist(self, contact, result):
|
2018-05-23 23:37:20 +02:00
|
|
|
# The "raw response" tuple contains the response message and the originating address info
|
2018-05-31 16:50:11 +02:00
|
|
|
originAddress = (contact.address, contact.port)
|
2018-05-23 23:37:20 +02:00
|
|
|
if self.finished_deferred.called:
|
2018-07-24 23:51:41 +02:00
|
|
|
return contact.id
|
2018-05-23 23:37:20 +02:00
|
|
|
if self.node.contact_manager.is_ignored(originAddress):
|
|
|
|
raise ValueError("contact is ignored")
|
2018-05-31 16:50:11 +02:00
|
|
|
if contact.id == self.node.node_id:
|
2018-07-24 23:51:41 +02:00
|
|
|
return contact.id
|
2018-05-23 23:37:20 +02:00
|
|
|
|
|
|
|
if contact not in self.active_contacts:
|
|
|
|
self.active_contacts.append(contact)
|
|
|
|
if contact not in self.shortlist:
|
|
|
|
self.shortlist.append(contact)
|
|
|
|
|
|
|
|
# Now grow extend the (unverified) shortlist with the returned contacts
|
|
|
|
# TODO: some validation on the result (for guarding against attacks)
|
|
|
|
# If we are looking for a value, first see if this result is the value
|
|
|
|
# we are looking for before treating it as a list of contact triples
|
|
|
|
if self.is_find_value_request and self.key in result:
|
|
|
|
# We have found the value
|
2018-07-06 02:38:52 +02:00
|
|
|
for peer in result[self.key]:
|
2018-08-10 23:23:07 +02:00
|
|
|
node_id, host, port = expand_peer(peer)
|
2018-07-06 02:38:52 +02:00
|
|
|
if (host, port) not in self.exclude:
|
2018-08-10 23:23:07 +02:00
|
|
|
self.find_value_result.setdefault(self.key, []).append((node_id, host, port))
|
2018-07-06 02:38:52 +02:00
|
|
|
if self.find_value_result:
|
|
|
|
self.finished_deferred.callback(self.find_value_result)
|
2018-05-23 23:37:20 +02:00
|
|
|
else:
|
|
|
|
if self.is_find_value_request:
|
|
|
|
# We are looking for a value, and the remote node didn't have it
|
|
|
|
# - mark it as the closest "empty" node, if it is
|
|
|
|
# TODO: store to this peer after finding the value as per the kademlia spec
|
2018-11-14 21:35:59 +01:00
|
|
|
if b'closestNodeNoValue' in self.find_value_result:
|
2018-05-31 16:50:11 +02:00
|
|
|
if self.is_closer(contact):
|
2018-11-14 21:35:59 +01:00
|
|
|
self.find_value_result[b'closestNodeNoValue'] = contact
|
2018-05-23 23:37:20 +02:00
|
|
|
else:
|
2018-11-14 21:35:59 +01:00
|
|
|
self.find_value_result[b'closestNodeNoValue'] = contact
|
2018-05-23 23:37:20 +02:00
|
|
|
contactTriples = self.getContactTriples(result)
|
|
|
|
for contactTriple in contactTriples:
|
|
|
|
if (contactTriple[1], contactTriple[2]) in ((c.address, c.port) for c in self.already_contacted):
|
|
|
|
continue
|
|
|
|
elif self.node.contact_manager.is_ignored((contactTriple[1], contactTriple[2])):
|
2018-07-12 23:24:18 +02:00
|
|
|
continue
|
2018-05-23 23:37:20 +02:00
|
|
|
else:
|
|
|
|
found_contact = self.node.contact_manager.make_contact(contactTriple[0], contactTriple[1],
|
|
|
|
contactTriple[2], self.node._protocol)
|
|
|
|
if found_contact not in self.shortlist:
|
|
|
|
self.shortlist.append(found_contact)
|
|
|
|
|
2018-06-06 23:21:35 +02:00
|
|
|
if not self.finished_deferred.called and self.should_stop():
|
|
|
|
self.sortByDistance(self.active_contacts)
|
|
|
|
self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))])
|
2018-05-23 23:37:20 +02:00
|
|
|
|
2018-07-24 23:51:41 +02:00
|
|
|
return contact.id
|
2018-05-23 23:37:20 +02:00
|
|
|
|
|
|
|
@defer.inlineCallbacks
|
|
|
|
def probeContact(self, contact):
|
|
|
|
fn = getattr(contact, self.rpc)
|
|
|
|
try:
|
2018-05-31 16:50:11 +02:00
|
|
|
response = yield fn(self.key)
|
2018-07-24 23:51:41 +02:00
|
|
|
result = self.extendShortlist(contact, response)
|
2018-05-23 23:37:20 +02:00
|
|
|
defer.returnValue(result)
|
|
|
|
except (TimeoutError, defer.CancelledError, ValueError, IndexError):
|
|
|
|
defer.returnValue(contact.id)
|
|
|
|
|
|
|
|
def should_stop(self):
|
2018-07-03 16:51:39 +02:00
|
|
|
if self.is_find_value_request:
|
|
|
|
# search stops when it finds a value, let it run
|
|
|
|
return False
|
2018-06-06 23:21:35 +02:00
|
|
|
if self.prev_closest_node and self.closest_node and self.distance.is_closer(self.prev_closest_node.id,
|
|
|
|
self.closest_node.id):
|
2018-06-07 17:39:20 +02:00
|
|
|
# we're getting further away
|
2018-05-23 23:37:20 +02:00
|
|
|
return True
|
2018-06-06 23:21:35 +02:00
|
|
|
if len(self.active_contacts) >= constants.k:
|
2018-06-07 17:39:20 +02:00
|
|
|
# we have enough results
|
2018-05-23 23:37:20 +02:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
# Send parallel, asynchronous FIND_NODE RPCs to the shortlist of contacts
|
|
|
|
def _searchIteration(self):
|
|
|
|
# Sort the discovered active nodes from closest to furthest
|
|
|
|
if len(self.active_contacts):
|
|
|
|
self.sortByDistance(self.active_contacts)
|
|
|
|
self.prev_closest_node = self.closest_node
|
|
|
|
self.closest_node = self.active_contacts[0]
|
|
|
|
|
2018-06-07 17:39:20 +02:00
|
|
|
# Sort the current shortList before contacting other nodes
|
2018-05-23 23:37:20 +02:00
|
|
|
self.sortByDistance(self.shortlist)
|
|
|
|
probes = []
|
|
|
|
already_contacted_addresses = {(c.address, c.port) for c in self.already_contacted}
|
|
|
|
to_remove = []
|
|
|
|
for contact in self.shortlist:
|
2018-07-16 22:22:46 +02:00
|
|
|
if self.node.contact_manager.is_ignored((contact.address, contact.port)):
|
|
|
|
to_remove.append(contact) # a contact became bad during iteration
|
|
|
|
continue
|
2018-05-23 23:37:20 +02:00
|
|
|
if (contact.address, contact.port) not in already_contacted_addresses:
|
|
|
|
self.already_contacted.append(contact)
|
|
|
|
to_remove.append(contact)
|
|
|
|
probe = self.probeContact(contact)
|
|
|
|
probes.append(probe)
|
|
|
|
self.active_probes.append(probe)
|
|
|
|
if len(probes) == constants.alpha:
|
|
|
|
break
|
|
|
|
for contact in to_remove: # these contacts will be re-added to the shortlist when they reply successfully
|
|
|
|
self.shortlist.remove(contact)
|
|
|
|
|
|
|
|
# run the probes
|
|
|
|
if probes:
|
|
|
|
# Schedule the next iteration if there are any active
|
|
|
|
# calls (Kademlia uses loose parallelism)
|
|
|
|
self.searchIteration()
|
|
|
|
|
2018-06-06 23:21:35 +02:00
|
|
|
d = defer.DeferredList(probes, consumeErrors=True)
|
2018-05-23 23:37:20 +02:00
|
|
|
|
|
|
|
def _remove_probes(results):
|
|
|
|
for probe in probes:
|
|
|
|
self.active_probes.remove(probe)
|
2018-06-06 23:21:35 +02:00
|
|
|
return results
|
2018-05-23 23:37:20 +02:00
|
|
|
|
|
|
|
d.addCallback(_remove_probes)
|
|
|
|
|
2018-06-07 17:39:20 +02:00
|
|
|
elif not self.finished_deferred.called and not self.active_probes or self.should_stop():
|
2018-05-23 23:37:20 +02:00
|
|
|
# If no probes were sent, there will not be any improvement, so we're done
|
2018-07-06 02:38:52 +02:00
|
|
|
if self.is_find_value_request:
|
|
|
|
self.finished_deferred.callback(self.find_value_result)
|
|
|
|
else:
|
|
|
|
self.sortByDistance(self.active_contacts)
|
|
|
|
self.finished_deferred.callback(self.active_contacts[:min(constants.k, len(self.active_contacts))])
|
2018-06-07 17:39:20 +02:00
|
|
|
elif not self.finished_deferred.called:
|
|
|
|
# Force the next iteration
|
|
|
|
self.searchIteration()
|
2018-05-23 23:37:20 +02:00
|
|
|
|
2018-06-06 23:21:35 +02:00
|
|
|
def searchIteration(self, delay=constants.iterativeLookupDelay):
|
2018-05-23 23:37:20 +02:00
|
|
|
def _cancel_pending_iterations(result):
|
|
|
|
while self.pending_iteration_calls:
|
|
|
|
canceller = self.pending_iteration_calls.pop()
|
|
|
|
canceller()
|
|
|
|
return result
|
|
|
|
self.finished_deferred.addBoth(_cancel_pending_iterations)
|
|
|
|
self._iteration_count += 1
|
2018-06-06 23:21:35 +02:00
|
|
|
call, cancel = self.node.reactor_callLater(delay, self._search_iteration_semaphore.run, self._searchIteration)
|
2018-05-23 23:37:20 +02:00
|
|
|
self.pending_iteration_calls.append(cancel)
|
|
|
|
|
|
|
|
|
2018-07-06 02:38:52 +02:00
|
|
|
def iterativeFind(node, shortlist, key, rpc, exclude=None):
|
|
|
|
helper = _IterativeFind(node, shortlist, key, rpc, exclude)
|
2018-06-06 23:21:35 +02:00
|
|
|
helper.searchIteration(0)
|
2018-05-23 23:37:20 +02:00
|
|
|
return helper.finished_deferred
|