4f3b5cd802
Timeout errors are common on the dht so log those at debug, but other errors need to (potentially) recieve more attention
927 lines
38 KiB
Python
927 lines
38 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# This library is free software, distributed under the terms of
|
|
# the GNU Lesser General Public License Version 3, or any later version.
|
|
# See the COPYING file included in this archive
|
|
#
|
|
# The docstrings in this module contain epytext markup; API documentation
|
|
# may be created by processing this file with epydoc: http://epydoc.sf.net
|
|
import argparse
|
|
import binascii
|
|
import hashlib
|
|
import operator
|
|
import random
|
|
import struct
|
|
import time
|
|
|
|
from twisted.internet import defer, error
|
|
|
|
import constants
|
|
import routingtable
|
|
import datastore
|
|
import protocol
|
|
import twisted.internet.reactor
|
|
import twisted.internet.threads
|
|
import twisted.python.log
|
|
from contact import Contact
|
|
from hashwatcher import HashWatcher
|
|
import logging
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
def rpcmethod(func):
|
|
""" Decorator to expose Node methods as remote procedure calls
|
|
|
|
Apply this decorator to methods in the Node class (or a subclass) in order
|
|
to make them remotely callable via the DHT's RPC mechanism.
|
|
"""
|
|
func.rpcmethod = True
|
|
return func
|
|
|
|
|
|
class Node(object):
|
|
""" Local node in the Kademlia network
|
|
|
|
This class represents a single local node in a Kademlia network; in other
|
|
words, this class encapsulates an Entangled-using application's "presence"
|
|
in a Kademlia network.
|
|
|
|
In Entangled, all interactions with the Kademlia network by a client
|
|
application is performed via this class (or a subclass).
|
|
"""
|
|
def __init__(self, id=None, udpPort=4000, dataStore=None,
|
|
routingTableClass=None, networkProtocol=None, lbryid=None,
|
|
externalIP=None):
|
|
"""
|
|
@param dataStore: The data store to use. This must be class inheriting
|
|
from the C{DataStore} interface (or providing the
|
|
same API). How the data store manages its data
|
|
internally is up to the implementation of that data
|
|
store.
|
|
@type dataStore: entangled.kademlia.datastore.DataStore
|
|
@param routingTable: The routing table class to use. Since there exists
|
|
some ambiguity as to how the routing table should be
|
|
implemented in Kademlia, a different routing table
|
|
may be used, as long as the appropriate API is
|
|
exposed. This should be a class, not an object,
|
|
in order to allow the Node to pass an
|
|
auto-generated node ID to the routingtable object
|
|
upon instantiation (if necessary).
|
|
@type routingTable: entangled.kademlia.routingtable.RoutingTable
|
|
@param networkProtocol: The network protocol to use. This can be
|
|
overridden from the default to (for example)
|
|
change the format of the physical RPC messages
|
|
being transmitted.
|
|
@type networkProtocol: entangled.kademlia.protocol.KademliaProtocol
|
|
"""
|
|
if id != None:
|
|
self.id = id
|
|
else:
|
|
self.id = self._generateID()
|
|
self.lbryid = lbryid
|
|
self.port = udpPort
|
|
self._listeningPort = None # object implementing Twisted
|
|
# IListeningPort This will contain a deferred created when
|
|
# joining the network, to enable publishing/retrieving
|
|
# information from the DHT as soon as the node is part of the
|
|
# network (add callbacks to this deferred if scheduling such
|
|
# operations before the node has finished joining the network)
|
|
self._joinDeferred = None
|
|
self.next_refresh_call = None
|
|
self.next_change_token_call = None
|
|
# Create k-buckets (for storing contacts)
|
|
if routingTableClass == None:
|
|
self._routingTable = routingtable.OptimizedTreeRoutingTable(self.id)
|
|
else:
|
|
self._routingTable = routingTableClass(self.id)
|
|
|
|
# Initialize this node's network access mechanisms
|
|
if networkProtocol == None:
|
|
self._protocol = protocol.KademliaProtocol(self)
|
|
else:
|
|
self._protocol = networkProtocol
|
|
# Initialize the data storage mechanism used by this node
|
|
self.token_secret = self._generateID()
|
|
self.old_token_secret = None
|
|
self.change_token()
|
|
if dataStore == None:
|
|
self._dataStore = datastore.DictDataStore()
|
|
else:
|
|
self._dataStore = dataStore
|
|
# Try to restore the node's state...
|
|
if 'nodeState' in self._dataStore:
|
|
state = self._dataStore['nodeState']
|
|
self.id = state['id']
|
|
for contactTriple in state['closestNodes']:
|
|
contact = Contact(
|
|
contactTriple[0], contactTriple[1], contactTriple[2], self._protocol)
|
|
self._routingTable.addContact(contact)
|
|
self.externalIP = externalIP
|
|
self.hash_watcher = HashWatcher()
|
|
|
|
def __del__(self):
|
|
if self._listeningPort is not None:
|
|
self._listeningPort.stopListening()
|
|
|
|
def stop(self):
|
|
#cancel callLaters:
|
|
if self.next_refresh_call is not None:
|
|
self.next_refresh_call.cancel()
|
|
self.next_refresh_call = None
|
|
if self.next_change_token_call is not None:
|
|
self.next_change_token_call.cancel()
|
|
self.next_change_token_call = None
|
|
if self._listeningPort is not None:
|
|
self._listeningPort.stopListening()
|
|
self.hash_watcher.stop()
|
|
|
|
|
|
def joinNetwork(self, knownNodeAddresses=None):
|
|
""" Causes the Node to join the Kademlia network; normally, this
|
|
should be called before any other DHT operations.
|
|
|
|
@param knownNodeAddresses: A sequence of tuples containing IP address
|
|
information for existing nodes on the
|
|
Kademlia network, in the format:
|
|
C{(<ip address>, (udp port>)}
|
|
@type knownNodeAddresses: tuple
|
|
"""
|
|
# Prepare the underlying Kademlia protocol
|
|
if self.port is not None:
|
|
try:
|
|
self._listeningPort = twisted.internet.reactor.listenUDP(self.port, self._protocol)
|
|
except error.CannotListenError as e:
|
|
import traceback
|
|
log.error("Couldn't bind to port %d. %s", self.port, traceback.format_exc())
|
|
raise ValueError("%s lbrynet may already be running." % str(e))
|
|
#IGNORE:E1101
|
|
# Create temporary contact information for the list of addresses of known nodes
|
|
if knownNodeAddresses != None:
|
|
bootstrapContacts = []
|
|
for address, port in knownNodeAddresses:
|
|
contact = Contact(self._generateID(), address, port, self._protocol)
|
|
bootstrapContacts.append(contact)
|
|
else:
|
|
bootstrapContacts = None
|
|
# Initiate the Kademlia joining sequence - perform a search for this node's own ID
|
|
self._joinDeferred = self._iterativeFind(self.id, bootstrapContacts)
|
|
# #TODO: Refresh all k-buckets further away than this node's closest neighbour
|
|
# Start refreshing k-buckets periodically, if necessary
|
|
self.next_refresh_call = twisted.internet.reactor.callLater(
|
|
constants.checkRefreshInterval, self._refreshNode) #IGNORE:E1101
|
|
self.hash_watcher.tick()
|
|
return self._joinDeferred
|
|
|
|
def printContacts(self, *args):
|
|
print '\n\nNODE CONTACTS\n==============='
|
|
for i in range(len(self._routingTable._buckets)):
|
|
for contact in self._routingTable._buckets[i]._contacts:
|
|
print contact
|
|
print '=================================='
|
|
|
|
def getApproximateTotalDHTNodes(self):
|
|
# get the deepest bucket and the number of contacts in that bucket and multiply it
|
|
# by the number of equivalently deep buckets in the whole DHT to get a really bad
|
|
# estimate!
|
|
bucket = self._routingTable._buckets[self._routingTable._kbucketIndex(self.id)]
|
|
num_in_bucket = len(bucket._contacts)
|
|
factor = (2**constants.key_bits) / (bucket.rangeMax - bucket.rangeMin)
|
|
return num_in_bucket * factor
|
|
|
|
def getApproximateTotalHashes(self):
|
|
# Divide the number of hashes we know about by k to get a really, really, really
|
|
# bad estimate of the average number of hashes per node, then multiply by the
|
|
# approximate number of nodes to get a horrendous estimate of the total number
|
|
# of hashes in the DHT
|
|
num_in_data_store = len(self._dataStore._dict)
|
|
if num_in_data_store == 0:
|
|
return 0
|
|
return num_in_data_store * self.getApproximateTotalDHTNodes() / 8
|
|
|
|
def announceHaveBlob(self, key, port):
|
|
return self.iterativeAnnounceHaveBlob(key, {'port': port, 'lbryid': self.lbryid})
|
|
|
|
def getPeersForBlob(self, blob_hash):
|
|
|
|
def expand_and_filter(result):
|
|
expanded_peers = []
|
|
if type(result) == dict:
|
|
if blob_hash in result:
|
|
for peer in result[blob_hash]:
|
|
if self.lbryid != peer[6:]:
|
|
host = ".".join([str(ord(d)) for d in peer[:4]])
|
|
if host == "127.0.0.1":
|
|
if "from_peer" in result:
|
|
if result["from_peer"] != "self":
|
|
host = result["from_peer"]
|
|
port, = struct.unpack('>H', peer[4:6])
|
|
expanded_peers.append((host, port))
|
|
return expanded_peers
|
|
|
|
def find_failed(err):
|
|
return []
|
|
|
|
d = self.iterativeFindValue(blob_hash)
|
|
d.addCallbacks(expand_and_filter, find_failed)
|
|
return d
|
|
|
|
def get_most_popular_hashes(self, num_to_return):
|
|
return self.hash_watcher.most_popular_hashes(num_to_return)
|
|
|
|
def iterativeAnnounceHaveBlob(self, blob_hash, value):
|
|
|
|
known_nodes = {}
|
|
|
|
def log_error(err, n):
|
|
if err.check(protocol.TimeoutError):
|
|
log.debug(
|
|
"Timeout while storing blob_hash %s at %s",
|
|
binascii.hexlify(blob_hash), n)
|
|
else:
|
|
log.error(
|
|
"Unexpected error while storing blob_hash %s at %s: %s",
|
|
binascii.hexlify(blob_hash), n, err.getErrorMessage())
|
|
|
|
def log_success(res):
|
|
log.debug("Response to store request: %s", str(res))
|
|
return res
|
|
|
|
def announce_to_peer(responseTuple):
|
|
""" @type responseMsg: kademlia.msgtypes.ResponseMessage """
|
|
# The "raw response" tuple contains the response message,
|
|
# and the originating address info
|
|
responseMsg = responseTuple[0]
|
|
originAddress = responseTuple[1] # tuple: (ip adress, udp port)
|
|
# Make sure the responding node is valid, and abort the operation if it isn't
|
|
if not responseMsg.nodeID in known_nodes:
|
|
return responseMsg.nodeID
|
|
|
|
n = known_nodes[responseMsg.nodeID]
|
|
|
|
result = responseMsg.response
|
|
if 'token' in result:
|
|
value['token'] = result['token']
|
|
d = n.store(blob_hash, value, self.id, 0)
|
|
d.addCallback(log_success)
|
|
d.addErrback(log_error, n)
|
|
else:
|
|
d = defer.succeed(False)
|
|
return d
|
|
|
|
def requestPeers(contacts):
|
|
if self.externalIP is not None and len(contacts) >= constants.k:
|
|
is_closer = Distance(blob_hash).is_closer(self.id, contacts[-1].id)
|
|
if is_closer:
|
|
contacts.pop()
|
|
self.store(blob_hash, value, self_store=True, originalPublisherID=self.id)
|
|
elif self.externalIP is not None:
|
|
self.store(blob_hash, value, self_store=True, originalPublisherID=self.id)
|
|
ds = []
|
|
for contact in contacts:
|
|
known_nodes[contact.id] = contact
|
|
rpcMethod = getattr(contact, "findValue")
|
|
df = rpcMethod(blob_hash, rawResponse=True)
|
|
df.addCallback(announce_to_peer)
|
|
df.addErrback(log_error, contact)
|
|
ds.append(df)
|
|
return defer.DeferredList(ds)
|
|
|
|
d = self.iterativeFindNode(blob_hash)
|
|
d.addCallbacks(requestPeers)
|
|
return d
|
|
|
|
def change_token(self):
|
|
self.old_token_secret = self.token_secret
|
|
self.token_secret = self._generateID()
|
|
self.next_change_token_call = twisted.internet.reactor.callLater(
|
|
constants.tokenSecretChangeInterval, self.change_token)
|
|
|
|
def make_token(self, compact_ip):
|
|
h = hashlib.new('sha384')
|
|
h.update(self.token_secret + compact_ip)
|
|
return h.digest()
|
|
|
|
def verify_token(self, token, compact_ip):
|
|
h = hashlib.new('sha384')
|
|
h.update(self.token_secret + compact_ip)
|
|
if not token == h.digest():
|
|
h = hashlib.new('sha384')
|
|
h.update(self.old_token_secret + compact_ip)
|
|
if not token == h.digest():
|
|
return False
|
|
return True
|
|
|
|
def iterativeFindNode(self, key):
|
|
""" The basic Kademlia node lookup operation
|
|
|
|
Call this to find a remote node in the P2P overlay network.
|
|
|
|
@param key: the n-bit key (i.e. the node or value ID) to search for
|
|
@type key: str
|
|
|
|
@return: This immediately returns a deferred object, which will return
|
|
a list of k "closest" contacts (C{kademlia.contact.Contact}
|
|
objects) to the specified key as soon as the operation is
|
|
finished.
|
|
@rtype: twisted.internet.defer.Deferred
|
|
"""
|
|
return self._iterativeFind(key)
|
|
|
|
def iterativeFindValue(self, key):
|
|
""" The Kademlia search operation (deterministic)
|
|
|
|
Call this to retrieve data from the DHT.
|
|
|
|
@param key: the n-bit key (i.e. the value ID) to search for
|
|
@type key: str
|
|
|
|
@return: This immediately returns a deferred object, which will return
|
|
either one of two things:
|
|
- If the value was found, it will return a Python
|
|
dictionary containing the searched-for key (the C{key}
|
|
parameter passed to this method), and its associated
|
|
value, in the format:
|
|
C{<str>key: <str>data_value}
|
|
- If the value was not found, it will return a list of k
|
|
"closest" contacts (C{kademlia.contact.Contact} objects)
|
|
to the specified key
|
|
@rtype: twisted.internet.defer.Deferred
|
|
"""
|
|
# Prepare a callback for this operation
|
|
outerDf = defer.Deferred()
|
|
def checkResult(result):
|
|
if type(result) == dict:
|
|
# We have found the value; now see who was the closest contact without it...
|
|
# ...and store the key/value pair
|
|
outerDf.callback(result)
|
|
else:
|
|
# The value wasn't found, but a list of contacts was returned
|
|
# Now, see if we have the value (it might seem wasteful to search on the network
|
|
# first, but it ensures that all values are properly propagated through the
|
|
# network
|
|
if self._dataStore.hasPeersForBlob(key):
|
|
# Ok, we have the value locally, so use that
|
|
peers = self._dataStore.getPeersForBlob(key)
|
|
# Send this value to the closest node without it
|
|
outerDf.callback({key: peers, "from_peer": 'self'})
|
|
else:
|
|
# Ok, value does not exist in DHT at all
|
|
outerDf.callback(result)
|
|
|
|
# Execute the search
|
|
df = self._iterativeFind(key, rpc='findValue')
|
|
df.addCallback(checkResult)
|
|
return outerDf
|
|
|
|
def addContact(self, contact):
|
|
""" Add/update the given contact; simple wrapper for the same method
|
|
in this object's RoutingTable object
|
|
|
|
@param contact: The contact to add to this node's k-buckets
|
|
@type contact: kademlia.contact.Contact
|
|
"""
|
|
self._routingTable.addContact(contact)
|
|
|
|
def removeContact(self, contactID):
|
|
""" Remove the contact with the specified node ID from this node's
|
|
table of known nodes. This is a simple wrapper for the same method
|
|
in this object's RoutingTable object
|
|
|
|
@param contactID: The node ID of the contact to remove
|
|
@type contactID: str
|
|
"""
|
|
self._routingTable.removeContact(contactID)
|
|
|
|
def findContact(self, contactID):
|
|
""" Find a entangled.kademlia.contact.Contact object for the specified
|
|
cotact ID
|
|
|
|
@param contactID: The contact ID of the required Contact object
|
|
@type contactID: str
|
|
|
|
@return: Contact object of remote node with the specified node ID,
|
|
or None if the contact was not found
|
|
@rtype: twisted.internet.defer.Deferred
|
|
"""
|
|
try:
|
|
contact = self._routingTable.getContact(contactID)
|
|
df = defer.Deferred()
|
|
df.callback(contact)
|
|
except ValueError:
|
|
def parseResults(nodes):
|
|
if contactID in nodes:
|
|
contact = nodes[nodes.index(contactID)]
|
|
return contact
|
|
else:
|
|
return None
|
|
df = self.iterativeFindNode(contactID)
|
|
df.addCallback(parseResults)
|
|
return df
|
|
|
|
@rpcmethod
|
|
def ping(self):
|
|
""" Used to verify contact between two Kademlia nodes
|
|
|
|
@rtype: str
|
|
"""
|
|
return 'pong'
|
|
|
|
@rpcmethod
|
|
def store(self, key, value, originalPublisherID=None, self_store=False, **kwargs):
|
|
""" Store the received data in this node's local hash table
|
|
|
|
@param key: The hashtable key of the data
|
|
@type key: str
|
|
@param value: The actual data (the value associated with C{key})
|
|
@type value: str
|
|
@param originalPublisherID: The node ID of the node that is the
|
|
B{original} publisher of the data
|
|
@type originalPublisherID: str
|
|
@param age: The relative age of the data (time in seconds since it was
|
|
originally published). Note that the original publish time
|
|
isn't actually given, to compensate for clock skew between
|
|
different nodes.
|
|
@type age: int
|
|
|
|
@rtype: str
|
|
|
|
@todo: Since the data (value) may be large, passing it around as a buffer
|
|
(which is the case currently) might not be a good idea... will have
|
|
to fix this (perhaps use a stream from the Protocol class?)
|
|
"""
|
|
# Get the sender's ID (if any)
|
|
if originalPublisherID == None:
|
|
if '_rpcNodeID' in kwargs:
|
|
originalPublisherID = kwargs['_rpcNodeID']
|
|
else:
|
|
raise TypeError, 'No NodeID given. Therefore we can\'t store this node'
|
|
|
|
if self_store is True and self.externalIP:
|
|
contact = Contact(self.id, self.externalIP, self.port, None, None)
|
|
compact_ip = contact.compact_ip()
|
|
elif '_rpcNodeContact' in kwargs:
|
|
contact = kwargs['_rpcNodeContact']
|
|
compact_ip = contact.compact_ip()
|
|
else:
|
|
return 'Not OK'
|
|
#raise TypeError, 'No contact info available'
|
|
|
|
if ((self_store is False) and
|
|
(not 'token' in value or not self.verify_token(value['token'], compact_ip))):
|
|
raise ValueError('Invalid or missing token')
|
|
|
|
if 'port' in value:
|
|
port = int(value['port'])
|
|
if 0 <= port <= 65536:
|
|
compact_port = str(struct.pack('>H', port))
|
|
else:
|
|
raise TypeError, 'Invalid port'
|
|
else:
|
|
raise TypeError, 'No port available'
|
|
|
|
if 'lbryid' in value:
|
|
if len(value['lbryid']) > constants.key_bits:
|
|
raise ValueError, 'Invalid lbryid'
|
|
else:
|
|
compact_address = compact_ip + compact_port + value['lbryid']
|
|
else:
|
|
raise TypeError, 'No lbryid given'
|
|
|
|
now = int(time.time())
|
|
originallyPublished = now# - age
|
|
self._dataStore.addPeerToBlob(
|
|
key, compact_address, now, originallyPublished, originalPublisherID)
|
|
return 'OK'
|
|
|
|
@rpcmethod
|
|
def findNode(self, key, **kwargs):
|
|
""" Finds a number of known nodes closest to the node/value with the
|
|
specified key.
|
|
|
|
@param key: the n-bit key (i.e. the node or value ID) to search for
|
|
@type key: str
|
|
|
|
@return: A list of contact triples closest to the specified key.
|
|
This method will return C{k} (or C{count}, if specified)
|
|
contacts if at all possible; it will only return fewer if the
|
|
node is returning all of the contacts that it knows of.
|
|
@rtype: list
|
|
"""
|
|
# Get the sender's ID (if any)
|
|
if '_rpcNodeID' in kwargs:
|
|
rpcSenderID = kwargs['_rpcNodeID']
|
|
else:
|
|
rpcSenderID = None
|
|
contacts = self._routingTable.findCloseNodes(key, constants.k, rpcSenderID)
|
|
contactTriples = []
|
|
for contact in contacts:
|
|
contactTriples.append((contact.id, contact.address, contact.port))
|
|
return contactTriples
|
|
|
|
@rpcmethod
|
|
def findValue(self, key, **kwargs):
|
|
""" Return the value associated with the specified key if present in
|
|
this node's data, otherwise execute FIND_NODE for the key
|
|
|
|
@param key: The hashtable key of the data to return
|
|
@type key: str
|
|
|
|
@return: A dictionary containing the requested key/value pair,
|
|
or a list of contact triples closest to the requested key.
|
|
@rtype: dict or list
|
|
"""
|
|
if self._dataStore.hasPeersForBlob(key):
|
|
rval = {key: self._dataStore.getPeersForBlob(key)}
|
|
else:
|
|
contactTriples = self.findNode(key, **kwargs)
|
|
rval = {'contacts': contactTriples}
|
|
if '_rpcNodeContact' in kwargs:
|
|
contact = kwargs['_rpcNodeContact']
|
|
compact_ip = contact.compact_ip()
|
|
rval['token'] = self.make_token(compact_ip)
|
|
self.hash_watcher.add_requested_hash(key, compact_ip)
|
|
return rval
|
|
|
|
def _generateID(self):
|
|
""" Generates an n-bit pseudo-random identifier
|
|
|
|
@return: A globally unique n-bit pseudo-random identifier
|
|
@rtype: str
|
|
"""
|
|
hash = hashlib.sha384()
|
|
hash.update(str(random.getrandbits(255)))
|
|
return hash.digest()
|
|
|
|
def _iterativeFind(self, key, startupShortlist=None, rpc='findNode'):
|
|
""" The basic Kademlia iterative lookup operation (for nodes/values)
|
|
|
|
This builds a list of k "closest" contacts through iterative use of
|
|
the "FIND_NODE" RPC, or if C{findValue} is set to C{True}, using the
|
|
"FIND_VALUE" RPC, in which case the value (if found) may be returned
|
|
instead of a list of contacts
|
|
|
|
@param key: the n-bit key (i.e. the node or value ID) to search for
|
|
@type key: str
|
|
@param startupShortlist: A list of contacts to use as the starting
|
|
shortlist for this search; this is normally
|
|
only used when the node joins the network
|
|
@type startupShortlist: list
|
|
@param rpc: The name of the RPC to issue to remote nodes during the
|
|
Kademlia lookup operation (e.g. this sets whether this
|
|
algorithm should search for a data value (if
|
|
rpc='findValue') or not. It can thus be used to perform
|
|
other operations that piggy-back on the basic Kademlia
|
|
lookup operation (Entangled's "delete" RPC, for instance).
|
|
@type rpc: str
|
|
|
|
@return: If C{findValue} is C{True}, the algorithm will stop as soon
|
|
as a data value for C{key} is found, and return a dictionary
|
|
containing the key and the found value. Otherwise, it will
|
|
return a list of the k closest nodes to the specified key
|
|
@rtype: twisted.internet.defer.Deferred
|
|
"""
|
|
if rpc != 'findNode':
|
|
findValue = True
|
|
else:
|
|
findValue = False
|
|
shortlist = []
|
|
if startupShortlist == None:
|
|
shortlist = self._routingTable.findCloseNodes(key, constants.alpha)
|
|
if key != self.id:
|
|
# Update the "last accessed" timestamp for the appropriate k-bucket
|
|
self._routingTable.touchKBucket(key)
|
|
if len(shortlist) == 0:
|
|
# This node doesn't know of any other nodes
|
|
fakeDf = defer.Deferred()
|
|
fakeDf.callback([])
|
|
return fakeDf
|
|
else:
|
|
# This is used during the bootstrap process; node ID's are most probably fake
|
|
shortlist = startupShortlist
|
|
|
|
outerDf = defer.Deferred()
|
|
|
|
helper = _IterativeFindHelper(self, outerDf, shortlist, key, findValue, rpc)
|
|
# Start the iterations
|
|
helper.searchIteration()
|
|
return outerDf
|
|
|
|
def _refreshNode(self):
|
|
""" Periodically called to perform k-bucket refreshes and data
|
|
replication/republishing as necessary """
|
|
df = self._refreshRoutingTable()
|
|
df.addCallback(self._removeExpiredPeers)
|
|
df.addCallback(self._scheduleNextNodeRefresh)
|
|
|
|
def _refreshRoutingTable(self):
|
|
nodeIDs = self._routingTable.getRefreshList(0, False)
|
|
outerDf = defer.Deferred()
|
|
def searchForNextNodeID(dfResult=None):
|
|
if len(nodeIDs) > 0:
|
|
searchID = nodeIDs.pop()
|
|
df = self.iterativeFindNode(searchID)
|
|
df.addCallback(searchForNextNodeID)
|
|
else:
|
|
# If this is reached, we have finished refreshing the routing table
|
|
outerDf.callback(None)
|
|
# Start the refreshing cycle
|
|
searchForNextNodeID()
|
|
return outerDf
|
|
|
|
|
|
def _scheduleNextNodeRefresh(self, *args):
|
|
self.next_refresh_call = twisted.internet.reactor.callLater(
|
|
constants.checkRefreshInterval, self._refreshNode)
|
|
|
|
#args put here because _refreshRoutingTable does outerDF.callback(None)
|
|
def _removeExpiredPeers(self, *args):
|
|
df = twisted.internet.threads.deferToThread(self._dataStore.removeExpiredPeers)
|
|
return df
|
|
|
|
|
|
# This was originally a set of nested methods in _iterativeFind
|
|
# but they have been moved into this helper class in-order to
|
|
# have better scoping and readability
|
|
class _IterativeFindHelper(object):
|
|
# TODO: use polymorphism to search for a value or node
|
|
# instead of using a find_value flag
|
|
def __init__(self, node, outer_d, shortlist, key, find_value, rpc):
|
|
self.node = node
|
|
self.outer_d = outer_d
|
|
self.shortlist = shortlist
|
|
self.key = key
|
|
self.find_value = find_value
|
|
self.rpc = rpc
|
|
# all distance operations in this class only care about the distance
|
|
# to self.key, so this makes it easier to calculate those
|
|
self.distance = Distance(key)
|
|
# List of active queries; len() indicates number of active probes
|
|
#
|
|
# n.b: using lists for these variables, because Python doesn't
|
|
# allow binding a new value to a name in an enclosing
|
|
# (non-global) scope
|
|
self.active_probes = []
|
|
# List of contact IDs that have already been queried
|
|
self.already_contacted = []
|
|
# Probes that were active during the previous iteration
|
|
# A list of found and known-to-be-active remote nodes
|
|
self.active_contacts = []
|
|
# This should only contain one entry; the next scheduled iteration call
|
|
self.pending_iteration_calls = []
|
|
self.prev_closest_node = [None]
|
|
self.find_value_result = {}
|
|
self.slow_node_count = [0]
|
|
|
|
def extendShortlist(self, responseTuple):
|
|
""" @type responseMsg: kademlia.msgtypes.ResponseMessage """
|
|
# The "raw response" tuple contains the response message,
|
|
# and the originating address info
|
|
responseMsg = responseTuple[0]
|
|
originAddress = responseTuple[1] # tuple: (ip adress, udp port)
|
|
# Make sure the responding node is valid, and abort the operation if it isn't
|
|
if responseMsg.nodeID in self.active_contacts or responseMsg.nodeID == self.node.id:
|
|
return responseMsg.nodeID
|
|
|
|
# Mark this node as active
|
|
aContact = self._getActiveContact(responseMsg, originAddress)
|
|
self.active_contacts.append(aContact)
|
|
|
|
# This makes sure "bootstrap"-nodes with "fake" IDs don't get queried twice
|
|
if responseMsg.nodeID not in self.already_contacted:
|
|
self.already_contacted.append(responseMsg.nodeID)
|
|
# Now grow extend the (unverified) shortlist with the returned contacts
|
|
result = responseMsg.response
|
|
#TODO: some validation on the result (for guarding against attacks)
|
|
# If we are looking for a value, first see if this result is the value
|
|
# we are looking for before treating it as a list of contact triples
|
|
if self.find_value is True and self.key in result and not 'contacts' in result:
|
|
# We have found the value
|
|
self.find_value_result[self.key] = result[self.key]
|
|
self.find_value_result['from_peer'] = aContact.address
|
|
else:
|
|
if self.find_value is True:
|
|
self._setClosestNodeValue(responseMsg, aContact)
|
|
self._keepSearching(result)
|
|
return responseMsg.nodeID
|
|
|
|
def _getActiveContact(self, responseMsg, originAddress):
|
|
if responseMsg.nodeID in self.shortlist:
|
|
# Get the contact information from the shortlist...
|
|
return self.shortlist[self.shortlist.index(responseMsg.nodeID)]
|
|
else:
|
|
# If it's not in the shortlist; we probably used a fake ID to reach it
|
|
# - reconstruct the contact, using the real node ID this time
|
|
return Contact(
|
|
responseMsg.nodeID, originAddress[0], originAddress[1], self.node._protocol)
|
|
|
|
def _keepSearching(self, result):
|
|
contactTriples = self._getContactTriples(result)
|
|
for contactTriple in contactTriples:
|
|
self._addIfValid(contactTriple)
|
|
|
|
def _getContactTriples(self, result):
|
|
if self.find_value is True:
|
|
return result['contacts']
|
|
else:
|
|
return result
|
|
|
|
def _setClosestNodeValue(self, responseMsg, aContact):
|
|
# We are looking for a value, and the remote node didn't have it
|
|
# - mark it as the closest "empty" node, if it is
|
|
if 'closestNodeNoValue' in self.find_value_result:
|
|
if self._is_closer(responseMsg):
|
|
self.find_value_result['closestNodeNoValue'] = aContact
|
|
else:
|
|
self.find_value_result['closestNodeNoValue'] = aContact
|
|
|
|
def _is_closer(self, responseMsg):
|
|
return self.distance.is_closer(responseMsg.nodeID, self.active_contacts[0].id)
|
|
|
|
def _addIfValid(self, contactTriple):
|
|
if isinstance(contactTriple, (list, tuple)) and len(contactTriple) == 3:
|
|
testContact = Contact(
|
|
contactTriple[0], contactTriple[1], contactTriple[2], self.node._protocol)
|
|
if testContact not in self.shortlist:
|
|
self.shortlist.append(testContact)
|
|
|
|
def removeFromShortlist(self, failure):
|
|
""" @type failure: twisted.python.failure.Failure """
|
|
failure.trap(protocol.TimeoutError)
|
|
deadContactID = failure.getErrorMessage()
|
|
if deadContactID in self.shortlist:
|
|
self.shortlist.remove(deadContactID)
|
|
return deadContactID
|
|
|
|
def cancelActiveProbe(self, contactID):
|
|
self.active_probes.pop()
|
|
if len(self.active_probes) <= constants.alpha/2 and len(self.pending_iteration_calls):
|
|
# Force the iteration
|
|
self.pending_iteration_calls[0].cancel()
|
|
del self.pending_iteration_calls[0]
|
|
self.searchIteration()
|
|
|
|
def sortByDistance(self, contact_list):
|
|
"""Sort the list of contacts in order by distance from key"""
|
|
ExpensiveSort(contact_list, self.distance.to_contact).sort()
|
|
|
|
# Send parallel, asynchronous FIND_NODE RPCs to the shortlist of contacts
|
|
def searchIteration(self):
|
|
self.slow_node_count[0] = len(self.active_probes)
|
|
# Sort the discovered active nodes from closest to furthest
|
|
self.sortByDistance(self.active_contacts)
|
|
# This makes sure a returning probe doesn't force calling this function by mistake
|
|
while len(self.pending_iteration_calls):
|
|
del self.pending_iteration_calls[0]
|
|
# See if should continue the search
|
|
if self.key in self.find_value_result:
|
|
self.outer_d.callback(self.find_value_result)
|
|
return
|
|
elif len(self.active_contacts) and self.find_value == False:
|
|
if self._is_all_done():
|
|
# TODO: Re-send the FIND_NODEs to all of the k closest nodes not already queried
|
|
#
|
|
# Ok, we're done; either we have accumulated k active
|
|
# contacts or no improvement in closestNode has been
|
|
# noted
|
|
self.outer_d.callback(self.active_contacts)
|
|
return
|
|
# The search continues...
|
|
if len(self.active_contacts):
|
|
self.prev_closest_node[0] = self.active_contacts[0]
|
|
contactedNow = 0
|
|
self.sortByDistance(self.shortlist)
|
|
# Store the current shortList length before contacting other nodes
|
|
prevShortlistLength = len(self.shortlist)
|
|
for contact in self.shortlist:
|
|
if contact.id not in self.already_contacted:
|
|
self._probeContact(contact)
|
|
contactedNow += 1
|
|
if contactedNow == constants.alpha:
|
|
break
|
|
if self._should_lookup_active_calls():
|
|
# Schedule the next iteration if there are any active
|
|
# calls (Kademlia uses loose parallelism)
|
|
call = twisted.internet.reactor.callLater(
|
|
constants.iterativeLookupDelay, self.searchIteration) #IGNORE:E1101
|
|
self.pending_iteration_calls.append(call)
|
|
# Check for a quick contact response that made an update to the shortList
|
|
elif prevShortlistLength < len(self.shortlist):
|
|
# Ensure that the closest contacts are taken from the updated shortList
|
|
self.searchIteration()
|
|
else:
|
|
# If no probes were sent, there will not be any improvement, so we're done
|
|
self.outer_d.callback(self.active_contacts)
|
|
|
|
def _probeContact(self, contact):
|
|
self.active_probes.append(contact.id)
|
|
rpcMethod = getattr(contact, self.rpc)
|
|
df = rpcMethod(self.key, rawResponse=True)
|
|
df.addCallback(self.extendShortlist)
|
|
df.addErrback(self.removeFromShortlist)
|
|
df.addCallback(self.cancelActiveProbe)
|
|
df.addErrback(log.fail(), 'Failed to contact %s', contact)
|
|
self.already_contacted.append(contact.id)
|
|
|
|
def _should_lookup_active_calls(self):
|
|
return (
|
|
len(self.active_probes) > self.slow_node_count[0] or
|
|
(
|
|
len(self.shortlist) < constants.k and
|
|
len(self.active_contacts) < len(self.shortlist) and
|
|
len(self.active_probes) > 0
|
|
)
|
|
)
|
|
|
|
def _is_all_done(self):
|
|
return (
|
|
len(self.active_contacts) >= constants.k or
|
|
(
|
|
self.active_contacts[0] == self.prev_closest_node[0] and
|
|
len(self.active_probes) == self.slow_node_count[0]
|
|
)
|
|
)
|
|
|
|
|
|
class Distance(object):
|
|
"""Calculate the XOR result between two string variables.
|
|
|
|
Frequently we re-use one of the points so as an optimization
|
|
we pre-calculate the long value of that point.
|
|
"""
|
|
def __init__(self, key):
|
|
self.key = key
|
|
self.val_key_one = long(key.encode('hex'), 16)
|
|
|
|
def __call__(self, key_two):
|
|
val_key_two = long(key_two.encode('hex'), 16)
|
|
return self.val_key_one ^ val_key_two
|
|
|
|
def is_closer(self, a, b):
|
|
"""Returns true is `a` is closer to `key` than `b` is"""
|
|
return self(a) < self(b)
|
|
|
|
def to_contact(self, contact):
|
|
"""A convenience function for calculating the distance to a contact"""
|
|
return self(contact.id)
|
|
|
|
|
|
class ExpensiveSort(object):
|
|
"""Sort a list in place.
|
|
|
|
The result of `key(item)` is cached for each item in the `to_sort`
|
|
list as an optimization. This can be useful when `key` is
|
|
expensive.
|
|
|
|
Attributes:
|
|
to_sort: a list of items to sort
|
|
key: callable, like `key` in normal python sort
|
|
attr: the attribute name used to cache the value on each item.
|
|
"""
|
|
def __init__(self, to_sort, key, attr='__value'):
|
|
self.to_sort = to_sort
|
|
self.key = key
|
|
self.attr = attr
|
|
|
|
def sort(self):
|
|
self._cacheValues()
|
|
self._sortByValue()
|
|
self._removeValue()
|
|
|
|
def _cacheValues(self):
|
|
for item in self.to_sort:
|
|
setattr(item, self.attr, self.key(item))
|
|
|
|
def _sortByValue(self):
|
|
self.to_sort.sort(key=operator.attrgetter(self.attr))
|
|
|
|
def _removeValue(self):
|
|
for item in self.to_sort:
|
|
delattr(item, self.attr)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Launch a dht node")
|
|
parser.add_argument("udp_port", help="The UDP port on which the node will listen",
|
|
type=int)
|
|
parser.add_argument("known_node_ip",
|
|
help="The IP of a known node to be used to bootstrap into the network",
|
|
nargs='?')
|
|
parser.add_argument("known_node_port",
|
|
help="The port of a known node to be used to bootstrap into the network",
|
|
nargs='?', default=4000, type=int)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.known_node_ip:
|
|
known_nodes = [(args.known_node_ip, args.known_node_port)]
|
|
else:
|
|
known_nodes = []
|
|
|
|
node = Node(udpPort=args.udp_port)
|
|
node.joinNetwork(known_nodes)
|
|
twisted.internet.reactor.run()
|
|
|
|
if __name__ == '__main__':
|
|
main()
|