lbry-sdk/lbry/dht/protocol/routing_table.py

import asyncio
import random
import logging
import typing
import itertools

from prometheus_client import Gauge

from lbry import utils
from lbry.dht import constants
from lbry.dht.protocol.distance import Distance
if typing.TYPE_CHECKING:
    from lbry.dht.peer import KademliaPeer, PeerManager

log = logging.getLogger(__name__)


class KBucket:
    """
    Kademlia K-bucket implementation.
    """
    peer_in_routing_table_metric = Gauge(
        "peers_in_routing_table", "Number of peers on routing table", namespace="dht_node",
        labelnames=("scope",)
    )
    peer_with_x_bit_colliding_metric = Gauge(
        "peer_x_bit_colliding", "Number of peers with at least X bits colliding with this node id",
        namespace="dht_node", labelnames=("amount",)
    )

    def __init__(self, peer_manager: 'PeerManager', range_min: int, range_max: int, node_id: bytes):
        """
        @param range_min: The lower boundary for the range in the n-bit ID
                         space covered by this k-bucket
        @param range_max: The upper boundary for the range in the ID space
                         covered by this k-bucket
        """
        self._peer_manager = peer_manager
        self.last_accessed = 0
        self.range_min = range_min
        self.range_max = range_max
        self.peers: typing.List['KademliaPeer'] = []
        self._node_id = node_id
        self._distance_to_self = Distance(node_id)

    def add_peer(self, peer: 'KademliaPeer') -> bool:
        """ Add contact to _contact list in the right order. This will move the
        contact to the end of the k-bucket if it is already present.

        @raise kademlia.kbucket.BucketFull: Raised when the bucket is full and
                                            the contact isn't in the bucket
                                            already

        @param peer: The contact to add
        @type peer: dht.contact._Contact
        """
        if peer in self.peers:
            # Move the existing contact to the end of the list
            # - using the new contact to allow add-on data
            #   (e.g. optimization-specific stuff) to pe updated as well
            self.peers.remove(peer)
            self.peers.append(peer)
            return True
        else:
            for i, _ in enumerate(self.peers):
                local_peer = self.peers[i]
                if local_peer.node_id == peer.node_id:
                    self.peers.remove(local_peer)
                    self.peers.append(peer)
                    return True
        if len(self.peers) < constants.K:
            self.peers.append(peer)
            self.peer_in_routing_table_metric.labels("global").inc()
            bits_colliding = utils.get_colliding_prefix_bits(peer.node_id, self._node_id)
            self.peer_with_x_bit_colliding_metric.labels(amount=bits_colliding).inc()
            return True
        else:
            return False
            # raise BucketFull("No space in bucket to insert contact")

    def get_peer(self, node_id: bytes) -> 'KademliaPeer':
        for peer in self.peers:
            if peer.node_id == node_id:
                return peer
        raise IndexError(node_id)

    def get_peers(self, count=-1, exclude_contact=None, sort_distance_to=None) -> typing.List['KademliaPeer']:
        """ Returns a list containing up to the first count number of contacts

        @param count: The amount of contacts to return (if 0 or less, return
                      all contacts)
        @type count: int
        @param exclude_contact: A node node_id to exclude; if this contact is in
                               the list of returned values, it will be
                               discarded before returning. If a C{str} is
                               passed as this argument, it must be the
                               contact's ID.
        @type exclude_contact: str

        @param sort_distance_to: Sort distance to the node_id, defaulting to the parent node node_id. If False don't
                                 sort the contacts

        @raise IndexError: If the number of requested contacts is too large

        @return: Return up to the first count number of contacts in a list
                If no contacts are present an empty is returned
        @rtype: list
        """
        peers = [peer for peer in self.peers if peer.node_id != exclude_contact]

        # Return all contacts in bucket
        if count <= 0:
            count = len(peers)

        # Get current contact number
        current_len = len(peers)

        # If count greater than k - return only k contacts
        if count > constants.K:
            count = constants.K

        if not current_len:
            return peers

        if sort_distance_to is False:
            pass
        else:
            sort_distance_to = sort_distance_to or self._node_id
            peers.sort(key=lambda c: Distance(sort_distance_to)(c.node_id))

        return peers[:min(current_len, count)]

    def get_bad_or_unknown_peers(self) -> typing.List['KademliaPeer']:
        peer = self.get_peers(sort_distance_to=False)
        return [
            peer for peer in peer
            if self._peer_manager.contact_triple_is_good(peer.node_id, peer.address, peer.udp_port) is not True
        ]

    def remove_peer(self, peer: 'KademliaPeer') -> None:
        self.peers.remove(peer)
        self.peer_in_routing_table_metric.labels("global").dec()
        bits_colliding = utils.get_colliding_prefix_bits(peer.node_id, self._node_id)
        self.peer_with_x_bit_colliding_metric.labels(amount=bits_colliding).dec()

    def key_in_range(self, key: bytes) -> bool:
        """ Tests whether the specified key (i.e. node ID) is in the range
        of the n-bit ID space covered by this k-bucket (in otherwords, it
        returns whether or not the specified key should be placed in this
        k-bucket)

        @param key: The key to test
        @type key: str or int

        @return: C{True} if the key is in this k-bucket's range, or C{False}
                 if not.
        @rtype: bool
        """
        return self.range_min <= self._distance_to_self(key) < self.range_max

    def __len__(self) -> int:
        return len(self.peers)

    def __contains__(self, item) -> bool:
        return item in self.peers


class TreeRoutingTable:
    """ This class implements a routing table used by a Node class.

    The Kademlia routing table is a binary tree whose leaves are k-buckets,
    where each k-bucket contains nodes with some common prefix of their IDs.
    This prefix is the k-bucket's position in the binary tree; it therefore
    covers some range of ID values, and together all of the k-buckets cover
    the entire n-bit ID (or key) space (with no overlap).

    @note: In this implementation, nodes in the tree (the k-buckets) are
    added dynamically, as needed; this technique is described in the 13-page
    version of the Kademlia paper, in section 2.4. It does, however, use the
    ping RPC-based k-bucket eviction algorithm described in section 2.2 of
    that paper.
    """
    bucket_in_routing_table_metric = Gauge(
        "buckets_in_routing_table", "Number of buckets on routing table", namespace="dht_node",
        labelnames=("scope",)
    )

    def __init__(self, loop: asyncio.AbstractEventLoop, peer_manager: 'PeerManager', parent_node_id: bytes,
                 split_buckets_under_index: int = constants.SPLIT_BUCKETS_UNDER_INDEX):
        self._loop = loop
        self._peer_manager = peer_manager
        self._parent_node_id = parent_node_id
        self._split_buckets_under_index = split_buckets_under_index
        self.buckets: typing.List[KBucket] = [
            KBucket(
                self._peer_manager, range_min=0, range_max=2 ** constants.HASH_BITS, node_id=self._parent_node_id
            )
        ]

    def get_peers(self) -> typing.List['KademliaPeer']:
        return list(itertools.chain.from_iterable(map(lambda bucket: bucket.peers, self.buckets)))

    def should_split(self, bucket_index: int, to_add: bytes) -> bool:
        #  https://stackoverflow.com/questions/32129978/highly-unbalanced-kademlia-routing-table/32187456#32187456
        if bucket_index < self._split_buckets_under_index:
            return True
        contacts = self.get_peers()
        distance = Distance(self._parent_node_id)
        contacts.sort(key=lambda c: distance(c.node_id))
        kth_contact = contacts[-1] if len(contacts) < constants.K else contacts[constants.K - 1]
        return distance(to_add) < distance(kth_contact.node_id)

    def find_close_peers(self, key: bytes, count: typing.Optional[int] = None,
                         sender_node_id: typing.Optional[bytes] = None) -> typing.List['KademliaPeer']:
        exclude = [self._parent_node_id]
        if sender_node_id:
            exclude.append(sender_node_id)
        count = count or constants.K
        distance = Distance(key)
        contacts = self.get_peers()
        contacts = [c for c in contacts if c.node_id not in exclude]
        if contacts:
            contacts.sort(key=lambda c: distance(c.node_id))
            return contacts[:min(count, len(contacts))]
        return []

    def get_peer(self, contact_id: bytes) -> 'KademliaPeer':
        """
        @raise IndexError: No contact with the specified contact ID is known
                           by this node
        """
        return self.buckets[self.kbucket_index(contact_id)].get_peer(contact_id)

    def get_refresh_list(self, start_index: int = 0, force: bool = False) -> typing.List[bytes]:
        bucket_index = start_index
        refresh_ids = []
        now = int(self._loop.time())
        for bucket in self.buckets[start_index:]:
            if force or now - bucket.last_accessed >= constants.REFRESH_INTERVAL:
                to_search = self.midpoint_id_in_bucket_range(bucket_index)
                refresh_ids.append(to_search)
            bucket_index += 1
        return refresh_ids

    def remove_peer(self, peer: 'KademliaPeer') -> None:
        if not peer.node_id:
            return
        bucket_index = self.kbucket_index(peer.node_id)
        try:
            self.buckets[bucket_index].remove_peer(peer)
            self._join_buckets()
        except ValueError:
            return

    def kbucket_index(self, key: bytes) -> int:
        i = 0
        for bucket in self.buckets:
            if bucket.key_in_range(key):
                return i
            else:
                i += 1
        return i

    def random_id_in_bucket_range(self, bucket_index: int) -> bytes:
        random_id = int(random.randrange(self.buckets[bucket_index].range_min, self.buckets[bucket_index].range_max))
        return Distance(
            self._parent_node_id
        )(random_id.to_bytes(constants.HASH_LENGTH, 'big')).to_bytes(constants.HASH_LENGTH, 'big')

    def midpoint_id_in_bucket_range(self, bucket_index: int) -> bytes:
        half = int((self.buckets[bucket_index].range_max - self.buckets[bucket_index].range_min) // 2)
        return Distance(self._parent_node_id)(
            int(self.buckets[bucket_index].range_min + half).to_bytes(constants.HASH_LENGTH, 'big')
        ).to_bytes(constants.HASH_LENGTH, 'big')

    def split_bucket(self, old_bucket_index: int) -> None:
        """ Splits the specified k-bucket into two new buckets which together
        cover the same range in the key/ID space

        @param old_bucket_index: The index of k-bucket to split (in this table's
                                 list of k-buckets)
        @type old_bucket_index: int
        """
        # Resize the range of the current (old) k-bucket
        old_bucket = self.buckets[old_bucket_index]
        split_point = old_bucket.range_max - (old_bucket.range_max - old_bucket.range_min) // 2
        # Create a new k-bucket to cover the range split off from the old bucket
        new_bucket = KBucket(self._peer_manager, split_point, old_bucket.range_max, self._parent_node_id)
        old_bucket.range_max = split_point
        # Now, add the new bucket into the routing table tree
        self.buckets.insert(old_bucket_index + 1, new_bucket)
        # Finally, copy all nodes that belong to the new k-bucket into it...
        for contact in old_bucket.peers:
            if new_bucket.key_in_range(contact.node_id):
                new_bucket.add_peer(contact)
        # ...and remove them from the old bucket
        for contact in new_bucket.peers:
            old_bucket.remove_peer(contact)
        self.bucket_in_routing_table_metric.labels("global").set(len(self.buckets))

    def _join_buckets(self):
        if len(self.buckets) == 1:
            return
        to_pop = [i for i, bucket in enumerate(self.buckets) if len(bucket) == 0]
        if not to_pop:
            return
        log.info("join buckets %i", len(to_pop))
        bucket_index_to_pop = to_pop[0]
        assert len(self.buckets[bucket_index_to_pop]) == 0
        can_go_lower = bucket_index_to_pop - 1 >= 0
        can_go_higher = bucket_index_to_pop + 1 < len(self.buckets)
        assert can_go_higher or can_go_lower
        bucket = self.buckets[bucket_index_to_pop]
        if can_go_lower and can_go_higher:
            midpoint = ((bucket.range_max - bucket.range_min) // 2) + bucket.range_min
            self.buckets[bucket_index_to_pop - 1].range_max = midpoint - 1
            self.buckets[bucket_index_to_pop + 1].range_min = midpoint
        elif can_go_lower:
            self.buckets[bucket_index_to_pop - 1].range_max = bucket.range_max
        elif can_go_higher:
            self.buckets[bucket_index_to_pop + 1].range_min = bucket.range_min
        self.buckets.remove(bucket)
        self.bucket_in_routing_table_metric.labels("global").set(len(self.buckets))
        return self._join_buckets()

    def buckets_with_contacts(self) -> int:
        count = 0
        for bucket in self.buckets:
            if len(bucket) > 0:
                count += 1
        return count

    async def add_peer(self, peer: 'KademliaPeer', probe: typing.Callable[['KademliaPeer'], typing.Awaitable]):
        if not peer.node_id:
            log.warning("Tried adding a peer with no node id!")
            return False
        for my_peer in self.get_peers():
            if (my_peer.address, my_peer.udp_port) == (peer.address, peer.udp_port) and my_peer.node_id != peer.node_id:
                self.remove_peer(my_peer)
                self._join_buckets()
        bucket_index = self.kbucket_index(peer.node_id)
        if self.buckets[bucket_index].add_peer(peer):
            return True

        # The bucket is full; see if it can be split (by checking if its range includes the host node's node_id)
        if self.should_split(bucket_index, peer.node_id):
            self.split_bucket(bucket_index)
            # Retry the insertion attempt
            result = await self.add_peer(peer, probe)
            self._join_buckets()
            return result
        else:
            # We can't split the k-bucket
            #
            # The 13 page kademlia paper specifies that the least recently contacted node in the bucket
            # shall be pinged. If it fails to reply it is replaced with the new contact. If the ping is successful
            # the new contact is ignored and not added to the bucket (sections 2.2 and 2.4).
            #
            # A reasonable extension to this is BEP 0005, which extends the above:
            #
            #    Not all nodes that we learn about are equal. Some are "good" and some are not.
            #    Many nodes using the DHT are able to send queries and receive responses,
            #    but are not able to respond to queries from other nodes. It is important that
            #    each node's routing table must contain only known good nodes. A good node is
            #    a node has responded to one of our queries within the last 15 minutes. A node
            #    is also good if it has ever responded to one of our queries and has sent us a
            #    query within the last 15 minutes. After 15 minutes of inactivity, a node becomes
            #    questionable. Nodes become bad when they fail to respond to multiple queries
            #    in a row. Nodes that we know are good are given priority over nodes with unknown status.
            #
            # When there are bad or questionable nodes in the bucket, the least recent is selected for
            # potential replacement (BEP 0005). When all nodes in the bucket are fresh, the head (least recent)
            # contact is selected as described in section 2.2 of the kademlia paper. In both cases the new contact
            # is ignored if the pinged node replies.

            not_good_contacts = self.buckets[bucket_index].get_bad_or_unknown_peers()
            not_recently_replied = []
            for my_peer in not_good_contacts:
                last_replied = self._peer_manager.get_last_replied(my_peer.address, my_peer.udp_port)
                if not last_replied or last_replied + 60 < self._loop.time():
                    not_recently_replied.append(my_peer)
            if not_recently_replied:
                to_replace = not_recently_replied[0]
            else:
                to_replace = self.buckets[bucket_index].peers[0]
                last_replied = self._peer_manager.get_last_replied(to_replace.address, to_replace.udp_port)
                if last_replied and last_replied + 60 > self._loop.time():
                    return False
            log.debug("pinging %s:%s", to_replace.address, to_replace.udp_port)
            try:
                await probe(to_replace)
                return False
            except asyncio.TimeoutError:
                log.debug("Replacing dead contact in bucket %i: %s:%i with %s:%i ", bucket_index,
                          to_replace.address, to_replace.udp_port, peer.address, peer.udp_port)
                if to_replace in self.buckets[bucket_index]:
                    self.buckets[bucket_index].remove_peer(to_replace)
                return await self.add_peer(peer, probe)
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`import asyncio`
			`import random`
			`import logging`
			`import typing`
			`import itertools`

change colliding bits metric to gauge 2021-12-16 20:50:14 +01:00			`from prometheus_client import Gauge`
prometheus: move blobs_stored and peers to SDK. add buckets_in_routing_table 2021-12-10 06:44:51 +01:00
replace duplicated code 2022-02-22 19:51:36 +01:00			`from lbry import utils`
moved lbrynet -> lbry 2019-06-21 02:55:47 +02:00			`from lbry.dht import constants`
			`from lbry.dht.protocol.distance import Distance`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`if typing.TYPE_CHECKING:`
moved lbrynet -> lbry 2019-06-21 02:55:47 +02:00			`from lbry.dht.peer import KademliaPeer, PeerManager`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`log = logging.getLogger(__name__)`


			`class KBucket:`
			`"""`
prometheus: move blobs_stored and peers to SDK. add buckets_in_routing_table 2021-12-10 06:44:51 +01:00			`Kademlia K-bucket implementation.`
			`"""`
add request received 2021-12-13 08:29:29 +01:00			`peer_in_routing_table_metric = Gauge(`
prometheus: move blobs_stored and peers to SDK. add buckets_in_routing_table 2021-12-10 06:44:51 +01:00			`"peers_in_routing_table", "Number of peers on routing table", namespace="dht_node",`
			`labelnames=("scope",)`
			`)`
change colliding bits metric to gauge 2021-12-16 20:50:14 +01:00			`peer_with_x_bit_colliding_metric = Gauge(`
count bit collisions between 8 and 16 2021-12-15 08:36:24 +01:00			`"peer_x_bit_colliding", "Number of peers with at least X bits colliding with this node id",`
add counter for peers with colliding bytes 2021-12-13 08:56:25 +01:00			`namespace="dht_node", labelnames=("amount",)`
			`)`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def __init__(self, peer_manager: 'PeerManager', range_min: int, range_max: int, node_id: bytes):`
			`"""`
			`@param range_min: The lower boundary for the range in the n-bit ID`
			`space covered by this k-bucket`
			`@param range_max: The upper boundary for the range in the ID space`
			`covered by this k-bucket`
			`"""`
			`self._peer_manager = peer_manager`
			`self.last_accessed = 0`
			`self.range_min = range_min`
			`self.range_max = range_max`
			`self.peers: typing.List['KademliaPeer'] = []`
			`self._node_id = node_id`
fix KBucket.key_in_range The range_min and range_max of a KBucket where mistakenly bounding the node id of a peer to be added rather than the distance from the local node id to the peer node id 2019-02-19 19:57:11 +01:00			`self._distance_to_self = Distance(node_id)`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def add_peer(self, peer: 'KademliaPeer') -> bool:`
			`""" Add contact to _contact list in the right order. This will move the`
			`contact to the end of the k-bucket if it is already present.`

			`@raise kademlia.kbucket.BucketFull: Raised when the bucket is full and`
			`the contact isn't in the bucket`
			`already`

			`@param peer: The contact to add`
			`@type peer: dht.contact._Contact`
			`"""`
			`if peer in self.peers:`
			`# Move the existing contact to the end of the list`
			`# - using the new contact to allow add-on data`
			`# (e.g. optimization-specific stuff) to pe updated as well`
			`self.peers.remove(peer)`
			`self.peers.append(peer)`
			`return True`
fix KBucket.add_peer 2019-02-21 01:24:44 +01:00			`else:`
upgrade pylint and fix lint errors 2021-08-21 04:36:35 +02:00			`for i, _ in enumerate(self.peers):`
lint: finish dht parts 2020-01-03 05:31:28 +01:00			`local_peer = self.peers[i]`
			`if local_peer.node_id == peer.node_id:`
			`self.peers.remove(local_peer)`
fix KBucket.add_peer 2019-02-21 01:24:44 +01:00			`self.peers.append(peer)`
			`return True`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`if len(self.peers) < constants.K:`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`self.peers.append(peer)`
add request received 2021-12-13 08:29:29 +01:00			`self.peer_in_routing_table_metric.labels("global").inc()`
simplify, genaralize to any size and fix tests 2022-02-23 02:38:04 +01:00			`bits_colliding = utils.get_colliding_prefix_bits(peer.node_id, self._node_id)`
replace duplicated code 2022-02-22 19:51:36 +01:00			`self.peer_with_x_bit_colliding_metric.labels(amount=bits_colliding).inc()`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`return True`
			`else:`
			`return False`
			`# raise BucketFull("No space in bucket to insert contact")`

			`def get_peer(self, node_id: bytes) -> 'KademliaPeer':`
			`for peer in self.peers:`
			`if peer.node_id == node_id:`
			`return peer`
			`raise IndexError(node_id)`

			`def get_peers(self, count=-1, exclude_contact=None, sort_distance_to=None) -> typing.List['KademliaPeer']:`
			`""" Returns a list containing up to the first count number of contacts`

			`@param count: The amount of contacts to return (if 0 or less, return`
			`all contacts)`
			`@type count: int`
			`@param exclude_contact: A node node_id to exclude; if this contact is in`
			`the list of returned values, it will be`
			`discarded before returning. If a C{str} is`
			`passed as this argument, it must be the`
			`contact's ID.`
			`@type exclude_contact: str`

			`@param sort_distance_to: Sort distance to the node_id, defaulting to the parent node node_id. If False don't`
			`sort the contacts`

			`@raise IndexError: If the number of requested contacts is too large`

			`@return: Return up to the first count number of contacts in a list`
			`If no contacts are present an empty is returned`
			`@rtype: list`
			`"""`
			`peers = [peer for peer in self.peers if peer.node_id != exclude_contact]`

			`# Return all contacts in bucket`
			`if count <= 0:`
			`count = len(peers)`

			`# Get current contact number`
			`current_len = len(peers)`

			`# If count greater than k - return only k contacts`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`if count > constants.K:`
			`count = constants.K`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`if not current_len:`
			`return peers`

			`if sort_distance_to is False:`
			`pass`
			`else:`
			`sort_distance_to = sort_distance_to or self._node_id`
			`peers.sort(key=lambda c: Distance(sort_distance_to)(c.node_id))`

			`return peers[:min(current_len, count)]`

			`def get_bad_or_unknown_peers(self) -> typing.List['KademliaPeer']:`
			`peer = self.get_peers(sort_distance_to=False)`
			`return [`
			`peer for peer in peer`
			`if self._peer_manager.contact_triple_is_good(peer.node_id, peer.address, peer.udp_port) is not True`
			`]`

			`def remove_peer(self, peer: 'KademliaPeer') -> None:`
			`self.peers.remove(peer)`
add request received 2021-12-13 08:29:29 +01:00			`self.peer_in_routing_table_metric.labels("global").dec()`
simplify, genaralize to any size and fix tests 2022-02-23 02:38:04 +01:00			`bits_colliding = utils.get_colliding_prefix_bits(peer.node_id, self._node_id)`
replace duplicated code 2022-02-22 19:51:36 +01:00			`self.peer_with_x_bit_colliding_metric.labels(amount=bits_colliding).dec()`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def key_in_range(self, key: bytes) -> bool:`
			`""" Tests whether the specified key (i.e. node ID) is in the range`
			`of the n-bit ID space covered by this k-bucket (in otherwords, it`
			`returns whether or not the specified key should be placed in this`
			`k-bucket)`

			`@param key: The key to test`
			`@type key: str or int`

			`@return: C{True} if the key is in this k-bucket's range, or C{False}`
			`if not.`
			`@rtype: bool`
			`"""`
fix KBucket.key_in_range The range_min and range_max of a KBucket where mistakenly bounding the node id of a peer to be added rather than the distance from the local node id to the peer node id 2019-02-19 19:57:11 +01:00			`return self.range_min <= self._distance_to_self(key) < self.range_max`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def __len__(self) -> int:`
			`return len(self.peers)`

			`def __contains__(self, item) -> bool:`
			`return item in self.peers`


			`class TreeRoutingTable:`
			`""" This class implements a routing table used by a Node class.`

			`The Kademlia routing table is a binary tree whose leaves are k-buckets,`
			`where each k-bucket contains nodes with some common prefix of their IDs.`
			`This prefix is the k-bucket's position in the binary tree; it therefore`
			`covers some range of ID values, and together all of the k-buckets cover`
			`the entire n-bit ID (or key) space (with no overlap).`

			`@note: In this implementation, nodes in the tree (the k-buckets) are`
			`added dynamically, as needed; this technique is described in the 13-page`
			`version of the Kademlia paper, in section 2.4. It does, however, use the`
			`ping RPC-based k-bucket eviction algorithm described in section 2.2 of`
			`that paper.`
			`"""`
add request received 2021-12-13 08:29:29 +01:00			`bucket_in_routing_table_metric = Gauge(`
prometheus: move blobs_stored and peers to SDK. add buckets_in_routing_table 2021-12-10 06:44:51 +01:00			`"buckets_in_routing_table", "Number of buckets on routing table", namespace="dht_node",`
			`labelnames=("scope",)`
			`)`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
typing 2019-08-02 19:14:41 +02:00			`def __init__(self, loop: asyncio.AbstractEventLoop, peer_manager: 'PeerManager', parent_node_id: bytes,`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`split_buckets_under_index: int = constants.SPLIT_BUCKETS_UNDER_INDEX):`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`self._loop = loop`
			`self._peer_manager = peer_manager`
			`self._parent_node_id = parent_node_id`
add split_buckets_under_index config setting for seed nodes 2019-02-20 17:22:55 +01:00			`self._split_buckets_under_index = split_buckets_under_index`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`self.buckets: typing.List[KBucket] = [`
			`KBucket(`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`self._peer_manager, range_min=0, range_max=2 ** constants.HASH_BITS, node_id=self._parent_node_id`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`)`
			`]`

			`def get_peers(self) -> typing.List['KademliaPeer']:`
			`return list(itertools.chain.from_iterable(map(lambda bucket: bucket.peers, self.buckets)))`

			`def should_split(self, bucket_index: int, to_add: bytes) -> bool:`
			`# https://stackoverflow.com/questions/32129978/highly-unbalanced-kademlia-routing-table/32187456#32187456`
add split_buckets_under_index config setting for seed nodes 2019-02-20 17:22:55 +01:00			`if bucket_index < self._split_buckets_under_index:`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`return True`
			`contacts = self.get_peers()`
			`distance = Distance(self._parent_node_id)`
			`contacts.sort(key=lambda c: distance(c.node_id))`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`kth_contact = contacts[-1] if len(contacts) < constants.K else contacts[constants.K - 1]`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`return distance(to_add) < distance(kth_contact.node_id)`

			`def find_close_peers(self, key: bytes, count: typing.Optional[int] = None,`
			`sender_node_id: typing.Optional[bytes] = None) -> typing.List['KademliaPeer']:`
			`exclude = [self._parent_node_id]`
			`if sender_node_id:`
			`exclude.append(sender_node_id)`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`count = count or constants.K`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`distance = Distance(key)`
			`contacts = self.get_peers()`
			`contacts = [c for c in contacts if c.node_id not in exclude]`
			`if contacts:`
			`contacts.sort(key=lambda c: distance(c.node_id))`
			`return contacts[:min(count, len(contacts))]`
			`return []`

			`def get_peer(self, contact_id: bytes) -> 'KademliaPeer':`
			`"""`
			`@raise IndexError: No contact with the specified contact ID is known`
			`by this node`
			`"""`
			`return self.buckets[self.kbucket_index(contact_id)].get_peer(contact_id)`

			`def get_refresh_list(self, start_index: int = 0, force: bool = False) -> typing.List[bytes]:`
			`bucket_index = start_index`
			`refresh_ids = []`
			`now = int(self._loop.time())`
			`for bucket in self.buckets[start_index:]:`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`if force or now - bucket.last_accessed >= constants.REFRESH_INTERVAL:`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`to_search = self.midpoint_id_in_bucket_range(bucket_index)`
			`refresh_ids.append(to_search)`
			`bucket_index += 1`
			`return refresh_ids`

			`def remove_peer(self, peer: 'KademliaPeer') -> None:`
			`if not peer.node_id:`
			`return`
			`bucket_index = self.kbucket_index(peer.node_id)`
			`try:`
			`self.buckets[bucket_index].remove_peer(peer)`
move add peer logic to routing table 2022-07-11 07:38:55 +02:00			`self._join_buckets()`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`except ValueError:`
			`return`

			`def kbucket_index(self, key: bytes) -> int:`
			`i = 0`
			`for bucket in self.buckets:`
			`if bucket.key_in_range(key):`
			`return i`
			`else:`
			`i += 1`
			`return i`

			`def random_id_in_bucket_range(self, bucket_index: int) -> bytes:`
			`random_id = int(random.randrange(self.buckets[bucket_index].range_min, self.buckets[bucket_index].range_max))`
fix KBucket.key_in_range The range_min and range_max of a KBucket where mistakenly bounding the node id of a peer to be added rather than the distance from the local node id to the peer node id 2019-02-19 19:57:11 +01:00			`return Distance(`
			`self._parent_node_id`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`)(random_id.to_bytes(constants.HASH_LENGTH, 'big')).to_bytes(constants.HASH_LENGTH, 'big')`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def midpoint_id_in_bucket_range(self, bucket_index: int) -> bytes:`
			`half = int((self.buckets[bucket_index].range_max - self.buckets[bucket_index].range_min) // 2)`
fix KBucket.key_in_range The range_min and range_max of a KBucket where mistakenly bounding the node id of a peer to be added rather than the distance from the local node id to the peer node id 2019-02-19 19:57:11 +01:00			`return Distance(self._parent_node_id)(`
dht constants -> CONSTANTS (linting) 2020-01-03 04:57:28 +01:00			`int(self.buckets[bucket_index].range_min + half).to_bytes(constants.HASH_LENGTH, 'big')`
			`).to_bytes(constants.HASH_LENGTH, 'big')`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def split_bucket(self, old_bucket_index: int) -> None:`
			`""" Splits the specified k-bucket into two new buckets which together`
			`cover the same range in the key/ID space`

			`@param old_bucket_index: The index of k-bucket to split (in this table's`
			`list of k-buckets)`
			`@type old_bucket_index: int`
			`"""`
			`# Resize the range of the current (old) k-bucket`
			`old_bucket = self.buckets[old_bucket_index]`
			`split_point = old_bucket.range_max - (old_bucket.range_max - old_bucket.range_min) // 2`
			`# Create a new k-bucket to cover the range split off from the old bucket`
			`new_bucket = KBucket(self._peer_manager, split_point, old_bucket.range_max, self._parent_node_id)`
			`old_bucket.range_max = split_point`
			`# Now, add the new bucket into the routing table tree`
			`self.buckets.insert(old_bucket_index + 1, new_bucket)`
			`# Finally, copy all nodes that belong to the new k-bucket into it...`
			`for contact in old_bucket.peers:`
			`if new_bucket.key_in_range(contact.node_id):`
			`new_bucket.add_peer(contact)`
			`# ...and remove them from the old bucket`
			`for contact in new_bucket.peers:`
			`old_bucket.remove_peer(contact)`
add request received 2021-12-13 08:29:29 +01:00			`self.bucket_in_routing_table_metric.labels("global").set(len(self.buckets))`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
move add peer logic to routing table 2022-07-11 07:38:55 +02:00			`def _join_buckets(self):`
don't join buckets if there is only one 2019-02-21 20:18:48 +01:00			`if len(self.buckets) == 1:`
			`return`
lint: finish dht parts 2020-01-03 05:31:28 +01:00			`to_pop = [i for i, bucket in enumerate(self.buckets) if len(bucket) == 0]`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`if not to_pop:`
			`return`
			`log.info("join buckets %i", len(to_pop))`
			`bucket_index_to_pop = to_pop[0]`
			`assert len(self.buckets[bucket_index_to_pop]) == 0`
			`can_go_lower = bucket_index_to_pop - 1 >= 0`
			`can_go_higher = bucket_index_to_pop + 1 < len(self.buckets)`
			`assert can_go_higher or can_go_lower`
			`bucket = self.buckets[bucket_index_to_pop]`
			`if can_go_lower and can_go_higher:`
			`midpoint = ((bucket.range_max - bucket.range_min) // 2) + bucket.range_min`
			`self.buckets[bucket_index_to_pop - 1].range_max = midpoint - 1`
			`self.buckets[bucket_index_to_pop + 1].range_min = midpoint`
			`elif can_go_lower:`
			`self.buckets[bucket_index_to_pop - 1].range_max = bucket.range_max`
			`elif can_go_higher:`
			`self.buckets[bucket_index_to_pop + 1].range_min = bucket.range_min`
			`self.buckets.remove(bucket)`
add request received 2021-12-13 08:29:29 +01:00			`self.bucket_in_routing_table_metric.labels("global").set(len(self.buckets))`
move add peer logic to routing table 2022-07-11 07:38:55 +02:00			`return self._join_buckets()`
async lbrynet.dht 2019-01-22 18:49:43 +01:00
			`def buckets_with_contacts(self) -> int:`
			`count = 0`
			`for bucket in self.buckets:`
lint: finish dht parts 2020-01-03 05:31:28 +01:00			`if len(bucket) > 0:`
async lbrynet.dht 2019-01-22 18:49:43 +01:00			`count += 1`
			`return count`
move add peer logic to routing table 2022-07-11 07:38:55 +02:00
			`async def add_peer(self, peer: 'KademliaPeer', probe: typing.Callable[['KademliaPeer'], typing.Awaitable]):`
			`if not peer.node_id:`
			`log.warning("Tried adding a peer with no node id!")`
			`return False`
			`for my_peer in self.get_peers():`
			`if (my_peer.address, my_peer.udp_port) == (peer.address, peer.udp_port) and my_peer.node_id != peer.node_id:`
			`self.remove_peer(my_peer)`
			`self._join_buckets()`
			`bucket_index = self.kbucket_index(peer.node_id)`
			`if self.buckets[bucket_index].add_peer(peer):`
			`return True`

			`# The bucket is full; see if it can be split (by checking if its range includes the host node's node_id)`
			`if self.should_split(bucket_index, peer.node_id):`
			`self.split_bucket(bucket_index)`
			`# Retry the insertion attempt`
			`result = await self.add_peer(peer, probe)`
			`self._join_buckets()`
			`return result`
			`else:`
			`# We can't split the k-bucket`
			`#`
			`# The 13 page kademlia paper specifies that the least recently contacted node in the bucket`
			`# shall be pinged. If it fails to reply it is replaced with the new contact. If the ping is successful`
			`# the new contact is ignored and not added to the bucket (sections 2.2 and 2.4).`
			`#`
			`# A reasonable extension to this is BEP 0005, which extends the above:`
			`#`
			`# Not all nodes that we learn about are equal. Some are "good" and some are not.`
			`# Many nodes using the DHT are able to send queries and receive responses,`
			`# but are not able to respond to queries from other nodes. It is important that`
			`# each node's routing table must contain only known good nodes. A good node is`
			`# a node has responded to one of our queries within the last 15 minutes. A node`
			`# is also good if it has ever responded to one of our queries and has sent us a`
			`# query within the last 15 minutes. After 15 minutes of inactivity, a node becomes`
			`# questionable. Nodes become bad when they fail to respond to multiple queries`
			`# in a row. Nodes that we know are good are given priority over nodes with unknown status.`
			`#`
			`# When there are bad or questionable nodes in the bucket, the least recent is selected for`
			`# potential replacement (BEP 0005). When all nodes in the bucket are fresh, the head (least recent)`
			`# contact is selected as described in section 2.2 of the kademlia paper. In both cases the new contact`
			`# is ignored if the pinged node replies.`

			`not_good_contacts = self.buckets[bucket_index].get_bad_or_unknown_peers()`
			`not_recently_replied = []`
			`for my_peer in not_good_contacts:`
			`last_replied = self._peer_manager.get_last_replied(my_peer.address, my_peer.udp_port)`
			`if not last_replied or last_replied + 60 < self._loop.time():`
			`not_recently_replied.append(my_peer)`
			`if not_recently_replied:`
			`to_replace = not_recently_replied[0]`
			`else:`
			`to_replace = self.buckets[bucket_index].peers[0]`
			`last_replied = self._peer_manager.get_last_replied(to_replace.address, to_replace.udp_port)`
			`if last_replied and last_replied + 60 > self._loop.time():`
			`return False`
			`log.debug("pinging %s:%s", to_replace.address, to_replace.udp_port)`
			`try:`
			`await probe(to_replace)`
			`return False`
			`except asyncio.TimeoutError:`
			`log.debug("Replacing dead contact in bucket %i: %s:%i with %s:%i ", bucket_index,`
			`to_replace.address, to_replace.udp_port, peer.address, peer.udp_port)`
			`if to_replace in self.buckets[bucket_index]:`
			`self.buckets[bucket_index].remove_peer(to_replace)`
			`return await self.add_peer(peer, probe)`