lbry.go/dht/routing_table.go

472 lines
12 KiB
Go
Raw Normal View History

2018-03-07 02:15:44 +01:00
package dht
import (
"encoding/json"
"fmt"
"net"
2018-03-07 02:15:44 +01:00
"sort"
"strconv"
"strings"
"sync"
2018-05-13 22:02:46 +02:00
"time"
2018-07-10 23:30:47 +02:00
"github.com/davecgh/go-spew/spew"
"github.com/lbryio/lbry.go/errors"
"github.com/lbryio/lbry.go/stop"
2018-06-14 17:48:02 +02:00
"github.com/lbryio/reflector.go/dht/bits"
2018-03-07 02:15:44 +01:00
)
2018-05-22 18:16:01 +02:00
// TODO: if routing table is ever empty (aka the node is isolated), it should re-bootstrap
// TODO: use a tree with bucket splitting instead of a fixed bucket list. include jack's optimization (see link in commit mesg)
// https://github.com/lbryio/lbry/pull/1211/commits/341b27b6d21ac027671d42458826d02735aaae41
2018-05-13 22:02:46 +02:00
// peer is a contact with extra freshness information
type peer struct {
Contact Contact
LastActivity time.Time
2018-06-28 00:09:10 +02:00
// LastReplied time.Time
// LastRequested time.Time
// LastFailure time.Time
// SecondLastFailure time.Time
2018-07-10 23:30:47 +02:00
NumFailures int
2018-06-28 00:09:10 +02:00
2018-05-13 22:02:46 +02:00
//<lastPublished>,
//<originallyPublished>
// <originalPublisherID>
2018-03-07 02:15:44 +01:00
}
2018-05-13 22:02:46 +02:00
func (p *peer) Touch() {
p.LastActivity = time.Now()
p.NumFailures = 0
2018-03-07 02:15:44 +01:00
}
2018-05-13 22:02:46 +02:00
// ActiveSince returns whether a peer has responded in the last `d` duration
// this is used to check if the peer is "good", meaning that we believe the peer will respond to our requests
func (p *peer) ActiveInLast(d time.Duration) bool {
return time.Since(p.LastActivity) < d
2018-05-13 22:02:46 +02:00
}
2018-05-13 22:02:46 +02:00
// IsBad returns whether a peer is "bad", meaning that it has failed to respond to multiple pings in a row
func (p *peer) IsBad(maxFalures int) bool {
return p.NumFailures >= maxFalures
2018-05-13 22:02:46 +02:00
}
// Fail marks a peer as having failed to respond. It returns whether or not the peer should be removed from the routing table
func (p *peer) Fail() {
p.NumFailures++
2018-05-13 22:02:46 +02:00
}
type bucket struct {
2018-07-10 23:30:47 +02:00
lock *sync.RWMutex
peers []peer
lastUpdate time.Time
Range bits.Range // capitalized because `range` is a keyword
}
func newBucket(r bits.Range) *bucket {
return &bucket{
peers: make([]peer, 0, bucketSize),
lock: &sync.RWMutex{},
Range: r,
}
2018-05-13 22:02:46 +02:00
}
// Len returns the number of peers in the bucket
func (b bucket) Len() int {
b.lock.RLock()
defer b.lock.RUnlock()
return len(b.peers)
2018-05-13 22:02:46 +02:00
}
2018-07-10 23:30:47 +02:00
func (b bucket) Contains(c Contact) bool {
b.lock.RLock()
defer b.lock.RUnlock()
for _, p := range b.peers {
if p.Contact.Equals(c, true) {
return true
}
}
return false
}
2018-05-13 22:02:46 +02:00
// Contacts returns a slice of the bucket's contacts
func (b bucket) Contacts() []Contact {
b.lock.RLock()
defer b.lock.RUnlock()
contacts := make([]Contact, len(b.peers))
for i := range b.peers {
contacts[i] = b.peers[i].Contact
}
2018-05-13 22:02:46 +02:00
return contacts
}
2018-05-13 22:02:46 +02:00
// UpdateContact marks a contact as having been successfully contacted. if insertIfNew and the contact is does not exist yet, it is inserted
func (b *bucket) UpdateContact(c Contact, insertIfNew bool) {
b.lock.Lock()
defer b.lock.Unlock()
2018-07-10 23:30:47 +02:00
fmt.Printf("updating contact %s\n", c.ID)
2018-05-13 22:02:46 +02:00
// TODO: verify the peer is in the bucket key range
peerIndex := find(c.ID, b.peers)
if peerIndex >= 0 {
2018-07-10 23:30:47 +02:00
fmt.Println("exists, moving to back")
2018-05-13 22:02:46 +02:00
b.lastUpdate = time.Now()
b.peers[peerIndex].Touch()
moveToBack(b.peers, peerIndex)
2018-05-13 22:02:46 +02:00
} else if insertIfNew {
2018-07-10 23:30:47 +02:00
fmt.Println("inserting new")
2018-05-13 22:02:46 +02:00
hasRoom := true
if len(b.peers) >= bucketSize {
2018-07-10 23:30:47 +02:00
fmt.Println("no room")
2018-05-13 22:02:46 +02:00
hasRoom = false
for i := range b.peers {
if b.peers[i].IsBad(maxPeerFails) {
2018-07-10 23:30:47 +02:00
fmt.Println("dropping bad peer to make room")
2018-05-13 22:02:46 +02:00
// TODO: Ping contact first. Only remove if it does not respond
b.peers = append(b.peers[:i], b.peers[i+1:]...)
2018-05-13 22:02:46 +02:00
hasRoom = true
break
}
}
}
if hasRoom {
2018-07-10 23:30:47 +02:00
fmt.Println("actually adding")
2018-05-13 22:02:46 +02:00
b.lastUpdate = time.Now()
peer := peer{Contact: c}
2018-05-13 22:02:46 +02:00
peer.Touch()
b.peers = append(b.peers, peer)
2018-07-10 23:30:47 +02:00
} else {
fmt.Println("no room, dropping")
}
}
2018-05-13 22:02:46 +02:00
}
2018-05-13 22:02:46 +02:00
// FailContact marks a contact as having failed, and removes it if it failed too many times
2018-06-14 17:48:02 +02:00
func (b *bucket) FailContact(id bits.Bitmap) {
2018-05-13 22:02:46 +02:00
b.lock.Lock()
defer b.lock.Unlock()
i := find(id, b.peers)
if i >= 0 {
2018-05-13 22:02:46 +02:00
// BEP5 says not to remove the contact until the bucket is full and you try to insert
b.peers[i].Fail()
}
}
2018-05-13 22:02:46 +02:00
// find returns the contact in the bucket, or nil if the bucket does not contain the contact
2018-06-14 17:48:02 +02:00
func find(id bits.Bitmap, peers []peer) int {
for i := range peers {
if peers[i].Contact.ID.Equals(id) {
return i
2018-03-07 02:15:44 +01:00
}
2018-04-25 00:12:17 +02:00
}
return -1
2018-04-25 00:12:17 +02:00
}
2018-05-13 22:02:46 +02:00
// NeedsRefresh returns true if bucket has not been updated in the last `refreshInterval`, false otherwise
func (b *bucket) NeedsRefresh(refreshInterval time.Duration) bool {
b.lock.RLock()
defer b.lock.RUnlock()
return time.Since(b.lastUpdate) > refreshInterval
2018-05-13 22:02:46 +02:00
}
2018-07-10 23:30:47 +02:00
func (b *bucket) Split() (*bucket, *bucket) {
b.lock.Lock()
defer b.lock.Unlock()
left := newBucket(b.Range.IntervalP(1, 2))
right := newBucket(b.Range.IntervalP(2, 2))
left.lastUpdate = b.lastUpdate
right.lastUpdate = b.lastUpdate
for _, p := range b.peers {
if left.Range.Contains(p.Contact.ID) {
left.peers = append(left.peers, p)
} else {
right.peers = append(right.peers, p)
}
}
if len(left.peers) == 0 {
left, right = right.Split()
left.Range.Start = b.Range.Start
} else if len(right.peers) == 0 {
left, right = left.Split()
right.Range.End = b.Range.End
}
return left, right
}
type routingTable struct {
2018-06-14 17:48:02 +02:00
id bits.Bitmap
2018-07-10 23:30:47 +02:00
buckets []*bucket
mu *sync.RWMutex // this mutex is write-locked only when CHANGING THE NUMBER OF BUCKETS in the table
2018-05-13 22:02:46 +02:00
}
2018-06-14 17:48:02 +02:00
func newRoutingTable(id bits.Bitmap) *routingTable {
2018-07-10 23:30:47 +02:00
rt := routingTable{
id: id,
mu: &sync.RWMutex{},
}
2018-06-19 20:06:35 +02:00
rt.reset()
return &rt
}
func (rt *routingTable) reset() {
2018-07-10 23:30:47 +02:00
rt.mu.Lock()
defer rt.mu.Unlock()
rt.buckets = []*bucket{newBucket(bits.MaxRange())}
2018-03-07 02:15:44 +01:00
}
func (rt *routingTable) BucketInfo() string {
2018-07-10 23:30:47 +02:00
rt.mu.RLock()
defer rt.mu.RUnlock()
2018-05-13 22:02:46 +02:00
var bucketInfo []string
for i, b := range rt.buckets {
if b.Len() > 0 {
contacts := b.Contacts()
s := make([]string, len(contacts))
for j, c := range contacts {
s[j] = c.ID.HexShort()
2018-05-13 22:02:46 +02:00
}
2018-06-28 00:09:10 +02:00
bucketInfo = append(bucketInfo, fmt.Sprintf("bucket %d: (%d) %s", i, len(contacts), strings.Join(s, ", ")))
2018-05-13 22:02:46 +02:00
}
}
2018-05-13 22:02:46 +02:00
if len(bucketInfo) == 0 {
return "buckets are empty"
}
return strings.Join(bucketInfo, "\n")
}
2018-05-13 22:02:46 +02:00
// Update inserts or refreshes a contact
func (rt *routingTable) Update(c Contact) {
2018-07-10 23:30:47 +02:00
rt.mu.Lock() // write lock, because updates may cause bucket splits
defer rt.mu.Unlock()
if rt.shouldSplit(c) {
spew.Dump("splitting")
i := rt.bucketNumFor(c.ID)
left, right := rt.buckets[i].Split()
rt.buckets = append(rt.buckets[:i], append([]*bucket{left, right}, rt.buckets[i+1:]...)...)
} else {
spew.Dump("no split")
}
rt.buckets[rt.bucketNumFor(c.ID)].UpdateContact(c, true)
2018-05-13 22:02:46 +02:00
}
// Fresh refreshes a contact if its already in the routing table
func (rt *routingTable) Fresh(c Contact) {
2018-07-10 23:30:47 +02:00
rt.mu.RLock()
defer rt.mu.RUnlock()
rt.bucketFor(c.ID).UpdateContact(c, false)
2018-05-13 22:02:46 +02:00
}
// FailContact marks a contact as having failed, and removes it if it failed too many times
func (rt *routingTable) Fail(c Contact) {
2018-07-10 23:30:47 +02:00
rt.mu.RLock()
defer rt.mu.RUnlock()
rt.bucketFor(c.ID).FailContact(c.ID)
2018-05-13 22:02:46 +02:00
}
2018-07-10 23:30:47 +02:00
// GetClosest returns the closest `limit` contacts from the routing table.
// This is a locking wrapper around getClosest()
func (rt *routingTable) GetClosest(target bits.Bitmap, limit int) []Contact {
rt.mu.RLock()
defer rt.mu.RUnlock()
return rt.getClosest(target, limit)
2018-06-29 21:47:00 +02:00
}
2018-07-10 23:30:47 +02:00
// getClosest returns the closest `limit` contacts from the routing table
func (rt *routingTable) getClosest(target bits.Bitmap, limit int) []Contact {
var toSort []sortedContact
for _, b := range rt.buckets {
2018-07-10 23:30:47 +02:00
for _, c := range b.Contacts() {
toSort = append(toSort, sortedContact{c, c.ID.Xor(target)})
}
2018-03-07 02:15:44 +01:00
}
sort.Sort(byXorDistance(toSort))
2018-07-10 23:30:47 +02:00
var contacts []Contact
2018-04-28 02:16:12 +02:00
for _, sorted := range toSort {
contacts = append(contacts, sorted.contact)
if len(contacts) >= limit {
break
}
2018-03-07 02:15:44 +01:00
}
2018-04-28 02:16:12 +02:00
return contacts
2018-03-07 02:15:44 +01:00
}
2018-04-28 02:16:12 +02:00
// Count returns the number of contacts in the routing table
func (rt *routingTable) Count() int {
2018-07-10 23:30:47 +02:00
rt.mu.RLock()
defer rt.mu.RUnlock()
count := 0
for _, bucket := range rt.buckets {
2018-06-21 21:05:45 +02:00
count += bucket.Len()
}
return count
}
2018-06-29 21:47:00 +02:00
// Len returns the number of buckets in the routing table
func (rt *routingTable) Len() int {
2018-07-10 23:30:47 +02:00
rt.mu.RLock()
defer rt.mu.RUnlock()
2018-06-29 21:47:00 +02:00
return len(rt.buckets)
}
// BucketRanges returns a slice of ranges, where the `start` of each range is the smallest id that can
// go in that bucket, and the `end` is the largest id
func (rt *routingTable) BucketRanges() []bits.Range {
2018-07-10 23:30:47 +02:00
rt.mu.RLock()
defer rt.mu.RUnlock()
ranges := make([]bits.Range, len(rt.buckets))
for i, b := range rt.buckets {
2018-07-10 23:30:47 +02:00
ranges[i] = b.Range
}
return ranges
}
2018-06-14 17:48:02 +02:00
func (rt *routingTable) bucketNumFor(target bits.Bitmap) int {
2018-05-01 22:18:38 +02:00
if rt.id.Equals(target) {
panic("routing table does not have a bucket for its own id")
}
distance := target.Xor(rt.id)
for i, b := range rt.buckets {
2018-07-10 23:30:47 +02:00
if b.Range.Contains(distance) {
return i
}
}
panic("target value overflows the key space")
2018-05-01 22:18:38 +02:00
}
2018-06-14 17:48:02 +02:00
func (rt *routingTable) bucketFor(target bits.Bitmap) *bucket {
2018-07-10 23:30:47 +02:00
return rt.buckets[rt.bucketNumFor(target)]
2018-05-13 22:02:46 +02:00
}
2018-07-10 23:30:47 +02:00
func (rt *routingTable) shouldSplit(c Contact) bool {
b := rt.bucketFor(c.ID)
if b.Contains(c) {
return false
}
2018-06-29 21:47:00 +02:00
if b.Len() >= bucketSize {
2018-07-10 23:30:47 +02:00
if b.Range.Start.Equals(bits.Bitmap{}) { // this is the bucket covering our node id
return true
}
2018-07-10 23:30:47 +02:00
kClosest := rt.getClosest(rt.id, bucketSize)
kthClosest := kClosest[len(kClosest)-1]
if rt.id.Closer(c.ID, kthClosest.ID) {
return true
}
}
return false
}
2018-06-28 00:09:10 +02:00
func (rt *routingTable) printBucketInfo() {
for i, b := range rt.buckets {
2018-07-10 23:30:47 +02:00
fmt.Printf("bucket %d, %d contacts\n", i+1, len(b.peers))
fmt.Printf(" start : %s\n", b.Range.Start.String())
fmt.Printf(" stop : %s\n", b.Range.End.String())
2018-06-28 00:09:10 +02:00
fmt.Println("")
}
}
2018-06-14 17:48:02 +02:00
func (rt *routingTable) GetIDsForRefresh(refreshInterval time.Duration) []bits.Bitmap {
var bitmaps []bits.Bitmap
2018-05-13 22:02:46 +02:00
for i, bucket := range rt.buckets {
if bucket.NeedsRefresh(refreshInterval) {
2018-06-14 17:48:02 +02:00
bitmaps = append(bitmaps, bits.Rand().Prefix(i, false))
2018-03-07 02:15:44 +01:00
}
}
2018-05-13 22:02:46 +02:00
return bitmaps
2018-03-07 02:15:44 +01:00
}
const rtContactSep = "-"
type rtSave struct {
ID string `json:"id"`
Contacts []string `json:"contacts"`
}
func (rt *routingTable) MarshalJSON() ([]byte, error) {
var data rtSave
data.ID = rt.id.Hex()
for _, b := range rt.buckets {
for _, c := range b.Contacts() {
data.Contacts = append(data.Contacts, strings.Join([]string{c.ID.Hex(), c.IP.String(), strconv.Itoa(c.Port)}, rtContactSep))
}
}
return json.Marshal(data)
}
func (rt *routingTable) UnmarshalJSON(b []byte) error {
var data rtSave
err := json.Unmarshal(b, &data)
if err != nil {
return err
}
2018-06-14 17:48:02 +02:00
rt.id, err = bits.FromHex(data.ID)
if err != nil {
return errors.Prefix("decoding ID", err)
}
2018-06-19 20:06:35 +02:00
rt.reset()
for _, s := range data.Contacts {
parts := strings.Split(s, rtContactSep)
if len(parts) != 3 {
return errors.Err("decoding contact %s: wrong number of parts", s)
}
var c Contact
2018-06-14 17:48:02 +02:00
c.ID, err = bits.FromHex(parts[0])
if err != nil {
return errors.Err("decoding contact %s: invalid ID: %s", s, err)
}
c.IP = net.ParseIP(parts[1])
if c.IP == nil {
return errors.Err("decoding contact %s: invalid IP", s)
}
c.Port, err = strconv.Atoi(parts[2])
if err != nil {
return errors.Err("decoding contact %s: invalid port: %s", s, err)
}
rt.Update(c)
}
return nil
}
2018-05-13 22:02:46 +02:00
// RoutingTableRefresh refreshes any buckets that need to be refreshed
func RoutingTableRefresh(n *Node, refreshInterval time.Duration, parentGrp *stop.Group) {
done := stop.New()
2018-05-13 22:02:46 +02:00
for _, id := range n.rt.GetIDsForRefresh(refreshInterval) {
2018-06-13 18:45:47 +02:00
done.Add(1)
2018-06-14 17:48:02 +02:00
go func(id bits.Bitmap) {
2018-06-13 18:45:47 +02:00
defer done.Done()
_, _, err := FindContacts(n, id, false, parentGrp)
2018-06-13 18:45:47 +02:00
if err != nil {
log.Error("error finding contact during routing table refresh - ", err)
}
2018-05-13 22:02:46 +02:00
}(id)
}
2018-06-13 18:45:47 +02:00
done.Wait()
done.Stop()
}
func moveToBack(peers []peer, index int) {
if index < 0 || len(peers) <= index+1 {
return
}
p := peers[index]
for i := index; i < len(peers)-1; i++ {
peers[i] = peers[i+1]
}
peers[len(peers)-1] = p
}