lbry.go/dht/dht.go

359 lines
9.2 KiB
Go
Raw Normal View History

2018-03-07 02:15:44 +01:00
package dht
import (
2018-04-25 00:12:17 +02:00
"fmt"
2018-03-07 02:15:44 +01:00
"net"
2018-04-05 17:35:57 +02:00
"strings"
2018-03-29 03:05:27 +02:00
"sync"
2018-03-07 02:15:44 +01:00
"time"
"github.com/lbryio/lbry.go/errors"
2018-03-29 03:05:27 +02:00
"github.com/lbryio/lbry.go/stopOnce"
2018-04-28 02:16:12 +02:00
"github.com/spf13/cast"
2018-03-07 02:15:44 +01:00
log "github.com/sirupsen/logrus"
)
2018-04-03 18:14:04 +02:00
func init() {
//log.SetFormatter(&log.TextFormatter{ForceColors: true})
//log.SetLevel(log.DebugLevel)
}
2018-05-01 22:18:38 +02:00
const (
network = "udp4"
2018-05-01 22:18:38 +02:00
// TODO: all these constants should be defaults, and should be used to set values in the standard Config. then the code should use values in the config
// TODO: alternatively, have a global Config for constants. at least that way tests can modify the values
alpha = 3 // this is the constant alpha in the spec
bucketSize = 8 // this is the constant k in the spec
nodeIDLength = 48 // bytes. this is the constant B in the spec
nodeIDBits = nodeIDLength * 8 // number of bits in node ID
messageIDLength = 20 // bytes.
2018-05-01 22:18:38 +02:00
udpRetry = 3
udpTimeout = 5 * time.Second
udpMaxMessageLength = 1024 // bytes. I think our longest message is ~676 bytes, so I rounded up
2018-05-13 22:02:46 +02:00
maxPeerFails = 3 // after this many failures, a peer is considered bad and will be removed from the routing table
//tExpire = 60 * time.Minute // the time after which a key/value pair expires; this is a time-to-live (TTL) from the original publication date
2018-05-22 18:16:01 +02:00
tReannounce = 50 * time.Minute // the time after which the original publisher must republish a key/value pair
tRefresh = 1 * time.Hour // the time after which an otherwise unaccessed bucket must be refreshed
//tReplicate = 1 * time.Hour // the interval between Kademlia replication events, when a node is required to publish its entire database
//tNodeRefresh = 15 * time.Minute // the time after which a good node becomes questionable if it has not messaged us
2018-05-01 22:18:38 +02:00
compactNodeInfoLength = nodeIDLength + 6 // nodeID + 4 for IP + 2 for port
2018-03-07 02:15:44 +01:00
2018-05-01 22:18:38 +02:00
tokenSecretRotationInterval = 5 * time.Minute // how often the token-generating secret is rotated
)
2018-03-07 02:15:44 +01:00
// Config represents the configure of dht.
type Config struct {
// this node's address. format is `ip:port`
Address string
// the seed nodes through which we can join in dht network
SeedNodes []string
// the hex-encoded node id for this node. if string is empty, a random id will be generated
NodeID string
2018-04-05 17:35:57 +02:00
// print the state of the dht every X time
PrintState time.Duration
2018-03-07 02:15:44 +01:00
}
// NewStandardConfig returns a Config pointer with default values.
func NewStandardConfig() *Config {
return &Config{
2018-04-24 23:20:03 +02:00
Address: "0.0.0.0:4444",
2018-03-07 02:15:44 +01:00
SeedNodes: []string{
"lbrynet1.lbry.io:4444",
"lbrynet2.lbry.io:4444",
"lbrynet3.lbry.io:4444",
},
}
}
// DHT represents a DHT node.
type DHT struct {
2018-04-05 17:35:57 +02:00
// config
conf *Config
2018-04-28 02:16:12 +02:00
// local contact
contact Contact
// node
2018-04-05 17:35:57 +02:00
node *Node
// stopper to shut down DHT
stop *stopOnce.Stopper
// channel is closed when DHT joins network
joined chan struct{}
2018-05-22 18:16:01 +02:00
// lock for announced list
lock *sync.RWMutex
// list of bitmaps that need to be reannounced periodically
announced map[Bitmap]bool
2018-03-07 02:15:44 +01:00
}
// New returns a DHT pointer. If config is nil, then config will be set to the default config.
func New(config *Config) (*DHT, error) {
2018-03-07 02:15:44 +01:00
if config == nil {
config = NewStandardConfig()
}
2018-04-28 02:16:12 +02:00
contact, err := getContact(config.NodeID, config.Address)
if err != nil {
2018-04-28 02:16:12 +02:00
return nil, err
}
2018-03-09 22:43:30 +01:00
d := &DHT{
2018-05-22 18:16:01 +02:00
conf: config,
contact: contact,
node: NewNode(contact.ID),
stop: stopOnce.New(),
joined: make(chan struct{}),
lock: &sync.RWMutex{},
announced: make(map[Bitmap]bool),
2018-03-07 02:15:44 +01:00
}
return d, nil
2018-03-07 02:15:44 +01:00
}
// join makes current node join the dht network.
func (dht *DHT) join() {
2018-04-25 00:12:17 +02:00
defer close(dht.joined) // if anyone's waiting for join to finish, they'll know its done
2018-04-03 18:14:04 +02:00
log.Debugf("[%s] joining network", dht.node.id.HexShort())
2018-04-25 00:12:17 +02:00
// ping nodes, which gets their real node IDs and adds them to the routing table
atLeastOneNodeResponded := false
2018-03-07 02:15:44 +01:00
for _, addr := range dht.conf.SeedNodes {
2018-04-25 00:12:17 +02:00
err := dht.Ping(addr)
2018-03-07 02:15:44 +01:00
if err != nil {
2018-04-25 00:12:17 +02:00
log.Error(errors.Prefix(fmt.Sprintf("[%s] join", dht.node.id.HexShort()), err))
} else {
atLeastOneNodeResponded = true
2018-03-07 02:15:44 +01:00
}
2018-04-25 00:12:17 +02:00
}
2018-03-07 02:15:44 +01:00
2018-04-25 00:12:17 +02:00
if !atLeastOneNodeResponded {
log.Errorf("[%s] join: no nodes responded to initial ping", dht.node.id.HexShort())
return
2018-03-29 03:05:27 +02:00
}
2018-03-07 02:15:44 +01:00
2018-03-29 03:05:27 +02:00
// now call iterativeFind on yourself
2018-05-13 22:02:46 +02:00
nf := newContactFinder(dht.node, dht.node.id, false)
_, err := nf.Find()
2018-03-29 03:05:27 +02:00
if err != nil {
2018-04-03 18:14:04 +02:00
log.Errorf("[%s] join: %s", dht.node.id.HexShort(), err.Error())
2018-03-07 02:15:44 +01:00
}
2018-05-24 19:05:05 +02:00
// TODO: after joining, refresh all the buckets all buckets further away than our closest neighbor
// http://xlattice.sourceforge.net/components/protocol/kademlia/specs.html#join
2018-03-07 02:15:44 +01:00
}
2018-03-29 03:05:27 +02:00
// Start starts the dht
2018-04-25 00:12:17 +02:00
func (dht *DHT) Start() error {
2018-04-28 02:16:12 +02:00
listener, err := net.ListenPacket(network, dht.conf.Address)
2018-03-07 02:15:44 +01:00
if err != nil {
2018-04-28 02:16:12 +02:00
return errors.Err(err)
2018-03-07 02:15:44 +01:00
}
2018-04-28 02:16:12 +02:00
conn := listener.(*net.UDPConn)
2018-03-07 02:15:44 +01:00
2018-04-28 02:16:12 +02:00
err = dht.node.Connect(conn)
if err != nil {
return err
}
2018-03-29 03:05:27 +02:00
dht.join()
2018-05-22 18:16:01 +02:00
dht.startReannouncer()
2018-04-28 02:16:12 +02:00
log.Debugf("[%s] DHT ready on %s (%d nodes found during join)", dht.node.id.HexShort(), dht.contact.Addr().String(), dht.node.rt.Count())
2018-04-25 00:12:17 +02:00
return nil
2018-04-05 17:35:57 +02:00
}
// WaitUntilJoined blocks until the node joins the network.
2018-04-05 17:35:57 +02:00
func (dht *DHT) WaitUntilJoined() {
if dht.joined == nil {
panic("dht not initialized")
}
<-dht.joined
2018-03-07 02:15:44 +01:00
}
2018-03-29 03:05:27 +02:00
// Shutdown shuts down the dht
func (dht *DHT) Shutdown() {
2018-04-03 18:14:04 +02:00
log.Debugf("[%s] DHT shutting down", dht.node.id.HexShort())
2018-03-29 03:05:27 +02:00
dht.stop.Stop()
2018-05-24 19:05:05 +02:00
dht.stop.Wait()
2018-04-28 02:16:12 +02:00
dht.node.Shutdown()
2018-04-05 17:35:57 +02:00
log.Debugf("[%s] DHT stopped", dht.node.id.HexShort())
2018-03-29 03:05:27 +02:00
}
// Ping pings a given address, creates a temporary contact for sending a message, and returns an error if communication
// fails.
2018-04-25 00:12:17 +02:00
func (dht *DHT) Ping(addr string) error {
raddr, err := net.ResolveUDPAddr(network, addr)
if err != nil {
return err
}
tmpNode := Contact{ID: RandomBitmapP(), IP: raddr.IP, Port: raddr.Port}
2018-04-28 02:16:12 +02:00
res := dht.node.Send(tmpNode, Request{Method: pingMethod})
2018-04-25 00:12:17 +02:00
if res == nil {
return errors.Err("no response from node %s", addr)
}
return nil
}
// Get returns the list of nodes that have the blob for the given hash
2018-04-28 02:16:12 +02:00
func (dht *DHT) Get(hash Bitmap) ([]Contact, error) {
nf := newContactFinder(dht.node, hash, true)
2018-03-29 03:05:27 +02:00
res, err := nf.Find()
if err != nil {
return nil, err
2018-03-29 03:05:27 +02:00
}
if res.Found {
2018-04-28 02:16:12 +02:00
return res.Contacts, nil
}
return nil, nil
2018-03-29 03:05:27 +02:00
}
2018-04-04 17:43:27 +02:00
// Announce announces to the DHT that this node has the blob for the given hash
func (dht *DHT) Announce(hash Bitmap) error {
2018-04-28 02:16:12 +02:00
nf := newContactFinder(dht.node, hash, false)
2018-03-29 03:05:27 +02:00
res, err := nf.Find()
if err != nil {
2018-04-03 19:38:01 +02:00
return err
2018-03-29 03:05:27 +02:00
}
2018-04-03 19:38:01 +02:00
2018-05-22 18:27:49 +02:00
// if we found less than K contacts, or current node is closer than farthest contact
if len(res.Contacts) < bucketSize || dht.node.id.Xor(hash).Less(res.Contacts[bucketSize-1].ID.Xor(hash)) {
// pop last contact, and self-store instead
res.Contacts[bucketSize-1] = dht.contact
}
wg := &sync.WaitGroup{}
for _, c := range res.Contacts {
wg.Add(1)
go func(c Contact) {
dht.storeOnNode(hash, c)
wg.Done()
}(c)
2018-04-03 19:38:01 +02:00
}
wg.Wait()
2018-05-22 18:16:01 +02:00
dht.lock.Lock()
dht.announced[hash] = true
dht.lock.Unlock()
2018-04-03 19:38:01 +02:00
return nil
2018-03-29 03:05:27 +02:00
}
2018-05-22 18:16:01 +02:00
func (dht *DHT) startReannouncer() {
2018-05-24 19:05:05 +02:00
dht.stop.Add(1)
defer dht.stop.Done()
2018-05-22 18:16:01 +02:00
tick := time.NewTicker(tReannounce)
for {
select {
2018-05-24 19:05:05 +02:00
case <-dht.stop.Ch():
2018-05-22 18:16:01 +02:00
return
case <-tick.C:
dht.lock.RLock()
for h := range dht.announced {
go func(bm Bitmap) {
if err := dht.Announce(bm); err != nil {
log.Error("error re-announcing bitmap - ", err)
}
}(h)
2018-05-22 18:16:01 +02:00
}
dht.lock.RUnlock()
}
}
}
func (dht *DHT) storeOnNode(hash Bitmap, c Contact) {
2018-05-24 19:05:05 +02:00
dht.stop.Add(1)
defer dht.stop.Done()
2018-05-22 18:27:49 +02:00
// self-store
if dht.contact.Equals(c) {
dht.node.Store(hash, c)
return
}
resCh, cancel := dht.node.SendCancelable(c, Request{
Method: findValueMethod,
Arg: &hash,
})
2018-05-13 22:02:46 +02:00
var res *Response
select {
case res = <-resCh:
2018-05-24 19:05:05 +02:00
case <-dht.stop.Ch():
2018-05-13 22:02:46 +02:00
cancel()
return
}
if res == nil {
return // request timed out
}
resCh, cancel = dht.node.SendCancelable(c, Request{
Method: storeMethod,
StoreArgs: &storeArgs{
BlobHash: hash,
Value: storeArgsValue{
Token: res.Token,
LbryID: dht.contact.ID,
Port: dht.contact.Port,
},
},
})
2018-05-13 22:02:46 +02:00
go func() {
select {
case <-resCh:
2018-05-24 19:05:05 +02:00
case <-dht.stop.Ch():
2018-05-13 22:02:46 +02:00
cancel()
}
}()
}
// PrintState prints the current state of the DHT including address, nr outstanding transactions, stored hashes as well
// as current bucket information.
2018-04-05 17:35:57 +02:00
func (dht *DHT) PrintState() {
2018-04-28 02:16:12 +02:00
log.Printf("DHT node %s at %s", dht.contact.String(), time.Now().Format(time.RFC822Z))
log.Printf("Outstanding transactions: %d", dht.node.CountActiveTransactions())
log.Printf("Stored hashes: %d", dht.node.store.CountStoredHashes())
2018-04-05 17:35:57 +02:00
log.Printf("Buckets:")
2018-04-28 02:16:12 +02:00
for _, line := range strings.Split(dht.node.rt.BucketInfo(), "\n") {
2018-04-05 17:35:57 +02:00
log.Println(line)
}
}
2018-04-28 02:16:12 +02:00
func getContact(nodeID, addr string) (Contact, error) {
var c Contact
if nodeID == "" {
c.ID = RandomBitmapP()
2018-04-28 02:16:12 +02:00
} else {
c.ID = BitmapFromHexP(nodeID)
2018-04-28 02:16:12 +02:00
}
ip, port, err := net.SplitHostPort(addr)
if err != nil {
return c, errors.Err(err)
} else if ip == "" {
return c, errors.Err("address does not contain an IP")
} else if port == "" {
return c, errors.Err("address does not contain a port")
}
c.IP = net.ParseIP(ip)
if c.IP == nil {
2018-04-28 02:16:12 +02:00
return c, errors.Err("invalid ip")
}
c.Port, err = cast.ToIntE(port)
2018-04-28 02:16:12 +02:00
if err != nil {
return c, errors.Err(err)
}
return c, nil
}