lbry.go/dht/dht.go

486 lines
12 KiB
Go
Raw Normal View History

2018-03-07 02:15:44 +01:00
package dht
import (
2018-03-29 03:05:27 +02:00
"context"
2018-03-07 02:15:44 +01:00
"net"
2018-03-29 03:05:27 +02:00
"sync"
2018-03-07 02:15:44 +01:00
"time"
"github.com/lbryio/errors.go"
2018-03-29 03:05:27 +02:00
"github.com/lbryio/lbry.go/stopOnce"
2018-03-07 02:15:44 +01:00
log "github.com/sirupsen/logrus"
"github.com/spf13/cast"
)
2018-04-03 18:14:04 +02:00
func init() {
//log.SetFormatter(&log.TextFormatter{ForceColors: true})
//log.SetLevel(log.DebugLevel)
}
2018-03-07 02:15:44 +01:00
const network = "udp4"
const alpha = 3 // this is the constant alpha in the spec
const nodeIDLength = 48 // bytes. this is the constant B in the spec
const messageIDLength = 20 // bytes.
const bucketSize = 8 // this is the constant k in the spec
const udpRetry = 3
const udpTimeout = 10 * time.Second
const tExpire = 86400 * time.Second // the time after which a key/value pair expires; this is a time-to-live (TTL) from the original publication date
2018-03-09 22:43:30 +01:00
const tRefresh = 3600 * time.Second // the time after which an otherwise unaccessed bucket must be refreshed
const tReplicate = 3600 * time.Second // the interval between Kademlia replication events, when a node is required to publish its entire database
const tRepublish = 86400 * time.Second // the time after which the original publisher must republish a key/value pair
2018-03-07 02:15:44 +01:00
const numBuckets = nodeIDLength * 8
const compactNodeInfoLength = nodeIDLength + 6
2018-03-07 02:15:44 +01:00
// packet represents the information receive from udp.
type packet struct {
data []byte
raddr *net.UDPAddr
}
// Config represents the configure of dht.
type Config struct {
// this node's address. format is `ip:port`
Address string
// the seed nodes through which we can join in dht network
SeedNodes []string
// the hex-encoded node id for this node. if string is empty, a random id will be generated
NodeID string
// print the state of the dht every minute
PrintState bool
2018-03-07 02:15:44 +01:00
}
// NewStandardConfig returns a Config pointer with default values.
func NewStandardConfig() *Config {
return &Config{
2018-03-09 22:43:30 +01:00
Address: "127.0.0.1:4444",
2018-03-07 02:15:44 +01:00
SeedNodes: []string{
"lbrynet1.lbry.io:4444",
"lbrynet2.lbry.io:4444",
"lbrynet3.lbry.io:4444",
},
}
}
// UDPConn allows using a mocked connection for testing sending/receiving data
type UDPConn interface {
ReadFromUDP([]byte) (int, *net.UDPAddr, error)
WriteToUDP([]byte, *net.UDPAddr) (int, error)
2018-03-29 03:05:27 +02:00
SetReadDeadline(time.Time) error
SetWriteDeadline(time.Time) error
2018-04-03 18:14:04 +02:00
Close() error
}
2018-03-07 02:15:44 +01:00
// DHT represents a DHT node.
type DHT struct {
conf *Config
conn UDPConn
node *Node
rt *RoutingTable
packets chan packet
store *peerStore
tm *transactionManager
2018-03-29 03:05:27 +02:00
stop *stopOnce.Stopper
2018-04-03 18:14:04 +02:00
stopWG *sync.WaitGroup
2018-03-07 02:15:44 +01:00
}
// New returns a DHT pointer. If config is nil, then config will be set to the default config.
func New(config *Config) (*DHT, error) {
2018-03-07 02:15:44 +01:00
if config == nil {
config = NewStandardConfig()
}
var id bitmap
if config.NodeID == "" {
id = newRandomBitmap()
} else {
id = newBitmapFromHex(config.NodeID)
}
ip, port, err := net.SplitHostPort(config.Address)
if err != nil {
return nil, errors.Err(err)
2018-03-09 22:43:30 +01:00
} else if ip == "" {
return nil, errors.Err("address does not contain an IP")
2018-03-09 22:43:30 +01:00
} else if port == "" {
return nil, errors.Err("address does not contain a port")
}
2018-03-09 22:43:30 +01:00
portInt, err := cast.ToIntE(port)
if err != nil {
return nil, errors.Err(err)
}
2018-03-09 22:43:30 +01:00
node := &Node{id: id, ip: net.ParseIP(ip), port: portInt}
if node.ip == nil {
return nil, errors.Err("invalid ip")
2018-03-09 22:43:30 +01:00
}
d := &DHT{
conf: config,
node: node,
rt: newRoutingTable(node),
packets: make(chan packet),
store: newPeerStore(),
2018-03-29 03:05:27 +02:00
stop: stopOnce.New(),
2018-04-03 18:14:04 +02:00
stopWG: &sync.WaitGroup{},
2018-03-07 02:15:44 +01:00
}
d.tm = newTransactionManager(d)
return d, nil
2018-03-07 02:15:44 +01:00
}
// init initializes global variables.
func (dht *DHT) init() error {
2018-04-03 18:14:04 +02:00
log.Debugf("Initializing DHT on %s (node id %s)", dht.conf.Address, dht.node.id.HexShort())
2018-03-07 02:15:44 +01:00
listener, err := net.ListenPacket(network, dht.conf.Address)
if err != nil {
return errors.Err(err)
2018-03-07 02:15:44 +01:00
}
dht.conn = listener.(*net.UDPConn)
if dht.conf.PrintState {
go printState(dht)
}
return nil
2018-03-07 02:15:44 +01:00
}
// listen receives message from udp.
func (dht *DHT) listen() {
2018-04-03 18:14:04 +02:00
dht.stopWG.Add(1)
defer dht.stopWG.Done()
2018-03-29 03:05:27 +02:00
buf := make([]byte, 8192)
2018-04-03 18:14:04 +02:00
2018-03-29 03:05:27 +02:00
for {
select {
case <-dht.stop.Chan():
return
default:
}
2018-04-03 18:14:04 +02:00
dht.conn.SetReadDeadline(time.Now().Add(1 * time.Second)) // need this to periodically check shutdown chan
2018-03-29 03:05:27 +02:00
n, raddr, err := dht.conn.ReadFromUDP(buf)
if err != nil {
if e, ok := err.(net.Error); !ok || !e.Timeout() {
2018-03-07 02:15:44 +01:00
log.Errorf("udp read error: %v", err)
}
2018-03-29 03:05:27 +02:00
continue
} else if raddr == nil {
log.Errorf("udp read with no raddr")
continue
2018-03-07 02:15:44 +01:00
}
2018-03-29 03:05:27 +02:00
2018-04-03 18:14:04 +02:00
data := make([]byte, n)
copy(data, buf[:n]) // slices use the same underlying array, so we need a new one for each packet
dht.packets <- packet{data: data, raddr: raddr}
2018-03-29 03:05:27 +02:00
}
2018-03-07 02:15:44 +01:00
}
// join makes current node join the dht network.
func (dht *DHT) join() {
2018-04-03 18:14:04 +02:00
log.Debugf("[%s] joining network", dht.node.id.HexShort())
2018-03-29 03:05:27 +02:00
// get real node IDs and add them to the routing table
2018-03-07 02:15:44 +01:00
for _, addr := range dht.conf.SeedNodes {
raddr, err := net.ResolveUDPAddr(network, addr)
if err != nil {
2018-03-29 03:05:27 +02:00
log.Errorln(err)
2018-03-07 02:15:44 +01:00
continue
}
2018-03-29 03:05:27 +02:00
tmpNode := Node{id: newRandomBitmap(), ip: raddr.IP, port: raddr.Port}
res := dht.tm.Send(tmpNode, &Request{Method: pingMethod})
if res == nil {
log.Errorf("[%s] join: no response from seed node %s", dht.node.id.HexShort(), addr)
}
}
2018-03-07 02:15:44 +01:00
2018-03-29 03:05:27 +02:00
// now call iterativeFind on yourself
_, err := dht.FindNodes(dht.node.id)
if err != nil {
2018-04-03 18:14:04 +02:00
log.Errorf("[%s] join: %s", dht.node.id.HexShort(), err.Error())
2018-03-07 02:15:44 +01:00
}
}
func (dht *DHT) runHandler() {
2018-04-03 18:14:04 +02:00
dht.stopWG.Add(1)
defer dht.stopWG.Done()
2018-03-07 02:15:44 +01:00
var pkt packet
for {
select {
case pkt = <-dht.packets:
handlePacket(dht, pkt)
2018-03-29 03:05:27 +02:00
case <-dht.stop.Chan():
return
2018-03-07 02:15:44 +01:00
}
}
}
2018-03-29 03:05:27 +02:00
// Start starts the dht
2018-04-03 18:14:04 +02:00
func (dht *DHT) Start() {
err := dht.init()
2018-03-07 02:15:44 +01:00
if err != nil {
2018-04-03 18:14:04 +02:00
log.Error(err)
return
2018-03-07 02:15:44 +01:00
}
2018-03-29 03:05:27 +02:00
go dht.listen()
go dht.runHandler()
dht.join()
2018-03-29 03:05:27 +02:00
log.Infof("[%s] DHT ready", dht.node.id.HexShort())
2018-03-07 02:15:44 +01:00
}
2018-03-29 03:05:27 +02:00
// Shutdown shuts down the dht
func (dht *DHT) Shutdown() {
2018-04-03 18:14:04 +02:00
log.Debugf("[%s] DHT shutting down", dht.node.id.HexShort())
2018-03-29 03:05:27 +02:00
dht.stop.Stop()
2018-04-03 18:14:04 +02:00
dht.stopWG.Wait()
dht.conn.Close()
log.Infof("[%s] DHT stopped", dht.node.id.HexShort())
2018-03-29 03:05:27 +02:00
}
func printState(dht *DHT) {
t := time.NewTicker(60 * time.Second)
for {
log.Printf("DHT state at %s", time.Now().Format(time.RFC822Z))
log.Printf("Outstanding transactions: %d", dht.tm.Count())
log.Printf("Known nodes: %d", dht.store.CountKnownNodes())
log.Printf("Buckets: \n%s", dht.rt.BucketInfo())
<-t.C
2018-03-07 02:15:44 +01:00
}
}
2018-03-29 03:05:27 +02:00
func (dht *DHT) FindNodes(hash bitmap) ([]Node, error) {
nf := newNodeFinder(dht, hash, false)
res, err := nf.Find()
if err != nil {
return nil, err
}
return res.Nodes, nil
}
func (dht *DHT) FindValue(hash bitmap) ([]Node, bool, error) {
nf := newNodeFinder(dht, hash, true)
res, err := nf.Find()
if err != nil {
return nil, false, err
}
return res.Nodes, res.Found, nil
}
type nodeFinder struct {
findValue bool // true if we're using findValue
target bitmap
dht *DHT
done *stopOnce.Stopper
findValueMutex *sync.Mutex
findValueResult []Node
activeNodesMutex *sync.Mutex
activeNodes []Node
2018-04-03 18:14:04 +02:00
shortlistContactedMutex *sync.Mutex
shortlist []Node
contacted map[bitmap]bool
2018-03-29 03:05:27 +02:00
}
type findNodeResponse struct {
Found bool
Nodes []Node
}
func newNodeFinder(dht *DHT, target bitmap, findValue bool) *nodeFinder {
return &nodeFinder{
2018-04-03 18:14:04 +02:00
dht: dht,
target: target,
findValue: findValue,
findValueMutex: &sync.Mutex{},
activeNodesMutex: &sync.Mutex{},
shortlistContactedMutex: &sync.Mutex{},
contacted: make(map[bitmap]bool),
done: stopOnce.New(),
2018-03-29 03:05:27 +02:00
}
}
func (nf *nodeFinder) Find() (findNodeResponse, error) {
log.Debugf("[%s] starting an iterative Find() for %s (findValue is %t)", nf.dht.node.id.HexShort(), nf.target.HexShort(), nf.findValue)
nf.appendNewToShortlist(nf.dht.rt.GetClosest(nf.target, alpha))
if len(nf.shortlist) == 0 {
return findNodeResponse{}, errors.Err("no nodes in routing table")
}
wg := &sync.WaitGroup{}
for i := 0; i < alpha; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
nf.iterationWorker(i + 1)
}(i)
}
wg.Wait()
// TODO: what to do if we have less than K active nodes, shortlist is empty, but we
// TODO: have other nodes in our routing table whom we have not contacted. prolly contact them?
result := findNodeResponse{}
if nf.findValue && len(nf.findValueResult) > 0 {
result.Found = true
result.Nodes = nf.findValueResult
} else {
result.Nodes = nf.activeNodes
if len(result.Nodes) > bucketSize {
result.Nodes = result.Nodes[:bucketSize]
}
}
return result, nil
}
func (nf *nodeFinder) iterationWorker(num int) {
log.Debugf("[%s] starting worker %d", nf.dht.node.id.HexShort(), num)
defer func() { log.Debugf("[%s] stopping worker %d", nf.dht.node.id.HexShort(), num) }()
for {
maybeNode := nf.popFromShortlist()
if maybeNode == nil {
// TODO: block if there are pending requests out from other workers. there may be more shortlist values coming
2018-04-03 18:14:04 +02:00
log.Debugf("[%s] no more nodes in shortlist", nf.dht.node.id.HexShort())
2018-03-29 03:05:27 +02:00
return
}
node := *maybeNode
if node.id.Equals(nf.dht.node.id) {
continue // cannot contact self
}
req := &Request{Args: []string{nf.target.RawString()}}
if nf.findValue {
req.Method = findValueMethod
} else {
req.Method = findNodeMethod
}
log.Debugf("[%s] contacting %s", nf.dht.node.id.HexShort(), node.id.HexShort())
var res *Response
ctx, cancel := context.WithCancel(context.Background())
resCh := nf.dht.tm.SendAsync(ctx, node, req)
select {
case res = <-resCh:
case <-nf.done.Chan():
log.Debugf("[%s] worker %d: canceled", nf.dht.node.id.HexShort(), num)
cancel()
return
}
if res == nil {
// nothing to do, response timed out
} else if nf.findValue && res.FindValueKey != "" {
log.Debugf("[%s] worker %d: got value", nf.dht.node.id.HexShort(), num)
nf.findValueMutex.Lock()
nf.findValueResult = res.FindNodeData
nf.findValueMutex.Unlock()
nf.done.Stop()
return
} else {
log.Debugf("[%s] worker %d: got more contacts", nf.dht.node.id.HexShort(), num)
nf.insertIntoActiveList(node)
nf.appendNewToShortlist(res.FindNodeData)
}
if nf.isSearchFinished() {
log.Debugf("[%s] worker %d: search is finished", nf.dht.node.id.HexShort(), num)
nf.done.Stop()
return
}
}
}
2018-04-03 18:14:04 +02:00
func (nf *nodeFinder) appendNewToShortlist(nodes []Node) {
nf.shortlistContactedMutex.Lock()
defer nf.shortlistContactedMutex.Unlock()
notContacted := []Node{}
2018-03-29 03:05:27 +02:00
for _, n := range nodes {
2018-04-03 18:14:04 +02:00
if _, ok := nf.contacted[n.id]; !ok {
notContacted = append(notContacted, n)
2018-03-29 03:05:27 +02:00
}
}
2018-04-03 18:14:04 +02:00
nf.shortlist = append(nf.shortlist, notContacted...)
2018-03-29 03:05:27 +02:00
sortNodesInPlace(nf.shortlist, nf.target)
}
func (nf *nodeFinder) popFromShortlist() *Node {
2018-04-03 18:14:04 +02:00
nf.shortlistContactedMutex.Lock()
defer nf.shortlistContactedMutex.Unlock()
2018-03-29 03:05:27 +02:00
if len(nf.shortlist) == 0 {
return nil
}
2018-04-03 18:14:04 +02:00
2018-03-29 03:05:27 +02:00
first := nf.shortlist[0]
nf.shortlist = nf.shortlist[1:]
2018-04-03 18:14:04 +02:00
nf.contacted[first.id] = true
2018-03-29 03:05:27 +02:00
return &first
}
func (nf *nodeFinder) insertIntoActiveList(node Node) {
nf.activeNodesMutex.Lock()
defer nf.activeNodesMutex.Unlock()
inserted := false
for i, n := range nf.activeNodes {
if node.id.Xor(nf.target).Less(n.id.Xor(nf.target)) {
nf.activeNodes = append(nf.activeNodes[:i], append([]Node{node}, nf.activeNodes[i:]...)...)
inserted = true
}
}
if !inserted {
nf.activeNodes = append(nf.activeNodes, node)
}
}
func (nf *nodeFinder) isSearchFinished() bool {
if nf.findValue && len(nf.findValueResult) > 0 {
return true
}
select {
case <-nf.done.Chan():
return true
default:
}
2018-04-03 18:14:04 +02:00
nf.shortlistContactedMutex.Lock()
defer nf.shortlistContactedMutex.Unlock()
2018-03-29 03:05:27 +02:00
if len(nf.shortlist) == 0 {
return true
}
nf.activeNodesMutex.Lock()
defer nf.activeNodesMutex.Unlock()
if len(nf.activeNodes) >= bucketSize && nf.activeNodes[bucketSize-1].id.Xor(nf.target).Less(nf.shortlist[0].id.Xor(nf.target)) {
// we have at least K active nodes, and we don't have any closer nodes yet to contact
return true
}
return false
}