From e480fa146f67bb9c0409470ca719fb70f90d8ebc Mon Sep 17 00:00:00 2001 From: Alex Grintsvayg Date: Thu, 21 Jun 2018 11:26:48 -0400 Subject: [PATCH] fixed a few channel lockups, fixed announced port in dht, successfully announced and served a blob --- cluster/cluster.go | 13 +++++----- cmd/peer.go | 2 +- cmd/reflector.go | 2 +- cmd/start.go | 62 ++++++++++++++++++++++++++++----------------- dht/dht.go | 17 +++++++++---- dht/node.go | 6 ++++- dht/node_finder.go | 26 ++++++++++--------- peer/server.go | 2 +- prism/prism.go | 60 +++++++++++++++++++++++++------------------ reflector/server.go | 2 +- 10 files changed, 115 insertions(+), 77 deletions(-) diff --git a/cluster/cluster.go b/cluster/cluster.go index 8e78f61..23837f6 100644 --- a/cluster/cluster.go +++ b/cluster/cluster.go @@ -15,8 +15,7 @@ import ( ) const ( - // DefaultClusterPort is the default port used when starting up a Cluster. - DefaultClusterPort = 17946 + DefaultPort = 17946 MembershipChangeBufferWindow = 1 * time.Second ) @@ -103,11 +102,11 @@ func (c *Cluster) listen() { case event := <-c.eventCh: switch event.EventType() { case serf.EventMemberJoin, serf.EventMemberFailed, serf.EventMemberLeave: - memberEvent := event.(serf.MemberEvent) - if event.EventType() == serf.EventMemberJoin && len(memberEvent.Members) == 1 && memberEvent.Members[0].Name == c.name { - // ignore event from my own joining of the cluster - continue - } + // // ignore event from my own joining of the cluster + //memberEvent := event.(serf.MemberEvent) + //if event.EventType() == serf.EventMemberJoin && len(memberEvent.Members) == 1 && memberEvent.Members[0].Name == c.name { + // continue + //} if timerCh == nil { timer.Reset(MembershipChangeBufferWindow) diff --git a/cmd/peer.go b/cmd/peer.go index 3f2a094..07fd71e 100644 --- a/cmd/peer.go +++ b/cmd/peer.go @@ -32,7 +32,7 @@ func peerCmd(cmd *cobra.Command, args []string) { combo := store.NewDBBackedS3Store(s3, db) peerServer := peer.NewServer(combo) - err = peerServer.Start("localhost:" + strconv.Itoa(peer.DefaultPort)) + err = peerServer.Start(":" + strconv.Itoa(peer.DefaultPort)) if err != nil { log.Fatal(err) } diff --git a/cmd/reflector.go b/cmd/reflector.go index e35ec1f..1b50fb2 100644 --- a/cmd/reflector.go +++ b/cmd/reflector.go @@ -31,7 +31,7 @@ func reflectorCmd(cmd *cobra.Command, args []string) { s3 := store.NewS3BlobStore(globalConfig.AwsID, globalConfig.AwsSecret, globalConfig.BucketRegion, globalConfig.BucketName) combo := store.NewDBBackedS3Store(s3, db) reflectorServer := reflector.NewServer(combo) - err = reflectorServer.Start("localhost:" + strconv.Itoa(reflector.DefaultPort)) + err = reflectorServer.Start(":" + strconv.Itoa(reflector.DefaultPort)) if err != nil { log.Fatal(err) } diff --git a/cmd/start.go b/cmd/start.go index 5a65730..58c941a 100644 --- a/cmd/start.go +++ b/cmd/start.go @@ -4,9 +4,13 @@ import ( "os" "os/signal" "strconv" + "strings" "syscall" + "github.com/lbryio/reflector.go/cluster" "github.com/lbryio/reflector.go/db" + "github.com/lbryio/reflector.go/dht" + "github.com/lbryio/reflector.go/dht/bits" "github.com/lbryio/reflector.go/peer" "github.com/lbryio/reflector.go/prism" "github.com/lbryio/reflector.go/reflector" @@ -16,28 +20,33 @@ import ( "github.com/spf13/cobra" ) +const ( + startNewCluster = "new" +) + var ( - startPeerPort int - startReflectorPort int - startDhtPort int - startDhtSeedPort int - startClusterPort int - startClusterSeedPort int + startClusterPort int + startPeerPort int + startReflectorPort int + startDhtPort int + startDhtSeeds []string + startHashRange string ) func init() { var cmd = &cobra.Command{ - Use: "start [cluster-address]", - Short: "Runs prism application with cluster, dht, peer server, and reflector server.", + Use: `start [cluster-address|"new"]`, + Short: "Runs full prism application with cluster, dht, peer server, and reflector server.", Run: startCmd, - Args: cobra.RangeArgs(0, 1), + Args: cobra.ExactArgs(1), } - cmd.PersistentFlags().IntVar(&startDhtPort, "dht-port", 4570, "Port to start DHT on") - cmd.PersistentFlags().IntVar(&startDhtSeedPort, "dht-seed-port", 4567, "Port to connect to DHT bootstrap node on") - cmd.PersistentFlags().IntVar(&startClusterPort, "cluster-port", 5678, "Port to start DHT on") - cmd.PersistentFlags().IntVar(&startClusterSeedPort, "cluster-seed-port", 0, "Port to start DHT on") + cmd.PersistentFlags().IntVar(&startClusterPort, "cluster-port", cluster.DefaultPort, "Port that cluster listens on") cmd.PersistentFlags().IntVar(&startPeerPort, "peer-port", peer.DefaultPort, "Port to start peer protocol on") cmd.PersistentFlags().IntVar(&startReflectorPort, "reflector-port", reflector.DefaultPort, "Port to start reflector protocol on") + cmd.PersistentFlags().IntVar(&startDhtPort, "dht-port", dht.DefaultPort, "Port that dht will listen on") + cmd.PersistentFlags().StringSliceVar(&startDhtSeeds, "dht-seeds", []string{}, "Comma-separated list of dht seed nodes (addr:port,addr:port,...)") + + cmd.PersistentFlags().StringVar(&startHashRange, "hash-range", "", "Limit on range of hashes to announce (start-end)") rootCmd.AddCommand(cmd) } @@ -49,25 +58,32 @@ func startCmd(cmd *cobra.Command, args []string) { s3 := store.NewS3BlobStore(globalConfig.AwsID, globalConfig.AwsSecret, globalConfig.BucketRegion, globalConfig.BucketName) comboStore := store.NewDBBackedS3Store(s3, db) + conf := prism.DefaultConf() + // TODO: args we need: - // clusterAddr - to connect to cluster (or start new cluster if empty) // minNodes - minimum number of nodes before announcing starts. otherwise first node will try to announce all the blobs in the db // or maybe we should do maxHashesPerNode? // in either case, this should not kill the cluster, but should only limit announces (and notify when some hashes are being left unannounced) - //clusterAddr := "" - //if len(args) > 0 { - // clusterAddr = args[0] - //} + if args[0] != startNewCluster { + conf.ClusterSeedAddr = args[0] + } - conf := prism.DefaultConf() conf.DB = db conf.Blobs = comboStore - conf.DhtAddress = "127.0.0.1:" + strconv.Itoa(startDhtPort) - conf.DhtSeedNodes = []string{"127.0.0.1:" + strconv.Itoa(startDhtSeedPort)} + conf.DhtAddress = "0.0.0.0:" + strconv.Itoa(startDhtPort) + conf.DhtSeedNodes = startDhtSeeds conf.ClusterPort = startClusterPort - if startClusterSeedPort > 0 { - conf.ClusterSeedAddr = "127.0.0.1:" + strconv.Itoa(startClusterSeedPort) + conf.PeerPort = startPeerPort + conf.ReflectorPort = startReflectorPort + + if startHashRange != "" { + hashRange := strings.Split(startHashRange, "-") + if len(hashRange) != 2 { + log.Fatal("invalid hash range") + } + r := bits.Range{Start: bits.FromShortHexP(hashRange[0]), End: bits.FromShortHexP(hashRange[1])} + conf.HashRange = &r } p := prism.New(conf) diff --git a/dht/dht.go b/dht/dht.go index 0df919d..eb7a9be 100644 --- a/dht/dht.go +++ b/dht/dht.go @@ -3,16 +3,19 @@ package dht import ( "fmt" "net" + "strconv" "strings" "sync" "time" + "github.com/lbryio/reflector.go/dht/bits" + peerproto "github.com/lbryio/reflector.go/peer" + "github.com/lbryio/lbry.go/errors" "github.com/lbryio/lbry.go/stopOnce" - "github.com/lbryio/reflector.go/dht/bits" - "github.com/spf13/cast" log "github.com/sirupsen/logrus" + "github.com/spf13/cast" ) func init() { @@ -21,7 +24,8 @@ func init() { } const ( - Network = "udp4" + Network = "udp4" + DefaultPort = 4444 // TODO: all these constants should be defaults, and should be used to set values in the standard Config. then the code should use values in the config // TODO: alternatively, have a global Config for constants. at least that way tests can modify the values @@ -57,17 +61,20 @@ type Config struct { NodeID string // print the state of the dht every X time PrintState time.Duration + // the port that clients can use to download blobs using the LBRY peer protocol + PeerProtocolPort int } // NewStandardConfig returns a Config pointer with default values. func NewStandardConfig() *Config { return &Config{ - Address: "0.0.0.0:4444", + Address: "0.0.0.0:" + strconv.Itoa(DefaultPort), SeedNodes: []string{ "lbrynet1.lbry.io:4444", "lbrynet2.lbry.io:4444", "lbrynet3.lbry.io:4444", }, + PeerProtocolPort: peerproto.DefaultPort, } } @@ -330,7 +337,7 @@ func (dht *DHT) storeOnNode(hash bits.Bitmap, c Contact) { Value: storeArgsValue{ Token: res.Token, LbryID: dht.contact.ID, - Port: dht.contact.Port, + Port: dht.conf.PeerProtocolPort, }, }, }) diff --git a/dht/node.go b/dht/node.go index 3be10d5..9607d1e 100644 --- a/dht/node.go +++ b/dht/node.go @@ -299,7 +299,11 @@ func (n *Node) handleRequest(addr *net.UDPAddr, request Request) { func (n *Node) handleResponse(addr *net.UDPAddr, response Response) { tx := n.txFind(response.ID, Contact{ID: response.NodeID, IP: addr.IP, Port: addr.Port}) if tx != nil { - tx.res <- response + select { + case tx.res <- response: + default: + log.Errorf("[%s] query %s: response received but tx has no listener", n.id.HexShort(), response.ID.HexShort()) + } } n.rt.Update(Contact{ID: response.NodeID, IP: addr.IP, Port: addr.Port}) diff --git a/dht/node_finder.go b/dht/node_finder.go index 3780281..cc57d1a 100644 --- a/dht/node_finder.go +++ b/dht/node_finder.go @@ -69,13 +69,13 @@ func (cf *contactFinder) Stop() { func (cf *contactFinder) Find() ([]Contact, bool, error) { if cf.findValue { - log.Debugf("[%s] starting an iterative Find for the value %s", cf.node.id.HexShort(), cf.target.HexShort()) + log.Debugf("[%s] find %s: starting iterativeFindValue", cf.node.id.HexShort(), cf.target.HexShort()) } else { - log.Debugf("[%s] starting an iterative Find for contacts near %s", cf.node.id.HexShort(), cf.target.HexShort()) + log.Debugf("[%s] find %s: starting iterativeFindNode", cf.node.id.HexShort(), cf.target.HexShort()) } cf.appendNewToShortlist(cf.node.rt.GetClosest(cf.target, alpha)) if len(cf.shortlist) == 0 { - return nil, false, errors.Err("no contacts in routing table") + return nil, false, errors.Err("[%s] find %s: no contacts in routing table", cf.node.id.HexShort(), cf.target.HexShort()) } for i := 0; i < alpha; i++ { @@ -108,14 +108,16 @@ func (cf *contactFinder) Find() ([]Contact, bool, error) { } func (cf *contactFinder) iterationWorker(num int) { - log.Debugf("[%s] starting worker %d", cf.node.id.HexShort(), num) - defer func() { log.Debugf("[%s] stopping worker %d", cf.node.id.HexShort(), num) }() + log.Debugf("[%s] find %s: starting worker %d", cf.node.id.HexShort(), cf.target.HexShort(), num) + defer func() { + log.Debugf("[%s] find %s: stopping worker %d", cf.node.id.HexShort(), cf.target.HexShort(), num) + }() for { maybeContact := cf.popFromShortlist() if maybeContact == nil { // TODO: block if there are pending requests out from other workers. there may be more shortlist values coming - log.Debugf("[%s] worker %d: no contacts in shortlist, waiting...", cf.node.id.HexShort(), num) + //log.Debugf("[%s] worker %d: no contacts in shortlist, waiting...", cf.node.id.HexShort(), num) time.Sleep(100 * time.Millisecond) } else { contact := *maybeContact @@ -131,7 +133,7 @@ func (cf *contactFinder) iterationWorker(num int) { req.Method = findNodeMethod } - log.Debugf("[%s] worker %d: contacting %s", cf.node.id.HexShort(), num, contact.ID.HexShort()) + log.Debugf("[%s] find %s: worker %d: contacting %s", cf.node.id.HexShort(), cf.target.HexShort(), num, contact.ID.HexShort()) cf.incrementOutstanding() @@ -140,23 +142,23 @@ func (cf *contactFinder) iterationWorker(num int) { select { case res = <-resCh: case <-cf.stop.Ch(): - log.Debugf("[%s] worker %d: canceled", cf.node.id.HexShort(), num) + log.Debugf("[%s] find %s: worker %d: canceled", cf.node.id.HexShort(), cf.target.HexShort(), num) cancel() return } if res == nil { // nothing to do, response timed out - log.Debugf("[%s] worker %d: search canceled or timed out waiting for %s", cf.node.id.HexShort(), num, contact.ID.HexShort()) + log.Debugf("[%s] find %s: worker %d: search canceled or timed out waiting for %s", cf.node.id.HexShort(), cf.target.HexShort(), num, contact.ID.HexShort()) } else if cf.findValue && res.FindValueKey != "" { - log.Debugf("[%s] worker %d: got value", cf.node.id.HexShort(), num) + log.Debugf("[%s] find %s: worker %d: got value", cf.node.id.HexShort(), cf.target.HexShort(), num) cf.findValueMutex.Lock() cf.findValueResult = res.Contacts cf.findValueMutex.Unlock() cf.stop.Stop() return } else { - log.Debugf("[%s] worker %d: got contacts", cf.node.id.HexShort(), num) + log.Debugf("[%s] find %s: worker %d: got contacts", cf.node.id.HexShort(), cf.target.HexShort(), num) cf.insertIntoActiveList(contact) cf.appendNewToShortlist(res.Contacts) } @@ -165,7 +167,7 @@ func (cf *contactFinder) iterationWorker(num int) { } if cf.isSearchFinished() { - log.Debugf("[%s] worker %d: search is finished", cf.node.id.HexShort(), num) + log.Debugf("[%s] find %s: worker %d: search is finished", cf.node.id.HexShort(), cf.target.HexShort(), num) cf.stop.Stop() return } diff --git a/peer/server.go b/peer/server.go index ad23246..1869109 100644 --- a/peer/server.go +++ b/peer/server.go @@ -51,7 +51,7 @@ func (s *Server) Shutdown() { // Start starts the server listener to handle connections. func (s *Server) Start(address string) error { log.Println("peer listening on " + address) - l, err := net.Listen("tcp", address) + l, err := net.Listen("tcp4", address) if err != nil { return err } diff --git a/prism/prism.go b/prism/prism.go index e62b916..ea76472 100644 --- a/prism/prism.go +++ b/prism/prism.go @@ -29,6 +29,9 @@ type Config struct { ClusterPort int ClusterSeedAddr string + // limit the range of hashes to announce. useful for testing + HashRange *bits.Range + DB *db.SQL Blobs store.BlobStore } @@ -36,7 +39,7 @@ type Config struct { // DefaultConf returns a default config func DefaultConf() *Config { return &Config{ - ClusterPort: cluster.DefaultClusterPort, + ClusterPort: cluster.DefaultPort, } } @@ -61,7 +64,10 @@ func New(conf *Config) *Prism { dhtConf := dht.NewStandardConfig() dhtConf.Address = conf.DhtAddress - dhtConf.SeedNodes = conf.DhtSeedNodes + dhtConf.PeerProtocolPort = conf.PeerPort + if len(conf.DhtSeedNodes) > 0 { + dhtConf.SeedNodes = conf.DhtSeedNodes + } d := dht.New(dhtConf) c := cluster.New(conf.ClusterPort, conf.ClusterSeedAddr) @@ -91,6 +97,8 @@ func New(conf *Config) *Prism { // Start starts the components of the application. func (p *Prism) Start() error { + var err error + if p.conf.DB == nil { return errors.Err("db required in conf") } @@ -99,7 +107,17 @@ func (p *Prism) Start() error { return errors.Err("blobs required in conf") } - err := p.dht.Start() + err = p.peer.Start(":" + strconv.Itoa(p.conf.PeerPort)) + if err != nil { + return err + } + + err = p.reflector.Start(":" + strconv.Itoa(p.conf.ReflectorPort)) + if err != nil { + return err + } + + err = p.dht.Start() if err != nil { return err } @@ -109,28 +127,16 @@ func (p *Prism) Start() error { return err } - // TODO: should not be localhost forever. should prolly be 0.0.0.0, or configurable - err = p.peer.Start("localhost:" + strconv.Itoa(p.conf.PeerPort)) - if err != nil { - return err - } - - // TODO: should not be localhost forever. should prolly be 0.0.0.0, or configurable - err = p.reflector.Start("localhost:" + strconv.Itoa(p.conf.ReflectorPort)) - if err != nil { - return err - } - return nil } // Shutdown gracefully shuts down the different prism components before exiting. func (p *Prism) Shutdown() { p.stop.StopAndWait() - p.reflector.Shutdown() - p.peer.Shutdown() p.cluster.Shutdown() p.dht.Shutdown() + p.reflector.Shutdown() + p.peer.Shutdown() } // AnnounceRange announces the `n`th interval of hashes, out of a total of `total` intervals @@ -143,15 +149,19 @@ func (p *Prism) AnnounceRange(n, total int) { return } - //r := bits.MaxRange().IntervalP(n, total) - - // TODO: this is temporary. it lets me test with a small number of hashes. use the full range in production - min, max, err := p.db.GetHashRange() - if err != nil { - log.Errorf("%s: error getting hash range: %s", p.dht.ID().HexShort(), err.Error()) - return + var r bits.Range + if p.conf.HashRange != nil { + r = *p.conf.HashRange + } else { + //r := bits.MaxRange().IntervalP(n, total) + // TODO: this is temporary. it lets me test with a small number of hashes. use the full range in production + min, max, err := p.db.GetHashRange() + if err != nil { + log.Errorf("%s: error getting hash range: %s", p.dht.ID().HexShort(), err.Error()) + return + } + r = (bits.Range{Start: bits.FromHexP(min), End: bits.FromHexP(max)}).IntervalP(n, total) } - r := (bits.Range{Start: bits.FromHexP(min), End: bits.FromHexP(max)}).IntervalP(n, total) log.Infof("%s: hash range is now %s to %s", p.dht.ID().HexShort(), r.Start, r.End) diff --git a/reflector/server.go b/reflector/server.go index 38e6b45..a5d0105 100644 --- a/reflector/server.go +++ b/reflector/server.go @@ -41,7 +41,7 @@ func (s *Server) Shutdown() { func (s *Server) Start(address string) error { //ToDo - We should make this DRY as it is the same code in both servers. log.Println("reflector listening on " + address) - l, err := net.Listen("tcp", address) + l, err := net.Listen("tcp4", address) if err != nil { return err }