server: Improve the persistent peer retry logic.

This fixes an issue introduced during the peer refactor where persistent
peers that failed the initial connection are not retried as intended.

It also improves the retry logic as follows:

- Make the retry handler goroutine simply use a for loop instead of
  launching a new goroutine for each backoff attempt.  Even though
  goroutines are fairly cheap to create, it is much more efficient to
  simply loop
- Change the retry handler to accept a flag if it is the initial attempt
- Rather than dividing the const interval by 2 everywhere and passing
  the retry duration in, just half the constant and set the initial
  duration to it in the retry handler

Finally, include the address of the peer in the error message when a new
outbound peer can't be created.
This commit is contained in:
Dave Collins 2015-11-22 04:04:22 -06:00
parent 1217e00d39
commit f41ff545be

View file

@ -48,7 +48,7 @@ const (
// connectionRetryInterval is the base amount of time to wait in between // connectionRetryInterval is the base amount of time to wait in between
// retries when connecting to persistent peers. It is adjusted by the // retries when connecting to persistent peers. It is adjusted by the
// number of retries such that there is a retry backoff. // number of retries such that there is a retry backoff.
connectionRetryInterval = time.Second * 10 connectionRetryInterval = time.Second * 5
// maxConnectionRetryInterval is the max amount of time retrying of a // maxConnectionRetryInterval is the max amount of time retrying of a
// persistent peer is allowed to grow to. This is necessary since the // persistent peer is allowed to grow to. This is necessary since the
@ -1124,7 +1124,7 @@ func (s *server) handleDonePeerMsg(state *peerState, sp *serverPeer) {
// Retry peer // Retry peer
sp = s.newOutboundPeer(sp.Addr(), sp.persistent) sp = s.newOutboundPeer(sp.Addr(), sp.persistent)
if sp != nil { if sp != nil {
go s.retryConn(sp, connectionRetryInterval/2) go s.retryConn(sp, false)
} }
list[sp.ID()] = sp list[sp.ID()] = sp
return return
@ -1480,7 +1480,7 @@ func (s *server) newOutboundPeer(addr string, persistent bool) *serverPeer {
sp := newServerPeer(s, persistent) sp := newServerPeer(s, persistent)
p, err := peer.NewOutboundPeer(newPeerConfig(sp), addr) p, err := peer.NewOutboundPeer(newPeerConfig(sp), addr)
if err != nil { if err != nil {
srvrLog.Errorf("Cannot create outbound peer: %v", err) srvrLog.Errorf("Cannot create outbound peer %s: %v", addr, err)
return nil return nil
} }
sp.Peer = p sp.Peer = p
@ -1525,22 +1525,36 @@ func (s *server) establishConn(sp *serverPeer) error {
return nil return nil
} }
// retryConn retries connection to the peer after the given duration. // retryConn retries connection to the peer after the given duration. It must
func (s *server) retryConn(sp *serverPeer, retryDuration time.Duration) { // be run as a goroutine.
srvrLog.Debugf("Retrying connection to %s in %s", sp.Addr(), retryDuration) func (s *server) retryConn(sp *serverPeer, initialAttempt bool) {
retryDuration := connectionRetryInterval
for {
if initialAttempt {
retryDuration = 0
initialAttempt = false
} else {
srvrLog.Debugf("Retrying connection to %s in %s", sp.Addr(),
retryDuration)
}
select { select {
case <-time.After(retryDuration): case <-time.After(retryDuration):
err := s.establishConn(sp) err := s.establishConn(sp)
if err != nil { if err != nil {
retryDuration += connectionRetryInterval / 2 retryDuration += connectionRetryInterval
if retryDuration > maxConnectionRetryInterval { if retryDuration > maxConnectionRetryInterval {
retryDuration = maxConnectionRetryInterval retryDuration = maxConnectionRetryInterval
} }
go s.retryConn(sp, retryDuration) continue
}
return
case <-sp.quit:
return
case <-s.quit:
return return
} }
case <-sp.quit:
case <-s.quit:
} }
} }
@ -1581,7 +1595,7 @@ func (s *server) peerHandler() {
for _, addr := range permanentPeers { for _, addr := range permanentPeers {
sp := s.newOutboundPeer(addr, true) sp := s.newOutboundPeer(addr, true)
if sp != nil { if sp != nil {
go s.peerConnHandler(sp) go s.retryConn(sp, true)
} }
} }