lbcd/connmgr/connmanager.go
Conner Fromknecht b26daffac9 connmgr: adds cancellation of pending requests
This commit extends the work started by roasbeef in the
 previous commit to bring full cancellation of pending
 connection requests. It also adds minor refactors to
 channel send/receives to help cleanup potentially
 lingering go routines.
2018-05-23 16:46:15 -07:00

569 lines
15 KiB
Go

// Copyright (c) 2016 The btcsuite developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
package connmgr
import (
"errors"
"fmt"
"net"
"sync"
"sync/atomic"
"time"
)
// maxFailedAttempts is the maximum number of successive failed connection
// attempts after which network failure is assumed and new connections will
// be delayed by the configured retry duration.
const maxFailedAttempts = 25
var (
//ErrDialNil is used to indicate that Dial cannot be nil in the configuration.
ErrDialNil = errors.New("Config: Dial cannot be nil")
// maxRetryDuration is the max duration of time retrying of a persistent
// connection is allowed to grow to. This is necessary since the retry
// logic uses a backoff mechanism which increases the interval base times
// the number of retries that have been done.
maxRetryDuration = time.Minute * 5
// defaultRetryDuration is the default duration of time for retrying
// persistent connections.
defaultRetryDuration = time.Second * 5
// defaultTargetOutbound is the default number of outbound connections to
// maintain.
defaultTargetOutbound = uint32(8)
)
// ConnState represents the state of the requested connection.
type ConnState uint8
// ConnState can be either pending, established, disconnected or failed. When
// a new connection is requested, it is attempted and categorized as
// established or failed depending on the connection result. An established
// connection which was disconnected is categorized as disconnected.
const (
ConnPending ConnState = iota
ConnFailing
ConnCanceled
ConnEstablished
ConnDisconnected
)
// ConnReq is the connection request to a network address. If permanent, the
// connection will be retried on disconnection.
type ConnReq struct {
// The following variables must only be used atomically.
id uint64
Addr net.Addr
Permanent bool
conn net.Conn
state ConnState
stateMtx sync.RWMutex
retryCount uint32
}
// updateState updates the state of the connection request.
func (c *ConnReq) updateState(state ConnState) {
c.stateMtx.Lock()
c.state = state
c.stateMtx.Unlock()
}
// ID returns a unique identifier for the connection request.
func (c *ConnReq) ID() uint64 {
return atomic.LoadUint64(&c.id)
}
// State is the connection state of the requested connection.
func (c *ConnReq) State() ConnState {
c.stateMtx.RLock()
state := c.state
c.stateMtx.RUnlock()
return state
}
// String returns a human-readable string for the connection request.
func (c *ConnReq) String() string {
if c.Addr.String() == "" {
return fmt.Sprintf("reqid %d", atomic.LoadUint64(&c.id))
}
return fmt.Sprintf("%s (reqid %d)", c.Addr, atomic.LoadUint64(&c.id))
}
// Config holds the configuration options related to the connection manager.
type Config struct {
// Listeners defines a slice of listeners for which the connection
// manager will take ownership of and accept connections. When a
// connection is accepted, the OnAccept handler will be invoked with the
// connection. Since the connection manager takes ownership of these
// listeners, they will be closed when the connection manager is
// stopped.
//
// This field will not have any effect if the OnAccept field is not
// also specified. It may be nil if the caller does not wish to listen
// for incoming connections.
Listeners []net.Listener
// OnAccept is a callback that is fired when an inbound connection is
// accepted. It is the caller's responsibility to close the connection.
// Failure to close the connection will result in the connection manager
// believing the connection is still active and thus have undesirable
// side effects such as still counting toward maximum connection limits.
//
// This field will not have any effect if the Listeners field is not
// also specified since there couldn't possibly be any accepted
// connections in that case.
OnAccept func(net.Conn)
// TargetOutbound is the number of outbound network connections to
// maintain. Defaults to 8.
TargetOutbound uint32
// RetryDuration is the duration to wait before retrying connection
// requests. Defaults to 5s.
RetryDuration time.Duration
// OnConnection is a callback that is fired when a new outbound
// connection is established.
OnConnection func(*ConnReq, net.Conn)
// OnDisconnection is a callback that is fired when an outbound
// connection is disconnected.
OnDisconnection func(*ConnReq)
// GetNewAddress is a way to get an address to make a network connection
// to. If nil, no new connections will be made automatically.
GetNewAddress func() (net.Addr, error)
// Dial connects to the address on the named network. It cannot be nil.
Dial func(net.Addr) (net.Conn, error)
}
// registerPending is used to register a pending connection attempt. By
// registering pending connection attempts we allow callers to cancel pending
// connection attempts before their successful or in the case they're not
// longer wanted.
type registerPending struct {
c *ConnReq
done chan struct{}
}
// handleConnected is used to queue a successful connection.
type handleConnected struct {
c *ConnReq
conn net.Conn
}
// handleDisconnected is used to remove a connection.
type handleDisconnected struct {
id uint64
retry bool
}
// handleFailed is used to remove a pending connection.
type handleFailed struct {
c *ConnReq
err error
}
// ConnManager provides a manager to handle network connections.
type ConnManager struct {
// The following variables must only be used atomically.
connReqCount uint64
start int32
stop int32
cfg Config
wg sync.WaitGroup
failedAttempts uint64
requests chan interface{}
quit chan struct{}
}
// handleFailedConn handles a connection failed due to a disconnect or any
// other failure. If permanent, it retries the connection after the configured
// retry duration. Otherwise, if required, it makes a new connection request.
// After maxFailedConnectionAttempts new connections will be retried after the
// configured retry duration.
func (cm *ConnManager) handleFailedConn(c *ConnReq) {
if atomic.LoadInt32(&cm.stop) != 0 {
return
}
if c.Permanent {
c.retryCount++
d := time.Duration(c.retryCount) * cm.cfg.RetryDuration
if d > maxRetryDuration {
d = maxRetryDuration
}
log.Debugf("Retrying connection to %v in %v", c, d)
time.AfterFunc(d, func() {
cm.Connect(c)
})
} else if cm.cfg.GetNewAddress != nil {
cm.failedAttempts++
if cm.failedAttempts >= maxFailedAttempts {
log.Debugf("Max failed connection attempts reached: [%d] "+
"-- retrying connection in: %v", maxFailedAttempts,
cm.cfg.RetryDuration)
time.AfterFunc(cm.cfg.RetryDuration, func() {
cm.NewConnReq()
})
} else {
go cm.NewConnReq()
}
}
}
// connHandler handles all connection related requests. It must be run as a
// goroutine.
//
// The connection handler makes sure that we maintain a pool of active outbound
// connections so that we remain connected to the network. Connection requests
// are processed and mapped by their assigned ids.
func (cm *ConnManager) connHandler() {
var (
// pending holds all registered conn requests that have yet to
// succeed.
pending = make(map[uint64]*ConnReq)
// conns represents the set of all actively connected peers.
conns = make(map[uint64]*ConnReq, cm.cfg.TargetOutbound)
)
out:
for {
select {
case req := <-cm.requests:
switch msg := req.(type) {
case registerPending:
connReq := msg.c
connReq.updateState(ConnPending)
pending[msg.c.id] = connReq
close(msg.done)
case handleConnected:
connReq := msg.c
if _, ok := pending[connReq.id]; !ok {
if msg.conn != nil {
msg.conn.Close()
}
log.Debugf("Ignoring connection for "+
"canceled connreq=%v", connReq)
continue
}
connReq.updateState(ConnEstablished)
connReq.conn = msg.conn
conns[connReq.id] = connReq
log.Debugf("Connected to %v", connReq)
connReq.retryCount = 0
cm.failedAttempts = 0
delete(pending, connReq.id)
if cm.cfg.OnConnection != nil {
go cm.cfg.OnConnection(connReq, msg.conn)
}
case handleDisconnected:
connReq, ok := conns[msg.id]
if !ok {
connReq, ok = pending[msg.id]
if !ok {
log.Errorf("Unknown connid=%d",
msg.id)
continue
}
// Pending connection was found, remove
// it from pending map if we should
// ignore a later, successful
// connection.
connReq.updateState(ConnCanceled)
log.Debugf("Canceling: %v", connReq)
delete(pending, msg.id)
continue
}
// An existing connection was located, mark as
// disconnected and execute disconnection
// callback.
log.Debugf("Disconnected from %v", connReq)
delete(conns, msg.id)
if connReq.conn != nil {
connReq.conn.Close()
}
if cm.cfg.OnDisconnection != nil {
go cm.cfg.OnDisconnection(connReq)
}
// All internal state has been cleaned up, if
// this connection is being removed, we will
// make no further attempts with this request.
if !msg.retry {
connReq.updateState(ConnDisconnected)
continue
}
// Otherwise, we will attempt a reconnection if
// we do not have enough peers, or if this is a
// persistent peer. The connection request is
// re added to the pending map, so that
// subsequent processing of connections and
// failures do not ignore the request.
if uint32(len(conns)) < cm.cfg.TargetOutbound ||
connReq.Permanent {
connReq.updateState(ConnPending)
log.Debugf("Reconnecting to %v",
connReq)
pending[msg.id] = connReq
cm.handleFailedConn(connReq)
}
case handleFailed:
connReq := msg.c
if _, ok := pending[connReq.id]; !ok {
log.Debugf("Ignoring connection for "+
"canceled conn req: %v", connReq)
continue
}
connReq.updateState(ConnFailing)
log.Debugf("Failed to connect to %v: %v",
connReq, msg.err)
cm.handleFailedConn(connReq)
}
case <-cm.quit:
break out
}
}
cm.wg.Done()
log.Trace("Connection handler done")
}
// NewConnReq creates a new connection request and connects to the
// corresponding address.
func (cm *ConnManager) NewConnReq() {
if atomic.LoadInt32(&cm.stop) != 0 {
return
}
if cm.cfg.GetNewAddress == nil {
return
}
c := &ConnReq{}
atomic.StoreUint64(&c.id, atomic.AddUint64(&cm.connReqCount, 1))
// Submit a request of a pending connection attempt to the connection
// manager. By registering the id before the connection is even
// established, we'll be able to later cancel the connection via the
// Remove method.
done := make(chan struct{})
select {
case cm.requests <- registerPending{c, done}:
case <-cm.quit:
return
}
// Wait for the registration to successfully add the pending conn req to
// the conn manager's internal state.
select {
case <-done:
case <-cm.quit:
return
}
addr, err := cm.cfg.GetNewAddress()
if err != nil {
select {
case cm.requests <- handleFailed{c, err}:
case <-cm.quit:
}
return
}
c.Addr = addr
cm.Connect(c)
}
// Connect assigns an id and dials a connection to the address of the
// connection request.
func (cm *ConnManager) Connect(c *ConnReq) {
if atomic.LoadInt32(&cm.stop) != 0 {
return
}
if atomic.LoadUint64(&c.id) == 0 {
atomic.StoreUint64(&c.id, atomic.AddUint64(&cm.connReqCount, 1))
// Submit a request of a pending connection attempt to the
// connection manager. By registering the id before the
// connection is even established, we'll be able to later
// cancel the connection via the Remove method.
done := make(chan struct{})
select {
case cm.requests <- registerPending{c, done}:
case <-cm.quit:
return
}
// Wait for the registration to successfully add the pending
// conn req to the conn manager's internal state.
select {
case <-done:
case <-cm.quit:
return
}
}
log.Debugf("Attempting to connect to %v", c)
conn, err := cm.cfg.Dial(c.Addr)
if err != nil {
select {
case cm.requests <- handleFailed{c, err}:
case <-cm.quit:
}
return
}
select {
case cm.requests <- handleConnected{c, conn}:
case <-cm.quit:
}
}
// Disconnect disconnects the connection corresponding to the given connection
// id. If permanent, the connection will be retried with an increasing backoff
// duration.
func (cm *ConnManager) Disconnect(id uint64) {
if atomic.LoadInt32(&cm.stop) != 0 {
return
}
select {
case cm.requests <- handleDisconnected{id, true}:
case <-cm.quit:
}
}
// Remove removes the connection corresponding to the given connection id from
// known connections.
//
// NOTE: This method can also be used to cancel a lingering connection attempt
// that hasn't yet succeeded.
func (cm *ConnManager) Remove(id uint64) {
if atomic.LoadInt32(&cm.stop) != 0 {
return
}
select {
case cm.requests <- handleDisconnected{id, false}:
case <-cm.quit:
}
}
// listenHandler accepts incoming connections on a given listener. It must be
// run as a goroutine.
func (cm *ConnManager) listenHandler(listener net.Listener) {
log.Infof("Server listening on %s", listener.Addr())
for atomic.LoadInt32(&cm.stop) == 0 {
conn, err := listener.Accept()
if err != nil {
// Only log the error if not forcibly shutting down.
if atomic.LoadInt32(&cm.stop) == 0 {
log.Errorf("Can't accept connection: %v", err)
}
continue
}
go cm.cfg.OnAccept(conn)
}
cm.wg.Done()
log.Tracef("Listener handler done for %s", listener.Addr())
}
// Start launches the connection manager and begins connecting to the network.
func (cm *ConnManager) Start() {
// Already started?
if atomic.AddInt32(&cm.start, 1) != 1 {
return
}
log.Trace("Connection manager started")
cm.wg.Add(1)
go cm.connHandler()
// Start all the listeners so long as the caller requested them and
// provided a callback to be invoked when connections are accepted.
if cm.cfg.OnAccept != nil {
for _, listner := range cm.cfg.Listeners {
cm.wg.Add(1)
go cm.listenHandler(listner)
}
}
for i := atomic.LoadUint64(&cm.connReqCount); i < uint64(cm.cfg.TargetOutbound); i++ {
go cm.NewConnReq()
}
}
// Wait blocks until the connection manager halts gracefully.
func (cm *ConnManager) Wait() {
cm.wg.Wait()
}
// Stop gracefully shuts down the connection manager.
func (cm *ConnManager) Stop() {
if atomic.AddInt32(&cm.stop, 1) != 1 {
log.Warnf("Connection manager already stopped")
return
}
// Stop all the listeners. There will not be any listeners if
// listening is disabled.
for _, listener := range cm.cfg.Listeners {
// Ignore the error since this is shutdown and there is no way
// to recover anyways.
_ = listener.Close()
}
close(cm.quit)
log.Trace("Connection manager stopped")
}
// New returns a new connection manager.
// Use Start to start connecting to the network.
func New(cfg *Config) (*ConnManager, error) {
if cfg.Dial == nil {
return nil, ErrDialNil
}
// Default to sane values
if cfg.RetryDuration <= 0 {
cfg.RetryDuration = defaultRetryDuration
}
if cfg.TargetOutbound == 0 {
cfg.TargetOutbound = defaultTargetOutbound
}
cm := ConnManager{
cfg: *cfg, // Copy so caller can't mutate
requests: make(chan interface{}),
quit: make(chan struct{}),
}
return &cm, nil
}