Fix RPC ping/pong deadlock and timeout issue.
This is a backport of decred/dcrwallet#612. This change moves the wait for the session RPC response (used as a pong) to a new goroutine that does not run directly in the queue handler. By moving this out to a new goroutine, the handler can continue enqueuing and dequeueing notifications while waiting for the session response. Previously, if a notifiation was sent after the session RPC was called and before the response was received, the rpcclient main loop would block due to being unable to enqueue the notification.
This commit is contained in:
parent
01d52770d2
commit
521b2123de
1 changed files with 28 additions and 22 deletions
|
@ -317,6 +317,7 @@ func (c *RPCClient) handler() {
|
|||
var dequeue chan interface{}
|
||||
var next interface{}
|
||||
pingChan := time.After(time.Minute)
|
||||
pingChanReset := make(chan (<-chan time.Time))
|
||||
out:
|
||||
for {
|
||||
select {
|
||||
|
@ -360,16 +361,19 @@ out:
|
|||
}
|
||||
|
||||
case <-pingChan:
|
||||
// No notifications were received in the last 60s.
|
||||
// Ensure the connection is still active by making a new
|
||||
// request to the server.
|
||||
// TODO: A minute timeout is used to prevent the handler
|
||||
// loop from blocking here forever, but this is much larger
|
||||
// than it needs to be due to btcd processing websocket
|
||||
// requests synchronously (see
|
||||
// https://github.com/roasbeef/btcd/issues/504). Decrease
|
||||
// this to something saner like 3s when the above issue is
|
||||
// fixed.
|
||||
// No notifications were received in the last 60s. Ensure the
|
||||
// connection is still active by making a new request to the server.
|
||||
//
|
||||
// This MUST wait for the response in a new goroutine so as to not
|
||||
// block channel sends enqueueing more notifications. Doing so
|
||||
// would cause a deadlock and after the timeout expires, the client
|
||||
// would be shut down.
|
||||
//
|
||||
// TODO: A minute timeout is used to prevent the handler loop from
|
||||
// blocking here forever, but this is much larger than it needs to
|
||||
// be due to dcrd processing websocket requests synchronously (see
|
||||
// https://github.com/btcsuite/btcd/issues/504). Decrease this to
|
||||
// something saner like 3s when the above issue is fixed.
|
||||
type sessionResult struct {
|
||||
err error
|
||||
}
|
||||
|
@ -378,22 +382,24 @@ out:
|
|||
_, err := c.Session()
|
||||
sessionResponse <- sessionResult{err}
|
||||
}()
|
||||
|
||||
go func() {
|
||||
select {
|
||||
case resp := <-sessionResponse:
|
||||
if resp.err != nil {
|
||||
log.Errorf("Failed to receive session "+
|
||||
"result: %v", resp.err)
|
||||
c.Stop()
|
||||
break out
|
||||
}
|
||||
pingChan = time.After(time.Minute)
|
||||
pingChanReset <- time.After(time.Minute)
|
||||
|
||||
case <-time.After(time.Minute):
|
||||
log.Errorf("Timeout waiting for session RPC")
|
||||
c.Stop()
|
||||
break out
|
||||
}
|
||||
}()
|
||||
|
||||
case ch := <-pingChanReset:
|
||||
pingChan = ch
|
||||
|
||||
case c.currentBlock <- bs:
|
||||
|
||||
|
|
Loading…
Reference in a new issue