Fix RPC ping/pong deadlock and timeout issue.

This is a backport of decred/dcrwallet#612.

This change moves the wait for the session RPC response (used as a
pong) to a new goroutine that does not run directly in the queue
handler.  By moving this out to a new goroutine, the handler can
continue enqueuing and dequeueing notifications while waiting for the
session response.  Previously, if a notifiation was sent after the
session RPC was called and before the response was received, the
rpcclient main loop would block due to being unable to enqueue the
notification.
This commit is contained in:
Olaoluwa Osuntokun 2017-04-21 13:48:18 -07:00
parent 01d52770d2
commit 521b2123de

View file

@ -317,6 +317,7 @@ func (c *RPCClient) handler() {
var dequeue chan interface{}
var next interface{}
pingChan := time.After(time.Minute)
pingChanReset := make(chan (<-chan time.Time))
out:
for {
select {
@ -360,16 +361,19 @@ out:
}
case <-pingChan:
// No notifications were received in the last 60s.
// Ensure the connection is still active by making a new
// request to the server.
// TODO: A minute timeout is used to prevent the handler
// loop from blocking here forever, but this is much larger
// than it needs to be due to btcd processing websocket
// requests synchronously (see
// https://github.com/roasbeef/btcd/issues/504). Decrease
// this to something saner like 3s when the above issue is
// fixed.
// No notifications were received in the last 60s. Ensure the
// connection is still active by making a new request to the server.
//
// This MUST wait for the response in a new goroutine so as to not
// block channel sends enqueueing more notifications. Doing so
// would cause a deadlock and after the timeout expires, the client
// would be shut down.
//
// TODO: A minute timeout is used to prevent the handler loop from
// blocking here forever, but this is much larger than it needs to
// be due to dcrd processing websocket requests synchronously (see
// https://github.com/btcsuite/btcd/issues/504). Decrease this to
// something saner like 3s when the above issue is fixed.
type sessionResult struct {
err error
}
@ -378,22 +382,24 @@ out:
_, err := c.Session()
sessionResponse <- sessionResult{err}
}()
go func() {
select {
case resp := <-sessionResponse:
if resp.err != nil {
log.Errorf("Failed to receive session "+
"result: %v", resp.err)
c.Stop()
}
pingChanReset <- time.After(time.Minute)
select {
case resp := <-sessionResponse:
if resp.err != nil {
log.Errorf("Failed to receive session "+
"result: %v", resp.err)
case <-time.After(time.Minute):
log.Errorf("Timeout waiting for session RPC")
c.Stop()
break out
}
pingChan = time.After(time.Minute)
}()
case <-time.After(time.Minute):
log.Errorf("Timeout waiting for session RPC")
c.Stop()
break out
}
case ch := <-pingChanReset:
pingChan = ch
case c.currentBlock <- bs: