Fix RPC ping/pong deadlock and timeout issue.

This is a backport of decred/dcrwallet#612.

This change moves the wait for the session RPC response (used as a
pong) to a new goroutine that does not run directly in the queue
handler.  By moving this out to a new goroutine, the handler can
continue enqueuing and dequeueing notifications while waiting for the
session response.  Previously, if a notifiation was sent after the
session RPC was called and before the response was received, the
rpcclient main loop would block due to being unable to enqueue the
notification.
This commit is contained in:
Olaoluwa Osuntokun 2017-04-21 13:48:18 -07:00
parent 01d52770d2
commit 521b2123de

View file

@ -317,6 +317,7 @@ func (c *RPCClient) handler() {
var dequeue chan interface{} var dequeue chan interface{}
var next interface{} var next interface{}
pingChan := time.After(time.Minute) pingChan := time.After(time.Minute)
pingChanReset := make(chan (<-chan time.Time))
out: out:
for { for {
select { select {
@ -360,16 +361,19 @@ out:
} }
case <-pingChan: case <-pingChan:
// No notifications were received in the last 60s. // No notifications were received in the last 60s. Ensure the
// Ensure the connection is still active by making a new // connection is still active by making a new request to the server.
// request to the server. //
// TODO: A minute timeout is used to prevent the handler // This MUST wait for the response in a new goroutine so as to not
// loop from blocking here forever, but this is much larger // block channel sends enqueueing more notifications. Doing so
// than it needs to be due to btcd processing websocket // would cause a deadlock and after the timeout expires, the client
// requests synchronously (see // would be shut down.
// https://github.com/roasbeef/btcd/issues/504). Decrease //
// this to something saner like 3s when the above issue is // TODO: A minute timeout is used to prevent the handler loop from
// fixed. // blocking here forever, but this is much larger than it needs to
// be due to dcrd processing websocket requests synchronously (see
// https://github.com/btcsuite/btcd/issues/504). Decrease this to
// something saner like 3s when the above issue is fixed.
type sessionResult struct { type sessionResult struct {
err error err error
} }
@ -378,22 +382,24 @@ out:
_, err := c.Session() _, err := c.Session()
sessionResponse <- sessionResult{err} sessionResponse <- sessionResult{err}
}() }()
go func() {
select { select {
case resp := <-sessionResponse: case resp := <-sessionResponse:
if resp.err != nil { if resp.err != nil {
log.Errorf("Failed to receive session "+ log.Errorf("Failed to receive session "+
"result: %v", resp.err) "result: %v", resp.err)
c.Stop() c.Stop()
break out
} }
pingChan = time.After(time.Minute) pingChanReset <- time.After(time.Minute)
case <-time.After(time.Minute): case <-time.After(time.Minute):
log.Errorf("Timeout waiting for session RPC") log.Errorf("Timeout waiting for session RPC")
c.Stop() c.Stop()
break out
} }
}()
case ch := <-pingChanReset:
pingChan = ch
case c.currentBlock <- bs: case c.currentBlock <- bs: