Fix RPC ping/pong deadlock and timeout issue.
This is a backport of decred/dcrwallet#612. This change moves the wait for the session RPC response (used as a pong) to a new goroutine that does not run directly in the queue handler. By moving this out to a new goroutine, the handler can continue enqueuing and dequeueing notifications while waiting for the session response. Previously, if a notifiation was sent after the session RPC was called and before the response was received, the rpcclient main loop would block due to being unable to enqueue the notification.
This commit is contained in:
parent
01d52770d2
commit
521b2123de
1 changed files with 28 additions and 22 deletions
|
@ -317,6 +317,7 @@ func (c *RPCClient) handler() {
|
||||||
var dequeue chan interface{}
|
var dequeue chan interface{}
|
||||||
var next interface{}
|
var next interface{}
|
||||||
pingChan := time.After(time.Minute)
|
pingChan := time.After(time.Minute)
|
||||||
|
pingChanReset := make(chan (<-chan time.Time))
|
||||||
out:
|
out:
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
|
@ -360,16 +361,19 @@ out:
|
||||||
}
|
}
|
||||||
|
|
||||||
case <-pingChan:
|
case <-pingChan:
|
||||||
// No notifications were received in the last 60s.
|
// No notifications were received in the last 60s. Ensure the
|
||||||
// Ensure the connection is still active by making a new
|
// connection is still active by making a new request to the server.
|
||||||
// request to the server.
|
//
|
||||||
// TODO: A minute timeout is used to prevent the handler
|
// This MUST wait for the response in a new goroutine so as to not
|
||||||
// loop from blocking here forever, but this is much larger
|
// block channel sends enqueueing more notifications. Doing so
|
||||||
// than it needs to be due to btcd processing websocket
|
// would cause a deadlock and after the timeout expires, the client
|
||||||
// requests synchronously (see
|
// would be shut down.
|
||||||
// https://github.com/roasbeef/btcd/issues/504). Decrease
|
//
|
||||||
// this to something saner like 3s when the above issue is
|
// TODO: A minute timeout is used to prevent the handler loop from
|
||||||
// fixed.
|
// blocking here forever, but this is much larger than it needs to
|
||||||
|
// be due to dcrd processing websocket requests synchronously (see
|
||||||
|
// https://github.com/btcsuite/btcd/issues/504). Decrease this to
|
||||||
|
// something saner like 3s when the above issue is fixed.
|
||||||
type sessionResult struct {
|
type sessionResult struct {
|
||||||
err error
|
err error
|
||||||
}
|
}
|
||||||
|
@ -378,22 +382,24 @@ out:
|
||||||
_, err := c.Session()
|
_, err := c.Session()
|
||||||
sessionResponse <- sessionResult{err}
|
sessionResponse <- sessionResult{err}
|
||||||
}()
|
}()
|
||||||
|
go func() {
|
||||||
select {
|
select {
|
||||||
case resp := <-sessionResponse:
|
case resp := <-sessionResponse:
|
||||||
if resp.err != nil {
|
if resp.err != nil {
|
||||||
log.Errorf("Failed to receive session "+
|
log.Errorf("Failed to receive session "+
|
||||||
"result: %v", resp.err)
|
"result: %v", resp.err)
|
||||||
c.Stop()
|
c.Stop()
|
||||||
break out
|
|
||||||
}
|
}
|
||||||
pingChan = time.After(time.Minute)
|
pingChanReset <- time.After(time.Minute)
|
||||||
|
|
||||||
case <-time.After(time.Minute):
|
case <-time.After(time.Minute):
|
||||||
log.Errorf("Timeout waiting for session RPC")
|
log.Errorf("Timeout waiting for session RPC")
|
||||||
c.Stop()
|
c.Stop()
|
||||||
break out
|
|
||||||
}
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
case ch := <-pingChanReset:
|
||||||
|
pingChan = ch
|
||||||
|
|
||||||
case c.currentBlock <- bs:
|
case c.currentBlock <- bs:
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue