use labels for different error types
This commit is contained in:
parent
50089481fb
commit
5d8a2d697c
3 changed files with 60 additions and 48 deletions
|
@ -10,6 +10,8 @@ import (
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/lbryio/reflector.go/reflector"
|
||||||
|
|
||||||
ee "github.com/lbryio/lbry.go/v2/extras/errors"
|
ee "github.com/lbryio/lbry.go/v2/extras/errors"
|
||||||
"github.com/lbryio/lbry.go/v2/extras/stop"
|
"github.com/lbryio/lbry.go/v2/extras/stop"
|
||||||
|
|
||||||
|
@ -57,12 +59,31 @@ func (s *Server) Shutdown() {
|
||||||
s.stop.StopAndWait()
|
s.stop.StopAndWait()
|
||||||
}
|
}
|
||||||
|
|
||||||
const ns = "reflector"
|
const (
|
||||||
|
ns = "reflector"
|
||||||
|
|
||||||
|
labelDirection = "direction"
|
||||||
|
labelErrorType = "error_type"
|
||||||
|
|
||||||
|
DirectionUpload = "upload" // to reflector
|
||||||
|
DirectionDownload = "download" // from reflector
|
||||||
|
|
||||||
|
errConnReset = "conn_reset"
|
||||||
|
errReadConnReset = "read_conn_reset"
|
||||||
|
errWriteConnReset = "write_conn_reset"
|
||||||
|
errReadConnTimedOut = "read_conn_timed_out"
|
||||||
|
errWriteBrokenPipe = "write_broken_pipe"
|
||||||
|
errIOTimeout = "io_timeout"
|
||||||
|
errUnexpectedEOF = "unexpected_eof"
|
||||||
|
errJSONSyntax = "json_syntax"
|
||||||
|
errBlobTooBig = "blob_too_big"
|
||||||
|
errOther = "other"
|
||||||
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
BlobDownloadCount = promauto.NewCounter(prometheus.CounterOpts{
|
BlobDownloadCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
Namespace: ns,
|
Namespace: ns,
|
||||||
Name: "download_total",
|
Name: "blob_download_total",
|
||||||
Help: "Total number of blobs downloaded from reflector",
|
Help: "Total number of blobs downloaded from reflector",
|
||||||
})
|
})
|
||||||
BlobUploadCount = promauto.NewCounter(prometheus.CounterOpts{
|
BlobUploadCount = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
|
@ -75,74 +96,64 @@ var (
|
||||||
Name: "sdblob_upload_total",
|
Name: "sdblob_upload_total",
|
||||||
Help: "Total number of SD blobs (and therefore streams) uploaded to reflector",
|
Help: "Total number of SD blobs (and therefore streams) uploaded to reflector",
|
||||||
})
|
})
|
||||||
ErrorCount = promauto.NewCounter(prometheus.CounterOpts{
|
ErrorCount = promauto.NewCounterVec(prometheus.CounterOpts{
|
||||||
Namespace: ns,
|
Namespace: ns,
|
||||||
Name: "error_total",
|
Name: "error_total",
|
||||||
Help: "Total number of errors",
|
Help: "Total number of errors",
|
||||||
})
|
}, []string{labelDirection, labelErrorType})
|
||||||
IOTimeoutCount = promauto.NewCounter(prometheus.CounterOpts{
|
|
||||||
Namespace: ns,
|
|
||||||
Name: "error_io_timeout_total",
|
|
||||||
Help: "Total number of 'i/o timeout' errors",
|
|
||||||
})
|
|
||||||
ReadConnResetCount = promauto.NewCounter(prometheus.CounterOpts{
|
|
||||||
Namespace: ns,
|
|
||||||
Name: "error_read_conn_reset_total",
|
|
||||||
Help: "Total number of 'read: connection reset by peer' errors",
|
|
||||||
})
|
|
||||||
UnexpectedEOFCount = promauto.NewCounter(prometheus.CounterOpts{
|
|
||||||
Namespace: ns,
|
|
||||||
Name: "error_unexpected_eof_total",
|
|
||||||
Help: "Total number of 'unexpected EOF' errors",
|
|
||||||
})
|
|
||||||
BrokenPipeCount = promauto.NewCounter(prometheus.CounterOpts{
|
|
||||||
Namespace: ns,
|
|
||||||
Name: "error_broken_pipe_total",
|
|
||||||
Help: "Total number of 'write: broken pipe' errors",
|
|
||||||
})
|
|
||||||
JSONSyntaxErrorCount = promauto.NewCounter(prometheus.CounterOpts{
|
|
||||||
Namespace: ns,
|
|
||||||
Name: "error_json_syntax_total",
|
|
||||||
Help: "Total number of JSON syntax errors",
|
|
||||||
})
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TrackError(e error) (shouldLog bool) { // shouldLog is a hack, but whatever
|
func TrackError(direction string, e error) (shouldLog bool) { // shouldLog is a hack, but whatever
|
||||||
if e == nil {
|
if e == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorCount.Inc()
|
|
||||||
|
|
||||||
err := ee.Wrap(e, 0)
|
err := ee.Wrap(e, 0)
|
||||||
|
errType := errOther
|
||||||
//name := err.TypeName()
|
//name := err.TypeName()
|
||||||
if errors.Is(e, context.DeadlineExceeded) {
|
if errors.Is(e, context.DeadlineExceeded) {
|
||||||
IOTimeoutCount.Inc()
|
errType = errIOTimeout
|
||||||
} else if strings.Contains(err.Error(), "i/o timeout") { // hit a read or write deadline
|
} else if strings.Contains(err.Error(), "i/o timeout") { // hit a read or write deadline
|
||||||
log.Warnln("i/o timeout is not the same as context.DeadlineExceeded")
|
log.Warnln("i/o timeout is not the same as context.DeadlineExceeded")
|
||||||
IOTimeoutCount.Inc()
|
errType = errIOTimeout
|
||||||
} else if errors.Is(e, syscall.ECONNRESET) {
|
} else if errors.Is(e, syscall.ECONNRESET) {
|
||||||
ReadConnResetCount.Inc()
|
errType = errConnReset
|
||||||
} else if strings.Contains(err.Error(), "read: connection reset by peer") { // the other side closed the connection using TCP reset
|
} else if strings.Contains(err.Error(), "read: connection reset by peer") { // the other side closed the connection using TCP reset
|
||||||
log.Warnln("conn reset by peer is not the same as ECONNRESET")
|
log.Warnln("read conn reset by peer is not the same as ECONNRESET")
|
||||||
ReadConnResetCount.Inc()
|
errType = errReadConnReset
|
||||||
|
} else if strings.Contains(err.Error(), "write: connection reset by peer") { // the other side closed the connection using TCP reset
|
||||||
|
log.Warnln("write conn reset by peer is not the same as ECONNRESET")
|
||||||
|
errType = errWriteConnReset
|
||||||
|
} else if errors.Is(e, syscall.ETIMEDOUT) {
|
||||||
|
errType = errReadConnTimedOut
|
||||||
|
} else if strings.Contains(err.Error(), "read: connection timed out") { // the other side closed the connection using TCP reset
|
||||||
|
log.Warnln("read conn timed out is not the same as ETIMEDOUT")
|
||||||
|
errType = errReadConnTimedOut
|
||||||
} else if errors.Is(e, io.ErrUnexpectedEOF) {
|
} else if errors.Is(e, io.ErrUnexpectedEOF) {
|
||||||
UnexpectedEOFCount.Inc()
|
errType = errUnexpectedEOF
|
||||||
} else if strings.Contains(err.Error(), "unexpected EOF") { // tried to read from closed pipe or socket
|
} else if strings.Contains(err.Error(), "unexpected EOF") { // tried to read from closed pipe or socket
|
||||||
log.Warnln("unexpected eof is not the same as io.ErrUnexpectedEOF")
|
log.Warnln("unexpected eof is not the same as io.ErrUnexpectedEOF")
|
||||||
UnexpectedEOFCount.Inc()
|
errType = errUnexpectedEOF
|
||||||
} else if errors.Is(e, syscall.EPIPE) {
|
} else if errors.Is(e, syscall.EPIPE) {
|
||||||
BrokenPipeCount.Inc()
|
errType = errWriteBrokenPipe
|
||||||
} else if strings.Contains(err.Error(), "write: broken pipe") { // tried to write to a pipe or socket that was closed by the peer
|
} else if strings.Contains(err.Error(), "write: broken pipe") { // tried to write to a pipe or socket that was closed by the peer
|
||||||
log.Warnln("broken pipe is not the same as EPIPE")
|
log.Warnln("broken pipe is not the same as EPIPE")
|
||||||
BrokenPipeCount.Inc()
|
errType = errWriteBrokenPipe
|
||||||
|
} else if errors.Is(e, reflector.ErrBlobTooBig) {
|
||||||
|
errType = errBlobTooBig
|
||||||
|
} else if strings.Contains(err.Error(), "blob must be at most") {
|
||||||
|
log.Warnln("blob must be at most X bytes is not the same as ErrBlobTooBig")
|
||||||
|
errType = errBlobTooBig
|
||||||
|
} else if _, ok := e.(*json.SyntaxError); ok {
|
||||||
|
errType = errJSONSyntax
|
||||||
} else {
|
} else {
|
||||||
shouldLog = true
|
shouldLog = true
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := e.(*json.SyntaxError); ok {
|
ErrorCount.With(map[string]string{
|
||||||
JSONSyntaxErrorCount.Inc()
|
labelDirection: direction,
|
||||||
}
|
labelErrorType: errType,
|
||||||
|
}).Inc()
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -283,7 +283,7 @@ func (s *Server) logError(e error) {
|
||||||
if e == nil {
|
if e == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
shouldLog := metrics.TrackError(e)
|
shouldLog := metrics.TrackError(metrics.DirectionDownload, e)
|
||||||
if shouldLog {
|
if shouldLog {
|
||||||
log.Errorln(errors.FullTrace(e))
|
log.Errorln(errors.FullTrace(e))
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,6 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net"
|
"net"
|
||||||
"strconv"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/lbryio/reflector.go/internal/metrics"
|
"github.com/lbryio/reflector.go/internal/metrics"
|
||||||
|
@ -33,6 +32,8 @@ const (
|
||||||
maxBlobSize = stream.MaxBlobSize
|
maxBlobSize = stream.MaxBlobSize
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var ErrBlobTooBig = errors.Base("blob must be at most %d bytes", maxBlobSize)
|
||||||
|
|
||||||
// Server is and instance of the reflector server. It houses the blob store and listener.
|
// Server is and instance of the reflector server. It houses the blob store and listener.
|
||||||
type Server struct {
|
type Server struct {
|
||||||
Timeout time.Duration // timeout to read or write next message
|
Timeout time.Duration // timeout to read or write next message
|
||||||
|
@ -167,7 +168,7 @@ func (s *Server) doError(conn net.Conn, err error) error {
|
||||||
if err == nil {
|
if err == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
shouldLog := metrics.TrackError(err)
|
shouldLog := metrics.TrackError(metrics.DirectionUpload, err)
|
||||||
if shouldLog {
|
if shouldLog {
|
||||||
log.Errorln(errors.FullTrace(err))
|
log.Errorln(errors.FullTrace(err))
|
||||||
}
|
}
|
||||||
|
@ -305,7 +306,7 @@ func (s *Server) readBlobRequest(conn net.Conn) (int, string, bool, error) {
|
||||||
return blobSize, blobHash, isSdBlob, errors.Err("blob hash is empty")
|
return blobSize, blobHash, isSdBlob, errors.Err("blob hash is empty")
|
||||||
}
|
}
|
||||||
if blobSize > maxBlobSize {
|
if blobSize > maxBlobSize {
|
||||||
return blobSize, blobHash, isSdBlob, errors.Err("blob must be at most " + strconv.Itoa(maxBlobSize) + " bytes")
|
return blobSize, blobHash, isSdBlob, errors.Err(ErrBlobTooBig)
|
||||||
}
|
}
|
||||||
if blobSize == 0 {
|
if blobSize == 0 {
|
||||||
return blobSize, blobHash, isSdBlob, errors.Err("0-byte blob received")
|
return blobSize, blobHash, isSdBlob, errors.Err("0-byte blob received")
|
||||||
|
|
Loading…
Reference in a new issue