use labels for different error types

This commit is contained in:
Alex Grintsvayg 2019-12-29 11:57:43 -05:00
parent 50089481fb
commit 5d8a2d697c
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
3 changed files with 60 additions and 48 deletions

View file

@ -10,6 +10,8 @@ import (
"syscall"
"time"
"github.com/lbryio/reflector.go/reflector"
ee "github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/stop"
@ -57,12 +59,31 @@ func (s *Server) Shutdown() {
s.stop.StopAndWait()
}
const ns = "reflector"
const (
ns = "reflector"
labelDirection = "direction"
labelErrorType = "error_type"
DirectionUpload = "upload" // to reflector
DirectionDownload = "download" // from reflector
errConnReset = "conn_reset"
errReadConnReset = "read_conn_reset"
errWriteConnReset = "write_conn_reset"
errReadConnTimedOut = "read_conn_timed_out"
errWriteBrokenPipe = "write_broken_pipe"
errIOTimeout = "io_timeout"
errUnexpectedEOF = "unexpected_eof"
errJSONSyntax = "json_syntax"
errBlobTooBig = "blob_too_big"
errOther = "other"
)
var (
BlobDownloadCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "download_total",
Name: "blob_download_total",
Help: "Total number of blobs downloaded from reflector",
})
BlobUploadCount = promauto.NewCounter(prometheus.CounterOpts{
@ -75,74 +96,64 @@ var (
Name: "sdblob_upload_total",
Help: "Total number of SD blobs (and therefore streams) uploaded to reflector",
})
ErrorCount = promauto.NewCounter(prometheus.CounterOpts{
ErrorCount = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Name: "error_total",
Help: "Total number of errors",
})
IOTimeoutCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_io_timeout_total",
Help: "Total number of 'i/o timeout' errors",
})
ReadConnResetCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_read_conn_reset_total",
Help: "Total number of 'read: connection reset by peer' errors",
})
UnexpectedEOFCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_unexpected_eof_total",
Help: "Total number of 'unexpected EOF' errors",
})
BrokenPipeCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_broken_pipe_total",
Help: "Total number of 'write: broken pipe' errors",
})
JSONSyntaxErrorCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_json_syntax_total",
Help: "Total number of JSON syntax errors",
})
}, []string{labelDirection, labelErrorType})
)
func TrackError(e error) (shouldLog bool) { // shouldLog is a hack, but whatever
func TrackError(direction string, e error) (shouldLog bool) { // shouldLog is a hack, but whatever
if e == nil {
return
}
ErrorCount.Inc()
err := ee.Wrap(e, 0)
errType := errOther
//name := err.TypeName()
if errors.Is(e, context.DeadlineExceeded) {
IOTimeoutCount.Inc()
errType = errIOTimeout
} else if strings.Contains(err.Error(), "i/o timeout") { // hit a read or write deadline
log.Warnln("i/o timeout is not the same as context.DeadlineExceeded")
IOTimeoutCount.Inc()
errType = errIOTimeout
} else if errors.Is(e, syscall.ECONNRESET) {
ReadConnResetCount.Inc()
errType = errConnReset
} else if strings.Contains(err.Error(), "read: connection reset by peer") { // the other side closed the connection using TCP reset
log.Warnln("conn reset by peer is not the same as ECONNRESET")
ReadConnResetCount.Inc()
log.Warnln("read conn reset by peer is not the same as ECONNRESET")
errType = errReadConnReset
} else if strings.Contains(err.Error(), "write: connection reset by peer") { // the other side closed the connection using TCP reset
log.Warnln("write conn reset by peer is not the same as ECONNRESET")
errType = errWriteConnReset
} else if errors.Is(e, syscall.ETIMEDOUT) {
errType = errReadConnTimedOut
} else if strings.Contains(err.Error(), "read: connection timed out") { // the other side closed the connection using TCP reset
log.Warnln("read conn timed out is not the same as ETIMEDOUT")
errType = errReadConnTimedOut
} else if errors.Is(e, io.ErrUnexpectedEOF) {
UnexpectedEOFCount.Inc()
errType = errUnexpectedEOF
} else if strings.Contains(err.Error(), "unexpected EOF") { // tried to read from closed pipe or socket
log.Warnln("unexpected eof is not the same as io.ErrUnexpectedEOF")
UnexpectedEOFCount.Inc()
errType = errUnexpectedEOF
} else if errors.Is(e, syscall.EPIPE) {
BrokenPipeCount.Inc()
errType = errWriteBrokenPipe
} else if strings.Contains(err.Error(), "write: broken pipe") { // tried to write to a pipe or socket that was closed by the peer
log.Warnln("broken pipe is not the same as EPIPE")
BrokenPipeCount.Inc()
errType = errWriteBrokenPipe
} else if errors.Is(e, reflector.ErrBlobTooBig) {
errType = errBlobTooBig
} else if strings.Contains(err.Error(), "blob must be at most") {
log.Warnln("blob must be at most X bytes is not the same as ErrBlobTooBig")
errType = errBlobTooBig
} else if _, ok := e.(*json.SyntaxError); ok {
errType = errJSONSyntax
} else {
shouldLog = true
}
if _, ok := e.(*json.SyntaxError); ok {
JSONSyntaxErrorCount.Inc()
}
ErrorCount.With(map[string]string{
labelDirection: direction,
labelErrorType: errType,
}).Inc()
return
}

View file

@ -283,7 +283,7 @@ func (s *Server) logError(e error) {
if e == nil {
return
}
shouldLog := metrics.TrackError(e)
shouldLog := metrics.TrackError(metrics.DirectionDownload, e)
if shouldLog {
log.Errorln(errors.FullTrace(e))
}

View file

@ -8,7 +8,6 @@ import (
"io"
"io/ioutil"
"net"
"strconv"
"time"
"github.com/lbryio/reflector.go/internal/metrics"
@ -33,6 +32,8 @@ const (
maxBlobSize = stream.MaxBlobSize
)
var ErrBlobTooBig = errors.Base("blob must be at most %d bytes", maxBlobSize)
// Server is and instance of the reflector server. It houses the blob store and listener.
type Server struct {
Timeout time.Duration // timeout to read or write next message
@ -167,7 +168,7 @@ func (s *Server) doError(conn net.Conn, err error) error {
if err == nil {
return nil
}
shouldLog := metrics.TrackError(err)
shouldLog := metrics.TrackError(metrics.DirectionUpload, err)
if shouldLog {
log.Errorln(errors.FullTrace(err))
}
@ -305,7 +306,7 @@ func (s *Server) readBlobRequest(conn net.Conn) (int, string, bool, error) {
return blobSize, blobHash, isSdBlob, errors.Err("blob hash is empty")
}
if blobSize > maxBlobSize {
return blobSize, blobHash, isSdBlob, errors.Err("blob must be at most " + strconv.Itoa(maxBlobSize) + " bytes")
return blobSize, blobHash, isSdBlob, errors.Err(ErrBlobTooBig)
}
if blobSize == 0 {
return blobSize, blobHash, isSdBlob, errors.Err("0-byte blob received")