use labels for different error types

This commit is contained in:
Alex Grintsvayg 2019-12-29 11:57:43 -05:00
parent 50089481fb
commit 5d8a2d697c
No known key found for this signature in database
GPG key ID: AEB3F089F86A22B5
3 changed files with 60 additions and 48 deletions

View file

@ -10,6 +10,8 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/lbryio/reflector.go/reflector"
ee "github.com/lbryio/lbry.go/v2/extras/errors" ee "github.com/lbryio/lbry.go/v2/extras/errors"
"github.com/lbryio/lbry.go/v2/extras/stop" "github.com/lbryio/lbry.go/v2/extras/stop"
@ -57,12 +59,31 @@ func (s *Server) Shutdown() {
s.stop.StopAndWait() s.stop.StopAndWait()
} }
const ns = "reflector" const (
ns = "reflector"
labelDirection = "direction"
labelErrorType = "error_type"
DirectionUpload = "upload" // to reflector
DirectionDownload = "download" // from reflector
errConnReset = "conn_reset"
errReadConnReset = "read_conn_reset"
errWriteConnReset = "write_conn_reset"
errReadConnTimedOut = "read_conn_timed_out"
errWriteBrokenPipe = "write_broken_pipe"
errIOTimeout = "io_timeout"
errUnexpectedEOF = "unexpected_eof"
errJSONSyntax = "json_syntax"
errBlobTooBig = "blob_too_big"
errOther = "other"
)
var ( var (
BlobDownloadCount = promauto.NewCounter(prometheus.CounterOpts{ BlobDownloadCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns, Namespace: ns,
Name: "download_total", Name: "blob_download_total",
Help: "Total number of blobs downloaded from reflector", Help: "Total number of blobs downloaded from reflector",
}) })
BlobUploadCount = promauto.NewCounter(prometheus.CounterOpts{ BlobUploadCount = promauto.NewCounter(prometheus.CounterOpts{
@ -75,74 +96,64 @@ var (
Name: "sdblob_upload_total", Name: "sdblob_upload_total",
Help: "Total number of SD blobs (and therefore streams) uploaded to reflector", Help: "Total number of SD blobs (and therefore streams) uploaded to reflector",
}) })
ErrorCount = promauto.NewCounter(prometheus.CounterOpts{ ErrorCount = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: ns, Namespace: ns,
Name: "error_total", Name: "error_total",
Help: "Total number of errors", Help: "Total number of errors",
}) }, []string{labelDirection, labelErrorType})
IOTimeoutCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_io_timeout_total",
Help: "Total number of 'i/o timeout' errors",
})
ReadConnResetCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_read_conn_reset_total",
Help: "Total number of 'read: connection reset by peer' errors",
})
UnexpectedEOFCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_unexpected_eof_total",
Help: "Total number of 'unexpected EOF' errors",
})
BrokenPipeCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_broken_pipe_total",
Help: "Total number of 'write: broken pipe' errors",
})
JSONSyntaxErrorCount = promauto.NewCounter(prometheus.CounterOpts{
Namespace: ns,
Name: "error_json_syntax_total",
Help: "Total number of JSON syntax errors",
})
) )
func TrackError(e error) (shouldLog bool) { // shouldLog is a hack, but whatever func TrackError(direction string, e error) (shouldLog bool) { // shouldLog is a hack, but whatever
if e == nil { if e == nil {
return return
} }
ErrorCount.Inc()
err := ee.Wrap(e, 0) err := ee.Wrap(e, 0)
errType := errOther
//name := err.TypeName() //name := err.TypeName()
if errors.Is(e, context.DeadlineExceeded) { if errors.Is(e, context.DeadlineExceeded) {
IOTimeoutCount.Inc() errType = errIOTimeout
} else if strings.Contains(err.Error(), "i/o timeout") { // hit a read or write deadline } else if strings.Contains(err.Error(), "i/o timeout") { // hit a read or write deadline
log.Warnln("i/o timeout is not the same as context.DeadlineExceeded") log.Warnln("i/o timeout is not the same as context.DeadlineExceeded")
IOTimeoutCount.Inc() errType = errIOTimeout
} else if errors.Is(e, syscall.ECONNRESET) { } else if errors.Is(e, syscall.ECONNRESET) {
ReadConnResetCount.Inc() errType = errConnReset
} else if strings.Contains(err.Error(), "read: connection reset by peer") { // the other side closed the connection using TCP reset } else if strings.Contains(err.Error(), "read: connection reset by peer") { // the other side closed the connection using TCP reset
log.Warnln("conn reset by peer is not the same as ECONNRESET") log.Warnln("read conn reset by peer is not the same as ECONNRESET")
ReadConnResetCount.Inc() errType = errReadConnReset
} else if strings.Contains(err.Error(), "write: connection reset by peer") { // the other side closed the connection using TCP reset
log.Warnln("write conn reset by peer is not the same as ECONNRESET")
errType = errWriteConnReset
} else if errors.Is(e, syscall.ETIMEDOUT) {
errType = errReadConnTimedOut
} else if strings.Contains(err.Error(), "read: connection timed out") { // the other side closed the connection using TCP reset
log.Warnln("read conn timed out is not the same as ETIMEDOUT")
errType = errReadConnTimedOut
} else if errors.Is(e, io.ErrUnexpectedEOF) { } else if errors.Is(e, io.ErrUnexpectedEOF) {
UnexpectedEOFCount.Inc() errType = errUnexpectedEOF
} else if strings.Contains(err.Error(), "unexpected EOF") { // tried to read from closed pipe or socket } else if strings.Contains(err.Error(), "unexpected EOF") { // tried to read from closed pipe or socket
log.Warnln("unexpected eof is not the same as io.ErrUnexpectedEOF") log.Warnln("unexpected eof is not the same as io.ErrUnexpectedEOF")
UnexpectedEOFCount.Inc() errType = errUnexpectedEOF
} else if errors.Is(e, syscall.EPIPE) { } else if errors.Is(e, syscall.EPIPE) {
BrokenPipeCount.Inc() errType = errWriteBrokenPipe
} else if strings.Contains(err.Error(), "write: broken pipe") { // tried to write to a pipe or socket that was closed by the peer } else if strings.Contains(err.Error(), "write: broken pipe") { // tried to write to a pipe or socket that was closed by the peer
log.Warnln("broken pipe is not the same as EPIPE") log.Warnln("broken pipe is not the same as EPIPE")
BrokenPipeCount.Inc() errType = errWriteBrokenPipe
} else if errors.Is(e, reflector.ErrBlobTooBig) {
errType = errBlobTooBig
} else if strings.Contains(err.Error(), "blob must be at most") {
log.Warnln("blob must be at most X bytes is not the same as ErrBlobTooBig")
errType = errBlobTooBig
} else if _, ok := e.(*json.SyntaxError); ok {
errType = errJSONSyntax
} else { } else {
shouldLog = true shouldLog = true
} }
if _, ok := e.(*json.SyntaxError); ok { ErrorCount.With(map[string]string{
JSONSyntaxErrorCount.Inc() labelDirection: direction,
} labelErrorType: errType,
}).Inc()
return return
} }

View file

@ -283,7 +283,7 @@ func (s *Server) logError(e error) {
if e == nil { if e == nil {
return return
} }
shouldLog := metrics.TrackError(e) shouldLog := metrics.TrackError(metrics.DirectionDownload, e)
if shouldLog { if shouldLog {
log.Errorln(errors.FullTrace(e)) log.Errorln(errors.FullTrace(e))
} }

View file

@ -8,7 +8,6 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
"net" "net"
"strconv"
"time" "time"
"github.com/lbryio/reflector.go/internal/metrics" "github.com/lbryio/reflector.go/internal/metrics"
@ -33,6 +32,8 @@ const (
maxBlobSize = stream.MaxBlobSize maxBlobSize = stream.MaxBlobSize
) )
var ErrBlobTooBig = errors.Base("blob must be at most %d bytes", maxBlobSize)
// Server is and instance of the reflector server. It houses the blob store and listener. // Server is and instance of the reflector server. It houses the blob store and listener.
type Server struct { type Server struct {
Timeout time.Duration // timeout to read or write next message Timeout time.Duration // timeout to read or write next message
@ -167,7 +168,7 @@ func (s *Server) doError(conn net.Conn, err error) error {
if err == nil { if err == nil {
return nil return nil
} }
shouldLog := metrics.TrackError(err) shouldLog := metrics.TrackError(metrics.DirectionUpload, err)
if shouldLog { if shouldLog {
log.Errorln(errors.FullTrace(err)) log.Errorln(errors.FullTrace(err))
} }
@ -305,7 +306,7 @@ func (s *Server) readBlobRequest(conn net.Conn) (int, string, bool, error) {
return blobSize, blobHash, isSdBlob, errors.Err("blob hash is empty") return blobSize, blobHash, isSdBlob, errors.Err("blob hash is empty")
} }
if blobSize > maxBlobSize { if blobSize > maxBlobSize {
return blobSize, blobHash, isSdBlob, errors.Err("blob must be at most " + strconv.Itoa(maxBlobSize) + " bytes") return blobSize, blobHash, isSdBlob, errors.Err(ErrBlobTooBig)
} }
if blobSize == 0 { if blobSize == 0 {
return blobSize, blobHash, isSdBlob, errors.Err("0-byte blob received") return blobSize, blobHash, isSdBlob, errors.Err("0-byte blob received")