tracker/dist/prometheus/chihaya.rules

79 lines
5.8 KiB
Text

# Chihaya recording rules
#
# Recording rules are queries that will be evaluated by prometheus periodically.
# They can be used to rename, aggregate, transform or decorate any of the time series available.
# After evaluation, their results are saved and they can be treated like any other time series.
# The rules provided below are entirely optional, but proven to be very useful for monitoring instances of Chihaya.
#
# Set the global configuration fields scrape_interval and evaluation_interval to 5s for smooth evaluations.
# All rules can (and should) be filtered or aggregated further.
# They usually at least have the protocol, job, instance, and address_family fields.
# Look at the chihaya:announce:rate1m rule for an example.
#################################
# Request/error rates
# UDP request rate, no errors (good requests)
chihaya:udp:requests:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error=""}[1m]))
chihaya:udp:requests:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error=""}[5m]))
# UDP error rate
chihaya:udp:errors:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[1m]))
chihaya:udp:errors:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[5m]))
# HTTP request rate, no errors (good requests)
chihaya:http:requests:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error=""}[1m]))
chihaya:http:requests:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error=""}[5m]))
# HTTP error rate
chihaya:http:errors:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error!=""}[1m]))
chihaya:http:errors:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error!=""}[5m]))
# Total request rate, no errors (good requests)
chihaya:requests:rate1m = sum(chihaya:udp:requests:rate1m) by (job,instance,address_family,protocol) or sum(chihaya:http:requests:rate1m) by (job,instance,address_family,protocol)
chihaya:requests:rate5m = sum(chihaya:udp:requests:rate5m) by (job,instance,address_family,protocol) or sum(chihaya:http:requests:rate5m) by (job,instance,address_family,protocol)
# Total error rate
chihaya:errors:rate1m = sum(chihaya:udp:errors:rate1m) by (job,instance,address_family,protocol) or sum(chihaya:http:errors:rate1m) by (job,instance,address_family,protocol)
chihaya:errors:rate5m = sum(chihaya:udp:errors:rate5m) by (job,instance,address_family,protocol) or sum(chihaya:http:errors:rate5m) by (job,instance,address_family,protocol)
# Total announce rate, no errors (good announces)
# Aggregate this for even more fun, for example:
# sum(chihaya:announce:rate1m) without (protocol)
chihaya:announce:rate1m = chihaya:udp:requests:rate1m{action="announce"} or chihaya:http:requests:rate1m{action="announce"}
chihaya:announce:rate5m = chihaya:udp:requests:rate5m{action="announce"} or chihaya:http:requests:rate5m{action="announce"}
# Total scrape rate, no errors (good scrapes)
# Aggregate these for even more fun, for example:
# sum(chihaya:scrape:rate1m) without (protocol)
chihaya:scrape:rate1m = chihaya:udp:requests:rate1m{action="scrape"} or chihaya:http:requests:rate1m{action="scrape"}
chihaya:scrape:rate5m = chihaya:udp:requests:rate5m{action="scrape"} or chihaya:http:requests:rate5m{action="scrape"}
#################################
# Latencies
# These only work with request timing enabled.
# UDP request latency, no errors (good requests)
chihaya:udp:requests:latency:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error=""}[1m])/rate(chihaya_udp_response_duration_milliseconds_count{error=""}[1m]))
chihaya:udp:requests:latency:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error=""}[5m])/rate(chihaya_udp_response_duration_milliseconds_count{error=""}[5m]))
# UDP error latency
chihaya:udp:errors:latency:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error!=""}[1m])/rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[1m]))
chihaya:udp:errors:latency:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error!=""}[5m])/rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[5m]))
# UDP latency of everything
chihaya:udp:latency:rate1m = chihaya:udp:requests:latency:rate1m or chihaya:udp:errors:latency:rate1m
chihaya:udp:latency:rate5m = chihaya:udp:requests:latency:rate5m or chihaya:udp:errors:latency:rate5m
# HTTP request latency, no errors (good requests)
chihaya:http:requests:latency:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error=""}[1m])/rate(chihaya_http_response_duration_milliseconds_count{error=""}[1m]))
chihaya:http:requests:latency:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error=""}[5m])/rate(chihaya_http_response_duration_milliseconds_count{error=""}[5m]))
# HTTP error latency
chihaya:http:errors:latency:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error!=""}[1m])/rate(chihaya_http_response_duration_milliseconds_count{error!=""}[1m]))
chihaya:http:errors:latency:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error!=""}[5m])/rate(chihaya_http_response_duration_milliseconds_count{error!=""}[5m]))
# HTTP latency of everything
chihaya:http:latency:rate1m = chihaya:http:requests:latency:rate1m or chihaya:http:errors:latency:rate1m
chihaya:http:latency:rate5m = chihaya:http:requests:latency:rate1m or chihaya:http:errors:latency:rate1m