From 7fabf4ba61b8999b0b862c76690d527caf48a974 Mon Sep 17 00:00:00 2001 From: Leo Balduf Date: Fri, 2 Jun 2017 16:22:53 +0200 Subject: [PATCH] dist/prometheus: add prometheus config and rules --- dist/prometheus/chihaya.rules | 79 +++++++++++++++++++++++++++++++++ dist/prometheus/prometheus.yaml | 14 ++++++ 2 files changed, 93 insertions(+) create mode 100644 dist/prometheus/chihaya.rules create mode 100644 dist/prometheus/prometheus.yaml diff --git a/dist/prometheus/chihaya.rules b/dist/prometheus/chihaya.rules new file mode 100644 index 0000000..36f7bac --- /dev/null +++ b/dist/prometheus/chihaya.rules @@ -0,0 +1,79 @@ +# Chihaya recording rules +# +# Recording rules are queries that will be evaluated by prometheus periodically. +# They can be used to rename, aggregate, transform or decorate any of the time series available. +# After evaluation, their results are saved and they can be treated like any other time series. +# The rules provided below are entirely optional, but proven to be very useful for monitoring instances of Chihaya. +# +# Set the global configuration fields scrape_interval and evaluation_interval to 5s for smooth evaluations. +# All rules can (and should) be filtered or aggregated further. +# They usually at least have the protocol, job, instance, and address_family fields. +# Look at the chihaya:announce:rate1m rule for an example. + +################################# +# Request/error rates + +# UDP request rate, no errors (good requests) +chihaya:udp:requests:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error=""}[1m])) +chihaya:udp:requests:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error=""}[5m])) + +# UDP error rate +chihaya:udp:errors:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[1m])) +chihaya:udp:errors:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[5m])) + +# HTTP request rate, no errors (good requests) +chihaya:http:requests:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error=""}[1m])) +chihaya:http:requests:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error=""}[5m])) + +# HTTP error rate +chihaya:http:errors:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error!=""}[1m])) +chihaya:http:errors:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_count{error!=""}[5m])) + +# Total request rate, no errors (good requests) +chihaya:requests:rate1m = sum(chihaya:udp:requests:rate1m) by (job,instance,address_family,protocol) or sum(chihaya:http:requests:rate1m) by (job,instance,address_family,protocol) +chihaya:requests:rate5m = sum(chihaya:udp:requests:rate5m) by (job,instance,address_family,protocol) or sum(chihaya:http:requests:rate5m) by (job,instance,address_family,protocol) + +# Total error rate +chihaya:errors:rate1m = sum(chihaya:udp:errors:rate1m) by (job,instance,address_family,protocol) or sum(chihaya:http:errors:rate1m) by (job,instance,address_family,protocol) +chihaya:errors:rate5m = sum(chihaya:udp:errors:rate5m) by (job,instance,address_family,protocol) or sum(chihaya:http:errors:rate5m) by (job,instance,address_family,protocol) + +# Total announce rate, no errors (good announces) +# Aggregate this for even more fun, for example: +# sum(chihaya:announce:rate1m) without (protocol) +chihaya:announce:rate1m = chihaya:udp:requests:rate1m{action="announce"} or chihaya:http:requests:rate1m{action="announce"} +chihaya:announce:rate5m = chihaya:udp:requests:rate5m{action="announce"} or chihaya:http:requests:rate5m{action="announce"} + +# Total scrape rate, no errors (good scrapes) +# Aggregate these for even more fun, for example: +# sum(chihaya:scrape:rate1m) without (protocol) +chihaya:scrape:rate1m = chihaya:udp:requests:rate1m{action="scrape"} or chihaya:http:requests:rate1m{action="scrape"} +chihaya:scrape:rate5m = chihaya:udp:requests:rate5m{action="scrape"} or chihaya:http:requests:rate5m{action="scrape"} + +################################# +# Latencies +# These only work with request timing enabled. + +# UDP request latency, no errors (good requests) +chihaya:udp:requests:latency:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error=""}[1m])/rate(chihaya_udp_response_duration_milliseconds_count{error=""}[1m])) +chihaya:udp:requests:latency:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error=""}[5m])/rate(chihaya_udp_response_duration_milliseconds_count{error=""}[5m])) + +# UDP error latency +chihaya:udp:errors:latency:rate1m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error!=""}[1m])/rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[1m])) +chihaya:udp:errors:latency:rate5m{protocol="udp"} = drop_common_labels(rate(chihaya_udp_response_duration_milliseconds_sum{error!=""}[5m])/rate(chihaya_udp_response_duration_milliseconds_count{error!=""}[5m])) + +# UDP latency of everything +chihaya:udp:latency:rate1m = chihaya:udp:requests:latency:rate1m or chihaya:udp:errors:latency:rate1m +chihaya:udp:latency:rate5m = chihaya:udp:requests:latency:rate5m or chihaya:udp:errors:latency:rate5m + +# HTTP request latency, no errors (good requests) +chihaya:http:requests:latency:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error=""}[1m])/rate(chihaya_http_response_duration_milliseconds_count{error=""}[1m])) +chihaya:http:requests:latency:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error=""}[5m])/rate(chihaya_http_response_duration_milliseconds_count{error=""}[5m])) + +# HTTP error latency +chihaya:http:errors:latency:rate1m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error!=""}[1m])/rate(chihaya_http_response_duration_milliseconds_count{error!=""}[1m])) +chihaya:http:errors:latency:rate5m{protocol="http"} = drop_common_labels(rate(chihaya_http_response_duration_milliseconds_sum{error!=""}[5m])/rate(chihaya_http_response_duration_milliseconds_count{error!=""}[5m])) + +# HTTP latency of everything +chihaya:http:latency:rate1m = chihaya:http:requests:latency:rate1m or chihaya:http:errors:latency:rate1m +chihaya:http:latency:rate5m = chihaya:http:requests:latency:rate1m or chihaya:http:errors:latency:rate1m + diff --git a/dist/prometheus/prometheus.yaml b/dist/prometheus/prometheus.yaml new file mode 100644 index 0000000..3b05544 --- /dev/null +++ b/dist/prometheus/prometheus.yaml @@ -0,0 +1,14 @@ +global: + scrape_interval: 5s + evaluation_interval: 5s + +rule_files: + - chihaya.rules + +# A scrape configuration containing exactly one endpoint to scrape: +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'local-chihaya' # you can name this however you want + scrape_interval: 5s # optionally override the global scrape_interval + static_configs: + - targets: ['localhost:6881'] # provide the address of chihaya's prometheus endpoint