From 35e106822feedfdc860ba974e9943fa70fe5ea59 Mon Sep 17 00:00:00 2001 From: Zach Leslie Date: Wed, 1 Dec 2021 20:59:05 +0000 Subject: [PATCH 1/3] Update mixin to use new backend metric --- operations/tempo-mixin/dashboards.libsonnet | 20 +++++++++---------- operations/tempo-mixin/tempo-operational.json | 8 ++++---- .../tempo-mixin/yamls/tempo-operational.json | 8 ++++---- operations/tempo-mixin/yamls/tempo-reads.json | 8 ++++---- .../tempo-mixin/yamls/tempo-writes.json | 16 +++++++-------- 5 files changed, 30 insertions(+), 30 deletions(-) diff --git a/operations/tempo-mixin/dashboards.libsonnet b/operations/tempo-mixin/dashboards.libsonnet index 34c0b8677ef..1003606a709 100644 --- a/operations/tempo-mixin/dashboards.libsonnet +++ b/operations/tempo-mixin/dashboards.libsonnet @@ -64,14 +64,14 @@ dashboard_utils { ) ) .addRow( - g.row('GCS') + g.row('Backend') .addPanel( $.panel('QPS') + - $.qpsPanel('tempodb_gcs_request_duration_seconds_count{%s,operation="GET"}' % $.jobMatcher($._config.jobs.querier)) + $.qpsPanel('tempodb_backend_request_duration_seconds_count{%s,operation="GET"}' % $.jobMatcher($._config.jobs.querier)) ) .addPanel( $.panel('Latency') + - $.latencyPanel('tempodb_gcs_request_duration_seconds', '{%s,operation="GET"}' % $.jobMatcher($._config.jobs.querier)) + $.latencyPanel('tempodb_backend_request_duration_seconds', '{%s,operation="GET"}' % $.jobMatcher($._config.jobs.querier)) ) ) .addRow( @@ -192,14 +192,14 @@ dashboard_utils { ) ) .addRow( - g.row('GCS - Ingester') + g.row('Backend - Ingester') .addPanel( $.panel('QPS') + - $.qpsPanel('tempodb_gcs_request_duration_seconds_count{%s,operation="POST"}' % $.jobMatcher($._config.jobs.ingester)) + $.qpsPanel('tempodb_backend_request_duration_seconds_count{%s,operation="POST"}' % $.jobMatcher($._config.jobs.ingester)) ) .addPanel( $.panel('Latency') + - $.latencyPanel('tempodb_gcs_request_duration_seconds', '{%s,operation="POST"}' % $.jobMatcher($._config.jobs.ingester)) + $.latencyPanel('tempodb_backend_request_duration_seconds', '{%s,operation="POST"}' % $.jobMatcher($._config.jobs.ingester)) ) ) .addRow( @@ -214,14 +214,14 @@ dashboard_utils { ) ) .addRow( - g.row('GCS - Compactor') + g.row('Backend - Compactor') .addPanel( $.panel('QPS') + - $.qpsPanel('tempodb_gcs_request_duration_seconds_count{%s,operation="POST"}' % $.jobMatcher($._config.jobs.compactor)) + $.qpsPanel('tempodb_backend_request_duration_seconds_count{%s,operation="POST"}' % $.jobMatcher($._config.jobs.compactor)) ) .addPanel( $.panel('Latency') + - $.latencyPanel('tempodb_gcs_request_duration_seconds', '{%s,operation="POST"}' % $.jobMatcher($._config.jobs.compactor)) + $.latencyPanel('tempodb_backend_request_duration_seconds', '{%s,operation="POST"}' % $.jobMatcher($._config.jobs.compactor)) ) ), 'tempo-resources.json': @@ -298,6 +298,6 @@ dashboard_utils { .addPanel( $.goHeapInUsePanel('Memory (go heap inuse)', $.jobMatcher($._config.jobs.compactor)), ) - ) + ), }, } diff --git a/operations/tempo-mixin/tempo-operational.json b/operations/tempo-mixin/tempo-operational.json index 57d91f7d1d4..564b53385ca 100644 --- a/operations/tempo-mixin/tempo-operational.json +++ b/operations/tempo-mixin/tempo-operational.json @@ -4261,7 +4261,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempodb_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (status_code, operation)", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (status_code, operation)", "interval": "", "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -4357,17 +4357,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", "legendFormat": ".5-{{operation}}", "refId": "C" } diff --git a/operations/tempo-mixin/yamls/tempo-operational.json b/operations/tempo-mixin/yamls/tempo-operational.json index 10e43565ad4..590ae97b934 100644 --- a/operations/tempo-mixin/yamls/tempo-operational.json +++ b/operations/tempo-mixin/yamls/tempo-operational.json @@ -4715,7 +4715,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(rate(tempodb_gcs_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (status_code, operation)", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (status_code, operation)", "interval": "", "legendFormat": "{{status_code}}-{{operation}}", "refId": "A" @@ -4827,17 +4827,17 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(.99, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", "legendFormat": ".99-{{operation}}", "refId": "A" }, { - "expr": "histogram_quantile(.9, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.9, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", "legendFormat": ".9-{{operation}}", "refId": "B" }, { - "expr": "histogram_quantile(.5, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", + "expr": "histogram_quantile(.5, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}[$__rate_interval])) by (operation, le))", "legendFormat": ".5-{{operation}}", "refId": "C" } diff --git a/operations/tempo-mixin/yamls/tempo-reads.json b/operations/tempo-mixin/yamls/tempo-reads.json index 07f02956dd4..2d83bcac6c5 100644 --- a/operations/tempo-mixin/yamls/tempo-reads.json +++ b/operations/tempo-mixin/yamls/tempo-reads.json @@ -1122,7 +1122,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_gcs_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1209,7 +1209,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1218,7 +1218,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1227,7 +1227,7 @@ "step": 10 }, { - "expr": "sum(rate(tempodb_gcs_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by () * 1e3 / sum(rate(tempodb_gcs_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/querier\",operation=\"GET\"}[$__interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, diff --git a/operations/tempo-mixin/yamls/tempo-writes.json b/operations/tempo-mixin/yamls/tempo-writes.json index f5803c8265f..581672555ab 100644 --- a/operations/tempo-mixin/yamls/tempo-writes.json +++ b/operations/tempo-mixin/yamls/tempo-writes.json @@ -1038,7 +1038,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_gcs_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1125,7 +1125,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1134,7 +1134,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1143,7 +1143,7 @@ "step": 10 }, { - "expr": "sum(rate(tempodb_gcs_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by () * 1e3 / sum(rate(tempodb_gcs_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/ingester\",operation=\"POST\"}[$__interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1458,7 +1458,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_gcs_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", + "expr": "sum by (status) (\n label_replace(label_replace(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__rate_interval]),\n \"status\", \"${1}xx\", \"status_code\", \"([0-9])..\"),\n \"status\", \"${1}\", \"status_code\", \"([a-z]+)\"))\n", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1545,7 +1545,7 @@ "steppedLine": false, "targets": [ { - "expr": "histogram_quantile(0.99, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.99, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1554,7 +1554,7 @@ "step": 10 }, { - "expr": "histogram_quantile(0.50, sum(rate(tempodb_gcs_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", + "expr": "histogram_quantile(0.50, sum(rate(tempodb_backend_request_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by (le,)) * 1e3", "format": "time_series", "interval": "1m", "intervalFactor": 2, @@ -1563,7 +1563,7 @@ "step": 10 }, { - "expr": "sum(rate(tempodb_gcs_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by () * 1e3 / sum(rate(tempodb_gcs_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by ()", + "expr": "sum(rate(tempodb_backend_request_duration_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by () * 1e3 / sum(rate(tempodb_backend_request_duration_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/compactor\",operation=\"POST\"}[$__interval])) by ()", "format": "time_series", "interval": "1m", "intervalFactor": 2, From 1de77157f2352104dd419f0c59bc200de2158f08 Mon Sep 17 00:00:00 2001 From: Zach Leslie Date: Wed, 1 Dec 2021 21:00:54 +0000 Subject: [PATCH 2/3] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 933b2e4f2cd..fbdcd214a26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * [ENHANCEMENT] Reduce compactionCycle to improve performance in large multitenant environments [#1145](https://github.com/grafana/tempo/pull/1145) (@joe-elliott) * [ENHANCEMENT] Added max_compaction_cycle to allow for independently configuring polling and compaction cycle. [#1145](https://github.com/grafana/tempo/pull/1145) (@joe-elliott) * [ENHANCEMENT] Add `tempodb_compaction_outstanding_blocks` metric to measure compaction load [#1143](https://github.com/grafana/tempo/pull/1143) (@mapno) +* [ENHANCEMENT] Update mixin to use new backend metric [#1151](https://github.com/grafana/tempo/pull/1151) (@zalegrala) * [BUGFIX] Fix defaults for MaxBytesPerTrace (ingester.max-bytes-per-trace) and MaxSearchBytesPerTrace (ingester.max-search-bytes-per-trace) (@bitprocessor) * [BUGFIX] Ignore empty objects during compaction [#1113](https://github.com/grafana/tempo/pull/1113) (@mdisibio) * [BUGFIX] Add process name to vulture traces to work around display issues [#1127](https://github.com/grafana/tempo/pull/1127) (@mdisibio) From aa3c403f5252254626c8219ca4d21f4d25d76404 Mon Sep 17 00:00:00 2001 From: Zach Leslie Date: Thu, 2 Dec 2021 17:55:12 +0000 Subject: [PATCH 3/3] Update GCS reference in yamls --- operations/tempo-mixin/tempo-operational.json | 4 ++-- operations/tempo-mixin/yamls/tempo-operational.json | 2 +- operations/tempo-mixin/yamls/tempo-reads.json | 2 +- operations/tempo-mixin/yamls/tempo-writes.json | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/operations/tempo-mixin/tempo-operational.json b/operations/tempo-mixin/tempo-operational.json index 564b53385ca..a404d1fcf61 100644 --- a/operations/tempo-mixin/tempo-operational.json +++ b/operations/tempo-mixin/tempo-operational.json @@ -4414,7 +4414,7 @@ } } ], - "title": "GCS", + "title": "Backend", "type": "row" }, { @@ -5534,4 +5534,4 @@ "title": "Tempo Operational", "uid": "a6175b9cc7ec20591890117c39580030", "version": 1 -} \ No newline at end of file +} diff --git a/operations/tempo-mixin/yamls/tempo-operational.json b/operations/tempo-mixin/yamls/tempo-operational.json index 590ae97b934..0ebf2f3aecc 100644 --- a/operations/tempo-mixin/yamls/tempo-operational.json +++ b/operations/tempo-mixin/yamls/tempo-operational.json @@ -4890,7 +4890,7 @@ } } ], - "title": "GCS", + "title": "Backend", "type": "row" }, { diff --git a/operations/tempo-mixin/yamls/tempo-reads.json b/operations/tempo-mixin/yamls/tempo-reads.json index 2d83bcac6c5..411ca9a460f 100644 --- a/operations/tempo-mixin/yamls/tempo-reads.json +++ b/operations/tempo-mixin/yamls/tempo-reads.json @@ -1281,7 +1281,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "GCS", + "title": "Backend", "titleSize": "h6" }, { diff --git a/operations/tempo-mixin/yamls/tempo-writes.json b/operations/tempo-mixin/yamls/tempo-writes.json index 581672555ab..4e568c88e77 100644 --- a/operations/tempo-mixin/yamls/tempo-writes.json +++ b/operations/tempo-mixin/yamls/tempo-writes.json @@ -1197,7 +1197,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "GCS - Ingester", + "title": "Backend - Ingester", "titleSize": "h6" }, { @@ -1617,7 +1617,7 @@ "repeatIteration": null, "repeatRowId": null, "showTitle": true, - "title": "GCS - Compactor", + "title": "Backend - Compactor", "titleSize": "h6" } ],