Skip to content

Commit

Permalink
Fix units for metric taskrun_pod_latency
Browse files Browse the repository at this point in the history
Prior to this commit, the taskrun_pod_latency metric was calculated
using pod timestamps and directly casted to float64. This calculation
resulted in incorrectly using nanoseconds, when the metric was
intended to have units of milliseconds.

This commit fixes the duration conversion and adds units to the metric
name, in line with Prometheus best practices.
  • Loading branch information
lbernick authored and tekton-robot committed Jul 10, 2023
1 parent 75dc883 commit 29ddf85
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ We expose several kinds of exporters, including Prometheus, Google Stackdriver,
| `tekton_pipelines_controller_running_taskruns_count` | Gauge | | experimental |
| `tekton_pipelines_controller_running_taskruns_throttled_by_quota_count` | Gauge | | experimental |
| `tekton_pipelines_controller_running_taskruns_throttled_by_node_count` | Gauge | | experimental |
| `tekton_pipelines_controller_taskruns_pod_latency` | Gauge | `namespace`=&lt;taskruns-namespace&gt; <br> `pod`= &lt; taskrun_pod_name&gt; <br> `*task`=&lt;task_name&gt; <br> `*taskrun`=&lt;taskrun_name&gt;<br> | experimental |
| `tekton_pipelines_controller_taskruns_pod_latency_milliseconds` | Gauge | `namespace`=&lt;taskruns-namespace&gt; <br> `pod`= &lt; taskrun_pod_name&gt; <br> `*task`=&lt;task_name&gt; <br> `*taskrun`=&lt;taskrun_name&gt;<br> | experimental |
| `tekton_pipelines_controller_client_latency_[bucket, sum, count]` | Histogram | | experimental |

The Labels/Tag marked as "*" are optional. And there's a choice between Histogram and LastValue(Gauge) for pipelinerun and taskrun duration metrics.
Expand Down
4 changes: 2 additions & 2 deletions pkg/taskrunmetrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ var (
"Number of taskruns executing currently, but whose underlying Pods or Containers are suspended by k8s because of Node level constraints. Such suspensions can occur as part of initial scheduling of the Pod, or scheduling of any of the subsequent Container(s) in the Pod after the first Container is started",
stats.UnitDimensionless)

podLatency = stats.Float64("taskruns_pod_latency",
podLatency = stats.Float64("taskruns_pod_latency_milliseconds",
"scheduling latency for the taskruns pods",
stats.UnitMilliseconds)
)
Expand Down Expand Up @@ -438,7 +438,7 @@ func (r *Recorder) RecordPodLatency(ctx context.Context, pod *corev1.Pod, tr *v1
return err
}

metrics.Record(ctx, podLatency.M(float64(latency)))
metrics.Record(ctx, podLatency.M(float64(latency.Milliseconds())))

return nil
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/taskrunmetrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ func TestRecordPodLatency(t *testing.T) {
"taskrun": "test-taskrun",
"namespace": "foo",
},
expectedValue: 4e+09,
expectedValue: 4000,
}, {
name: "for non scheduled pod",
pod: &corev1.Pod{
Expand Down Expand Up @@ -559,7 +559,7 @@ func TestRecordPodLatency(t *testing.T) {
if err != nil {
t.Errorf("RecordPodLatency: %v", err)
}
metricstest.CheckLastValueData(t, "taskruns_pod_latency", td.expectedTags, td.expectedValue)
metricstest.CheckLastValueData(t, "taskruns_pod_latency_milliseconds", td.expectedTags, td.expectedValue)
}
})
}
Expand Down Expand Up @@ -610,7 +610,7 @@ func TestTaskRunIsOfPipelinerun(t *testing.T) {
}

func unregisterMetrics() {
metricstest.Unregister("taskrun_duration_seconds", "pipelinerun_taskrun_duration_seconds", "taskrun_count", "running_taskruns_count", "running_taskruns_throttled_by_quota_count", "running_taskruns_throttled_by_node_count", "taskruns_pod_latency")
metricstest.Unregister("taskrun_duration_seconds", "pipelinerun_taskrun_duration_seconds", "taskrun_count", "running_taskruns_count", "running_taskruns_throttled_by_quota_count", "running_taskruns_throttled_by_node_count", "taskruns_pod_latency_milliseconds")

// Allow the recorder singleton to be recreated.
once = sync.Once{}
Expand Down

0 comments on commit 29ddf85

Please sign in to comment.