From 66c3e08f3cd7a5c94c06d08a5d42502b8d800b8e Mon Sep 17 00:00:00 2001 From: Zhenya Tikhonov Date: Sun, 22 Dec 2024 20:12:22 +0400 Subject: [PATCH] feat: add debug metrics --- venona/VERSION | 2 +- venona/pkg/agent/agent.go | 7 ++++++- venona/pkg/metrics/metrics.go | 18 ++++++++++++++---- venonactl/VERSION | 2 +- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/venona/VERSION b/venona/VERSION index e9307ca5..50ffc5aa 100644 --- a/venona/VERSION +++ b/venona/VERSION @@ -1 +1 @@ -2.0.2 +2.0.3 diff --git a/venona/pkg/agent/agent.go b/venona/pkg/agent/agent.go index 6956b877..066bffca 100644 --- a/venona/pkg/agent/agent.go +++ b/venona/pkg/agent/agent.go @@ -262,6 +262,7 @@ func (a *Agent) reportTaskStatus(ctx context.Context, taskDef task.Task, err err } func (a *Agent) getTasks(ctx context.Context) (task.Tasks, []*workflow.Workflow) { + metrics.IncGetTasksRequests() tasks := a.pullTasks(ctx) return a.splitTasks(tasks) } @@ -269,7 +270,11 @@ func (a *Agent) getTasks(ctx context.Context) (task.Tasks, []*workflow.Workflow) func (a *Agent) pullTasks(ctx context.Context) task.Tasks { start := time.Now() tasks, err := a.cf.Tasks(ctx) - metrics.ObserveGetTasks(start) + status := "success" + if err != nil { + status = "error" + } + metrics.ObserveGetTasks(start, status) if err != nil { a.log.Error("Failed pulling tasks", "error", err) diff --git a/venona/pkg/metrics/metrics.go b/venona/pkg/metrics/metrics.go index f14acdbc..ba56c004 100644 --- a/venona/pkg/metrics/metrics.go +++ b/venona/pkg/metrics/metrics.go @@ -56,11 +56,16 @@ var ( Name: "queue_size", Help: "Current number of waiting tasks", }) - getTasksDuration = prometheus.NewHistogram(prometheus.HistogramOpts{ + getTasksDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: runnerNamespace, Name: "get_tasks_duration_sec", Help: "How long each GetTasks request takes (seconds)", - Buckets: []float64{0.25, 0.5, 1, 2, 3, 6}, + Buckets: []float64{0.25, 0.5, 1, 2, 3, 6, 12, 30, 60}, + }, []string{"status"}) + getTasksRequests = prometheus.NewCounter(prometheus.CounterOpts{ + Namespace: runnerNamespace, + Name: "get_tasks_requests", + Help: "Number of GetTasks requests", }) handlingTimeSinceCreation = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: runnerNamespace, @@ -136,10 +141,15 @@ func IncWorkflowRetries(podName string) { wfTaskRetries.With(labels).Inc() } -func ObserveGetTasks(start time.Time) { +func ObserveGetTasks(start time.Time, status string) { end := time.Now() diff := end.Sub(start) - getTasksDuration.Observe(diff.Seconds()) + labels := prometheus.Labels{"status": status} + getTasksDuration.With(labels).Observe(diff.Seconds()) +} + +func IncGetTasksRequests() { + getTasksRequests.Inc() } func ObserveAgentTaskMetrics(agentType string, sinceCreation, inRunner, processed time.Duration) { diff --git a/venonactl/VERSION b/venonactl/VERSION index e9307ca5..50ffc5aa 100644 --- a/venonactl/VERSION +++ b/venonactl/VERSION @@ -1 +1 @@ -2.0.2 +2.0.3