From 76e6363b09222aa341c62f7310fd86cdc2ede61e Mon Sep 17 00:00:00 2001 From: Richard LT Date: Tue, 21 Feb 2023 15:05:48 +0100 Subject: [PATCH] fix(hatchery:swarm): unregister worker cpu/memory metrics (#6470) Signed-off-by: richardlt --- engine/hatchery/swarm/monitoring.go | 19 ++++++++++++------- engine/hatchery/swarm/types.go | 13 +++++++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/engine/hatchery/swarm/monitoring.go b/engine/hatchery/swarm/monitoring.go index 732842d9b4..72c8e0cb1e 100644 --- a/engine/hatchery/swarm/monitoring.go +++ b/engine/hatchery/swarm/monitoring.go @@ -9,6 +9,7 @@ import ( "time" "go.opencensus.io/stats" + "go.opencensus.io/stats/view" "go.opencensus.io/tag" "golang.org/x/net/context" @@ -41,12 +42,12 @@ func (h *HatcherySwarm) InitWorkersMetrics(ctx context.Context) error { telemetry.MustNewKey(TagResourceName), } - return telemetry.RegisterView(ctx, - telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu", h.workerMetrics.CPU, tags), - telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu_request", h.workerMetrics.CPURequest, tags), - telemetry.NewViewLast("cds/hatchery/worker_memory", h.workerMetrics.Memory, tags), - telemetry.NewViewLast("cds/hatchery/worker_memory_request", h.workerMetrics.MemoryRequest, tags), - ) + h.workerMetrics.CPUView = telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu", h.workerMetrics.CPU, tags) + h.workerMetrics.CPURequestView = telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu_request", h.workerMetrics.CPURequest, tags) + h.workerMetrics.MemoryView = telemetry.NewViewLast("cds/hatchery/worker_memory", h.workerMetrics.Memory, tags) + h.workerMetrics.MemoryRequestView = telemetry.NewViewLast("cds/hatchery/worker_memory_request", h.workerMetrics.MemoryRequest, tags) + + return telemetry.RegisterView(ctx, h.workerMetrics.CPUView, h.workerMetrics.CPURequestView, h.workerMetrics.MemoryView, h.workerMetrics.MemoryRequestView) } func (h *HatcherySwarm) StartWorkerMetricsRoutine(ctx context.Context, delay int64) { @@ -57,6 +58,10 @@ func (h *HatcherySwarm) StartWorkerMetricsRoutine(ctx context.Context, delay int select { case <-ticker.C: h.GoRoutines.Exec(ctx, "compute-worker-metrics", func(ctx context.Context) { + // Re-register view to drop ended workers metrics + view.Unregister(h.workerMetrics.CPUView, h.workerMetrics.CPURequestView, h.workerMetrics.MemoryView, h.workerMetrics.MemoryRequestView) + view.Register(h.workerMetrics.CPUView, h.workerMetrics.CPURequestView, h.workerMetrics.MemoryView, h.workerMetrics.MemoryRequestView) + ms, err := h.WorkersMetrics(ctx) if err != nil { log.ErrorWithStackTrace(ctx, err) @@ -127,7 +132,7 @@ func (h *HatcherySwarm) WorkersMetrics(ctx context.Context) ([]WorkerMetricsReso } var stats types.Stats if err := json.Unmarshal(v, &stats); err != nil { - log.ErrorWithStackTrace(ctx, sdk.WrapError(err, "unable to get unmarshal stats for container %s/%s", host, c.ID)) + log.ErrorWithStackTrace(ctx, sdk.WrapError(err, "unable to unmarshal stats for container %s/%s", host, c.ID)) return } diff --git a/engine/hatchery/swarm/types.go b/engine/hatchery/swarm/types.go index 8274a0ff41..0cc7d8816f 100644 --- a/engine/hatchery/swarm/types.go +++ b/engine/hatchery/swarm/types.go @@ -3,6 +3,7 @@ package swarm import ( docker "github.com/docker/docker/client" "go.opencensus.io/stats" + "go.opencensus.io/stats/view" hatcheryCommon "github.com/ovh/cds/engine/hatchery" "github.com/ovh/cds/engine/service" @@ -51,10 +52,14 @@ type HatcherySwarm struct { Config HatcheryConfiguration dockerClients map[string]*dockerClient workerMetrics struct { - CPU *stats.Float64Measure - CPURequest *stats.Float64Measure - Memory *stats.Int64Measure - MemoryRequest *stats.Int64Measure + CPU *stats.Float64Measure + CPURequest *stats.Float64Measure + Memory *stats.Int64Measure + MemoryRequest *stats.Int64Measure + CPUView *view.View + CPURequestView *view.View + MemoryView *view.View + MemoryRequestView *view.View } }