Skip to content

Commit

Permalink
fix(hatchery:swarm): unregister worker cpu/memory metrics (#6470)
Browse files Browse the repository at this point in the history
Signed-off-by: richardlt <[email protected]>
  • Loading branch information
richardlt authored Feb 21, 2023
1 parent e04cdc7 commit 76e6363
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
19 changes: 12 additions & 7 deletions engine/hatchery/swarm/monitoring.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

"go.opencensus.io/stats"
"go.opencensus.io/stats/view"
"go.opencensus.io/tag"
"golang.org/x/net/context"

Expand Down Expand Up @@ -41,12 +42,12 @@ func (h *HatcherySwarm) InitWorkersMetrics(ctx context.Context) error {
telemetry.MustNewKey(TagResourceName),
}

return telemetry.RegisterView(ctx,
telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu", h.workerMetrics.CPU, tags),
telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu_request", h.workerMetrics.CPURequest, tags),
telemetry.NewViewLast("cds/hatchery/worker_memory", h.workerMetrics.Memory, tags),
telemetry.NewViewLast("cds/hatchery/worker_memory_request", h.workerMetrics.MemoryRequest, tags),
)
h.workerMetrics.CPUView = telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu", h.workerMetrics.CPU, tags)
h.workerMetrics.CPURequestView = telemetry.NewViewLastFloat64("cds/hatchery/worker_cpu_request", h.workerMetrics.CPURequest, tags)
h.workerMetrics.MemoryView = telemetry.NewViewLast("cds/hatchery/worker_memory", h.workerMetrics.Memory, tags)
h.workerMetrics.MemoryRequestView = telemetry.NewViewLast("cds/hatchery/worker_memory_request", h.workerMetrics.MemoryRequest, tags)

return telemetry.RegisterView(ctx, h.workerMetrics.CPUView, h.workerMetrics.CPURequestView, h.workerMetrics.MemoryView, h.workerMetrics.MemoryRequestView)
}

func (h *HatcherySwarm) StartWorkerMetricsRoutine(ctx context.Context, delay int64) {
Expand All @@ -57,6 +58,10 @@ func (h *HatcherySwarm) StartWorkerMetricsRoutine(ctx context.Context, delay int
select {
case <-ticker.C:
h.GoRoutines.Exec(ctx, "compute-worker-metrics", func(ctx context.Context) {
// Re-register view to drop ended workers metrics
view.Unregister(h.workerMetrics.CPUView, h.workerMetrics.CPURequestView, h.workerMetrics.MemoryView, h.workerMetrics.MemoryRequestView)
view.Register(h.workerMetrics.CPUView, h.workerMetrics.CPURequestView, h.workerMetrics.MemoryView, h.workerMetrics.MemoryRequestView)

ms, err := h.WorkersMetrics(ctx)
if err != nil {
log.ErrorWithStackTrace(ctx, err)
Expand Down Expand Up @@ -127,7 +132,7 @@ func (h *HatcherySwarm) WorkersMetrics(ctx context.Context) ([]WorkerMetricsReso
}
var stats types.Stats
if err := json.Unmarshal(v, &stats); err != nil {
log.ErrorWithStackTrace(ctx, sdk.WrapError(err, "unable to get unmarshal stats for container %s/%s", host, c.ID))
log.ErrorWithStackTrace(ctx, sdk.WrapError(err, "unable to unmarshal stats for container %s/%s", host, c.ID))
return
}

Expand Down
13 changes: 9 additions & 4 deletions engine/hatchery/swarm/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package swarm
import (
docker "github.com/docker/docker/client"
"go.opencensus.io/stats"
"go.opencensus.io/stats/view"

hatcheryCommon "github.com/ovh/cds/engine/hatchery"
"github.com/ovh/cds/engine/service"
Expand Down Expand Up @@ -51,10 +52,14 @@ type HatcherySwarm struct {
Config HatcheryConfiguration
dockerClients map[string]*dockerClient
workerMetrics struct {
CPU *stats.Float64Measure
CPURequest *stats.Float64Measure
Memory *stats.Int64Measure
MemoryRequest *stats.Int64Measure
CPU *stats.Float64Measure
CPURequest *stats.Float64Measure
Memory *stats.Int64Measure
MemoryRequest *stats.Int64Measure
CPUView *view.View
CPURequestView *view.View
MemoryView *view.View
MemoryRequestView *view.View
}
}

Expand Down

0 comments on commit 76e6363

Please sign in to comment.