diff --git a/engine/hatchery/swarm/swarm_conf.go b/engine/hatchery/swarm/swarm_conf.go index 05faa49c1c..710e8e5dde 100644 --- a/engine/hatchery/swarm/swarm_conf.go +++ b/engine/hatchery/swarm/swarm_conf.go @@ -57,6 +57,7 @@ func (h *HatcherySwarm) ApplyConfiguration(cfg interface{}) error { func (h *HatcherySwarm) Status(ctx context.Context) sdk.MonitoringStatus { m := h.CommonMonitoring() m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "Workers", Value: fmt.Sprintf("%d/%d", len(h.WorkersStarted(ctx)), h.Config.Provision.MaxWorker), Status: sdk.MonitoringStatusOK}) + var nbErrorImageList, nbErrorGetContainers int for dockerName, dockerClient := range h.dockerClients { //Check images status := sdk.MonitoringStatusOK @@ -65,7 +66,8 @@ func (h *HatcherySwarm) Status(ctx context.Context) sdk.MonitoringStatus { images, err := dockerClient.ImageList(ctxList, types.ImageListOptions{All: true}) if err != nil { log.Warning(ctx, "hatchery> swarm> %s> Status> Unable to list images on %s: %s", h.Name(), dockerName, err) - status = sdk.MonitoringStatusAlert + status = sdk.MonitoringStatusWarn + nbErrorImageList++ } m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "Images-" + dockerName, Value: fmt.Sprintf("%d", len(images)), Status: status}) //Check containers @@ -73,11 +75,24 @@ func (h *HatcherySwarm) Status(ctx context.Context) sdk.MonitoringStatus { cs, err := h.getContainers(dockerClient, types.ContainerListOptions{All: true}) if err != nil { log.Warning(ctx, "hatchery> swarm> %s> Status> Unable to list containers on %s: %s", h.Name(), dockerName, err) - status = sdk.MonitoringStatusAlert + status = sdk.MonitoringStatusWarn + nbErrorGetContainers++ } m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "Containers-" + dockerName, Value: fmt.Sprintf("%d", len(cs)), Status: status}) } + var status = sdk.MonitoringStatusOK + if nbErrorImageList > len(h.dockerClients)/2 { + status = sdk.MonitoringStatusAlert + } + m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "DockerEngines.ListImages", Value: fmt.Sprintf("%d/%d", nbErrorImageList, len(h.dockerClients)), Status: status}) + + status = sdk.MonitoringStatusOK + if nbErrorGetContainers > len(h.dockerClients)/2 { + status = sdk.MonitoringStatusAlert + } + m.Lines = append(m.Lines, sdk.MonitoringStatusLine{Component: "DockerEngines.GetContainers", Value: fmt.Sprintf("%d/%d", nbErrorGetContainers, len(h.dockerClients)), Status: status}) + return m } diff --git a/sdk/status.go b/sdk/status.go index a4cd11f29b..ffe024245f 100644 --- a/sdk/status.go +++ b/sdk/status.go @@ -50,7 +50,7 @@ type MonitoringStatusLine struct { // HTTPStatusCode return the http status code func (m MonitoringStatus) HTTPStatusCode() int { for _, l := range m.Lines { - if l.Status != MonitoringStatusOK { + if l.Status == MonitoringStatusAlert { return http.StatusServiceUnavailable } }