From 4505743af8a9b5c70850da93b44ce8528cc47a65 Mon Sep 17 00:00:00 2001 From: Cam Date: Mon, 22 Feb 2021 15:54:54 -0800 Subject: [PATCH] container stats: ignore context.Canceled errors we often receive context.Canceled errors when a container exits during docker stats collection. there is a benign race condition where if we process the error first before processing that the context is "Done", then we will log this canceled error as a warning message here: https://github.com/aws/amazon-ecs-agent/blob/5be7aa08bed215a557f48c16d8201ad3db59a9be/agent/stats/container.go#L118-L122 this change ignores these context.Canceled errors so that we don't log them. This will eliminate log messages that look like this when a container exits: level=warn time=2020-12-25T07:51:33Z msg="Error encountered processing metrics stream from docker, this may affect cloudwatch metric accuracy: DockerGoClient: Unable to decode stats for container REDACTED: context canceled" module=container.go --- agent/dockerclient/dockerapi/docker_client.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/agent/dockerclient/dockerapi/docker_client.go b/agent/dockerclient/dockerapi/docker_client.go index 4935d7d8b7c..c84cbc481ef 100644 --- a/agent/dockerclient/dockerapi/docker_client.go +++ b/agent/dockerclient/dockerapi/docker_client.go @@ -1393,6 +1393,10 @@ func (dg *dockerGoClient) Stats(ctx context.Context, id string, inactivityTimeou stream := true resp, err = client.ContainerStats(subCtx, id, stream) if err != nil { + if errors.Is(err, context.Canceled) { + // ignore context cancelled errors, which indicate container has exited already + return + } errC <- fmt.Errorf("DockerGoClient: Unable to retrieve stats for container %s: %v", id, err) return } @@ -1408,6 +1412,10 @@ func (dg *dockerGoClient) Stats(ctx context.Context, id string, inactivityTimeou data := new(types.StatsJSON) for err := decoder.Decode(data); err != io.EOF; err = decoder.Decode(data) { if err != nil { + if errors.Is(err, context.Canceled) { + // ignore context cancelled errors, which indicate container has exited already + return + } errC <- fmt.Errorf("DockerGoClient: Unable to decode stats for container %s: %v", id, err) return } @@ -1466,6 +1474,10 @@ func getContainerStatsNotStreamed(client sdkclient.Client, ctx context.Context, response := make(chan statsResponse, 1) go func() { stats, err := client.ContainerStats(ctxWithTimeout, id, false) + if errors.Is(err, context.Canceled) { + // ignore context cancelled errors, which indicate container has exited already + err = nil + } response <- statsResponse{stats, err} }() select {