From 9fb3226f507d952c31d8740c9bd54d8b933c11bc Mon Sep 17 00:00:00 2001 From: Cameron Sparr Date: Fri, 3 May 2019 16:51:52 -0700 Subject: [PATCH] image manager: cleanup 'dead' and 'created' containers (#2015) also cleanup of 'dangling' images that have no tags or names associated with them (ie, they show as in 'docker images') closes #1684 unit tests dont touch dangling images -- for now skip containers that don't have a finished time --- agent/config/config.go | 4 +- agent/engine/docker_image_manager.go | 19 +- agent/engine/docker_image_manager_test.go | 261 ++++++++++++++++++++-- 3 files changed, 261 insertions(+), 23 deletions(-) diff --git a/agent/config/config.go b/agent/config/config.go index eb44e2aabf9..99022cca49d 100644 --- a/agent/config/config.go +++ b/agent/config/config.go @@ -306,7 +306,7 @@ func (cfg *Config) validateAndOverrideBounds() error { // If a value has been set for taskCleanupWaitDuration and the value is less than the minimum allowed cleanup duration, // print a warning and override it if cfg.TaskCleanupWaitDuration < minimumTaskCleanupWaitDuration { - seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration) + seelog.Warnf("Invalid value for ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration) cfg.TaskCleanupWaitDuration = DefaultTaskCleanupWaitDuration } @@ -316,7 +316,7 @@ func (cfg *Config) validateAndOverrideBounds() error { } if cfg.ImageCleanupInterval < minimumImageCleanupInterval { - seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval) + seelog.Warnf("Invalid value for ECS_IMAGE_CLEANUP_INTERVAL, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval) cfg.ImageCleanupInterval = DefaultImageCleanupTimeInterval } diff --git a/agent/engine/docker_image_manager.go b/agent/engine/docker_image_manager.go index 5f465054c85..cf8cce4c610 100644 --- a/agent/engine/docker_image_manager.go +++ b/agent/engine/docker_image_manager.go @@ -312,6 +312,7 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context) var numECSImagesDeleted int imageManager.imageStatesConsideredForDeletion = imageManager.imagesConsiderForDeletion(imageManager.getAllImageStates()) + for i := 0; i < imageManager.numImagesToDelete; i++ { err := imageManager.removeLeastRecentlyUsedImage(ctx) numECSImagesDeleted = i @@ -342,9 +343,17 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte continue } - finishedTime, _ := time.Parse(time.Now().String(), response.State.FinishedAt) + seelog.Debugf("Inspecting Non-ECS Container ID [%s] for removal, Finished [%s] Status [%s]", id, response.State.FinishedAt, response.State.Status) + finishedTime, err := time.Parse(time.RFC3339Nano, response.State.FinishedAt) + if err != nil { + seelog.Errorf("Error parsing time string for container. id: %s, time: %s err: %s", id, response.State.FinishedAt, err) + continue + } - if response.State.Status == "exited" && time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration { + if (response.State.Status == "exited" || + response.State.Status == "dead" || + response.State.Status == "created") && + time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration { nonECSContainerRemoveAvailableIDs = append(nonECSContainerRemoveAvailableIDs, id) } } @@ -353,13 +362,13 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte if numNonECSContainerDeleted == imageManager.numNonECSContainersToDelete { break } - seelog.Infof("Removing non-ECS container id: %s", id) + seelog.Debugf("Removing non-ECS Container ID %s", id) err := imageManager.client.RemoveContainer(ctx, id, dockerclient.RemoveContainerTimeout) if err == nil { - seelog.Infof("Image removed: %s", id) + seelog.Infof("Removed Container ID: %s", id) numNonECSContainerDeleted++ } else { - seelog.Errorf("Error removing Image %s - %v", id, err) + seelog.Errorf("Error Removing Container ID %s - %s", id, err) continue } } diff --git a/agent/engine/docker_image_manager_test.go b/agent/engine/docker_image_manager_test.go index c64d7eb87fc..a37642d8ea1 100644 --- a/agent/engine/docker_image_manager_test.go +++ b/agent/engine/docker_image_manager_test.go @@ -27,7 +27,7 @@ import ( "github.com/aws/amazon-ecs-agent/agent/config" "github.com/aws/amazon-ecs-agent/agent/dockerclient" "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi" - "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks" + mock_dockerapi "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks" "github.com/aws/amazon-ecs-agent/agent/ec2" "github.com/aws/amazon-ecs-agent/agent/engine/dockerstate" "github.com/aws/amazon-ecs-agent/agent/engine/image" @@ -984,12 +984,14 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) { defer ctrl.Finish() client := mock_dockerapi.NewMockDockerClient(ctrl) imageManager := &dockerImageManager{ - client: client, - state: dockerstate.NewTaskEngineState(), - minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, - numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, - imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, - deleteNonECSImagesEnabled: true, + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, } imageManager.SetSaver(statemanager.NewNoopStateManager()) @@ -1002,35 +1004,262 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) { ID: "1", State: &types.ContainerState{ Status: "exited", - FinishedAt: time.Now().AddDate(0, -2, 0).String(), + FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano), }, }, } + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + } + listImagesResponse := dockerapi.ListImagesResponse{ ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, } client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() - client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1) client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() - client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.ImageIDs[0], dockerclient.RemoveImageTimeout).Return(nil).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1) + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1) ctx, cancel := context.WithCancel(context.TODO()) defer cancel() imageManager.removeUnusedImages(ctx) - if len(listContainersResponse.DockerIDs) != 1 { - t.Error("Error removing containers ids") + assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs") + assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids") + assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed") +} + +// Dead images should be cleaned up. +func TestNonECSImageAndContainers_RemoveDeadContainer(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, } - if len(inspectContainerResponse.ID) != 1 { - t.Error("Error inspecting containers ids") + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, } - if len(imageManager.imageStates) != 0 { - t.Error("Error removing image state after the image is removed") + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "dead", + FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano), + }, + }, + } + + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + } + + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, + } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1) + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1) + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) + + assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs") + assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids") + assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed") +} + +// Old 'Created' Images should be cleaned up +func TestNonECSImageAndContainersCleanup_RemoveOldCreatedContainer(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, + } + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, + } + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "created", + FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano), + }, + }, + } + + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + } + + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, + } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(1) + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1) + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) + + assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs") + assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids") + assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed") +} + +func TestNonECSImageAndContainersCleanup_DontRemoveContainerWithInvalidFinishedTime(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, + } + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, + } + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "created", + FinishedAt: "Hello! I am an invalid timestamp!!", + }, + }, + } + + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + } + + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(0) + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1) + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) + + assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs") + assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids") + assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed") +} + +// New 'Created' Images should NOT be cleaned up. +func TestNonECSImageAndContainersCleanup_DoNotRemoveNewlyCreatedContainer(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + client := mock_dockerapi.NewMockDockerClient(ctrl) + imageManager := &dockerImageManager{ + client: client, + state: dockerstate.NewTaskEngineState(), + minimumAgeBeforeDeletion: config.DefaultImageDeletionAge, + numImagesToDelete: config.DefaultNumImagesToDeletePerCycle, + numNonECSContainersToDelete: 10, + imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval, + deleteNonECSImagesEnabled: true, + nonECSContainerCleanupWaitDuration: time.Hour * 3, + } + imageManager.SetSaver(statemanager.NewNoopStateManager()) + + listContainersResponse := dockerapi.ListContainersResponse{ + DockerIDs: []string{"1"}, + } + + inspectContainerResponse := &types.ContainerJSON{ + ContainerJSONBase: &types.ContainerJSONBase{ + ID: "1", + State: &types.ContainerState{ + Status: "created", + FinishedAt: time.Now().Format(time.RFC3339Nano), + }, + }, + } + + inspectImageResponse := &types.ImageInspect{ + Size: 4096, + } + + listImagesResponse := dockerapi.ListImagesResponse{ + ImageIDs: []string{"sha256:qwerty1"}, + RepoTags: []string{"tester"}, + } + + client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes() + client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes() + client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(0) + client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes() + client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1) + client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1) + + ctx, cancel := context.WithCancel(context.TODO()) + defer cancel() + + imageManager.removeUnusedImages(ctx) + + assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs") + assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids") + assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed") } func TestDeleteImage(t *testing.T) {