Skip to content

Commit

Permalink
image manager: cleanup 'dead' and 'created' containers (#2015)
Browse files Browse the repository at this point in the history
also cleanup of 'dangling' images that have no tags or names associated
with them (ie, they show as <none> in 'docker images')

closes #1684

unit tests

dont touch dangling images -- for now

skip containers that don't have a finished time
  • Loading branch information
sparrc authored May 3, 2019
1 parent b40c4cb commit 9fb3226
Show file tree
Hide file tree
Showing 3 changed files with 261 additions and 23 deletions.
4 changes: 2 additions & 2 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ func (cfg *Config) validateAndOverrideBounds() error {
// If a value has been set for taskCleanupWaitDuration and the value is less than the minimum allowed cleanup duration,
// print a warning and override it
if cfg.TaskCleanupWaitDuration < minimumTaskCleanupWaitDuration {
seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration)
seelog.Warnf("Invalid value for ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultTaskCleanupWaitDuration.String(), cfg.TaskCleanupWaitDuration, minimumTaskCleanupWaitDuration)
cfg.TaskCleanupWaitDuration = DefaultTaskCleanupWaitDuration
}

Expand All @@ -316,7 +316,7 @@ func (cfg *Config) validateAndOverrideBounds() error {
}

if cfg.ImageCleanupInterval < minimumImageCleanupInterval {
seelog.Warnf("Invalid value for image cleanup duration, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval)
seelog.Warnf("Invalid value for ECS_IMAGE_CLEANUP_INTERVAL, will be overridden with the default value: %s. Parsed value: %v, minimum value: %v.", DefaultImageCleanupTimeInterval.String(), cfg.ImageCleanupInterval, minimumImageCleanupInterval)
cfg.ImageCleanupInterval = DefaultImageCleanupTimeInterval
}

Expand Down
19 changes: 14 additions & 5 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,7 @@ func (imageManager *dockerImageManager) removeUnusedImages(ctx context.Context)

var numECSImagesDeleted int
imageManager.imageStatesConsideredForDeletion = imageManager.imagesConsiderForDeletion(imageManager.getAllImageStates())

for i := 0; i < imageManager.numImagesToDelete; i++ {
err := imageManager.removeLeastRecentlyUsedImage(ctx)
numECSImagesDeleted = i
Expand Down Expand Up @@ -342,9 +343,17 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
continue
}

finishedTime, _ := time.Parse(time.Now().String(), response.State.FinishedAt)
seelog.Debugf("Inspecting Non-ECS Container ID [%s] for removal, Finished [%s] Status [%s]", id, response.State.FinishedAt, response.State.Status)
finishedTime, err := time.Parse(time.RFC3339Nano, response.State.FinishedAt)
if err != nil {
seelog.Errorf("Error parsing time string for container. id: %s, time: %s err: %s", id, response.State.FinishedAt, err)
continue
}

if response.State.Status == "exited" && time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration {
if (response.State.Status == "exited" ||
response.State.Status == "dead" ||
response.State.Status == "created") &&
time.Now().Sub(finishedTime) > imageManager.nonECSContainerCleanupWaitDuration {
nonECSContainerRemoveAvailableIDs = append(nonECSContainerRemoveAvailableIDs, id)
}
}
Expand All @@ -353,13 +362,13 @@ func (imageManager *dockerImageManager) removeNonECSContainers(ctx context.Conte
if numNonECSContainerDeleted == imageManager.numNonECSContainersToDelete {
break
}
seelog.Infof("Removing non-ECS container id: %s", id)
seelog.Debugf("Removing non-ECS Container ID %s", id)
err := imageManager.client.RemoveContainer(ctx, id, dockerclient.RemoveContainerTimeout)
if err == nil {
seelog.Infof("Image removed: %s", id)
seelog.Infof("Removed Container ID: %s", id)
numNonECSContainerDeleted++
} else {
seelog.Errorf("Error removing Image %s - %v", id, err)
seelog.Errorf("Error Removing Container ID %s - %s", id, err)
continue
}
}
Expand Down
261 changes: 245 additions & 16 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks"
mock_dockerapi "github.com/aws/amazon-ecs-agent/agent/dockerclient/dockerapi/mocks"
"github.com/aws/amazon-ecs-agent/agent/ec2"
"github.com/aws/amazon-ecs-agent/agent/engine/dockerstate"
"github.com/aws/amazon-ecs-agent/agent/engine/image"
Expand Down Expand Up @@ -984,12 +984,14 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

Expand All @@ -1002,35 +1004,262 @@ func TestNonECSImageAndContainersCleanupRemoveImage(t *testing.T) {
ID: "1",
State: &types.ContainerState{
Status: "exited",
FinishedAt: time.Now().AddDate(0, -2, 0).String(),
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.ImageIDs[0], dockerclient.RemoveImageTimeout).Return(nil).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

if len(listContainersResponse.DockerIDs) != 1 {
t.Error("Error removing containers ids")
assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// Dead images should be cleaned up.
func TestNonECSImageAndContainers_RemoveDeadContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
if len(inspectContainerResponse.ID) != 1 {
t.Error("Error inspecting containers ids")
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}
if len(imageManager.imageStates) != 0 {
t.Error("Error removing image state after the image is removed")

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "dead",
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// Old 'Created' Images should be cleaned up
func TestNonECSImageAndContainersCleanup_RemoveOldCreatedContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: time.Now().AddDate(0, -2, 0).Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(1)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

func TestNonECSImageAndContainersCleanup_DontRemoveContainerWithInvalidFinishedTime(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: "Hello! I am an invalid timestamp!!",
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), "1", gomock.Any()).Return(nil).Times(0)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

// New 'Created' Images should NOT be cleaned up.
func TestNonECSImageAndContainersCleanup_DoNotRemoveNewlyCreatedContainer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := mock_dockerapi.NewMockDockerClient(ctrl)
imageManager := &dockerImageManager{
client: client,
state: dockerstate.NewTaskEngineState(),
minimumAgeBeforeDeletion: config.DefaultImageDeletionAge,
numImagesToDelete: config.DefaultNumImagesToDeletePerCycle,
numNonECSContainersToDelete: 10,
imageCleanupTimeInterval: config.DefaultImageCleanupTimeInterval,
deleteNonECSImagesEnabled: true,
nonECSContainerCleanupWaitDuration: time.Hour * 3,
}
imageManager.SetSaver(statemanager.NewNoopStateManager())

listContainersResponse := dockerapi.ListContainersResponse{
DockerIDs: []string{"1"},
}

inspectContainerResponse := &types.ContainerJSON{
ContainerJSONBase: &types.ContainerJSONBase{
ID: "1",
State: &types.ContainerState{
Status: "created",
FinishedAt: time.Now().Format(time.RFC3339Nano),
},
},
}

inspectImageResponse := &types.ImageInspect{
Size: 4096,
}

listImagesResponse := dockerapi.ListImagesResponse{
ImageIDs: []string{"sha256:qwerty1"},
RepoTags: []string{"tester"},
}

client.EXPECT().ListContainers(gomock.Any(), gomock.Any(), dockerclient.ListImagesTimeout).Return(listContainersResponse).AnyTimes()
client.EXPECT().InspectContainer(gomock.Any(), gomock.Any(), dockerclient.InspectContainerTimeout).Return(inspectContainerResponse, nil).AnyTimes()
client.EXPECT().RemoveContainer(gomock.Any(), gomock.Any(), dockerclient.RemoveContainerTimeout).Return(nil).Times(0)
client.EXPECT().ListImages(gomock.Any(), dockerclient.ListImagesTimeout).Return(listImagesResponse).AnyTimes()
client.EXPECT().InspectImage(listImagesResponse.RepoTags[0]).Return(inspectImageResponse, nil).Times(1)
client.EXPECT().RemoveImage(gomock.Any(), listImagesResponse.RepoTags[0], dockerclient.RemoveImageTimeout).Return(nil).Times(1)

ctx, cancel := context.WithCancel(context.TODO())
defer cancel()

imageManager.removeUnusedImages(ctx)

assert.Len(t, listContainersResponse.DockerIDs, 1, "error removing container IDs")
assert.Len(t, inspectContainerResponse.ID, 1, "error inspecting containers ids")
assert.Len(t, imageManager.imageStates, 0, "Error removing image state after the image is removed")
}

func TestDeleteImage(t *testing.T) {
Expand Down

0 comments on commit 9fb3226

Please sign in to comment.