Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the deadlock caused by the ImagePullDeleteLock #836

Merged
merged 3 commits into from
Jun 9, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions agent/engine/docker_image_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *
for i, imageState := range imageManager.imageStates {
if imageState.Image.ImageID == imageStateToBeRemoved.Image.ImageID {
// Image State found; hence remove it
seelog.Infof("Removing Image State: %v from Image Manager", imageState.Image.ImageID)
seelog.Infof("Removing Image State: [%s] from Image Manager", imageState.String())
imageManager.imageStates = append(imageManager.imageStates[:i], imageManager.imageStates[i+1:]...)
return
}
Expand All @@ -205,13 +205,14 @@ func (imageManager *dockerImageManager) removeImageState(imageStateToBeRemoved *

func (imageManager *dockerImageManager) getCandidateImagesForDeletion() []*image.ImageState {
if len(imageManager.imageStatesConsideredForDeletion) < 1 {
seelog.Debugf("Image Manager: Empty state!")
// no image states present in image manager
return nil
}
var imagesForDeletion []*image.ImageState
for _, imageState := range imageManager.imageStatesConsideredForDeletion {
if imageManager.isImageOldEnough(imageState) && imageState.HasNoAssociatedContainers() {
seelog.Infof("Candidate image for deletion: %+v", imageState)
seelog.Infof("Candidate image for deletion: [%s]", imageState.String())
imagesForDeletion = append(imagesForDeletion, imageState)
}
}
Expand Down Expand Up @@ -275,7 +276,12 @@ func (imageManager *dockerImageManager) performPeriodicImageCleanup(ctx context.
}

func (imageManager *dockerImageManager) removeUnusedImages() {
seelog.Infof("Begin building map of eligible unused images for deletion")
seelog.Debug("Attempting to obtain ImagePullDeleteLock for removing images")
ImagePullDeleteLock.Lock()
seelog.Debug("Obtained ImagePullDeleteLock for removing images")
defer seelog.Debug("Released ImagePullDeleteLock after removing images")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe group these statements in an anonymous function so its easier to see the order of execution?

defer func() { ... }

defer ImagePullDeleteLock.Unlock()

imageManager.updateLock.Lock()
defer imageManager.updateLock.Unlock()
imageManager.imageStatesConsideredForDeletion = make(map[string]*image.ImageState)
Expand All @@ -285,18 +291,14 @@ func (imageManager *dockerImageManager) removeUnusedImages() {
for i := 0; i < imageManager.numImagesToDelete; i++ {
err := imageManager.removeLeastRecentlyUsedImage()
if err != nil {
seelog.Infof("End of eligible images for deletion")
seelog.Infof("End of eligible images for deletion: %v; Still have %d image states being managed", err, len(imageManager.getAllImageStates()))
break
}
}
}

func (imageManager *dockerImageManager) removeLeastRecentlyUsedImage() error {
seelog.Debug("Attempting to obtain ImagePullDeleteLock for removing images")
ImagePullDeleteLock.Lock()
seelog.Debug("Obtained ImagePullDeleteLock for removing images")
defer seelog.Debug("Released ImagePullDeleteLock after removing images")
defer ImagePullDeleteLock.Unlock()
seelog.Infof("Begin building map of eligible unused images for deletion")
leastRecentlyUsedImage := imageManager.getUnusedImageForDeletion()
if leastRecentlyUsedImage == nil {
return fmt.Errorf("No more eligible images for deletion")
Expand Down
43 changes: 43 additions & 0 deletions agent/engine/docker_image_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,49 @@ import (
"golang.org/x/net/context"
)

// TestImagePullRemoveDeadlock tests if there's a deadlock when trying to
// pull an image while image clean up is in progress
func TestImagePullRemoveDeadlock(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
client := NewMockDockerClient(ctrl)

cfg := defaultTestConfig()
imageManager := NewImageManager(cfg, client, dockerstate.NewTaskEngineState())
imageManager.SetSaver(statemanager.NewNoopStateManager())

sleepContainer := &api.Container{
Name: "sleep",
Image: "busybox",
}
sleepContainerImageInspected := &docker.Image{
ID: "sha256:qwerty",
}

// Cause a fake delay when recording container reference so that the
// race condition between ImagePullLock and updateLock gets exercised
// If updateLock precedes ImagePullLock, it can cause a deadlock
client.EXPECT().InspectImage(sleepContainer.Image).Do(func(image string) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cool! Did we verify that this fails before the patch?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this test doesn't pass without the patch.

time.Sleep(time.Second)
}).Return(sleepContainerImageInspected, nil)

var wg sync.WaitGroup
wg.Add(2)
go func() {
ImagePullDeleteLock.Lock()
defer ImagePullDeleteLock.Unlock()
err := imageManager.RecordContainerReference(sleepContainer)
assert.NoError(t, err)
wg.Done()
}()

go func() {
imageManager.(*dockerImageManager).removeUnusedImages()
wg.Done()
}()
wg.Wait()
}

func TestAddAndRemoveContainerToImageStateReferenceHappyPath(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down
14 changes: 14 additions & 0 deletions agent/engine/image/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package image
import (
"encoding/json"
"fmt"
"strings"
"sync"
"time"

Expand All @@ -29,6 +30,10 @@ type Image struct {
Size int64
}

func (image *Image) String() string {
return fmt.Sprintf("ImageID: %s; Names: %s", image.ImageID, strings.Join(image.Names, ", "))
}

// ImageState represents a docker image
// and its state information such as containers associated with it
type ImageState struct {
Expand Down Expand Up @@ -114,3 +119,12 @@ func (imageState *ImageState) MarshalJSON() ([]byte, error) {
LastUsedAt: imageState.LastUsedAt,
})
}

func (imageState *ImageState) String() string {
image := ""
if imageState.Image != nil {
image = imageState.Image.String()
}
return fmt.Sprintf("Image: [%s] referenced by %d containers; PulledAt: %s; LastUsedAt: %s",
image, len(imageState.Containers), imageState.PulledAt.String(), imageState.LastUsedAt.String())
}