Skip to content

Commit

Permalink
Add retries to DockerClient's manifest pull functionality and add uti…
Browse files Browse the repository at this point in the history
…ls/reference package (#4150)
  • Loading branch information
amogh09 authored Apr 23, 2024
1 parent b702281 commit 7e51191
Show file tree
Hide file tree
Showing 8 changed files with 227 additions and 34 deletions.
73 changes: 59 additions & 14 deletions agent/dockerclient/dockerapi/docker_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ type DockerClient interface {

// Given an image reference and registry auth credentials, pulls the image manifest
// of the image from the registry.
PullImageManifest(context.Context, string, *apicontainer.RegistryAuthenticationData) (registry.DistributionInspect, error)
PullImageManifest(context.Context, string, *apicontainer.RegistryAuthenticationData) (registry.DistributionInspect, apierrors.NamedError)

// PullImage pulls an image. authData should contain authentication data provided by the ECS backend.
PullImage(context.Context, string, *apicontainer.RegistryAuthenticationData, time.Duration) DockerContainerMetadata
Expand Down Expand Up @@ -244,6 +244,7 @@ type dockerGoClient struct {
ecrTokenCache async.Cache
config *config.Config
context context.Context
manifestPullBackoff retry.Backoff
imagePullBackoff retry.Backoff
inactivityTimeoutHandler inactivityTimeoutHandlerFunc

Expand All @@ -267,11 +268,12 @@ type ImagePullResponse struct {

func (dg *dockerGoClient) WithVersion(version dockerclient.DockerVersion) (DockerClient, error) {
versionedClient := &dockerGoClient{
sdkClientFactory: dg.sdkClientFactory,
version: version,
auth: dg.auth,
config: dg.config,
context: dg.context,
sdkClientFactory: dg.sdkClientFactory,
version: version,
auth: dg.auth,
config: dg.config,
context: dg.context,
manifestPullBackoff: dg.manifestPullBackoff,
}
// Check if the version is supported
_, err := versionedClient.sdkDockerClient()
Expand Down Expand Up @@ -313,6 +315,8 @@ func NewDockerGoClient(sdkclientFactory sdkclientfactory.Factory,
context: ctx,
imagePullBackoff: retry.NewExponentialBackoff(minimumPullRetryDelay, maximumPullRetryDelay,
pullRetryJitterMultiplier, pullRetryDelayMultiplier),
manifestPullBackoff: retry.NewExponentialBackoff(minimumPullRetryDelay, maximumPullRetryDelay,
pullRetryJitterMultiplier, pullRetryDelayMultiplier),
inactivityTimeoutHandler: handleInactivityTimeout,
}, nil
}
Expand All @@ -337,29 +341,70 @@ func (dg *dockerGoClient) time() ttime.Time {
// Pulls image manifest from the registry
func (dg *dockerGoClient) PullImageManifest(
ctx context.Context, imageRef string, authData *apicontainer.RegistryAuthenticationData,
) (registry.DistributionInspect, error) {
) (registry.DistributionInspect, apierrors.NamedError) {
// Get auth creds
sdkAuthConfig, err := dg.getAuthdata(imageRef, authData)
if err != nil {
return registry.DistributionInspect{}, wrapPullErrorAsNamedError(imageRef, err)
return registry.DistributionInspect{}, wrapManifestPullErrorAsNamedError(imageRef, err)
}
encodedAuth, err := registry.EncodeAuthConfig(sdkAuthConfig)
if err != nil {
return registry.DistributionInspect{}, wrapPullErrorAsNamedError(imageRef, err)
return registry.DistributionInspect{}, wrapManifestPullErrorAsNamedError(imageRef, err)
}

// Get an SDK Docker Client and call Distribution API
// Get an SDK Docker Client
client, err := dg.sdkDockerClient()
if err != nil {
return registry.DistributionInspect{}, CannotGetDockerClientError{version: dg.version, err: err}
}
distInspect, err := client.DistributionInspect(ctx, imageRef, encodedAuth)

// Call DistributionInspect API with retries
startTime := time.Now()
var distInspectPtr *registry.DistributionInspect
err = retry.RetryNWithBackoffCtx(ctx, dg.manifestPullBackoff, maximumPullRetries, func() error {
distInspect, err := client.DistributionInspect(ctx, imageRef, encodedAuth)
if err != nil {
return err
}
distInspectPtr = &distInspect
return nil
})

if err != nil {
err = redactEcrUrls(imageRef, err)
return registry.DistributionInspect{}, CannotPullContainerError{err}
return registry.DistributionInspect{}, wrapManifestPullErrorAsNamedError(imageRef, err)
}
if ctxErr := ctx.Err(); ctxErr != nil {
// Context was done before manifest could be pulled
if errors.Is(ctxErr, context.DeadlineExceeded) {
timeoutErr := &DockerTimeoutError{time.Since(startTime), "MANIFEST_PULLED"}
return registry.DistributionInspect{}, timeoutErr
}
return registry.DistributionInspect{}, wrapManifestPullErrorAsNamedError(imageRef, ctxErr)
}
if distInspectPtr == nil {
// Shouldn't ever happen but to prevent a panic
return registry.DistributionInspect{}, CannotPullImageManifestError{
FromError: errors.New("failed to pull image manifest"),
}
}

return distInspect, nil
return *distInspectPtr, nil
}

// If the provided error is a NamedError then returns it, otherwise wraps the error in
// a CannotPullImageManifestError after redacting sensitive information from the error
// message.
func wrapManifestPullErrorAsNamedError(image string, err error) apierrors.NamedError {
var retErr apierrors.NamedError
if err != nil {
engErr, ok := err.(apierrors.NamedError)
if !ok {
err = redactEcrUrls(image, err)
engErr = CannotPullImageManifestError{err}
}
retErr = engErr
}
return retErr
}

func (dg *dockerGoClient) PullImage(ctx context.Context, image string,
Expand Down
12 changes: 7 additions & 5 deletions agent/dockerclient/dockerapi/docker_client_integ_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@ package dockerapi
import (
"context"
"testing"
"time"

"github.com/aws/amazon-ecs-agent/agent/api/container"
apicontainer "github.com/aws/amazon-ecs-agent/agent/api/container"
"github.com/aws/amazon-ecs-agent/agent/config"
"github.com/aws/amazon-ecs-agent/agent/dockerclient"
"github.com/aws/amazon-ecs-agent/agent/dockerclient/sdkclientfactory"
"github.com/aws/amazon-ecs-agent/ecs-agent/utils/retry"
"github.com/docker/docker/api/types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -52,6 +54,9 @@ func TestImageManifestPullInteg(t *testing.T) {
if err != nil {
t.Skipf("Skipping test due to unsupported Docker version: %v", err)
}
// Make retries very fast
supportedClient.(*dockerGoClient).manifestPullBackoff = retry.NewExponentialBackoff(
time.Nanosecond, time.Nanosecond, 1, 1)

tcs := []struct {
name string
Expand Down Expand Up @@ -99,12 +104,9 @@ func TestImageManifestPullInteg(t *testing.T) {
{
name: "Docker client version too old",
dockerClient: func() DockerClient {
sdkClientFactory := sdkclientfactory.NewFactory(context.Background(), dockerEndpoint)
cfg := &config.Config{}
defaultClient, err := NewDockerGoClient(sdkClientFactory, cfg, context.Background())
require.NoError(t, err)
// Prepare a Docker client with an older API version
version := dockerclient.GetSupportedDockerAPIVersion(dockerclient.Version_1_29)
unsupportedClient, err := defaultClient.WithVersion(version)
unsupportedClient, err := supportedClient.WithVersion(version)
require.NoError(t, err)
return unsupportedClient
}(),
Expand Down
64 changes: 52 additions & 12 deletions agent/dockerclient/dockerapi/docker_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ func TestPullImageManifest(t *testing.T) {
authData *apicontainer.RegistryAuthenticationData
setSDKFactoryExpectations func(f *mock_sdkclientfactory.MockFactory, ctrl *gomock.Controller)
setECRClientExpectations func(*mock_ecr.MockECRClient)
expectedError error
assertError func(t *testing.T, err error)
expectedDistributionInspect registry.DistributionInspect
}
tcs := []testCase{
Expand All @@ -327,7 +327,9 @@ func TestPullImageManifest(t *testing.T) {
setECRClientExpectations: func(me *mock_ecr.MockECRClient) {
me.EXPECT().GetAuthorizationToken("registryId").Return(nil, someErr)
},
expectedError: CannotPullECRContainerError{someErr},
assertError: func(t *testing.T, err error) {
require.Equal(t, CannotPullECRContainerError{someErr}, err)
},
},
{
name: "Failure in getting SDK client",
Expand All @@ -336,7 +338,9 @@ func TestPullImageManifest(t *testing.T) {
setSDKFactoryExpectations: func(f *mock_sdkclientfactory.MockFactory, ctrl *gomock.Controller) {
f.EXPECT().GetDefaultClient().Return(nil, someErr)
},
expectedError: CannotGetDockerClientError{version: "", err: someErr},
assertError: func(t *testing.T, err error) {
require.Equal(t, CannotGetDockerClientError{version: "", err: someErr}, err)
},
},
{
name: "Failure in DistributionInspect API - image URL is redacted",
Expand All @@ -347,12 +351,50 @@ func TestPullImageManifest(t *testing.T) {
client.EXPECT().
DistributionInspect(
gomock.Any(), "image", base64.URLEncoding.EncodeToString([]byte("{}"))).
Times(maximumPullRetries).
Return(
registry.DistributionInspect{},
errors.New("Some error for https://prod-us-east-1-starport-layer-bucket.s3.us-east-1.amazonaws.com"))
f.EXPECT().GetDefaultClient().Return(client, nil)
},
expectedError: CannotPullContainerError{errors.New("Some error for REDACTED ECR URL related to image")},
assertError: func(t *testing.T, err error) {
expectedErr := CannotPullImageManifestError{errors.New("Some error for REDACTED ECR URL related to image")}
require.Equal(t, expectedErr, err)
},
},
{
name: "Error is returned if context is canceled",
ctx: func() context.Context {
c, cancel := context.WithCancel(context.Background())
cancel()
return c
}(),
setSDKFactoryExpectations: func(f *mock_sdkclientfactory.MockFactory, ctrl *gomock.Controller) {
client := mock_sdkclient.NewMockClient(ctrl)
f.EXPECT().GetDefaultClient().Return(client, nil)
},
imageRef: "image",
assertError: func(t *testing.T, err error) {
require.Equal(t,
CannotPullImageManifestError{FromError: errors.New("context canceled")}, err)
},
},
{
name: "Error is returned if context deadline is exceeded",
ctx: func() context.Context {
c, cancel := context.WithTimeout(context.Background(), 0*time.Second)
time.Sleep(2 * time.Millisecond) // Give some time for deadline to be exceeded
cancel()
return c
}(),
setSDKFactoryExpectations: func(f *mock_sdkclientfactory.MockFactory, ctrl *gomock.Controller) {
client := mock_sdkclient.NewMockClient(ctrl)
f.EXPECT().GetDefaultClient().Return(client, nil)
},
imageRef: "image",
assertError: func(t *testing.T, err error) {
require.ErrorContains(t, err, "Could not transition to MANIFEST_PULLED; timed out")
},
},
{
name: "Manifest is returned if there are no errors - no auth data",
Expand Down Expand Up @@ -411,22 +453,20 @@ func TestPullImageManifest(t *testing.T) {
tc.setSDKFactoryExpectations(sdkFactory, ctrl)
}

goClient, ok := client.(*dockerGoClient)
require.True(t, ok)
ecrClientFactory := mock_ecr.NewMockECRFactory(ctrl)
ecrClient := mock_ecr.NewMockECRClient(ctrl)
mockTime := mock_ttime.NewMockTime(ctrl)
goClient.ecrClientFactory = ecrClientFactory
goClient._time = mockTime
client.(*dockerGoClient).ecrClientFactory = ecrClientFactory
client.(*dockerGoClient).manifestPullBackoff = retry.NewExponentialBackoff(
1*time.Nanosecond, 1*time.Nanosecond, 1, 1)

if tc.setECRClientExpectations != nil {
ecrClientFactory.EXPECT().GetClient(tc.authData.ECRAuthData).Return(ecrClient, nil)
tc.setECRClientExpectations(ecrClient)
}

res, err := goClient.PullImageManifest(tc.ctx, tc.imageRef, tc.authData)
if tc.expectedError != nil {
assert.Equal(t, tc.expectedError, err)
res, err := client.PullImageManifest(tc.ctx, tc.imageRef, tc.authData)
if tc.assertError != nil {
tc.assertError(t, err)
} else {
require.Nil(t, err)
assert.Equal(t, tc.expectedDistributionInspect, res)
Expand Down
14 changes: 14 additions & 0 deletions agent/dockerclient/dockerapi/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,20 @@ func (err CannotPullContainerError) ErrorName() string {
return "CannotPullContainerError"
}

// CannotPullImageManifestError indicates any error when trying to pull a container image manifest.
type CannotPullImageManifestError struct {
FromError error
}

func (err CannotPullImageManifestError) Error() string {
return err.FromError.Error()
}

// ErrorName returns the name of CannotPullImageManifestError.
func (err CannotPullImageManifestError) ErrorName() string {
return "CannotPullImageManifestError"
}

// CannotPullECRContainerError indicates any error when trying to pull
// a container image from ECR
type CannotPullECRContainerError struct {
Expand Down
5 changes: 3 additions & 2 deletions agent/dockerclient/dockerapi/mocks/dockerapi_mocks.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion agent/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/containernetworking/cni v1.1.2
github.com/containernetworking/plugins v1.4.1
github.com/deniswernert/udev v0.0.0-20170418162847-a12666f7b5a1
github.com/docker/distribution v2.8.2+incompatible
github.com/docker/docker v24.0.7+incompatible
github.com/docker/go-connections v0.4.0
github.com/docker/go-units v0.5.0
Expand Down Expand Up @@ -42,7 +43,6 @@ require (
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/didip/tollbooth v4.0.2+incompatible // indirect
github.com/docker/distribution v2.8.2+incompatible // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/godbus/dbus/v5 v5.1.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
Expand Down
35 changes: 35 additions & 0 deletions agent/utils/reference/reference.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"). You may
// not use this file except in compliance with the License. A copy of the
// License is located at
//
// http://aws.amazon.com/apache2.0/
//
// or in the "license" file accompanying this file. This file is distributed
// on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
// express or implied. See the License for the specific language governing
// permissions and limitations under the License.

// Utilities for container image reference strings.
package reference

import (
"github.com/docker/distribution/reference"
"github.com/opencontainers/go-digest"
)

// Helper function to parse an image reference and get digest from it if found.
// The caller must check that the returned digest is non-empty before using it.
func GetDigestFromImageRef(imageRef string) digest.Digest {
parsedRef, err := reference.Parse(imageRef)
if err != nil {
return ""
}
switch v := parsedRef.(type) {
case reference.Digested:
return v.Digest()
default:
return ""
}
}
Loading

0 comments on commit 7e51191

Please sign in to comment.