Skip to content

Commit

Permalink
stop container timeout: increase buffer from 30s to 2m
Browse files Browse the repository at this point in the history
  • Loading branch information
sparrc committed Oct 31, 2020
1 parent 9280bcf commit 4f0d1ef
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 12 deletions.
12 changes: 7 additions & 5 deletions agent/dockerclient/dockerapi/docker_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,6 @@ const (
)

// Timelimits for docker operations enforced above docker
// TODO: Make these limits configurable.
const (
// Parameters for caching the docker auth for ECR
tokenCacheSize = 100
Expand All @@ -90,7 +89,12 @@ const (
pollStatsTimeout = 18 * time.Second
)

var ctxTimeoutStopContainer = dockerclient.StopContainerTimeout
// stopContainerTimeoutBuffer is a buffer added to the timeout passed into the docker
// StopContainer api call. The reason for this buffer is that when the regular "stop"
// command fails, the docker api falls back to other kill methods, such as a containerd
// kill and SIGKILL. This buffer adds time onto the context timeout to allow time
// for these backup kill methods to finish.
var stopContainerTimeoutBuffer = 2 * time.Minute

type inactivityTimeoutHandlerFunc func(reader io.ReadCloser, timeout time.Duration, cancelRequest func(), canceled *uint32) (io.ReadCloser, chan<- struct{})

Expand Down Expand Up @@ -659,9 +663,7 @@ func (dg *dockerGoClient) inspectContainer(ctx context.Context, dockerID string)
}

func (dg *dockerGoClient) StopContainer(ctx context.Context, dockerID string, timeout time.Duration) DockerContainerMetadata {
// ctxTimeout is sum of timeout(applied to the StopContainer api call) and a fixed constant dockerclient.StopContainerTimeout
// the context's timeout should be greater than the sigkill timout for the StopContainer call
ctxTimeout := timeout + ctxTimeoutStopContainer
ctxTimeout := timeout + stopContainerTimeoutBuffer
ctx, cancel := context.WithTimeout(ctx, ctxTimeout)
defer cancel()
defer metrics.MetricsEngineGlobal.RecordDockerMetric("STOP_CONTAINER")()
Expand Down
10 changes: 7 additions & 3 deletions agent/dockerclient/dockerapi/docker_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,11 @@ func TestStopContainerTimeout(t *testing.T) {
cfg.DockerStopTimeout = xContainerShortTimeout
mockDockerSDK, client, _, _, _, done := dockerClientSetupWithConfig(t, cfg)
defer done()
ctxTimeoutStopContainer = xContainerShortTimeout
reset := stopContainerTimeoutBuffer
stopContainerTimeoutBuffer = xContainerShortTimeout
defer func() {
stopContainerTimeoutBuffer = reset
}()

wait := &sync.WaitGroup{}
wait.Add(1)
Expand All @@ -488,7 +492,7 @@ func TestStopContainerTimeout(t *testing.T) {
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
metadata := client.StopContainer(ctx, "id", xContainerShortTimeout)
assert.Error(t, metadata.Error, "Expected error for pull timeout")
assert.Error(t, metadata.Error, "Expected error for stop timeout")
assert.Equal(t, "DockerTimeoutError", metadata.Error.(apierrors.NamedError).ErrorName())
wait.Done()
}
Expand All @@ -514,7 +518,7 @@ func TestStopContainer(t *testing.T) {
)
ctx, cancel := context.WithCancel(context.TODO())
defer cancel()
metadata := client.StopContainer(ctx, "id", dockerclient.StopContainerTimeout)
metadata := client.StopContainer(ctx, "id", client.config.DockerStopTimeout)
assert.NoError(t, metadata.Error)
assert.Equal(t, "id", metadata.DockerID)
}
Expand Down
2 changes: 0 additions & 2 deletions agent/dockerclient/timeout.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ const (
ListContainersTimeout = 10 * time.Minute
// InspectContainerTimeout is the timeout for the InspectContainer API.
InspectContainerTimeout = 30 * time.Second
// StopContainerTimeout is the timeout for the StopContainer API.
StopContainerTimeout = 30 * time.Second
// RemoveContainerTimeout is the timeout for the RemoveContainer API.
RemoveContainerTimeout = 5 * time.Minute

Expand Down
4 changes: 2 additions & 2 deletions agent/engine/docker_task_engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -618,7 +618,7 @@ func TestSteadyStatePoll(t *testing.T) {
dockerapi.DockerContainerMetadata{
DockerID: containerID,
}).AnyTimes()
client.EXPECT().StopContainer(gomock.Any(), containerID, dockerclient.StopContainerTimeout).AnyTimes()
client.EXPECT().StopContainer(gomock.Any(), containerID, 30*time.Second).AnyTimes()

err := taskEngine.Init(ctx) // start the task engine
assert.NoError(t, err)
Expand Down Expand Up @@ -869,7 +869,7 @@ func TestTaskTransitionWhenStopContainerTimesout(t *testing.T) {
containerStopTimeoutError := dockerapi.DockerContainerMetadata{
Error: &dockerapi.DockerTimeoutError{
Transition: "stop",
Duration: dockerclient.StopContainerTimeout,
Duration: 30 * time.Second,
},
}
dockerEventSent := make(chan int)
Expand Down

0 comments on commit 4f0d1ef

Please sign in to comment.