Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge branch 'container-ordering-feature' into dev #1904

Merged
merged 37 commits into from
Feb 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
e36e2c8
Model changes for ACS for container ordering
ubhattacharjya Jan 28, 2019
5fbfab0
Adding adapter to have volumes/links use DependsOn field
ubhattacharjya Jan 30, 2019
5c3fa51
Refactor dependency graph code for volume/links dependency resolution
ubhattacharjya Feb 7, 2019
680db40
Merge branch 'dev' into container-ordering feature
ubhattacharjya Feb 15, 2019
2cf4734
Merge pull request #1853 from ubhattacharjya/mergeBranch
ubhattacharjya Feb 16, 2019
79bd517
engine,dockerclient: per container start/stop timeouts
adnxn Feb 6, 2019
3265bf9
statemanager: add container start/stop statemanger
adnxn Feb 15, 2019
f4da625
Merge pull request #1849 from adnxn/dev-granular-timeouts
adnxn Feb 19, 2019
6dcd3e3
Add Dependency conditions of 'Complete', 'Success' and 'Healthy'
ubhattacharjya Feb 13, 2019
4c57056
engine: adding poll function during progressTask
petderek Feb 22, 2019
3bc095b
Merge pull request #1876 from petderek/container-ordering-task-sync
petderek Feb 22, 2019
6f89b0e
dependencygraph: Enforce shutdown order
petderek Feb 18, 2019
fa040f4
functional tests: increase timeout on link/volume
petderek Feb 19, 2019
89bb2e8
dependencygraph: simplify container start logic
petderek Feb 21, 2019
582327c
Merge pull request #1866 from petderek/container-ordering-feature
petderek Feb 22, 2019
01dca6d
Remove the functionality of StartTimeout as Docker API Start Timeout
ubhattacharjya Feb 22, 2019
5bac35e
engine: add ordering integration tests
petderek Feb 22, 2019
af9e1f5
Merge pull request #1881 from petderek/container-ordering-integ-tests
petderek Feb 24, 2019
4ac84e4
Dependency Condition Naming change:
ubhattacharjya Feb 23, 2019
c698919
Validating that the dependency container has not timed out
ubhattacharjya Feb 22, 2019
7ba5947
Merge pull request #1882 from ubhattacharjya/Naming
ubhattacharjya Feb 24, 2019
83c928b
Merge pull request #1880 from ubhattacharjya/ChangeStartTimeout
ubhattacharjya Feb 24, 2019
c065ad3
tests: make windows test run a bit faster
petderek Feb 24, 2019
ee7a419
Merge pull request #1886 from petderek/fast-windows-test
petderek Feb 24, 2019
3c27ece
Checking dependency resolution after timeout and successful exit check
ubhattacharjya Feb 24, 2019
cd2b228
engine: adding timeouts to the other order tests
petderek Feb 24, 2019
314e658
Merge pull request #1884 from ubhattacharjya/bugFix
ubhattacharjya Feb 25, 2019
6ef7266
ACS model change
ubhattacharjya Feb 24, 2019
01b0505
Accomodate ACS model change
ubhattacharjya Feb 24, 2019
0c0a2e8
tests: prefer cached images
petderek Feb 26, 2019
d986f36
Adding shutdown order test
petderek Feb 27, 2019
fc2d930
Merge pull request #1885 from ubhattacharjya/ACS
ubhattacharjya Feb 27, 2019
e34ceb7
Adding Integ Tests for Granular Stop Timeout
ubhattacharjya Feb 26, 2019
bc3f30c
Fix Shutdown Order test
ubhattacharjya Feb 27, 2019
c3b8514
integ tests: allocate more cpu per container
petderek Feb 27, 2019
95de818
Merge pull request #1887 from ubhattacharjya/StopTimeoutTest
ubhattacharjya Feb 27, 2019
4a67ab7
Merge branch 'container-ordering-feature' into dev
ubhattacharjya Feb 28, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ additional details on each available environment variable.
| `ECS_APPARMOR_CAPABLE` | `true` | Whether AppArmor is available on the container instance. | `false` | `false` |
| `ECS_ENGINE_TASK_CLEANUP_WAIT_DURATION` | 10m | Time to wait to delete containers for a stopped task. If set to less than 1 minute, the value is ignored. | 3h | 3h |
| `ECS_CONTAINER_STOP_TIMEOUT` | 10m | Instance scoped configuration for time to wait for the container to exit normally before being forcibly killed. | 30s | 30s |
| `ECS_CONTAINER_START_TIMEOUT` | 10m | Instance scoped configuration for timeout before giving up on starting a container. | 3m | 8m |
| `ECS_CONTAINER_START_TIMEOUT` | 10m | Timeout before giving up on starting a container. | 3m | 8m |
| `ECS_ENABLE_TASK_IAM_ROLE` | `true` | Whether to enable IAM Roles for Tasks on the Container Instance | `false` | `false` |
| `ECS_ENABLE_TASK_IAM_ROLE_NETWORK_HOST` | `true` | Whether to enable IAM Roles for Tasks when launched with `host` network mode on the Container Instance | `false` | `false` |
| `ECS_DISABLE_IMAGE_CLEANUP` | `true` | Whether to disable automated image cleanup for the ECS Agent. | `false` | `false` |
Expand Down
34 changes: 28 additions & 6 deletions agent/acs/model/api/api-2.json
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@
"AssociationType":{
"type":"string",
"enum":[
"gpu",
"elastic-inference"
"elastic-inference",
"gpu"
]
},
"Associations":{
Expand Down Expand Up @@ -203,7 +203,30 @@
"healthCheckType":{"shape":"HealthCheckType"},
"registryAuthentication":{"shape":"RegistryAuthenticationData"},
"logsAuthStrategy":{"shape":"AuthStrategy"},
"secrets":{"shape":"SecretList"}
"secrets":{"shape":"SecretList"},
"dependsOn":{"shape":"ContainerDependencies"},
"startTimeout":{"shape":"Integer"},
"stopTimeout":{"shape":"Integer"}
}
},
"ContainerCondition":{
"type":"string",
"enum":[
"START",
"COMPLETE",
"SUCCESS",
"HEALTHY"
]
},
"ContainerDependencies":{
"type":"list",
"member":{"shape":"ContainerDependency"}
},
"ContainerDependency":{
"type":"structure",
"members":{
"containerName":{"shape":"String"},
"condition":{"shape":"ContainerCondition"}
}
},
"ContainerList":{
Expand Down Expand Up @@ -479,8 +502,7 @@
"SecretType":{
"type":"string",
"enum":[
"ENVIRONMENT_VARIABLE",
"MOUNT_POINT"
"ENVIRONMENT_VARIABLE"
]
},
"SensitiveString":{
Expand Down Expand Up @@ -592,4 +614,4 @@
]
}
}
}
}
24 changes: 24 additions & 0 deletions agent/acs/model/ecsacs/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ type Container struct {

Cpu *int64 `locationName:"cpu" type:"integer"`

DependsOn []*ContainerDependency `locationName:"dependsOn" type:"list"`

DockerConfig *DockerConfig `locationName:"dockerConfig" type:"structure"`

EntryPoint []*string `locationName:"entryPoint" type:"list"`
Expand Down Expand Up @@ -236,6 +238,10 @@ type Container struct {

Secrets []*Secret `locationName:"secrets" type:"list"`

StartTimeout *int64 `locationName:"startTimeout" type:"integer"`

StopTimeout *int64 `locationName:"stopTimeout" type:"integer"`

VolumesFrom []*VolumeFrom `locationName:"volumesFrom" type:"list"`
}

Expand All @@ -249,6 +255,24 @@ func (s Container) GoString() string {
return s.String()
}

type ContainerDependency struct {
_ struct{} `type:"structure"`

Condition *string `locationName:"condition" type:"string" enum:"ContainerCondition"`

ContainerName *string `locationName:"containerName" type:"string"`
}

// String returns the string representation
func (s ContainerDependency) String() string {
return awsutil.Prettify(s)
}

// GoString returns the string representation
func (s ContainerDependency) GoString() string {
return s.String()
}

type DockerConfig struct {
_ struct{} `type:"structure"`

Expand Down
30 changes: 29 additions & 1 deletion agent/api/container/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ type HealthStatus struct {
type Container struct {
// Name is the name of the container specified in the task definition
Name string
// DependsOn is the field which specifies the ordering for container startup and shutdown.
DependsOn []DependsOn `json:"dependsOn,omitempty"`
// V3EndpointID is a container identifier used to construct v3 metadata endpoint; it's unique among
// all the containers managed by the agent
V3EndpointID string
Expand Down Expand Up @@ -130,14 +132,21 @@ type Container struct {
DockerConfig DockerConfig `json:"dockerConfig"`
// RegistryAuthentication is the auth data used to pull image
RegistryAuthentication *RegistryAuthenticationData `json:"registryAuthentication"`
// HealthCheckType is the mechnism to use for the container health check
// HealthCheckType is the mechanism to use for the container health check
// currently it only supports 'DOCKER'
HealthCheckType string `json:"healthCheckType,omitempty"`
// Health contains the health check information of container health check
Health HealthStatus `json:"-"`
// LogsAuthStrategy specifies how the logs driver for the container will be
// authenticated
LogsAuthStrategy string
// StartTimeout specifies the time value after which if a container has a dependency
// on another container and the dependency conditions are 'SUCCESS', 'COMPLETE', 'HEALTHY',
// then that dependency will not be resolved.
StartTimeout uint
// StopTimeout specifies the time value to be passed as StopContainer api call
StopTimeout uint

// lock is used for fields that are accessed and updated concurrently
lock sync.RWMutex

Expand Down Expand Up @@ -236,6 +245,11 @@ type Container struct {
labels map[string]string
}

type DependsOn struct {
ContainerName string `json:"containerName"`
Condition string `json:"condition"`
}

// DockerContainer is a mapping between containers-as-docker-knows-them and
// containers-as-we-know-them.
// This is primarily used in DockerState, but lives here such that tasks and
Expand Down Expand Up @@ -860,3 +874,17 @@ func (c *Container) HasSecretAsEnv() bool {
}
return false
}

func (c *Container) GetStartTimeout() time.Duration {
c.lock.Lock()
defer c.lock.Unlock()

return time.Duration(c.StartTimeout) * time.Second
}

func (c *Container) GetStopTimeout() time.Duration {
c.lock.Lock()
defer c.lock.Unlock()

return time.Duration(c.StopTimeout) * time.Second
}
15 changes: 14 additions & 1 deletion agent/api/container/container_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func TestSetupExecutionRoleFlag(t *testing.T) {
}
}

func TestSetHealtStatus(t *testing.T) {
func TestSetHealthStatus(t *testing.T) {
container := Container{}

// set the container status to be healthy
Expand Down Expand Up @@ -455,3 +455,16 @@ func TestHasSecretAsEnv(t *testing.T) {
}

}

func TestPerContainerTimeouts(t *testing.T) {
timeout := uint(10)
expectedTimeout := time.Duration(timeout) * time.Second

container := Container{
StartTimeout: timeout,
StopTimeout: timeout,
}

assert.Equal(t, container.GetStartTimeout(), expectedTimeout)
assert.Equal(t, container.GetStopTimeout(), expectedTimeout)
}
52 changes: 51 additions & 1 deletion agent/api/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@ const (
NvidiaVisibleDevicesEnvVar = "NVIDIA_VISIBLE_DEVICES"
GPUAssociationType = "gpu"

ContainerOrderingCreateCondition = "CREATE"
ContainerOrderingStartCondition = "START"

arnResourceSections = 2
arnResourceDelimiter = "/"
// networkModeNone specifies the string used to define the `none` docker networking mode
Expand Down Expand Up @@ -250,6 +253,18 @@ func (task *Task) PostUnmarshalTask(cfg *config.Config,
}
}

err := task.initializeContainerOrderingForVolumes()
if err != nil {
seelog.Errorf("Task [%s]: could not initialize volumes dependency for container: %v", task.Arn, err)
return apierrors.NewResourceInitError(task.Arn, err)
}

err = task.initializeContainerOrderingForLinks()
if err != nil {
seelog.Errorf("Task [%s]: could not initialize links dependency for container: %v", task.Arn, err)
return apierrors.NewResourceInitError(task.Arn, err)
}

if task.requiresASMDockerAuthData() {
task.initializeASMAuthResource(credentialsManager, resourceFields)
}
Expand All @@ -262,7 +277,7 @@ func (task *Task) PostUnmarshalTask(cfg *config.Config,
task.initializeASMSecretResource(credentialsManager, resourceFields)
}

err := task.initializeDockerLocalVolumes(dockerClient, ctx)
err = task.initializeDockerLocalVolumes(dockerClient, ctx)
if err != nil {
return apierrors.NewResourceInitError(task.Arn, err)
}
Expand Down Expand Up @@ -1250,6 +1265,41 @@ func (task *Task) shouldOverrideIPCMode(container *apicontainer.Container, docke
}
}

func (task *Task) initializeContainerOrderingForVolumes() error {
for _, container := range task.Containers {
if len(container.VolumesFrom) > 0 {
for _, volume := range container.VolumesFrom {
if _, ok := task.ContainerByName(volume.SourceContainer); !ok {
return fmt.Errorf("could not find container with name %s", volume.SourceContainer)
}
dependOn := apicontainer.DependsOn{ContainerName: volume.SourceContainer, Condition: ContainerOrderingCreateCondition}
container.DependsOn = append(container.DependsOn, dependOn)
}
}
}
return nil
}

func (task *Task) initializeContainerOrderingForLinks() error {
for _, container := range task.Containers {
if len(container.Links) > 0 {
for _, link := range container.Links {
linkParts := strings.Split(link, ":")
if len(linkParts) > 2 {
return fmt.Errorf("Invalid link format")
}
linkName := linkParts[0]
if _, ok := task.ContainerByName(linkName); !ok {
return fmt.Errorf("could not find container with name %s", linkName)
}
dependOn := apicontainer.DependsOn{ContainerName: linkName, Condition: ContainerOrderingStartCondition}
container.DependsOn = append(container.DependsOn, dependOn)
}
}
}
return nil
}

func (task *Task) dockerLinks(container *apicontainer.Container, dockerContainerMap map[string]*apicontainer.DockerContainer) ([]string, error) {
dockerLinkArr := make([]string, len(container.Links))
for i, link := range container.Links {
Expand Down
Loading