Skip to content

Commit

Permalink
adding increased-task-cpu-limit capability
Browse files Browse the repository at this point in the history
  • Loading branch information
singholt committed May 3, 2022
1 parent 5ca8089 commit be393c8
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ GOFILES:=$(shell go list -f '{{$$p := .}}{{range $$f := .GoFiles}}{{$$p.Dir}}/{{
.PHONY: gocyclo
gocyclo:
# Run gocyclo over all .go files
gocyclo -over 17 ${GOFILES}
gocyclo -over 18 ${GOFILES}

# same as gofiles above, but without the `-f`
.PHONY: govet
Expand Down
6 changes: 6 additions & 0 deletions agent/api/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,12 @@ func (task *Task) PostUnmarshalTask(cfg *config.Config,
})
return apierrors.NewResourceInitError(task.Arn, err)
}
} else if task.CPU > 0 || task.Memory > 0 {
// Client-side validation/warning if a task with task-level CPU/memory limits specified somehow lands on an instance
// where agent does not support it. These limits will be ignored.
logger.Warn("Ignoring task-level CPU/memory limits since agent does not support the TaskCPUMemLimits capability", logger.Fields{
field.TaskID: task.GetID(),
})
}

if err := task.initializeContainerOrderingForVolumes(); err != nil {
Expand Down
14 changes: 0 additions & 14 deletions agent/api/task/task_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ import (
)

const (
// With a 100ms CPU period, we can express 0.01 vCPU to 10 vCPUs
maxTaskVCPULimit = 10
// Reference: http://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
minimumCPUShare = 2

Expand Down Expand Up @@ -141,21 +139,9 @@ func (task *Task) BuildLinuxResourceSpec(cGroupCPUPeriod time.Duration) (specs.L
// buildExplicitLinuxCPUSpec builds CPU spec when task CPU limits are
// explicitly requested
func (task *Task) buildExplicitLinuxCPUSpec(cGroupCPUPeriod time.Duration) (specs.LinuxCPU, error) {
if task.CPU > maxTaskVCPULimit {
return specs.LinuxCPU{},
errors.Errorf("task CPU spec builder: unsupported CPU limits, requested=%f, max-supported=%d",
task.CPU, maxTaskVCPULimit)
}
taskCPUPeriod := uint64(cGroupCPUPeriod / time.Microsecond)
taskCPUQuota := int64(task.CPU * float64(taskCPUPeriod))

// TODO: DefaultCPUPeriod only permits 10VCPUs.
// Adaptive calculation of CPUPeriod required for further support
// (samuelkarp) The largest available EC2 instance in terms of CPU count is a x1.32xlarge,
// with 128 vCPUs. If we assume a fixed evaluation period of 100ms (100000us),
// we'd need a quota of 12800000us, which is longer than the maximum of 1000000.
// For 128 vCPUs, we'd probably need something like a 1ms (1000us - the minimum)
// evaluation period, an 128000us quota in order to stay within the min/max limits.
return specs.LinuxCPU{
Quota: &taskCPUQuota,
Period: &taskCPUPeriod,
Expand Down
23 changes: 23 additions & 0 deletions agent/api/task/task_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,29 @@ func TestBuildLinuxResourceSpecCPU(t *testing.T) {
assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec)
}

// TestBuildLinuxResourceSpecIncreasedTaskCPULimit validates the linux resource spec builder
// with increased task CPU limit (>10 vCPUs).
func TestBuildLinuxResourceSpecIncreasedTaskCPULimit(t *testing.T) {
const increasedTaskVCPULimit float64 = 15
task := &Task{
Arn: validTaskArn,
CPU: increasedTaskVCPULimit,
}

linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod)

expectedTaskCPUPeriod := uint64(defaultCPUPeriod / time.Microsecond)
expectedTaskCPUQuota := int64(increasedTaskVCPULimit * float64(expectedTaskCPUPeriod))
expectedLinuxResourceSpec := specs.LinuxResources{
CPU: &specs.LinuxCPU{
Quota: &expectedTaskCPUQuota,
Period: &expectedTaskCPUPeriod,
},
}
assert.NoError(t, err)
assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec)
}

// TestBuildLinuxResourceSpecWithoutTaskCPULimits validates behavior of CPU Shares
func TestBuildLinuxResourceSpecWithoutTaskCPULimits(t *testing.T) {
task := &Task{
Expand Down
13 changes: 13 additions & 0 deletions agent/app/agent_capability.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const (
appMeshAttributeSuffix = "aws-appmesh"
cniPluginVersionSuffix = "cni-plugin-version"
capabilityTaskCPUMemLimit = "task-cpu-mem-limit"
capabilityIncreasedTaskCPULimit = "increased-task-cpu-limit"
capabilityDockerPluginInfix = "docker-plugin."
attributeSeparator = "."
capabilityPrivateRegistryAuthASM = "private-registry-authentication.secretsmanager"
Expand Down Expand Up @@ -210,6 +211,7 @@ func (agent *ecsAgent) capabilities() ([]*ecs.Attribute, error) {
return nil, err
}

capabilities = agent.appendIncreasedTaskCPULimitCapability(capabilities)
capabilities = agent.appendTaskENICapabilities(capabilities)
capabilities = agent.appendENITrunkingCapabilities(capabilities)
capabilities = agent.appendDockerDependentCapabilities(capabilities, supportedVersions)
Expand Down Expand Up @@ -351,6 +353,17 @@ func (agent *ecsAgent) appendTaskCPUMemLimitCapabilities(capabilities []*ecs.Att
return capabilities, nil
}

func (agent *ecsAgent) appendIncreasedTaskCPULimitCapability(capabilities []*ecs.Attribute) []*ecs.Attribute {
if !agent.cfg.TaskCPUMemLimit.Enabled() {
// don't register the "increased-task-cpu-limit" capability if the "task-cpu-mem-limit" capability is disabled.
// "task-cpu-mem-limit" capability may be explicitly disabled or disabled due to unsupported docker version.
seelog.Warn("Increased Task CPU Limit capability is disabled since the Task CPU + Mem Limit capability is disabled.")
} else {
capabilities = appendNameOnlyAttribute(capabilities, attributePrefix+capabilityIncreasedTaskCPULimit)
}
return capabilities
}

func (agent *ecsAgent) appendTaskENICapabilities(capabilities []*ecs.Attribute) []*ecs.Attribute {
if agent.cfg.TaskENIEnabled.Enabled() {
// The assumption here is that all of the dependencies for supporting the
Expand Down
73 changes: 73 additions & 0 deletions agent/app/agent_capability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,79 @@ func TestCapabilitesTaskResourceLimitErrorCase(t *testing.T) {
assert.Error(t, err, "An error should be thrown when TaskCPUMemLimit is explicitly enabled")
}

func TestCapabilitiesIncreasedTaskCPULimit(t *testing.T) {
testCases := []struct {
testName string
taskCPUMemLimitValue config.Conditional
dockerVersion dockerclient.DockerVersion
expectedIncreasedTaskCPULimitEnabled bool
}{
{
testName: "enabled by default",
taskCPUMemLimitValue: config.NotSet,
dockerVersion: dockerclient.Version_1_22,
expectedIncreasedTaskCPULimitEnabled: true,
},
{
testName: "disabled, unsupportedDockerVersion",
taskCPUMemLimitValue: config.NotSet,
dockerVersion: dockerclient.Version_1_19,
expectedIncreasedTaskCPULimitEnabled: false,
},
{
testName: "disabled, taskCPUMemLimit explicitly disabled",
taskCPUMemLimitValue: config.ExplicitlyDisabled,
dockerVersion: dockerclient.Version_1_22,
expectedIncreasedTaskCPULimitEnabled: false,
},
}

for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
conf := &config.Config{
TaskCPUMemLimit: config.BooleanDefaultTrue{Value: tc.taskCPUMemLimitValue},
}

client := mock_dockerapi.NewMockDockerClient(ctrl)
versionList := []dockerclient.DockerVersion{tc.dockerVersion}
mockMobyPlugins := mock_mobypkgwrapper.NewMockPlugins(ctrl)
mockPauseLoader := mock_pause.NewMockLoader(ctrl)
mockPauseLoader.EXPECT().IsLoaded(gomock.Any()).Return(false, nil).AnyTimes()
gomock.InOrder(
client.EXPECT().SupportedVersions().Return(versionList),
client.EXPECT().KnownVersions().Return(versionList),
mockMobyPlugins.EXPECT().Scan().AnyTimes().Return([]string{}, nil),
client.EXPECT().ListPluginsWithFilters(gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any()).AnyTimes().Return([]string{}, nil),
)
ctx, cancel := context.WithCancel(context.TODO())
// Cancel the context to cancel async routines
defer cancel()
agent := &ecsAgent{
ctx: ctx,
cfg: conf,
dockerClient: client,
pauseLoader: mockPauseLoader,
mobyPlugins: mockMobyPlugins,
}

capability := attributePrefix + capabilityIncreasedTaskCPULimit
capabilities, err := agent.capabilities()
assert.NoError(t, err)

capMap := make(map[string]bool)
for _, capability := range capabilities {
capMap[aws.StringValue(capability.Name)] = true
}

_, ok := capMap[capability]
assert.Equal(t, tc.expectedIncreasedTaskCPULimitEnabled, ok)
})
}
}

func TestCapabilitiesContainerHealth(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down
2 changes: 1 addition & 1 deletion agent/app/agent_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (agent *ecsAgent) cgroupInit() error {
if agent.cfg.TaskCPUMemLimit.Value == config.ExplicitlyEnabled {
return errors.Wrapf(err, "unable to setup '/ecs' cgroup")
}
seelog.Warnf("Disabling TaskCPUMemLimit because agent is unabled to setup '/ecs' cgroup: %v", err)
seelog.Warnf("Disabling TaskCPUMemLimit because agent is unable to setup '/ecs' cgroup: %v", err)
agent.cfg.TaskCPUMemLimit.Value = config.ExplicitlyDisabled
return nil
}
Expand Down

0 comments on commit be393c8

Please sign in to comment.