Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding increased-task-cpu-limit capability #3197

Merged
merged 1 commit into from
May 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ GOFILES:=$(shell go list -f '{{$$p := .}}{{range $$f := .GoFiles}}{{$$p.Dir}}/{{
.PHONY: gocyclo
gocyclo:
# Run gocyclo over all .go files
gocyclo -over 17 ${GOFILES}
gocyclo -over 18 ${GOFILES}

# same as gofiles above, but without the `-f`
.PHONY: govet
Expand Down
6 changes: 6 additions & 0 deletions agent/api/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,12 @@ func (task *Task) PostUnmarshalTask(cfg *config.Config,
})
return apierrors.NewResourceInitError(task.Arn, err)
}
} else if task.CPU > 0 || task.Memory > 0 {
// Client-side validation/warning if a task with task-level CPU/memory limits specified somehow lands on an instance
// where agent does not support it. These limits will be ignored.
logger.Warn("Ignoring task-level CPU/memory limits since agent does not support the TaskCPUMemLimits capability", logger.Fields{
field.TaskID: task.GetID(),
})
}

if err := task.initializeContainerOrderingForVolumes(); err != nil {
Expand Down
14 changes: 0 additions & 14 deletions agent/api/task/task_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@ import (
)

const (
// With a 100ms CPU period, we can express 0.01 vCPU to 10 vCPUs
maxTaskVCPULimit = 10
// Reference: http://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html
minimumCPUShare = 2

Expand Down Expand Up @@ -141,21 +139,9 @@ func (task *Task) BuildLinuxResourceSpec(cGroupCPUPeriod time.Duration) (specs.L
// buildExplicitLinuxCPUSpec builds CPU spec when task CPU limits are
// explicitly requested
func (task *Task) buildExplicitLinuxCPUSpec(cGroupCPUPeriod time.Duration) (specs.LinuxCPU, error) {
if task.CPU > maxTaskVCPULimit {
return specs.LinuxCPU{},
errors.Errorf("task CPU spec builder: unsupported CPU limits, requested=%f, max-supported=%d",
task.CPU, maxTaskVCPULimit)
}
taskCPUPeriod := uint64(cGroupCPUPeriod / time.Microsecond)
taskCPUQuota := int64(task.CPU * float64(taskCPUPeriod))

// TODO: DefaultCPUPeriod only permits 10VCPUs.
// Adaptive calculation of CPUPeriod required for further support
// (samuelkarp) The largest available EC2 instance in terms of CPU count is a x1.32xlarge,
// with 128 vCPUs. If we assume a fixed evaluation period of 100ms (100000us),
// we'd need a quota of 12800000us, which is longer than the maximum of 1000000.
// For 128 vCPUs, we'd probably need something like a 1ms (1000us - the minimum)
// evaluation period, an 128000us quota in order to stay within the min/max limits.
return specs.LinuxCPU{
Quota: &taskCPUQuota,
Period: &taskCPUPeriod,
Expand Down
23 changes: 23 additions & 0 deletions agent/api/task/task_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,29 @@ func TestBuildLinuxResourceSpecCPU(t *testing.T) {
assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec)
}

// TestBuildLinuxResourceSpecIncreasedTaskCPULimit validates the linux resource spec builder
// with increased task CPU limit (>10 vCPUs).
func TestBuildLinuxResourceSpecIncreasedTaskCPULimit(t *testing.T) {
const increasedTaskVCPULimit float64 = 15
task := &Task{
Arn: validTaskArn,
CPU: increasedTaskVCPULimit,
}

linuxResourceSpec, err := task.BuildLinuxResourceSpec(defaultCPUPeriod)

expectedTaskCPUPeriod := uint64(defaultCPUPeriod / time.Microsecond)
expectedTaskCPUQuota := int64(increasedTaskVCPULimit * float64(expectedTaskCPUPeriod))
expectedLinuxResourceSpec := specs.LinuxResources{
CPU: &specs.LinuxCPU{
Quota: &expectedTaskCPUQuota,
Period: &expectedTaskCPUPeriod,
},
}
assert.NoError(t, err)
assert.EqualValues(t, expectedLinuxResourceSpec, linuxResourceSpec)
}

// TestBuildLinuxResourceSpecWithoutTaskCPULimits validates behavior of CPU Shares
func TestBuildLinuxResourceSpecWithoutTaskCPULimits(t *testing.T) {
task := &Task{
Expand Down
13 changes: 13 additions & 0 deletions agent/app/agent_capability.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ const (
appMeshAttributeSuffix = "aws-appmesh"
cniPluginVersionSuffix = "cni-plugin-version"
capabilityTaskCPUMemLimit = "task-cpu-mem-limit"
capabilityIncreasedTaskCPULimit = "increased-task-cpu-limit"
capabilityDockerPluginInfix = "docker-plugin."
attributeSeparator = "."
capabilityPrivateRegistryAuthASM = "private-registry-authentication.secretsmanager"
Expand Down Expand Up @@ -210,6 +211,7 @@ func (agent *ecsAgent) capabilities() ([]*ecs.Attribute, error) {
return nil, err
}

capabilities = agent.appendIncreasedTaskCPULimitCapability(capabilities)
capabilities = agent.appendTaskENICapabilities(capabilities)
capabilities = agent.appendENITrunkingCapabilities(capabilities)
capabilities = agent.appendDockerDependentCapabilities(capabilities, supportedVersions)
Expand Down Expand Up @@ -351,6 +353,17 @@ func (agent *ecsAgent) appendTaskCPUMemLimitCapabilities(capabilities []*ecs.Att
return capabilities, nil
}

func (agent *ecsAgent) appendIncreasedTaskCPULimitCapability(capabilities []*ecs.Attribute) []*ecs.Attribute {
if !agent.cfg.TaskCPUMemLimit.Enabled() {
// don't register the "increased-task-cpu-limit" capability if the "task-cpu-mem-limit" capability is disabled.
// "task-cpu-mem-limit" capability may be explicitly disabled or disabled due to unsupported docker version.
seelog.Warn("Increased Task CPU Limit capability is disabled since the Task CPU + Mem Limit capability is disabled.")
} else {
capabilities = appendNameOnlyAttribute(capabilities, attributePrefix+capabilityIncreasedTaskCPULimit)
}
return capabilities
}

func (agent *ecsAgent) appendTaskENICapabilities(capabilities []*ecs.Attribute) []*ecs.Attribute {
if agent.cfg.TaskENIEnabled.Enabled() {
// The assumption here is that all of the dependencies for supporting the
Expand Down
73 changes: 73 additions & 0 deletions agent/app/agent_capability_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -713,6 +713,79 @@ func TestCapabilitesTaskResourceLimitErrorCase(t *testing.T) {
assert.Error(t, err, "An error should be thrown when TaskCPUMemLimit is explicitly enabled")
}

func TestCapabilitiesIncreasedTaskCPULimit(t *testing.T) {
testCases := []struct {
testName string
taskCPUMemLimitValue config.Conditional
dockerVersion dockerclient.DockerVersion
expectedIncreasedTaskCPULimitEnabled bool
}{
{
testName: "enabled by default",
taskCPUMemLimitValue: config.NotSet,
dockerVersion: dockerclient.Version_1_22,
expectedIncreasedTaskCPULimitEnabled: true,
},
{
testName: "disabled, unsupportedDockerVersion",
taskCPUMemLimitValue: config.NotSet,
dockerVersion: dockerclient.Version_1_19,
expectedIncreasedTaskCPULimitEnabled: false,
},
{
testName: "disabled, taskCPUMemLimit explicitly disabled",
taskCPUMemLimitValue: config.ExplicitlyDisabled,
dockerVersion: dockerclient.Version_1_22,
expectedIncreasedTaskCPULimitEnabled: false,
},
}

for _, tc := range testCases {
t.Run(tc.testName, func(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
conf := &config.Config{
TaskCPUMemLimit: config.BooleanDefaultTrue{Value: tc.taskCPUMemLimitValue},
}

client := mock_dockerapi.NewMockDockerClient(ctrl)
versionList := []dockerclient.DockerVersion{tc.dockerVersion}
mockMobyPlugins := mock_mobypkgwrapper.NewMockPlugins(ctrl)
mockPauseLoader := mock_pause.NewMockLoader(ctrl)
mockPauseLoader.EXPECT().IsLoaded(gomock.Any()).Return(false, nil).AnyTimes()
gomock.InOrder(
client.EXPECT().SupportedVersions().Return(versionList),
client.EXPECT().KnownVersions().Return(versionList),
mockMobyPlugins.EXPECT().Scan().AnyTimes().Return([]string{}, nil),
client.EXPECT().ListPluginsWithFilters(gomock.Any(), gomock.Any(), gomock.Any(),
gomock.Any()).AnyTimes().Return([]string{}, nil),
)
ctx, cancel := context.WithCancel(context.TODO())
// Cancel the context to cancel async routines
defer cancel()
agent := &ecsAgent{
ctx: ctx,
cfg: conf,
dockerClient: client,
pauseLoader: mockPauseLoader,
mobyPlugins: mockMobyPlugins,
}

capability := attributePrefix + capabilityIncreasedTaskCPULimit
capabilities, err := agent.capabilities()
assert.NoError(t, err)

capMap := make(map[string]bool)
for _, capability := range capabilities {
capMap[aws.StringValue(capability.Name)] = true
}

_, ok := capMap[capability]
assert.Equal(t, tc.expectedIncreasedTaskCPULimitEnabled, ok)
})
}
}

func TestCapabilitiesContainerHealth(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down
2 changes: 1 addition & 1 deletion agent/app/agent_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (agent *ecsAgent) cgroupInit() error {
if agent.cfg.TaskCPUMemLimit.Value == config.ExplicitlyEnabled {
return errors.Wrapf(err, "unable to setup '/ecs' cgroup")
}
seelog.Warnf("Disabling TaskCPUMemLimit because agent is unabled to setup '/ecs' cgroup: %v", err)
seelog.Warnf("Disabling TaskCPUMemLimit because agent is unable to setup '/ecs' cgroup: %v", err)
agent.cfg.TaskCPUMemLimit.Value = config.ExplicitlyDisabled
return nil
}
Expand Down