From 498c7bcde990c04c289a37535a0e4dd5bc54591e Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Mon, 28 Jan 2019 15:22:41 -0600 Subject: [PATCH] virtcontainers: reimplement sandbox cgroup All containers run in different cgroups even the sandbox, with this new implementation the sandbox cpu cgroup wil be equal to the sum of all its containers and the hypervisor process will be placed there impacting to the containers running in the sandbox (VM). The default number of vcpus is used when the sandbox has no constraints. For example, if default_vcpus is 2, then quota will be 200000 and period 100000. **c-ray test** http://www.futuretech.blinkenlights.nl/c-ray.html ``` +=============================================+ | | 6 threads 6cpus | 1 thread 1 cpu | +=============================================+ | current | 40 seconds | 122 seconds | +============================================== | new | 37 seconds | 124 seconds | +============================================== ``` current = current cgroups implementation new = new cgroups implementation **workload** ```yaml apiVersion: v1 kind: Pod metadata: name: c-ray annotations: io.kubernetes.cri.untrusted-workload: "true" spec: restartPolicy: Never containers: - name: c-ray-1 image: docker.io/devimc/c-ray:latest imagePullPolicy: IfNotPresent args: ["-t", "6", "-s", "1600x1200", "-r", "8", "-i", "/c-ray-1.1/sphfract", "-o", "/tmp/output.ppm"] resources: limits: cpu: 6 - name: c-ray-2 image: docker.io/devimc/c-ray:latest imagePullPolicy: IfNotPresent args: ["-t", "1", "-s", "1600x1200", "-r", "8", "-i", "/c-ray-1.1/sphfract", "-o", "/tmp/output.ppm"] resources: limits: cpu: 1 ``` Depends-on: github.com/kata-containers/tests#1141 fixes #1153 Signed-off-by: Julio Montes --- virtcontainers/api.go | 5 - virtcontainers/api_test.go | 11 +- virtcontainers/cgroups.go | 352 +++++++++++++++++++++------------ virtcontainers/cgroups_test.go | 320 ++++++++++++++---------------- virtcontainers/sandbox.go | 47 +++-- virtcontainers/sandbox_test.go | 7 +- 6 files changed, 420 insertions(+), 322 deletions(-) diff --git a/virtcontainers/api.go b/virtcontainers/api.go index 52caeb1d2b..58302e76b6 100644 --- a/virtcontainers/api.go +++ b/virtcontainers/api.go @@ -120,11 +120,6 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f return nil, err } - // Setup host cgroups - if err = s.setupCgroups(); err != nil { - return nil, err - } - return s, nil } diff --git a/virtcontainers/api_test.go b/virtcontainers/api_test.go index f7aebf7205..16ea93708c 100644 --- a/virtcontainers/api_test.go +++ b/virtcontainers/api_test.go @@ -18,6 +18,7 @@ import ( "testing" "github.com/containernetworking/plugins/pkg/ns" + "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" "github.com/kata-containers/runtime/virtcontainers/pkg/mock" vcTypes "github.com/kata-containers/runtime/virtcontainers/pkg/types" "github.com/kata-containers/runtime/virtcontainers/types" @@ -30,13 +31,15 @@ const ( ) var sandboxAnnotations = map[string]string{ - "sandbox.foo": "sandbox.bar", - "sandbox.hello": "sandbox.world", + "sandbox.foo": "sandbox.bar", + "sandbox.hello": "sandbox.world", + annotations.ConfigJSONKey: `{"linux":{"resources":{}}}`, } var containerAnnotations = map[string]string{ - "container.foo": "container.bar", - "container.hello": "container.world", + "container.foo": "container.bar", + "container.hello": "container.world", + annotations.ConfigJSONKey: `{"linux":{"resources":{}}}`, } func newBasicTestCmd() types.Cmd { diff --git a/virtcontainers/cgroups.go b/virtcontainers/cgroups.go index 20fa6b07bc..ac242e1863 100644 --- a/virtcontainers/cgroups.go +++ b/virtcontainers/cgroups.go @@ -1,4 +1,5 @@ // Copyright (c) 2018 Huawei Corporation +// Copyright (c) 2019 Intel Corporation // // SPDX-License-Identifier: Apache-2.0 // @@ -6,192 +7,291 @@ package virtcontainers import ( - "encoding/json" + "bufio" "fmt" + "math" + "os" + "path/filepath" + "strings" "github.com/containerd/cgroups" - "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" ) -const ( - vcpuGroupName = "vcpu" - defaultCgroupParent = "/kata" -) - -type sandboxCgroups struct { - commonParent cgroups.Cgroup - sandboxSub cgroups.Cgroup - vcpuSub cgroups.Cgroup +type cgroupPather interface { + cgroups.Subsystem + Path(path string) string } -func (s *Sandbox) newCgroups() error { - // New will still succeed when cgroup exists - // create common parent for all kata-containers - // e.g. /sys/fs/cgroup/cpu/vc - parent, err := cgroups.New(cgroups.V1, - cgroups.StaticPath(defaultCgroupParent), &specs.LinuxResources{}) - if err != nil { - return fmt.Errorf("failed to create cgroup for %q", defaultCgroupParent) - } +// unconstrained cgroups are placed here. +// for example /sys/fs/cgroup/memory/kata/$CGPATH +// where path is defined by the containers manager +const cgroupKataPath = "/kata/" + +var cgroupsLoadFunc = cgroups.Load +var cgroupsNewFunc = cgroups.New - // create sub-cgroup for each sandbox - // e.g. /sys/fs/cgroup/cpu/vc/ - sandboxSub, err := parent.New(s.id, &specs.LinuxResources{}) +// V1Constraints returns the cgroups that are compatible with th VC architecture +// and hypervisor, constraints can be applied to these cgroups. +func V1Constraints() ([]cgroups.Subsystem, error) { + root, err := cgroupV1MountPoint() if err != nil { - return fmt.Errorf("failed to create cgroup for %s/%s", defaultCgroupParent, s.id) + return nil, err } + subsystems := []cgroups.Subsystem{ + cgroups.NewCputset(root), + cgroups.NewCpu(root), + cgroups.NewCpuacct(root), + } + return cgroupsSubsystems(subsystems) +} - // create sub-cgroup for vcpu threads - vcpuSub, err := sandboxSub.New(vcpuGroupName, &specs.LinuxResources{}) +// V1NoConstraints returns the cgroups that are *not* compatible with th VC +// architecture and hypervisor, constraints MUST NOT be applied to these cgroups. +func V1NoConstraints() ([]cgroups.Subsystem, error) { + root, err := cgroupV1MountPoint() if err != nil { - return fmt.Errorf("failed to create cgroup for %s/%s/%s", defaultCgroupParent, s.id, vcpuGroupName) + return nil, err + } + subsystems := []cgroups.Subsystem{ + // Some constainers managers, like k8s, take the control of cgroups. + // k8s: the memory cgroup for the dns containers is small to place + // a hypervisor there. + cgroups.NewMemory(root), } + return cgroupsSubsystems(subsystems) +} - s.cgroup = &sandboxCgroups{ - commonParent: parent, - sandboxSub: sandboxSub, - vcpuSub: vcpuSub, +func cgroupsSubsystems(subsystems []cgroups.Subsystem) ([]cgroups.Subsystem, error) { + var enabled []cgroups.Subsystem + for _, s := range cgroupPathers(subsystems) { + // check and remove the default groups that do not exist + if _, err := os.Lstat(s.Path("/")); err == nil { + enabled = append(enabled, s) + } } - return nil + return enabled, nil } -func (s *Sandbox) destroyCgroups() error { - if s.cgroup == nil { - s.Logger().Warningf("cgroup is not initialized, no need to destroy") - return nil +func cgroupPathers(subystems []cgroups.Subsystem) []cgroupPather { + var out []cgroupPather + for _, s := range subystems { + if p, ok := s.(cgroupPather); ok { + out = append(out, p) + } } + return out +} - // first move all processes in subgroup to parent in case live process blocks - // deletion of cgroup - if err := s.cgroup.sandboxSub.MoveTo(s.cgroup.commonParent); err != nil { - return fmt.Errorf("failed to clear cgroup processes") +// v1MountPoint returns the mount point where the cgroup +// mountpoints are mounted in a single hiearchy +func cgroupV1MountPoint() (string, error) { + f, err := os.Open("/proc/self/mountinfo") + if err != nil { + return "", err } + defer f.Close() + scanner := bufio.NewScanner(f) + for scanner.Scan() { + if err := scanner.Err(); err != nil { + return "", err + } + var ( + text = scanner.Text() + fields = strings.Split(text, " ") + // safe as mountinfo encodes mountpoints with spaces as \040. + index = strings.Index(text, " - ") + postSeparatorFields = strings.Fields(text[index+3:]) + numPostFields = len(postSeparatorFields) + ) + // this is an error as we can't detect if the mount is for "cgroup" + if numPostFields == 0 { + return "", fmt.Errorf("Found no fields post '-' in %q", text) + } + if postSeparatorFields[0] == "cgroup" { + // check that the mount is properly formated. + if numPostFields < 3 { + return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) + } + return filepath.Dir(fields[4]), nil + } + } + return "", cgroups.ErrMountPointNotExist +} - return s.cgroup.sandboxSub.Delete() +func cgroupNoConstraintsPath(path string) string { + return filepath.Join(cgroupKataPath, path) } -func (s *Sandbox) setupCgroups() error { - if s.cgroup == nil { - return fmt.Errorf("failed to setup uninitialized cgroup for sandbox") +func (s *Sandbox) updateCgroups() error { + if s.state.CgroupPath == "" { + s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty") + return nil } - resource, err := s.mergeSpecResource() + cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath)) if err != nil { - return err + return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err) } - if err := s.applyCPUCgroup(resource); err != nil { + if err := s.constrainHypervisor(cgroup); err != nil { return err } - return nil -} -func (s *Sandbox) applyCPUCgroup(rc *specs.LinuxResources) error { - if s.cgroup == nil { - return fmt.Errorf("failed to setup uninitialized cgroup for sandbox") + if len(s.containers) <= 1 { + // nothing to update + return nil } - // apply cpu constraint to vcpu cgroup - if err := s.cgroup.vcpuSub.Update(rc); err != nil { + resources, err := s.resources() + if err != nil { return err } - // when new container joins, new CPU could be hotplugged, so we - // have to query fresh vcpu info from hypervisor for every time. - tids, err := s.hypervisor.getThreadIDs() - if err != nil { - return fmt.Errorf("failed to get thread ids from hypervisor: %v", err) + if err := cgroup.Update(&resources); err != nil { + return fmt.Errorf("Could not update cgroup %v: %v", s.state.CgroupPath, err) } - if tids == nil { - // If there's no tid returned from the hypervisor, this is not - // a bug. It simply means there is nothing to constrain, hence - // let's return without any error from here. + + return nil +} + +func (s *Sandbox) deleteCgroups() error { + path := cgroupNoConstraintsPath(s.state.CgroupPath) + s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup") + noConstraintsCgroup, err := cgroupsLoadFunc(V1NoConstraints, cgroups.StaticPath(path)) + if err == cgroups.ErrCgroupDeleted { + // cgroup already deleted return nil } - // use Add() to add vcpu thread to s.cgroup, it will write thread id to - // `cgroup.procs` which will move all threads in qemu process to this cgroup - // immediately as default behaviour. - if len(tids.vcpus) > 0 { - if err := s.cgroup.sandboxSub.Add(cgroups.Process{ - Pid: tids.vcpus[0], - }); err != nil { - return err - } + if err != nil { + return fmt.Errorf("Could not load cgroup without constraints %v: %v", path, err) } - for _, i := range tids.vcpus { - if i <= 0 { - continue - } + return noConstraintsCgroup.Delete() +} - // In contrast, AddTask will write thread id to `tasks` - // After this, vcpu threads are in "vcpu" sub-cgroup, other threads in - // qemu will be left in parent cgroup untouched. - if err := s.cgroup.vcpuSub.AddTask(cgroups.Process{ - Pid: i, - }); err != nil { - return err - } +func (s *Sandbox) constrainHypervisor(cgroup cgroups.Cgroup) error { + pid := s.hypervisor.pid() + if pid <= 0 { + return fmt.Errorf("Invalid hypervisor PID: %d", pid) } - return nil + resources := &specs.LinuxResources{} + path := cgroupNoConstraintsPath(s.state.CgroupPath) + noConstraintsCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources) + if err != nil { + return fmt.Errorf("Could not create cgroup %v: %v", path, err) + } + + if err := noConstraintsCgroup.Add(cgroups.Process{Pid: pid}); err != nil { + return fmt.Errorf("Could not add hypervisor PID %d to cgroup %v: %v", pid, path, err) + } + + // Add qemu into cgroup + return cgroup.Add(cgroups.Process{Pid: pid}) } -func (s *Sandbox) mergeSpecResource() (*specs.LinuxResources, error) { - if s.config == nil { - return nil, fmt.Errorf("sandbox config is nil") +func (s *Sandbox) resources() (specs.LinuxResources, error) { + resources := specs.LinuxResources{ + CPU: s.cpuResources(), } - resource := &specs.LinuxResources{ - CPU: &specs.LinuxCPU{}, + return resources, nil +} + +func (s *Sandbox) cpuResources() *specs.LinuxCPU { + quota := int64(0) + period := uint64(0) + shares := uint64(0) + realtimePeriod := uint64(0) + realtimeRuntime := int64(0) + + cpu := &specs.LinuxCPU{ + Quota: "a, + Period: &period, + Shares: &shares, + RealtimePeriod: &realtimePeriod, + RealtimeRuntime: &realtimeRuntime, } - for _, c := range s.config.Containers { - config, ok := c.Annotations[annotations.ConfigJSONKey] - if !ok { - s.Logger().WithField("container", c.ID).Warningf("failed to find config from container annotations") + for _, c := range s.containers { + if s.state.Pid == c.process.Pid { + // skip sandbox container continue } - var spec specs.Spec - if err := json.Unmarshal([]byte(config), &spec); err != nil { - return nil, err + if c.state.Resources.CPU == nil { + continue } - // TODO: how to handle empty/unlimited resource? - // maybe we should add a default CPU/Memory delta when no - // resource limit is given. -- @WeiZhang555 - if spec.Linux == nil || spec.Linux.Resources == nil { - continue + if c.state.Resources.CPU.Shares != nil { + shares = uint64(math.Max(float64(*c.state.Resources.CPU.Shares), float64(shares))) + } + + if c.state.Resources.CPU.Quota != nil { + quota += *c.state.Resources.CPU.Quota + } + + if c.state.Resources.CPU.Period != nil { + period = uint64(math.Max(float64(*c.state.Resources.CPU.Period), float64(period))) + } + + if c.state.Resources.CPU.Cpus != "" { + cpu.Cpus += c.state.Resources.CPU.Cpus + "," + } + + if c.state.Resources.CPU.RealtimeRuntime != nil { + realtimeRuntime += *c.state.Resources.CPU.RealtimeRuntime + } + + if c.state.Resources.CPU.RealtimePeriod != nil { + realtimePeriod += *c.state.Resources.CPU.RealtimePeriod } - // calculate cpu quota and period - s.mergeCPUResource(resource, spec.Linux.Resources) + + if c.state.Resources.CPU.Mems != "" { + cpu.Mems += c.state.Resources.CPU.Mems + "," + } + } + + cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,") + + // use a default constraint for sandboxes without cpu constraints + if period == uint64(0) && quota == int64(0) { + // set a quota and period equal to the default number of vcpus + quota = int64(s.config.HypervisorConfig.NumVCPUs) * 100000 + period = 100000 } - return resource, nil + + return validCPUResources(cpu) } -func (s *Sandbox) mergeCPUResource(orig, rc *specs.LinuxResources) { - if orig.CPU == nil { - orig.CPU = &specs.LinuxCPU{} - } - - if rc.CPU != nil && rc.CPU.Quota != nil && rc.CPU.Period != nil && - *rc.CPU.Quota > 0 && *rc.CPU.Period > 0 { - if orig.CPU.Period == nil { - orig.CPU.Period = rc.CPU.Period - orig.CPU.Quota = rc.CPU.Quota - } else { - // this is an example to show how it works: - // container A and `orig` has quota: 5000 and period 10000 - // here comes container B with quota 40 and period 100, - // so use previous period 10000 as a baseline, container B - // has proportional resource of quota 4000 and period 10000, calculated as - // delta := 40 / 100 * 10000 = 4000 - // and final `*orig.CPU.Quota` = 5000 + 4000 = 9000 - delta := float64(*rc.CPU.Quota) / float64(*rc.CPU.Period) * float64(*orig.CPU.Period) - *orig.CPU.Quota += int64(delta) - } +// validCPUResources checks CPU resources coherency +func validCPUResources(cpuSpec *specs.LinuxCPU) *specs.LinuxCPU { + if cpuSpec == nil { + return nil + } + + cpu := *cpuSpec + if cpu.Period != nil && *cpu.Period < 1 { + cpu.Period = nil } + + if cpu.Quota != nil && *cpu.Quota < 1 { + cpu.Quota = nil + } + + if cpu.Shares != nil && *cpu.Shares < 1 { + cpu.Shares = nil + } + + if cpu.RealtimePeriod != nil && *cpu.RealtimePeriod < 1 { + cpu.RealtimePeriod = nil + } + + if cpu.RealtimeRuntime != nil && *cpu.RealtimeRuntime < 1 { + cpu.RealtimeRuntime = nil + } + + return &cpu } diff --git a/virtcontainers/cgroups_test.go b/virtcontainers/cgroups_test.go index 1a89690fb4..42eb4f5bef 100644 --- a/virtcontainers/cgroups_test.go +++ b/virtcontainers/cgroups_test.go @@ -6,206 +6,182 @@ package virtcontainers import ( - "bufio" - "encoding/json" "fmt" - "io/ioutil" "os" + "os/exec" "path/filepath" - "reflect" - "strings" "testing" + "github.com/containerd/cgroups" + "github.com/kata-containers/runtime/virtcontainers/types" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/assert" - - "github.com/kata-containers/runtime/virtcontainers/pkg/annotations" ) -func getCgroupDestination(subsystem string) (string, error) { - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "", err - } - defer f.Close() - s := bufio.NewScanner(f) - for s.Scan() { - if err := s.Err(); err != nil { - return "", err - } - fields := strings.Fields(s.Text()) - for _, opt := range strings.Split(fields[len(fields)-1], ",") { - if opt == subsystem { - return fields[4], nil - } - } - } - return "", fmt.Errorf("failed to find cgroup mountpoint for %q", subsystem) +type mockCgroup struct { } -func TestMergeSpecResource(t *testing.T) { - s := &Sandbox{ - config: &SandboxConfig{ - Containers: []ContainerConfig{ - { - ID: "containerA", - Annotations: make(map[string]string), - }, - { - ID: "containerA", - Annotations: make(map[string]string), - }, - }, - }, - } +func (m *mockCgroup) New(string, *specs.LinuxResources) (cgroups.Cgroup, error) { + return &mockCgroup{}, nil +} +func (m *mockCgroup) Add(cgroups.Process) error { + return nil +} - contA := s.config.Containers[0] - contB := s.config.Containers[1] +func (m *mockCgroup) AddTask(cgroups.Process) error { + return nil +} - getIntP := func(x int64) *int64 { return &x } - getUintP := func(x uint64) *uint64 { return &x } +func (m *mockCgroup) Delete() error { + return nil +} - type testData struct { - first *specs.LinuxResources - second *specs.LinuxResources - expected *specs.LinuxResources - } +func (m *mockCgroup) MoveTo(cgroups.Cgroup) error { + return nil +} - for _, testdata := range []testData{ - { - nil, - nil, - &specs.LinuxResources{CPU: &specs.LinuxCPU{}}, - }, - { - nil, - &specs.LinuxResources{}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{}}, - }, - { - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(0), Period: getUintP(100000)}}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}}, - }, - { - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(10000), Period: getUintP(0)}}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}}, - }, - { - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(1000), Period: getUintP(2000)}}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(20000), Period: getUintP(100000)}}, - &specs.LinuxResources{CPU: &specs.LinuxCPU{Quota: getIntP(1400), Period: getUintP(2000)}}, - }, - } { - data, err := json.Marshal(&specs.Spec{ - Linux: &specs.Linux{ - Resources: testdata.first, - }, - }) - assert.Nil(t, err) - contA.Annotations[annotations.ConfigJSONKey] = string(data) +func (m *mockCgroup) Stat(...cgroups.ErrorHandler) (*cgroups.Metrics, error) { + return &cgroups.Metrics{}, nil +} - data, err = json.Marshal(&specs.Spec{ - Linux: &specs.Linux{ - Resources: testdata.second, - }, - }) - assert.Nil(t, err) - contB.Annotations[annotations.ConfigJSONKey] = string(data) +func (m *mockCgroup) Update(resources *specs.LinuxResources) error { + return nil +} - rc, err := s.mergeSpecResource() - assert.Nil(t, err) - assert.True(t, reflect.DeepEqual(rc, testdata.expected), "should be equal, got: %#v, expected: %#v", rc, testdata.expected) - } +func (m *mockCgroup) Processes(cgroups.Name, bool) ([]cgroups.Process, error) { + return nil, nil } -func TestSetupCgroups(t *testing.T) { - if os.Geteuid() != 0 { - t.Skip("Test disabled as requires root privileges") - } +func (m *mockCgroup) Freeze() error { + return nil +} + +func (m *mockCgroup) Thaw() error { + return nil +} + +func (m *mockCgroup) OOMEventFD() (uintptr, error) { + return 0, nil +} + +func (m *mockCgroup) State() cgroups.State { + return "" +} + +func (m *mockCgroup) Subsystems() []cgroups.Subsystem { + return nil +} + +func mockCgroupNew(hierarchy cgroups.Hierarchy, path cgroups.Path, resources *specs.LinuxResources) (cgroups.Cgroup, error) { + return &mockCgroup{}, nil +} + +func mockCgroupLoad(hierarchy cgroups.Hierarchy, path cgroups.Path) (cgroups.Cgroup, error) { + return &mockCgroup{}, nil +} + +func init() { + cgroupsNewFunc = mockCgroupNew + cgroupsLoadFunc = mockCgroupLoad +} + +func TestV1Constraints(t *testing.T) { + assert := assert.New(t) + + systems, err := V1Constraints() + assert.NoError(err) + assert.NotEmpty(systems) +} + +func TestV1NoConstraints(t *testing.T) { + assert := assert.New(t) + + systems, err := V1NoConstraints() + assert.NoError(err) + assert.NotEmpty(systems) +} + +func TestCgroupNoConstraintsPath(t *testing.T) { + assert := assert.New(t) + + cgrouPath := "abc" + expectedPath := filepath.Join(cgroupKataPath, cgrouPath) + path := cgroupNoConstraintsPath(cgrouPath) + assert.Equal(expectedPath, path) +} + +func TestUpdateCgroups(t *testing.T) { + assert := assert.New(t) + + oldCgroupsNew := cgroupsNewFunc + oldCgroupsLoad := cgroupsLoadFunc + cgroupsNewFunc = cgroups.New + cgroupsLoadFunc = cgroups.Load + defer func() { + cgroupsNewFunc = oldCgroupsNew + cgroupsLoadFunc = oldCgroupsLoad + }() s := &Sandbox{ - id: "test-sandbox", - hypervisor: &mockHypervisor{}, - config: &SandboxConfig{ - Containers: []ContainerConfig{ - { - ID: "containerA", - Annotations: make(map[string]string), - }, - { - ID: "containerA", - Annotations: make(map[string]string), - }, - }, + state: types.State{ + CgroupPath: "", }, } - contA := s.config.Containers[0] - contB := s.config.Containers[1] + // empty path + err := s.updateCgroups() + assert.NoError(err) - getIntP := func(x int64) *int64 { return &x } - getUintP := func(x uint64) *uint64 { return &x } + // path doesn't exist + s.state.CgroupPath = "/abc/123/rgb" + err = s.updateCgroups() + assert.Error(err) + + if os.Getuid() != 0 { + return + } - data, err := json.Marshal(&specs.Spec{ - Linux: &specs.Linux{ - Resources: &specs.LinuxResources{ - CPU: &specs.LinuxCPU{ - Quota: getIntP(5000), - Period: getUintP(10000), - }, + s.state.CgroupPath = fmt.Sprintf("/kata-tests-%d", os.Getpid()) + testCgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(s.state.CgroupPath), &specs.LinuxResources{}) + assert.NoError(err) + defer testCgroup.Delete() + s.hypervisor = &mockHypervisor{mockPid: 0} + + // bad pid + err = s.updateCgroups() + assert.Error(err) + + // fake workload + cmd := exec.Command("tail", "-f", "/dev/null") + assert.NoError(cmd.Start()) + s.state.Pid = cmd.Process.Pid + s.hypervisor = &mockHypervisor{mockPid: s.state.Pid} + + // no containers + err = s.updateCgroups() + assert.NoError(err) + + s.config = &SandboxConfig{} + s.config.HypervisorConfig.NumVCPUs = 1 + + s.containers = map[string]*Container{ + "abc": { + process: Process{ + Pid: s.state.Pid, }, }, - }) - assert.Nil(t, err) - contA.Annotations[annotations.ConfigJSONKey] = string(data) - - data, err = json.Marshal(&specs.Spec{ - Linux: &specs.Linux{ - Resources: &specs.LinuxResources{ - CPU: &specs.LinuxCPU{ - Quota: getIntP(10000), - Period: getUintP(40000), - }, + "xyz": { + process: Process{ + Pid: s.state.Pid, }, }, - }) - assert.Nil(t, err) - contB.Annotations[annotations.ConfigJSONKey] = string(data) - - err = s.newCgroups() - assert.Nil(t, err, "failed to create cgroups") - - defer s.destroyCgroups() - - // test if function works without error - err = s.setupCgroups() - assert.Nil(t, err, "setup host cgroup failed") - - // test if the quota and period value are written into cgroup files - cpu, err := getCgroupDestination("cpu") - assert.Nil(t, err, "failed to get cpu cgroup path") - assert.NotEqual(t, "", cpu, "cpu cgroup value can't be empty") - - parentDir := filepath.Join(cpu, defaultCgroupParent, "test-sandbox", "vcpu") - quotaFile := filepath.Join(parentDir, "cpu.cfs_quota_us") - periodFile := filepath.Join(parentDir, "cpu.cfs_period_us") - - expectedQuota := "7500\n" - expectedPeriod := "10000\n" - - fquota, err := os.Open(quotaFile) - assert.Nil(t, err, "open file %q failed", quotaFile) - defer fquota.Close() - data, err = ioutil.ReadAll(fquota) - assert.Nil(t, err) - assert.Equal(t, expectedQuota, string(data), "failed to get expected cfs_quota") - - fperiod, err := os.Open(periodFile) - assert.Nil(t, err, "open file %q failed", periodFile) - defer fperiod.Close() - data, err = ioutil.ReadAll(fperiod) - assert.Nil(t, err) - assert.Equal(t, expectedPeriod, string(data), "failed to get expected cfs_period") + } + + err = s.updateCgroups() + assert.NoError(err) + + // cleanup + assert.NoError(cmd.Process.Kill()) + err = s.deleteCgroups() + assert.NoError(err) } diff --git a/virtcontainers/sandbox.go b/virtcontainers/sandbox.go index 1a9e715db7..0aa0376bc3 100644 --- a/virtcontainers/sandbox.go +++ b/virtcontainers/sandbox.go @@ -233,8 +233,6 @@ type Sandbox struct { seccompSupported bool ctx context.Context - - cgroup *sandboxCgroups } // ID returns the sandbox identifier string. @@ -602,11 +600,6 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return nil, err } - // create new cgroup for sandbox - if err := s.newCgroups(); err != nil { - return nil, err - } - return s, nil } @@ -724,10 +717,8 @@ func (s *Sandbox) Delete() error { } } - // destroy sandbox cgroup - if err := s.destroyCgroups(); err != nil { - // continue the removal process even cgroup failed to destroy - s.Logger().WithError(err).Error("failed to destroy cgroup") + if err := s.deleteCgroups(); err != nil { + return err } globalSandboxList.removeSandbox(s.id) @@ -1071,10 +1062,14 @@ func (s *Sandbox) CreateContainer(contConfig ContainerConfig) (VCContainer, erro return nil, err } - // Setup host cgroups for new container - if err := s.setupCgroups(); err != nil { + if err := s.setCgroupPath(c); err != nil { return nil, err } + + if err := s.updateCgroups(); err != nil { + return nil, err + } + return c, nil } @@ -1226,6 +1221,10 @@ func (s *Sandbox) UpdateContainer(containerID string, resources specs.LinuxResou return err } + if err := s.updateCgroups(); err != nil { + return err + } + return c.storeContainer() } @@ -1291,6 +1290,14 @@ func (s *Sandbox) createContainers() error { if err := s.addContainer(c); err != nil { return err } + + if err := s.setCgroupPath(c); err != nil { + return err + } + } + + if err := s.updateCgroups(); err != nil { + return err } return nil @@ -1462,6 +1469,20 @@ func (s *Sandbox) setSandboxPid(pid int) error { return s.storage.storeSandboxResource(s.id, stateFileType, s.state) } +// setCgroupPath sets the cgroup for the sandbox, sandbox's processes including +// the hypervisor are placed there. +func (s *Sandbox) setCgroupPath(c *Container) error { + if s.state.Pid != c.process.Pid { + // container is not the sandbox + return nil + } + + s.state.CgroupPath = c.state.CgroupPath + + // update on-disk state + return s.storage.storeSandboxResource(s.id, stateFileType, s.state) +} + func (s *Sandbox) setContainersState(state types.StateString) error { if state == "" { return errNeedState diff --git a/virtcontainers/sandbox_test.go b/virtcontainers/sandbox_test.go index 39701332d3..d264c45d6a 100644 --- a/virtcontainers/sandbox_test.go +++ b/virtcontainers/sandbox_test.go @@ -52,6 +52,7 @@ func testCreateSandbox(t *testing.T, id string, NetworkConfig: nconfig, Volumes: volumes, Containers: containers, + Annotations: sandboxAnnotations, } sandbox, err := createSandbox(context.Background(), sconfig, nil) @@ -936,7 +937,8 @@ func TestSandboxGetContainer(t *testing.T) { func TestContainerSetStateBlockIndex(t *testing.T) { containers := []ContainerConfig{ { - ID: "100", + ID: "100", + Annotations: containerAnnotations, }, } @@ -1035,7 +1037,8 @@ func TestContainerStateSetFstype(t *testing.T) { containers := []ContainerConfig{ { - ID: "100", + ID: "100", + Annotations: containerAnnotations, }, }