From 53c733e0bacd44d52cbe58a31dbc1ff2ca0d5403 Mon Sep 17 00:00:00 2001 From: Akihiro Suda Date: Mon, 19 Dec 2022 10:50:18 +0900 Subject: [PATCH] cri: fix `memory.memsw.limit_in_bytes: no such file or directory` Skip automatic `if swapLimit == 0 { s.Linux.Resources.Memory.Swap = &limit }` when the swap controller is missing. (default on Ubuntu 20.04) Fix issue 7828 (regression in PR 7783 "cri: make swapping disabled with memory limit") Cherry-Pick was not cleaned, required updated cgroups library from v1.0.3 to v1.0.4 Signed-off-by: Akihiro Suda (cherry picked from commit 41575038819551c255ea2385dfe6c63cb7048dca) Signed-off-by: Derek McGowan --- go.mod | 2 +- go.sum | 3 +- integration/client/go.mod | 2 +- integration/client/go.sum | 4 +- pkg/cri/opts/spec_linux.go | 33 ++++++++++++- .../github.com/containerd/cgroups/README.md | 2 +- .../github.com/containerd/cgroups/Vagrantfile | 8 ++-- vendor/github.com/containerd/cgroups/utils.go | 29 ++++++++---- .../containerd/cgroups/v2/manager.go | 46 +++++++++++++++++-- .../containerd/cgroups/v2/memory.go | 7 +++ .../github.com/containerd/cgroups/v2/utils.go | 2 +- vendor/modules.txt | 4 +- 12 files changed, 113 insertions(+), 29 deletions(-) diff --git a/go.mod b/go.mod index 9b4e65e237fe..e208c17dfb14 100644 --- a/go.mod +++ b/go.mod @@ -8,7 +8,7 @@ require ( github.com/Microsoft/hcsshim v0.9.6 github.com/containerd/aufs v1.0.0 github.com/containerd/btrfs v1.0.0 - github.com/containerd/cgroups v1.0.3 + github.com/containerd/cgroups v1.0.4 github.com/containerd/console v1.0.3 github.com/containerd/continuity v0.3.0 github.com/containerd/fifo v1.0.0 diff --git a/go.sum b/go.sum index 47832e81dd3f..a9d1902afebd 100644 --- a/go.sum +++ b/go.sum @@ -183,8 +183,9 @@ github.com/containerd/cgroups v0.0.0-20200710171044-318312a37340/go.mod h1:s5q4S github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= github.com/containerd/cgroups v0.0.0-20210114181951-8a68de567b68/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU= -github.com/containerd/cgroups v1.0.3 h1:ADZftAkglvCiD44c77s5YmMqaP2pzVCFZvBmAlBdAP4= github.com/containerd/cgroups v1.0.3/go.mod h1:/ofk34relqNjSGyqPrmEULrO4Sc8LJhvJmWbUCUKqj8= +github.com/containerd/cgroups v1.0.4 h1:jN/mbWBEaz+T1pi5OFtnkQ+8qnmEbAr1Oo1FRm5B0dA= +github.com/containerd/cgroups v1.0.4/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA= github.com/containerd/console v0.0.0-20180822173158-c12b1e7919c1/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20181022165439-0650fd9eeb50/go.mod h1:Tj/on1eG8kiEhd0+fhSDzsPAFESxzBBvdyEgyryXffw= github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= diff --git a/integration/client/go.mod b/integration/client/go.mod index 7cae31ddfd62..2cd10f34119f 100644 --- a/integration/client/go.mod +++ b/integration/client/go.mod @@ -5,7 +5,7 @@ go 1.15 require ( github.com/Microsoft/hcsshim v0.9.6 github.com/Microsoft/hcsshim/test v0.0.0-20210408205431-da33ecd607e1 - github.com/containerd/cgroups v1.0.3 + github.com/containerd/cgroups v1.0.4 // the actual version of containerd is replaced with the code at the root of this repository github.com/containerd/containerd v1.6.1 github.com/containerd/go-runc v1.0.0 diff --git a/integration/client/go.sum b/integration/client/go.sum index b59a28f7aac6..dbad78fc1385 100644 --- a/integration/client/go.sum +++ b/integration/client/go.sum @@ -121,8 +121,8 @@ github.com/containerd/aufs v1.0.0/go.mod h1:kL5kd6KM5TzQjR79jljyi4olc1Vrx6XBlcyj github.com/containerd/btrfs v1.0.0/go.mod h1:zMcX3qkXTAi9GI50+0HOeuV8LU2ryCE/V2vG/ZBiTss= github.com/containerd/cgroups v0.0.0-20200824123100-0b889c03f102/go.mod h1:s5q4SojHctfxANBDvMeIaIovkq29IP48TKAxnhYRxvo= github.com/containerd/cgroups v1.0.1/go.mod h1:0SJrPIenamHDcZhEcJMNBB85rHcUsw4f25ZfBiPYRkU= -github.com/containerd/cgroups v1.0.3 h1:ADZftAkglvCiD44c77s5YmMqaP2pzVCFZvBmAlBdAP4= -github.com/containerd/cgroups v1.0.3/go.mod h1:/ofk34relqNjSGyqPrmEULrO4Sc8LJhvJmWbUCUKqj8= +github.com/containerd/cgroups v1.0.4 h1:jN/mbWBEaz+T1pi5OFtnkQ+8qnmEbAr1Oo1FRm5B0dA= +github.com/containerd/cgroups v1.0.4/go.mod h1:nLNQtsF7Sl2HxNebu77i1R0oDlhiTG+kO4JTrUzo6IA= github.com/containerd/console v0.0.0-20191206165004-02ecf6a7291e/go.mod h1:8Pf4gM6VEbTNRIT26AyyU7hxdQU3MvAvxVI0sc00XBE= github.com/containerd/console v1.0.1/go.mod h1:XUsP6YE/mKtz6bxc+I8UiKKTP04qjQL4qcS3XoQ5xkw= github.com/containerd/console v1.0.2/go.mod h1:ytZPjGgY2oeTkAONYafi2kSj0aYggsf8acV1PGKCbzQ= diff --git a/pkg/cri/opts/spec_linux.go b/pkg/cri/opts/spec_linux.go index 17147020c056..1baf5f30a414 100644 --- a/pkg/cri/opts/spec_linux.go +++ b/pkg/cri/opts/spec_linux.go @@ -28,6 +28,7 @@ import ( "sync" "syscall" + "github.com/containerd/cgroups" "github.com/containerd/containerd/containers" "github.com/containerd/containerd/log" "github.com/containerd/containerd/mount" @@ -403,6 +404,36 @@ func WithSelinuxLabels(process, mount string) oci.SpecOpts { } } +var ( + swapControllerAvailability bool + swapControllerAvailabilityOnce sync.Once +) + +func swapControllerAvailable() bool { + swapControllerAvailabilityOnce.Do(func() { + const warn = "Failed to detect the availability of the swap controller, assuming not available" + p := "/sys/fs/cgroup/memory/memory.memsw.limit_in_bytes" + if cgroups.Mode() == cgroups.Unified { + // memory.swap.max does not exist in the cgroup root, so we check /sys/fs/cgroup//memory.swap.max + _, unified, err := cgroups.ParseCgroupFileUnified("/proc/self/cgroup") + if err != nil { + err = fmt.Errorf("failed to parse /proc/self/cgroup: %w", err) + logrus.WithError(err).Warn(warn) + return + } + p = filepath.Join("/sys/fs/cgroup", unified, "memory.swap.max") + } + if _, err := os.Stat(p); err != nil { + if !errors.Is(err, os.ErrNotExist) { + logrus.WithError(err).Warn(warn) + } + return + } + swapControllerAvailability = true + }) + return swapControllerAvailability +} + // WithResources sets the provided resource restrictions func WithResources(resources *runtime.LinuxContainerResources, tolerateMissingHugetlbController, disableHugetlbController bool) oci.SpecOpts { return func(ctx context.Context, client oci.Client, c *containers.Container, s *runtimespec.Spec) (err error) { @@ -448,7 +479,7 @@ func WithResources(resources *runtime.LinuxContainerResources, tolerateMissingHu if limit != 0 { s.Linux.Resources.Memory.Limit = &limit // swap/memory limit should be equal to prevent container from swapping by default - if swapLimit == 0 { + if swapLimit == 0 && swapControllerAvailable() { s.Linux.Resources.Memory.Swap = &limit } } diff --git a/vendor/github.com/containerd/cgroups/README.md b/vendor/github.com/containerd/cgroups/README.md index fc2c7a9be4d2..eccb9d984555 100644 --- a/vendor/github.com/containerd/cgroups/README.md +++ b/vendor/github.com/containerd/cgroups/README.md @@ -26,7 +26,7 @@ uses the v1 implementation of cgroups. ```go shares := uint64(100) control, err := cgroups.New(cgroups.V1, cgroups.StaticPath("/test"), &specs.LinuxResources{ - CPU: &specs.CPU{ + CPU: &specs.LinuxCPU{ Shares: &shares, }, }) diff --git a/vendor/github.com/containerd/cgroups/Vagrantfile b/vendor/github.com/containerd/cgroups/Vagrantfile index 4596ad8a7d45..9a4aac8cb344 100644 --- a/vendor/github.com/containerd/cgroups/Vagrantfile +++ b/vendor/github.com/containerd/cgroups/Vagrantfile @@ -3,19 +3,19 @@ Vagrant.configure("2") do |config| # Fedora box is used for testing cgroup v2 support - config.vm.box = "fedora/32-cloud-base" + config.vm.box = "fedora/35-cloud-base" config.vm.provider :virtualbox do |v| - v.memory = 2048 + v.memory = 4096 v.cpus = 2 end config.vm.provider :libvirt do |v| - v.memory = 2048 + v.memory = 4096 v.cpus = 2 end config.vm.provision "shell", inline: <<-SHELL set -eux -o pipefail # configuration - GO_VERSION="1.15" + GO_VERSION="1.17.7" # install gcc and Golang dnf -y install gcc diff --git a/vendor/github.com/containerd/cgroups/utils.go b/vendor/github.com/containerd/cgroups/utils.go index 2297980d93ce..217138975682 100644 --- a/vendor/github.com/containerd/cgroups/utils.go +++ b/vendor/github.com/containerd/cgroups/utils.go @@ -261,21 +261,28 @@ func parseKV(raw string) (string, uint64, error) { // "pids": "/user.slice/user-1000.slice" // etc. // -// Note that for cgroup v2 unified hierarchy, there are no per-controller -// cgroup paths, so the resulting map will have a single element where the key -// is empty string ("") and the value is the cgroup path the is in. +// The resulting map does not have an element for cgroup v2 unified hierarchy. +// Use ParseCgroupFileUnified to get the unified path. func ParseCgroupFile(path string) (map[string]string, error) { + x, _, err := ParseCgroupFileUnified(path) + return x, err +} + +// ParseCgroupFileUnified returns legacy subsystem paths as the first value, +// and returns the unified path as the second value. +func ParseCgroupFileUnified(path string) (map[string]string, string, error) { f, err := os.Open(path) if err != nil { - return nil, err + return nil, "", err } defer f.Close() - return parseCgroupFromReader(f) + return parseCgroupFromReaderUnified(f) } -func parseCgroupFromReader(r io.Reader) (map[string]string, error) { +func parseCgroupFromReaderUnified(r io.Reader) (map[string]string, string, error) { var ( cgroups = make(map[string]string) + unified = "" s = bufio.NewScanner(r) ) for s.Scan() { @@ -284,18 +291,20 @@ func parseCgroupFromReader(r io.Reader) (map[string]string, error) { parts = strings.SplitN(text, ":", 3) ) if len(parts) < 3 { - return nil, fmt.Errorf("invalid cgroup entry: %q", text) + return nil, unified, fmt.Errorf("invalid cgroup entry: %q", text) } for _, subs := range strings.Split(parts[1], ",") { - if subs != "" { + if subs == "" { + unified = parts[2] + } else { cgroups[subs] = parts[2] } } } if err := s.Err(); err != nil { - return nil, err + return nil, unified, err } - return cgroups, nil + return cgroups, unified, nil } func getCgroupDestination(subsystem string) (string, error) { diff --git a/vendor/github.com/containerd/cgroups/v2/manager.go b/vendor/github.com/containerd/cgroups/v2/manager.go index afed14c6ef3b..1f017509f119 100644 --- a/vendor/github.com/containerd/cgroups/v2/manager.go +++ b/vendor/github.com/containerd/cgroups/v2/manager.go @@ -240,6 +240,10 @@ func (c *Manager) Controllers() ([]string, error) { return strings.Fields(string(b)), nil } +func (c *Manager) Update(resources *Resources) error { + return setResources(c.path, resources) +} + type ControllerToggle int const ( @@ -701,12 +705,39 @@ func setDevices(path string, devices []specs.LinuxDeviceCgroup) error { return nil } +// getSystemdFullPath returns the full systemd path when creating a systemd slice group. +// the reason this is necessary is because the "-" character has a special meaning in +// systemd slice. For example, when creating a slice called "my-group-112233.slice", +// systemd will create a hierarchy like this: +// /sys/fs/cgroup/my.slice/my-group.slice/my-group-112233.slice +func getSystemdFullPath(slice, group string) string { + return filepath.Join(defaultCgroup2Path, dashesToPath(slice), dashesToPath(group)) +} + +// dashesToPath converts a slice name with dashes to it's corresponding systemd filesystem path. +func dashesToPath(in string) string { + path := "" + if strings.HasSuffix(in, ".slice") && strings.Contains(in, "-") { + parts := strings.Split(in, "-") + for i := range parts { + s := strings.Join(parts[0:i+1], "-") + if !strings.HasSuffix(s, ".slice") { + s += ".slice" + } + path = filepath.Join(path, s) + } + } else { + path = filepath.Join(path, in) + } + return path +} + func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, error) { if slice == "" { slice = defaultSlice } ctx := context.TODO() - path := filepath.Join(defaultCgroup2Path, slice, group) + path := getSystemdFullPath(slice, group) conn, err := systemdDbus.NewWithContext(ctx) if err != nil { return &Manager{}, err @@ -734,12 +765,17 @@ func NewSystemd(slice, group string, pid int, resources *Resources) (*Manager, e properties = append(properties, newSystemdProperty("PIDs", []uint32{uint32(pid)})) } - if resources.Memory != nil && *resources.Memory.Max != 0 { + if resources.Memory != nil && resources.Memory.Min != nil && *resources.Memory.Min != 0 { + properties = append(properties, + newSystemdProperty("MemoryMin", uint64(*resources.Memory.Min))) + } + + if resources.Memory != nil && resources.Memory.Max != nil && *resources.Memory.Max != 0 { properties = append(properties, newSystemdProperty("MemoryMax", uint64(*resources.Memory.Max))) } - if resources.CPU != nil && *resources.CPU.Weight != 0 { + if resources.CPU != nil && resources.CPU.Weight != nil && *resources.CPU.Weight != 0 { properties = append(properties, newSystemdProperty("CPUWeight", *resources.CPU.Weight)) } @@ -796,9 +832,9 @@ func LoadSystemd(slice, group string) (*Manager, error) { if slice == "" { slice = defaultSlice } - group = filepath.Join(defaultCgroup2Path, slice, group) + path := getSystemdFullPath(slice, group) return &Manager{ - path: group, + path: path, }, nil } diff --git a/vendor/github.com/containerd/cgroups/v2/memory.go b/vendor/github.com/containerd/cgroups/v2/memory.go index 72f94b738b8f..6f4733be60ff 100644 --- a/vendor/github.com/containerd/cgroups/v2/memory.go +++ b/vendor/github.com/containerd/cgroups/v2/memory.go @@ -18,6 +18,7 @@ package v2 type Memory struct { Swap *int64 + Min *int64 Max *int64 Low *int64 High *int64 @@ -30,6 +31,12 @@ func (r *Memory) Values() (o []Value) { value: *r.Swap, }) } + if r.Min != nil { + o = append(o, Value{ + filename: "memory.min", + value: *r.Min, + }) + } if r.Max != nil { o = append(o, Value{ filename: "memory.max", diff --git a/vendor/github.com/containerd/cgroups/v2/utils.go b/vendor/github.com/containerd/cgroups/v2/utils.go index 902466f51be2..240c9267798a 100644 --- a/vendor/github.com/containerd/cgroups/v2/utils.go +++ b/vendor/github.com/containerd/cgroups/v2/utils.go @@ -227,7 +227,7 @@ func ToResources(spec *specs.LinuxResources) *Resources { if i := spec.Rdma; i != nil { resources.RDMA = &RDMA{} for device, value := range spec.Rdma { - if device != "" && (value.HcaHandles != nil || value.HcaObjects != nil) { + if device != "" && (value.HcaHandles != nil && value.HcaObjects != nil) { resources.RDMA.Limit = append(resources.RDMA.Limit, RDMAEntry{ Device: device, HcaHandles: *value.HcaHandles, diff --git a/vendor/modules.txt b/vendor/modules.txt index af74ddbd926e..2eb01b6167cb 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -74,8 +74,8 @@ github.com/containerd/aufs/plugin # github.com/containerd/btrfs v1.0.0 ## explicit; go 1.15 github.com/containerd/btrfs -# github.com/containerd/cgroups v1.0.3 -## explicit; go 1.16 +# github.com/containerd/cgroups v1.0.4 +## explicit; go 1.17 github.com/containerd/cgroups github.com/containerd/cgroups/stats/v1 github.com/containerd/cgroups/v2