From 1c68f50378b3562e7497d42973ba610945c4bc66 Mon Sep 17 00:00:00 2001 From: Jeremi Piotrowski Date: Fri, 3 Dec 2021 10:26:23 +0000 Subject: [PATCH] cgroup2: monitor OOMKill instead of OOM to prevent missing container OOM events With the cgroupv2 configuration employed by Kubernetes, the pod cgroup (slice) and container cgroup (scope) will both have the same memory limit applied. In that situation, the kernel will consider an OOM event to be triggered by the parent cgroup (slice), and increment 'oom' there. The child cgroup (scope) only sees an oom_kill increment. Since we monitor child cgroups for oom events, check the OOMKill field so that we don't miss events. This is not visible when running containers through docker or ctr, because they set the limits differently (only container level). An alternative would be to not configure limits at the pod level - that way the container limit will be hit and the OOM will be correctly generated. An interesting consequence is that when spawning a pod with multiple containers, the oom events also work correctly, because: a) if one of the containers has no limit, the pod has no limit so OOM events in another container report correctly. b) if all of the containers have limits then the pod limit will be a sum of container events, so a container will be able to hit its limit first. Signed-off-by: Jeremi Piotrowski (cherry picked from commit 7275411ec811f1294e1b3466bc24e963ea90f002) Signed-off-by: Akihiro Suda --- pkg/oom/v2/v2.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/oom/v2/v2.go b/pkg/oom/v2/v2.go index a956b1c86676..f45051a2530e 100644 --- a/pkg/oom/v2/v2.go +++ b/pkg/oom/v2/v2.go @@ -70,15 +70,15 @@ func (w *watcher) Run(ctx context.Context) { continue } lastOOM := lastOOMMap[i.id] - if i.ev.OOM > lastOOM { + if i.ev.OOMKill > lastOOM { if err := w.publisher.Publish(ctx, runtime.TaskOOMEventTopic, &eventstypes.TaskOOM{ ContainerID: i.id, }); err != nil { logrus.WithError(err).Error("publish OOM event") } } - if i.ev.OOM > 0 { - lastOOMMap[i.id] = i.ev.OOM + if i.ev.OOMKill > 0 { + lastOOMMap[i.id] = i.ev.OOMKill } } }