Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

libcontainer: include cgroupv2 memory.events in Manager.GetStats #4120

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Memory.PSI = cg.MemoryStats.PSI
s.Memory.EventCount = cg.MemoryStats.EventCount

s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// memory event for cgroup v2
if err := eventMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// io (since kernel 4.5)
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
Expand Down
33 changes: 33 additions & 0 deletions libcontainer/cgroups/fs2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,36 @@ func rootStatsFromMeminfo(stats *cgroups.Stats) error {

return nil
}

func eventMemory(dirPath string, stats *cgroups.Stats) error {
const fileName = "memory.events"
file, err := cgroups.OpenFile(dirPath, fileName, os.O_RDONLY)
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
stats.MemoryStats.EventCount = cgroups.MemoryEventCount{}
for scanner.Scan() {
t, v, err := fscommon.ParseKeyValue(scanner.Text())
if err != nil {
return &parseError{Path: dirPath, File: fileName, Err: err}
}
switch t {
case "max":
stats.MemoryStats.EventCount.Max = v
case "low":
stats.MemoryStats.EventCount.ReclaimLow = v
case "high":
stats.MemoryStats.EventCount.ReclaimHigh = v
case "oom":
stats.MemoryStats.EventCount.OOM = v
case "oom_kill":
stats.MemoryStats.EventCount.OOMKill = v
}
}
if err := scanner.Err(); err != nil {
return &parseError{Path: dirPath, File: fileName, Err: err}
}
return nil
}
18 changes: 16 additions & 2 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,22 @@ type MemoryStats struct {
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`

Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
EventCount MemoryEventCount `json:"events_count,omitempty"`
}

type MemoryEventCount struct {
// count of memory reclaim (when usage is under the low boundary)
Zheaoli marked this conversation as resolved.
Show resolved Hide resolved
ReclaimLow uint64 `json:"reclaim_low"`
// count of memory reclaim (when high memory boundary was exceeded)
ReclaimHigh uint64 `json:"reclaim_high"`
// count of times the cgroup’s memory usage was about to go over the max boundary
Max uint64 `json:"max"`
// count of times the cgroup’s memory usage was reached the limit and allocation was about to fail
OOM uint64 `json:"oom"`
// count of processes belonging to this container was oom killed
OOMKill uint64 `json:"oom_kill"`
}

type PageUsageByNUMA struct {
Expand Down
32 changes: 31 additions & 1 deletion tests/integration/events.bats
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,40 @@ function test_events() {
retry 10 1 grep -q test_busybox events.log
# shellcheck disable=SC2016
__runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)'
retry 30 1 grep -q oom events.log
retry 30 1 grep -q '{"type":"oom","id":"test_busybox"}' events.log
__runc delete -f test_busybox
) &
wait # wait for the above sub shells to finish

grep -q '{"type":"oom","id":"test_busybox"}' events.log
}

@test "events --stats with OOM memory event" {
requires root cgroups_v2
init_cgroup_paths

# we need the container to hit OOM, so disable swap
update_config '(.. | select(.resources? != null)) .resources.memory |= {"limit": 33554432, "swap": 33554432}'

# run busybox detached
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# spawn two sub processes (shells)
# the first sub process is an event logger that sends stats events to events.log
# the second sub process exec a memory hog process to cause a oom condition
# and waits for an oom event
(__runc events test_busybox >events.log) &
(
retry 10 1 grep -q test_busybox events.log
# shellcheck disable=SC2016
__runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)'
retry 30 1 grep -q '{"type":"oom","id":"test_busybox"}' events.log
__runc events --stats test_busybox >stats.log
__runc delete -f test_busybox
) &
wait # wait for the above sub shells to finish

grep -q '{"type":"oom","id":"test_busybox"}' events.log
jq -e '.data.memory.event_count.oom_kill >= 1' <<<"$(cat stats.log)"
}
17 changes: 10 additions & 7 deletions types/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type PSIData = cgroups.PSIData

type PSIStats = cgroups.PSIStats

type MemoryEventCount = cgroups.MemoryEventCount

type Hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Expand Down Expand Up @@ -102,13 +104,14 @@ type MemoryEntry struct {
}

type Memory struct {
Cache uint64 `json:"cache,omitempty"`
Usage MemoryEntry `json:"usage,omitempty"`
Swap MemoryEntry `json:"swap,omitempty"`
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
Cache uint64 `json:"cache,omitempty"`
Usage MemoryEntry `json:"usage,omitempty"`
Swap MemoryEntry `json:"swap,omitempty"`
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
EventCount MemoryEventCount `json:"event_count"`
}

type L3CacheInfo struct {
Expand Down