Skip to content

Commit

Permalink
libcontainer: include cgroupv2 memory.events in Manager.GetStats
Browse files Browse the repository at this point in the history
Signed-off-by: Zheao.Li <me@manjusaka.me>
  • Loading branch information
Zheaoli committed Jan 18, 2024
1 parent 10754b3 commit 12aaa2b
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 10 deletions.
1 change: 1 addition & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Memory.PSI = cg.MemoryStats.PSI
s.Memory.EventCount = cg.MemoryStats.EventCount

s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// memory event for cgroup v2
if err := eventMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// io (since kernel 4.5)
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
Expand Down
33 changes: 33 additions & 0 deletions libcontainer/cgroups/fs2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,36 @@ func rootStatsFromMeminfo(stats *cgroups.Stats) error {

return nil
}

func eventMemory(dirPath string, stats *cgroups.Stats) error {
const fileName = "memory.events"
file, err := cgroups.OpenFile(dirPath, fileName, os.O_RDONLY)
if err != nil {
return err
}
defer file.Close()
scanner := bufio.NewScanner(file)
stats.MemoryStats.EventCount = cgroups.MemoryEventCount{}
for scanner.Scan() {
t, v, err := fscommon.ParseKeyValue(scanner.Text())
if err != nil {
return &parseError{Path: dirPath, File: fileName, Err: err}
}
switch t {
case "max":
stats.MemoryStats.EventCount.Max = v
case "low":
stats.MemoryStats.EventCount.ReclaimLow = v
case "high":
stats.MemoryStats.EventCount.ReclaimHigh = v
case "oom":
stats.MemoryStats.EventCount.OOM = v
case "oom_kill":
stats.MemoryStats.EventCount.OOMKill = v
}
}
if err := scanner.Err(); err != nil {
return &parseError{Path: dirPath, File: fileName, Err: err}
}
return nil
}
18 changes: 16 additions & 2 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,22 @@ type MemoryStats struct {
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`

Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
EventCount MemoryEventCount `json:"events_count,omitempty"`
}

type MemoryEventCount struct {
// count of memory reclaim (when usage is under the low boundary)
ReclaimLow uint64 `json:"reclaim_low"`
// count of memory reclaim (when high memory boundary was exceeded)
ReclaimHigh uint64 `json:"reclaim_high"`
// count of times the cgroup’s memory usage was about to go over the max boundary
Max uint64 `json:"max"`
// count of times the cgroup’s memory usage was reached the limit and allocation was about to fail
OOM uint64 `json:"oom"`
// count of processes belonging to this container was oom killed
OOMKill uint64 `json:"oom_kill"`
}

type PageUsageByNUMA struct {
Expand Down
32 changes: 31 additions & 1 deletion tests/integration/events.bats
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,40 @@ function test_events() {
retry 10 1 grep -q test_busybox events.log
# shellcheck disable=SC2016
__runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)'
retry 30 1 grep -q oom events.log
retry 30 1 grep -q '{"type":"oom","id":"test_busybox"}' events.log
__runc delete -f test_busybox
) &
wait # wait for the above sub shells to finish

grep -q '{"type":"oom","id":"test_busybox"}' events.log
}

@test "events --stats with OOM memory event" {
requires root cgroups_v2
init_cgroup_paths

# we need the container to hit OOM, so disable swap
update_config '(.. | select(.resources? != null)) .resources.memory |= {"limit": 33554432, "swap": 33554432}'

# run busybox detached
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# spawn two sub processes (shells)
# the first sub process is an event logger that sends stats events to events.log
# the second sub process exec a memory hog process to cause a oom condition
# and waits for an oom event
(__runc events test_busybox >events.log) &
(
retry 10 1 grep -q test_busybox events.log
# shellcheck disable=SC2016
__runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)'
retry 30 1 grep -q '{"type":"oom","id":"test_busybox"}' events.log
__runc events --stats test_busybox >stats.log
__runc delete -f test_busybox
) &
wait # wait for the above sub shells to finish

grep -q '{"type":"oom","id":"test_busybox"}' events.log
jq -e '.data.memory.event_count.oom_kill >= 1' <<<"$(cat stats.log)"
}
17 changes: 10 additions & 7 deletions types/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type PSIData = cgroups.PSIData

type PSIStats = cgroups.PSIStats

type MemoryEventCount = cgroups.MemoryEventCount

type Hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Expand Down Expand Up @@ -102,13 +104,14 @@ type MemoryEntry struct {
}

type Memory struct {
Cache uint64 `json:"cache,omitempty"`
Usage MemoryEntry `json:"usage,omitempty"`
Swap MemoryEntry `json:"swap,omitempty"`
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
Cache uint64 `json:"cache,omitempty"`
Usage MemoryEntry `json:"usage,omitempty"`
Swap MemoryEntry `json:"swap,omitempty"`
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
EventCount MemoryEventCount `json:"event_count"`
}

type L3CacheInfo struct {
Expand Down

0 comments on commit 12aaa2b

Please sign in to comment.