Skip to content

Commit

Permalink
libcontainer: include cgroupv2 memory.events in Manager.GetStats
Browse files Browse the repository at this point in the history
Signed-off-by: Zheao.Li <me@manjusaka.me>
  • Loading branch information
Zheaoli committed Dec 12, 2023
1 parent 99f7fa1 commit bffe907
Show file tree
Hide file tree
Showing 8 changed files with 184 additions and 10 deletions.
1 change: 1 addition & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Memory.PSI = cg.MemoryStats.PSI
s.Memory.MemoryEventCount = cg.MemoryStats.EventCount

s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
Expand Down
4 changes: 4 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// memory event for CGRoup v2
if err := eventMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// io (since kernel 4.5)
if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
Expand Down
16 changes: 16 additions & 0 deletions libcontainer/cgroups/fs2/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -237,3 +237,19 @@ func rootStatsFromMeminfo(stats *cgroups.Stats) error {

return nil
}

func eventMemory(dirPath string, stats *cgroups.Stats) error {
kv, err := fscommon.ParseKeyValueFile(dirPath, "memory.events")
if err != nil {
return err
}
eventCount := cgroups.MemoryEventsCount{}
eventCount.MaxCount = kv["max"]
eventCount.ReclaimLowCount = kv["low"]
eventCount.ReclaimHighCount = kv["high"]
eventCount.OomCount = kv["oom"]
eventCount.OomKillCount = kv["oom_kill"]
stats.MemoryStats.EventCount = eventCount

return nil
}
23 changes: 23 additions & 0 deletions libcontainer/cgroups/fscommon/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,26 @@ func GetCgroupParamString(path, file string) (string, error) {

return strings.TrimSpace(contents), nil
}

// ReadKeyValueFile reads all key-value pairs from the specified cgroup file,
// returns a map from key to value.
func ParseKeyValueFile(dir, file string) (map[string]uint64, error) {
content, err := cgroups.ReadFile(dir, file)
if err != nil {
return nil, err
}

lines := strings.Split(content, "\n")
vals := make(map[string]uint64, len(lines))
for _, line := range lines {
arr := strings.Split(line, " ")
if len(arr) == 2 {
val, err := ParseUint(arr[1], 10, 64)
if err == nil {
vals[arr[0]] = val
}
}
}

return vals, nil
}
83 changes: 83 additions & 0 deletions libcontainer/cgroups/fscommon/utils_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package fscommon

import (
"errors"
"math"
"os"
"path/filepath"
"reflect"
"strconv"
"testing"

Expand Down Expand Up @@ -93,3 +95,84 @@ func TestGetCgroupParamsInt(t *testing.T) {
t.Fatal("Expecting error, got none")
}
}

func TestParseKeyValueFile(t *testing.T) {
testCases := []struct {
Name string
FileContent []byte
FileExist bool
Filename string
HasErr bool
ExpectedErr error
Expected map[string]uint64
}{
{
Name: "Standard memory.events",
FileContent: []byte("low 0\nhigh 0\nmax 12692218\noom 74039\noom_kill 71934\n"),
Filename: "memory.events",
FileExist: true,
HasErr: false,
Expected: map[string]uint64{
"low": 0,
"high": 0,
"max": 12692218,
"oom": 74039,
"oom_kill": 71934,
},
},
{
Name: "File not exists",
FileExist: false,
HasErr: true,
ExpectedErr: os.ErrNotExist,
},
{
Name: "Sample cpu.stat with invalid line",
FileContent: []byte("usage_usec 27458468773731\nuser_usec 20792829128141\nsystem_usec 6665639645590\n\nval_only\nnon_int xyz\n"),
FileExist: true,
HasErr: false,
Expected: map[string]uint64{
"usage_usec": 27458468773731,
"user_usec": 20792829128141,
"system_usec": 6665639645590,
},
},
}

for _, testCase := range testCases {
// setup file
tempDir := t.TempDir()
if testCase.Filename == "" {
testCase.Filename = "cgroup.file"
}

if testCase.FileExist {
tempFile := filepath.Join(tempDir, testCase.Filename)

if err := os.WriteFile(tempFile, testCase.FileContent, 0o755); err != nil {
t.Fatal(err)
}
}

// get key value
got, err := ParseKeyValueFile(tempDir, testCase.Filename)
hasErr := err != nil

// compare expected
if testCase.HasErr != hasErr {
t.Errorf("ParseKeyValueFile returns wrong err: %v for test case: %v", err, testCase.Filename)
}

if testCase.ExpectedErr != nil && !errors.Is(err, testCase.ExpectedErr) {
t.Errorf("ParseKeyValueFile returns wrong err for test case: %v, expected: %v, got: %v",
testCase.Filename, testCase.Expected, err)
}

if !testCase.HasErr {
if !reflect.DeepEqual(got, testCase.Expected) {
t.Errorf("ParseKeyValueFile returns wrong result for test case: %v, got: %v, want: %v",
testCase.Filename, got, testCase.Expected)
}
}
}
}
18 changes: 16 additions & 2 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,22 @@ type MemoryStats struct {
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
UseHierarchy bool `json:"use_hierarchy"`

Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
EventCount MemoryEventsCount `json:"events_count,omitempty"`
}

type MemoryEventsCount struct {
// count of memory reclaim (when usage is under the low boundary)
ReclaimLowCount uint64 `json:"reclaim_low_count"`
// count of memory reclaim (when high memory boundary was exceeded)
ReclaimHighCount uint64 `json:"reclaim_high_count"`
// the number of times the cgroup’s memory usage was about to go over the max boundary
MaxCount uint64 `json:"max_count"`
// the number of time the cgroup’s memory usage was reached the limit and allocation was about to fail
OomCount uint64 `json:"oom_count"`
// The number of processes belonging to this cgroup was oom killed
OomKillCount uint64 `json:"oom_kill_count"`
}

type PageUsageByNUMA struct {
Expand Down
32 changes: 31 additions & 1 deletion tests/integration/events.bats
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,40 @@ function test_events() {
retry 10 1 grep -q test_busybox events.log
# shellcheck disable=SC2016
__runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)'
retry 30 1 grep -q oom events.log
retry 30 1 grep -q '{"type":"oom","id":"test_busybox"}' events.log
__runc delete -f test_busybox
) &
wait # wait for the above sub shells to finish

grep -q '{"type":"oom","id":"test_busybox"}' events.log
}

@test "events --stats with OOM memory event" {
requires root cgroups_v2
init_cgroup_paths

# we need the container to hit OOM, so disable swap
update_config '(.. | select(.resources? != null)) .resources.memory |= {"limit": 33554432, "swap": 33554432}'

# run busybox detached
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# spawn two sub processes (shells)
# the first sub process is an event logger that sends stats events to events.log
# the second sub process exec a memory hog process to cause a oom condition
# and waits for an oom event
(__runc events test_busybox >events.log) &
(
retry 10 1 grep -q test_busybox events.log
# shellcheck disable=SC2016
__runc exec -d test_busybox sh -c 'test=$(dd if=/dev/urandom ibs=5120k)'
retry 30 1 grep -q '{"type":"oom","id":"test_busybox"}' events.log
__runc events --stats test_busybox >stats.log
__runc delete -f test_busybox
) &
wait # wait for the above sub shells to finish

grep -q '{"type":"oom","id":"test_busybox"}' events.log
jq -e '.data.memory.memory_event_count.oom_kill_count >= 1' <<<"$(cat stats.log)"
}
17 changes: 10 additions & 7 deletions types/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ type PSIData = cgroups.PSIData

type PSIStats = cgroups.PSIStats

type MemoryEventCount = cgroups.MemoryEventsCount

type Hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Expand Down Expand Up @@ -102,13 +104,14 @@ type MemoryEntry struct {
}

type Memory struct {
Cache uint64 `json:"cache,omitempty"`
Usage MemoryEntry `json:"usage,omitempty"`
Swap MemoryEntry `json:"swap,omitempty"`
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
Cache uint64 `json:"cache,omitempty"`
Usage MemoryEntry `json:"usage,omitempty"`
Swap MemoryEntry `json:"swap,omitempty"`
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
MemoryEventCount MemoryEventCount `json:"memory_event_count"`
}

type L3CacheInfo struct {
Expand Down

0 comments on commit bffe907

Please sign in to comment.