Skip to content

Commit

Permalink
Make one-shot stats faster
Browse files Browse the repository at this point in the history
This commit moves one-shot stats processing out of the publishing
channels, i.e. collect stats directly.

Also changes the method of getSystemCPUUsage() on Linux to return
number of online CPUs also.

Signed-off-by: Xinfeng Liu <XinfengLiu@icloud.com>
  • Loading branch information
xinfengliu committed Sep 27, 2023
1 parent bd7b27b commit ff10cbc
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 135 deletions.
46 changes: 38 additions & 8 deletions daemon/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"runtime"
"time"

"github.com/containerd/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/backend"
"github.com/docker/docker/api/types/versions"
Expand Down Expand Up @@ -43,6 +44,15 @@ func (daemon *Daemon) ContainerStats(ctx context.Context, prefixOrName string, c
})
}

// Get container stats directly if OneShot is set
if config.OneShot {
stats, err := daemon.GetContainerStats(ctr)
if err != nil {
return err
}
return json.NewEncoder(config.OutStream).Encode(stats)
}

outStream := config.OutStream
if config.Stream {
wf := ioutils.NewWriteFlusher(outStream)
Expand Down Expand Up @@ -146,17 +156,37 @@ func (daemon *Daemon) unsubscribeToContainerStats(c *container.Container, ch cha

// GetContainerStats collects all the stats published by a container
func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
stats, err := daemon.stats(container)
if err != nil {
return nil, err
}
var stats *types.StatsJSON
var err error

stats, err = daemon.stats(container)

// We already have the network stats on Windows directly from HCS.
if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
if stats.Networks, err = daemon.getNetworkStats(container); err != nil {
if err == nil && !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
stats.Networks, err = daemon.getNetworkStats(container)
}

switch err.(type) {
case nil:
// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
systemUsage, onlineCPUs, err := getSystemCPUUsage()
if err != nil {
log.G(context.TODO()).WithError(err).WithField("container_id", container.ID).Errorf("collecting system cpu usage")
return nil, err
}
// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs
return stats, nil
case errdefs.ErrConflict, errdefs.ErrNotFound:
// return empty stats containing only name and ID if not running or not found
return &types.StatsJSON{
Name: container.Name,
ID: container.ID,
}, nil
default:
log.G(context.TODO()).Errorf("collecting stats for container %s: %v", container.ID, err)
return nil, err
}

return stats, nil
}
44 changes: 4 additions & 40 deletions daemon/stats/collector.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
package stats // import "github.com/docker/docker/daemon/stats"

import (
"bufio"
"context"
"sync"
"time"

"github.com/containerd/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/container"
"github.com/docker/docker/errdefs"
"github.com/moby/pubsub"
)

Expand All @@ -20,7 +16,6 @@ type Collector struct {
supervisor supervisor
interval time.Duration
publishers map[*container.Container]*pubsub.Publisher
bufReader *bufio.Reader
}

// NewCollector creates a stats collector that will poll the supervisor with the specified interval
Expand All @@ -29,7 +24,6 @@ func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
interval: interval,
supervisor: supervisor,
publishers: make(map[*container.Container]*pubsub.Publisher),
bufReader: bufio.NewReaderSize(nil, 128),
}
s.cond = sync.NewCond(&s.m)
return s
Expand Down Expand Up @@ -108,45 +102,15 @@ func (s *Collector) Run() {

s.cond.L.Unlock()

onlineCPUs, err := s.getNumberOnlineCPUs()
if err != nil {
log.G(context.TODO()).Errorf("collecting system online cpu count: %v", err)
continue
}

for _, pair := range pairs {
stats, err := s.supervisor.GetContainerStats(pair.container)

switch err.(type) {
case nil:
// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
systemUsage, err := s.getSystemCPUUsage()
if err != nil {
log.G(context.TODO()).WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
continue
}

// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs

pair.publisher.Publish(*stats)

case errdefs.ErrConflict, errdefs.ErrNotFound:
// publish empty stats containing only name and ID if not running or not found
pair.publisher.Publish(types.StatsJSON{
if err != nil {
stats = &types.StatsJSON{
Name: pair.container.Name,
ID: pair.container.ID,
})

default:
log.G(context.TODO()).Errorf("collecting stats for %s: %v", pair.container.ID, err)
pair.publisher.Publish(types.StatsJSON{
Name: pair.container.Name,
ID: pair.container.ID,
})
}
}
pair.publisher.Publish(*stats)
}

time.Sleep(s.interval)
Expand Down
75 changes: 0 additions & 75 deletions daemon/stats/collector_unix.go

This file was deleted.

12 changes: 0 additions & 12 deletions daemon/stats/collector_windows.go

This file was deleted.

61 changes: 61 additions & 0 deletions daemon/stats_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
package daemon // import "github.com/docker/docker/daemon"

import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"

statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats"
Expand Down Expand Up @@ -296,3 +300,60 @@ func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.

return stats, nil
}

const (
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
// on Linux it's a constant which is safe to be hard coded,
// so we can avoid using cgo here. For details, see:
// https://github.com/containerd/cgroups/pull/12
clockTicksPerSecond = 100
nanoSecondsPerSecond = 1e9
)

// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds and number of online CPUs. An error is returned
// if the format of the underlying file does not match.
//
// Uses /proc/stat defined by POSIX. Looks for the cpu
// statistics line and then sums up the first seven fields
// provided. See `man 5 proc` for details on specific field
// information.
func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, err error) {
f, err := os.Open("/proc/stat")
if err != nil {
return 0, 0, err
}
defer f.Close()

scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) < 4 || line[:3] != "cpu" {
break // Assume all cpu* records are at the front, like glibc https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135
}
if line[3] == ' ' {
parts := strings.Fields(line)
if len(parts) < 8 {
return 0, 0, fmt.Errorf("invalid number of cpu fields")
}
var totalClockTicks uint64
for _, i := range parts[1:8] {
v, err := strconv.ParseUint(i, 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("Unable to convert value %s to int: %w", i, err)
}
totalClockTicks += v
}
cpuUsage = (totalClockTicks * nanoSecondsPerSecond) /
clockTicksPerSecond
}
if '0' <= line[3] && line[3] <= '9' {
cpuNum++
}
}

if err := scanner.Err(); err != nil {
return 0, 0, fmt.Errorf("error scanning '/proc/stat' file: %w", err)
}
return
}
8 changes: 8 additions & 0 deletions daemon/stats_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
return make(map[string]types.NetworkStats), nil
}

// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds and number of online CPUs. An error is returned
// if the format of the underlying file does not match.
// This is a no-op on Windows.
func getSystemCPUUsage() (uint64, uint32, error) {
return 0, 0, nil
}

0 comments on commit ff10cbc

Please sign in to comment.