Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make one-shot stats faster #46448

Merged
merged 2 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
46 changes: 38 additions & 8 deletions daemon/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"runtime"
"time"

"github.com/containerd/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/api/types/backend"
"github.com/docker/docker/api/types/versions"
Expand Down Expand Up @@ -43,6 +44,15 @@ func (daemon *Daemon) ContainerStats(ctx context.Context, prefixOrName string, c
})
}

// Get container stats directly if OneShot is set
if config.OneShot {
stats, err := daemon.GetContainerStats(ctr)
xinfengliu marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}
return json.NewEncoder(config.OutStream).Encode(stats)
}

outStream := config.OutStream
if config.Stream {
wf := ioutils.NewWriteFlusher(outStream)
Expand Down Expand Up @@ -146,17 +156,37 @@ func (daemon *Daemon) unsubscribeToContainerStats(c *container.Container, ch cha

// GetContainerStats collects all the stats published by a container
func (daemon *Daemon) GetContainerStats(container *container.Container) (*types.StatsJSON, error) {
stats, err := daemon.stats(container)
if err != nil {
return nil, err
}
var stats *types.StatsJSON
var err error

stats, err = daemon.stats(container)
xinfengliu marked this conversation as resolved.
Show resolved Hide resolved

// We already have the network stats on Windows directly from HCS.
if !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
if stats.Networks, err = daemon.getNetworkStats(container); err != nil {
if err == nil && !container.Config.NetworkDisabled && runtime.GOOS != "windows" {
xinfengliu marked this conversation as resolved.
Show resolved Hide resolved
stats.Networks, err = daemon.getNetworkStats(container)
}

switch err.(type) {
case nil:
// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
systemUsage, onlineCPUs, err := getSystemCPUUsage()
xinfengliu marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
log.G(context.TODO()).WithError(err).WithField("container_id", container.ID).Errorf("collecting system cpu usage")
return nil, err
}
// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs
return stats, nil
case errdefs.ErrConflict, errdefs.ErrNotFound:
// return empty stats containing only name and ID if not running or not found
return &types.StatsJSON{
Name: container.Name,
ID: container.ID,
}, nil
default:
log.G(context.TODO()).Errorf("collecting stats for container %s: %v", container.ID, err)
return nil, err
}

return stats, nil
}
44 changes: 4 additions & 40 deletions daemon/stats/collector.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
package stats // import "github.com/docker/docker/daemon/stats"

import (
"bufio"
"context"
"sync"
"time"

"github.com/containerd/containerd/log"
"github.com/docker/docker/api/types"
"github.com/docker/docker/container"
"github.com/docker/docker/errdefs"
"github.com/moby/pubsub"
)

Expand All @@ -20,7 +16,6 @@ type Collector struct {
supervisor supervisor
interval time.Duration
publishers map[*container.Container]*pubsub.Publisher
bufReader *bufio.Reader
}

// NewCollector creates a stats collector that will poll the supervisor with the specified interval
Expand All @@ -29,7 +24,6 @@ func NewCollector(supervisor supervisor, interval time.Duration) *Collector {
interval: interval,
supervisor: supervisor,
publishers: make(map[*container.Container]*pubsub.Publisher),
bufReader: bufio.NewReaderSize(nil, 128),
}
s.cond = sync.NewCond(&s.m)
return s
Expand Down Expand Up @@ -108,45 +102,15 @@ func (s *Collector) Run() {

s.cond.L.Unlock()

onlineCPUs, err := s.getNumberOnlineCPUs()
if err != nil {
log.G(context.TODO()).Errorf("collecting system online cpu count: %v", err)
continue
}

for _, pair := range pairs {
stats, err := s.supervisor.GetContainerStats(pair.container)

switch err.(type) {
case nil:
// Sample system CPU usage close to container usage to avoid
// noise in metric calculations.
systemUsage, err := s.getSystemCPUUsage()
if err != nil {
log.G(context.TODO()).WithError(err).WithField("container_id", pair.container.ID).Errorf("collecting system cpu usage")
continue
}

// FIXME: move to containerd on Linux (not Windows)
stats.CPUStats.SystemUsage = systemUsage
stats.CPUStats.OnlineCPUs = onlineCPUs

pair.publisher.Publish(*stats)

case errdefs.ErrConflict, errdefs.ErrNotFound:
// publish empty stats containing only name and ID if not running or not found
pair.publisher.Publish(types.StatsJSON{
if err != nil {
stats = &types.StatsJSON{
Name: pair.container.Name,
ID: pair.container.ID,
})

default:
log.G(context.TODO()).Errorf("collecting stats for %s: %v", pair.container.ID, err)
pair.publisher.Publish(types.StatsJSON{
Name: pair.container.Name,
ID: pair.container.ID,
})
}
}
pair.publisher.Publish(*stats)
}

time.Sleep(s.interval)
Expand Down
75 changes: 0 additions & 75 deletions daemon/stats/collector_unix.go

This file was deleted.

12 changes: 0 additions & 12 deletions daemon/stats/collector_windows.go

This file was deleted.

61 changes: 61 additions & 0 deletions daemon/stats_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
package daemon // import "github.com/docker/docker/daemon"

import (
"bufio"
"context"
"fmt"
"os"
"strconv"
"strings"

statsV1 "github.com/containerd/cgroups/v3/cgroup1/stats"
Expand Down Expand Up @@ -296,3 +300,60 @@ func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.

return stats, nil
}

const (
// The value comes from `C.sysconf(C._SC_CLK_TCK)`, and
// on Linux it's a constant which is safe to be hard coded,
// so we can avoid using cgo here. For details, see:
// https://github.com/containerd/cgroups/pull/12
clockTicksPerSecond = 100
nanoSecondsPerSecond = 1e9
)

// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds and number of online CPUs. An error is returned
// if the format of the underlying file does not match.
//
// Uses /proc/stat defined by POSIX. Looks for the cpu
// statistics line and then sums up the first seven fields
// provided. See `man 5 proc` for details on specific field
// information.
func getSystemCPUUsage() (cpuUsage uint64, cpuNum uint32, err error) {
f, err := os.Open("/proc/stat")
if err != nil {
return 0, 0, err
}
defer f.Close()

scanner := bufio.NewScanner(f)
for scanner.Scan() {
line := scanner.Text()
if len(line) < 4 || line[:3] != "cpu" {
break // Assume all cpu* records are at the front, like glibc https://github.com/bminor/glibc/blob/5d00c201b9a2da768a79ea8d5311f257871c0b43/sysdeps/unix/sysv/linux/getsysstats.c#L108-L135
}
if line[3] == ' ' {
parts := strings.Fields(line)
if len(parts) < 8 {
return 0, 0, fmt.Errorf("invalid number of cpu fields")
}
var totalClockTicks uint64
for _, i := range parts[1:8] {
v, err := strconv.ParseUint(i, 10, 64)
if err != nil {
return 0, 0, fmt.Errorf("Unable to convert value %s to int: %w", i, err)
}
totalClockTicks += v
}
cpuUsage = (totalClockTicks * nanoSecondsPerSecond) /
clockTicksPerSecond
}
if '0' <= line[3] && line[3] <= '9' {
cpuNum++
}
}

if err := scanner.Err(); err != nil {
return 0, 0, fmt.Errorf("error scanning '/proc/stat' file: %w", err)
}
return
}
8 changes: 8 additions & 0 deletions daemon/stats_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,11 @@ func (daemon *Daemon) stats(c *container.Container) (*types.StatsJSON, error) {
func (daemon *Daemon) getNetworkStats(c *container.Container) (map[string]types.NetworkStats, error) {
return make(map[string]types.NetworkStats), nil
}

// getSystemCPUUsage returns the host system's cpu usage in
// nanoseconds and number of online CPUs. An error is returned
// if the format of the underlying file does not match.
// This is a no-op on Windows.
func getSystemCPUUsage() (uint64, uint32, error) {
return 0, 0, nil
}