Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update container/pod benchmarking procedures. #894

Merged
merged 9 commits into from May 23, 2022
26 changes: 26 additions & 0 deletions docs/benchmark.md
Expand Up @@ -26,10 +26,32 @@ git clone https://github.com/kubernetes-sigs/cri-tools -b release-1.9 $GOPATH/sr

Before running the test, you need to _ensure that the CRI server under test is running and listening on a Unix socket_ or a Windows tcp socket. Because the benchmark tests are designed to request changes (e.g., create/delete) to the containers and verify that correct status is reported, it expects to be the only user of the CRI server. Please make sure that 1) there are no existing CRI-managed containers running on the node, and 2) no other processes (e.g., Kubelet) will interfere with the tests.

### Defining benchmarking parameters

You can optionally specify some parameters detailing how benchmarks should be run.

```yaml
# The number of container lifecycle benchmarks to run:
containersNumber: 100

# The number of container lifecycle benchmarks to run in parallel.
# The total number of samples will be floor(containersNumber / containersNumberParallel)
containersNumberParallel: 2


# The number of pod lifecycle benchmarks to run:
podsNumber: 1000
# The number of pod lifecycle benchmarks to run in parallel.
# The total number of samples will be floor(podsNumber/ podsNumberParallel)
podsNumberParallel: 1
```

### Run

```sh
critest -benchmark
[--benchmarking-params-file /path/to/params.yml]
[--benchmarking-output-dir /path/to/outdir/]
```

This will
Expand All @@ -45,5 +67,9 @@ critest connects to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://l
- `-ginkgo.focus`: Only run the tests that match the regular expression.
- `-image-endpoint`: Set the endpoint of image service. Same with runtime-endpoint if not specified.
- `-runtime-endpoint`: Set the endpoint of runtime service. Default to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://localhost:3735`.
- `-benchmarking-params-file`: optional path to a YAML file containing parameters describing which
benchmarks should be run.
- `-benchmarking-output-dir`: optional path to a pre-existing directory in which to write JSON
files detailing the results of the benchmarks.
- `-ginkgo.skip`: Skip the tests that match the regular expression.
- `-h`: Should help and all supported options.
4 changes: 4 additions & 0 deletions pkg/benchmark/benchmark.go
Expand Up @@ -33,6 +33,10 @@ import (
. "github.com/onsi/gomega"
)

const (
defaultOperationTimes int = 20
)

// TestPerformance checks configuration parameters (specified through flags) and then runs
// benchmark tests using the Ginkgo runner.
// If a "report directory" is specified, one or more JUnit test reports will be
Expand Down
184 changes: 112 additions & 72 deletions pkg/benchmark/container.go
@@ -1,5 +1,5 @@
/*
Copyright 2017 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -17,12 +17,21 @@ limitations under the License.
package benchmark

import (
"fmt"
"path"
"time"

"github.com/golang/glog"
"github.com/kubernetes-sigs/cri-tools/pkg/framework"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/gmeasure"
)

const (
defaultContainerBenchmarkTimeoutSeconds = 60
)

var _ = framework.KubeDescribe("Container", func() {
Expand All @@ -37,84 +46,115 @@ var _ = framework.KubeDescribe("Container", func() {
})

Context("benchmark about operations on Container", func() {
var podID string
var podConfig *runtimeapi.PodSandboxConfig

BeforeEach(func() {
podID, podConfig = framework.CreatePodSandboxForContainer(rc)
})

AfterEach(func() {
By("stop PodSandbox")
rc.StopPodSandbox(podID)
By("delete PodSandbox")
rc.RemovePodSandbox(podID)
})

Measure("benchmark about basic operations on Container", func(b Benchmarker) {
var containerID string
var err error
It("benchmark about basic operations on Container", func() {
timeout := defaultContainerBenchmarkTimeoutSeconds
if framework.TestContext.BenchmarkingParams.ContainerBenchmarkTimeoutSeconds > 0 {
timeout = framework.TestContext.BenchmarkingParams.ContainerBenchmarkTimeoutSeconds
}

operation := b.Time("create Container", func() {
By("benchmark about creating Container")
containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Container-for-creating-benchmark-")
})
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "create Container shouldn't take too long.")
// Setup sampling config from TestContext:
samplingConfig := gmeasure.SamplingConfig{
N: framework.TestContext.BenchmarkingParams.ContainersNumber,
NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel,
}
if samplingConfig.N <= 0 {
Skip("skipping container lifecycle benchmarks since container number option was not set")
}
if samplingConfig.NumParallel < 1 {
samplingConfig.NumParallel = 1
}

operation = b.Time("start Container", func() {
By("benchmark about starting Container")
// Setup results reporting channel:
resultsSet := LifecycleBenchmarksResultsSet{
OperationsNames: []string{"CreateContainer", "StartContainer", "StatusContainer", "StopContainer", "RemoveContainer"},
NumParallel: samplingConfig.NumParallel,
Datapoints: make([]LifecycleBenchmarkDatapoint, 0),
}
resultsManager := NewLifecycleBenchmarksResultsManager(
resultsSet,
timeout,
)
resultsChannel := resultsManager.StartResultsConsumer()

experiment := gmeasure.NewExperiment("ContainerOps")
experiment.Sample(func(idx int) {
var podID string
var podConfig *runtimeapi.PodSandboxConfig
var containerID string
var lastStartTime, lastEndTime int64
var err error
durations := make([]int64, len(resultsSet.OperationsNames))

podID, podConfig = framework.CreatePodSandboxForContainer(rc)

By(fmt.Sprintf("CreatingContainer %d", idx))
startTime := time.Now().UnixNano()
lastStartTime = startTime
containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Benchmark-container-")
lastEndTime = time.Now().UnixNano()
durations[0] = lastEndTime - lastStartTime

By(fmt.Sprintf("StartingContainer %d", idx))
lastStartTime = time.Now().UnixNano()
err = rc.StartContainer(containerID)
})

framework.ExpectNoError(err, "failed to start Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "start Container shouldn't take too long.")

operation = b.Time("Container status", func() {
By("benchmark about getting Container status")
_, err = rc.ContainerStatus(containerID, false)
})

framework.ExpectNoError(err, "failed to get Container status: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "get container status shouldn't take too long.")

operation = b.Time("stop Container", func() {
By("benchmark about stoping Container")
lastEndTime = time.Now().UnixNano()
durations[1] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to start Container: %v", err)

By(fmt.Sprintf("ContainerStatus %d", idx))
lastStartTime = time.Now().UnixNano()
_, err = rc.ContainerStatus(containerID, true)
lastEndTime = time.Now().UnixNano()
durations[2] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to get Container status: %v", err)

By(fmt.Sprintf("ContainerStop %d", idx))
lastStartTime = time.Now().UnixNano()
err = rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
})

framework.ExpectNoError(err, "failed to stop Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "stop Container shouldn't take too long.")
lastEndTime = time.Now().UnixNano()
durations[3] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to stop Container: %v", err)

operation = b.Time("remove Container", func() {
By("benchmark about removing Container")
By(fmt.Sprintf("ContainerRemove %d", idx))
lastStartTime = time.Now().UnixNano()
err = rc.RemoveContainer(containerID)
})

framework.ExpectNoError(err, "failed to remove Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "remove Container shouldn't take too long.")

}, defaultOperationTimes)

Measure("benchmark about listing Container", func(b Benchmarker) {
containerList := make([]string, 0, framework.TestContext.Number)
var err error

for i := 0; i < framework.TestContext.Number; i++ {
containerID := framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Container-for-listing-benchmark-")
containerList = append(containerList, containerID)
lastEndTime = time.Now().UnixNano()
durations[4] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to remove Container: %v", err)

res := LifecycleBenchmarkDatapoint{
SampleIndex: idx,
StartTime: startTime,
EndTime: lastEndTime,
OperationsDurationsNs: durations,
MetaInfo: map[string]string{"podId": podID, "containerId": containerID},
}
resultsChannel <- &res

By(fmt.Sprintf("stop PodSandbox %d", idx))
rc.StopPodSandbox(podID)
By(fmt.Sprintf("delete PodSandbox %d", idx))
rc.RemovePodSandbox(podID)

}, samplingConfig)

// Send nil and give the manager a minute to process any already-queued results:
resultsChannel <- nil
err := resultsManager.AwaitAllResults(60)
if err != nil {
glog.Errorf("Results manager failed to await all results: %s", err)
}

operation := b.Time("list Container", func() {
_, err = rc.ListContainers(nil)
})

framework.ExpectNoError(err, "failed to list Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "list Container shouldn't take too long.")

for _, containerID := range containerList {
rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
rc.RemoveContainer(containerID)
if framework.TestContext.BenchmarkingOutputDir != "" {
filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "container_benchmark_data.json")
err = resultsManager.WriteResultsFile(filepath)
if err != nil {
glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err)
}
} else {
glog.Infof("No benchmarking output dir provided, skipping writing benchmarking results file.")
glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet)
}
}, defaultOperationTimes)
})
})
})