Skip to content

Commit

Permalink
Merge pull request #894 from aznashwan/benchmarking
Browse files Browse the repository at this point in the history
Update container/pod benchmarking procedures.
  • Loading branch information
k8s-ci-robot committed May 23, 2022
2 parents 66de35d + 4ee0c36 commit 4b7a280
Show file tree
Hide file tree
Showing 18 changed files with 2,518 additions and 201 deletions.
26 changes: 26 additions & 0 deletions docs/benchmark.md
Expand Up @@ -26,10 +26,32 @@ git clone https://github.com/kubernetes-sigs/cri-tools -b release-1.9 $GOPATH/sr

Before running the test, you need to _ensure that the CRI server under test is running and listening on a Unix socket_ or a Windows tcp socket. Because the benchmark tests are designed to request changes (e.g., create/delete) to the containers and verify that correct status is reported, it expects to be the only user of the CRI server. Please make sure that 1) there are no existing CRI-managed containers running on the node, and 2) no other processes (e.g., Kubelet) will interfere with the tests.

### Defining benchmarking parameters

You can optionally specify some parameters detailing how benchmarks should be run.

```yaml
# The number of container lifecycle benchmarks to run:
containersNumber: 100

# The number of container lifecycle benchmarks to run in parallel.
# The total number of samples will be floor(containersNumber / containersNumberParallel)
containersNumberParallel: 2


# The number of pod lifecycle benchmarks to run:
podsNumber: 1000
# The number of pod lifecycle benchmarks to run in parallel.
# The total number of samples will be floor(podsNumber/ podsNumberParallel)
podsNumberParallel: 1
```

### Run

```sh
critest -benchmark
[--benchmarking-params-file /path/to/params.yml]
[--benchmarking-output-dir /path/to/outdir/]
```

This will
Expand All @@ -45,5 +67,9 @@ critest connects to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://l
- `-ginkgo.focus`: Only run the tests that match the regular expression.
- `-image-endpoint`: Set the endpoint of image service. Same with runtime-endpoint if not specified.
- `-runtime-endpoint`: Set the endpoint of runtime service. Default to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://localhost:3735`.
- `-benchmarking-params-file`: optional path to a YAML file containing parameters describing which
benchmarks should be run.
- `-benchmarking-output-dir`: optional path to a pre-existing directory in which to write JSON
files detailing the results of the benchmarks.
- `-ginkgo.skip`: Skip the tests that match the regular expression.
- `-h`: Should help and all supported options.
4 changes: 4 additions & 0 deletions pkg/benchmark/benchmark.go
Expand Up @@ -33,6 +33,10 @@ import (
. "github.com/onsi/gomega"
)

const (
defaultOperationTimes int = 20
)

// TestPerformance checks configuration parameters (specified through flags) and then runs
// benchmark tests using the Ginkgo runner.
// If a "report directory" is specified, one or more JUnit test reports will be
Expand Down
184 changes: 112 additions & 72 deletions pkg/benchmark/container.go
@@ -1,5 +1,5 @@
/*
Copyright 2017 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -17,12 +17,21 @@ limitations under the License.
package benchmark

import (
"fmt"
"path"
"time"

"github.com/golang/glog"
"github.com/kubernetes-sigs/cri-tools/pkg/framework"
internalapi "k8s.io/cri-api/pkg/apis"
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/onsi/gomega/gmeasure"
)

const (
defaultContainerBenchmarkTimeoutSeconds = 60
)

var _ = framework.KubeDescribe("Container", func() {
Expand All @@ -37,84 +46,115 @@ var _ = framework.KubeDescribe("Container", func() {
})

Context("benchmark about operations on Container", func() {
var podID string
var podConfig *runtimeapi.PodSandboxConfig

BeforeEach(func() {
podID, podConfig = framework.CreatePodSandboxForContainer(rc)
})

AfterEach(func() {
By("stop PodSandbox")
rc.StopPodSandbox(podID)
By("delete PodSandbox")
rc.RemovePodSandbox(podID)
})

Measure("benchmark about basic operations on Container", func(b Benchmarker) {
var containerID string
var err error
It("benchmark about basic operations on Container", func() {
timeout := defaultContainerBenchmarkTimeoutSeconds
if framework.TestContext.BenchmarkingParams.ContainerBenchmarkTimeoutSeconds > 0 {
timeout = framework.TestContext.BenchmarkingParams.ContainerBenchmarkTimeoutSeconds
}

operation := b.Time("create Container", func() {
By("benchmark about creating Container")
containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Container-for-creating-benchmark-")
})
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "create Container shouldn't take too long.")
// Setup sampling config from TestContext:
samplingConfig := gmeasure.SamplingConfig{
N: framework.TestContext.BenchmarkingParams.ContainersNumber,
NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel,
}
if samplingConfig.N <= 0 {
Skip("skipping container lifecycle benchmarks since container number option was not set")
}
if samplingConfig.NumParallel < 1 {
samplingConfig.NumParallel = 1
}

operation = b.Time("start Container", func() {
By("benchmark about starting Container")
// Setup results reporting channel:
resultsSet := LifecycleBenchmarksResultsSet{
OperationsNames: []string{"CreateContainer", "StartContainer", "StatusContainer", "StopContainer", "RemoveContainer"},
NumParallel: samplingConfig.NumParallel,
Datapoints: make([]LifecycleBenchmarkDatapoint, 0),
}
resultsManager := NewLifecycleBenchmarksResultsManager(
resultsSet,
timeout,
)
resultsChannel := resultsManager.StartResultsConsumer()

experiment := gmeasure.NewExperiment("ContainerOps")
experiment.Sample(func(idx int) {
var podID string
var podConfig *runtimeapi.PodSandboxConfig
var containerID string
var lastStartTime, lastEndTime int64
var err error
durations := make([]int64, len(resultsSet.OperationsNames))

podID, podConfig = framework.CreatePodSandboxForContainer(rc)

By(fmt.Sprintf("CreatingContainer %d", idx))
startTime := time.Now().UnixNano()
lastStartTime = startTime
containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Benchmark-container-")
lastEndTime = time.Now().UnixNano()
durations[0] = lastEndTime - lastStartTime

By(fmt.Sprintf("StartingContainer %d", idx))
lastStartTime = time.Now().UnixNano()
err = rc.StartContainer(containerID)
})

framework.ExpectNoError(err, "failed to start Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "start Container shouldn't take too long.")

operation = b.Time("Container status", func() {
By("benchmark about getting Container status")
_, err = rc.ContainerStatus(containerID, false)
})

framework.ExpectNoError(err, "failed to get Container status: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "get container status shouldn't take too long.")

operation = b.Time("stop Container", func() {
By("benchmark about stoping Container")
lastEndTime = time.Now().UnixNano()
durations[1] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to start Container: %v", err)

By(fmt.Sprintf("ContainerStatus %d", idx))
lastStartTime = time.Now().UnixNano()
_, err = rc.ContainerStatus(containerID, true)
lastEndTime = time.Now().UnixNano()
durations[2] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to get Container status: %v", err)

By(fmt.Sprintf("ContainerStop %d", idx))
lastStartTime = time.Now().UnixNano()
err = rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
})

framework.ExpectNoError(err, "failed to stop Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "stop Container shouldn't take too long.")
lastEndTime = time.Now().UnixNano()
durations[3] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to stop Container: %v", err)

operation = b.Time("remove Container", func() {
By("benchmark about removing Container")
By(fmt.Sprintf("ContainerRemove %d", idx))
lastStartTime = time.Now().UnixNano()
err = rc.RemoveContainer(containerID)
})

framework.ExpectNoError(err, "failed to remove Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "remove Container shouldn't take too long.")

}, defaultOperationTimes)

Measure("benchmark about listing Container", func(b Benchmarker) {
containerList := make([]string, 0, framework.TestContext.Number)
var err error

for i := 0; i < framework.TestContext.Number; i++ {
containerID := framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Container-for-listing-benchmark-")
containerList = append(containerList, containerID)
lastEndTime = time.Now().UnixNano()
durations[4] = lastEndTime - lastStartTime
framework.ExpectNoError(err, "failed to remove Container: %v", err)

res := LifecycleBenchmarkDatapoint{
SampleIndex: idx,
StartTime: startTime,
EndTime: lastEndTime,
OperationsDurationsNs: durations,
MetaInfo: map[string]string{"podId": podID, "containerId": containerID},
}
resultsChannel <- &res

By(fmt.Sprintf("stop PodSandbox %d", idx))
rc.StopPodSandbox(podID)
By(fmt.Sprintf("delete PodSandbox %d", idx))
rc.RemovePodSandbox(podID)

}, samplingConfig)

// Send nil and give the manager a minute to process any already-queued results:
resultsChannel <- nil
err := resultsManager.AwaitAllResults(60)
if err != nil {
glog.Errorf("Results manager failed to await all results: %s", err)
}

operation := b.Time("list Container", func() {
_, err = rc.ListContainers(nil)
})

framework.ExpectNoError(err, "failed to list Container: %v", err)
Expect(operation.Seconds()).Should(BeNumerically("<", 2), "list Container shouldn't take too long.")

for _, containerID := range containerList {
rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
rc.RemoveContainer(containerID)
if framework.TestContext.BenchmarkingOutputDir != "" {
filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "container_benchmark_data.json")
err = resultsManager.WriteResultsFile(filepath)
if err != nil {
glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err)
}
} else {
glog.Infof("No benchmarking output dir provided, skipping writing benchmarking results file.")
glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet)
}
}, defaultOperationTimes)
})
})
})

0 comments on commit 4b7a280

Please sign in to comment.