Merge pull request #894 from aznashwan/benchmarking

Update container/pod benchmarking procedures.
kubernetes-sigs · May 23, 2022 · 4b7a280 · 4b7a280
2 parents 66de35d + 4ee0c36
commit 4b7a280
Show file tree

Hide file tree

Showing 18 changed files with 2,518 additions and 201 deletions.
diff --git a/docs/benchmark.md b/docs/benchmark.md
@@ -26,10 +26,32 @@ git clone https://github.com/kubernetes-sigs/cri-tools -b release-1.9 $GOPATH/sr
 
 Before running the test, you need to _ensure that the CRI server under test is running and listening on a Unix socket_ or a Windows tcp socket. Because the benchmark tests are designed to request changes (e.g., create/delete) to the containers and verify that correct status is reported, it expects to be the only user of the CRI server. Please make sure that 1) there are no existing CRI-managed containers running on the node, and 2) no other processes (e.g., Kubelet) will interfere with the tests.
 
+### Defining benchmarking parameters
+
+You can optionally specify some parameters detailing how benchmarks should be run.
+
+```yaml
+# The number of container lifecycle benchmarks to run:
+containersNumber: 100
+
+# The number of container lifecycle benchmarks to run in parallel.
+# The total number of samples will be floor(containersNumber / containersNumberParallel)
+containersNumberParallel: 2
+
+
+# The number of pod lifecycle benchmarks to run:
+podsNumber: 1000
+# The number of pod lifecycle benchmarks to run in parallel.
+# The total number of samples will be floor(podsNumber/ podsNumberParallel)
+podsNumberParallel: 1
+```
+
 ### Run
 
 ```sh
 critest -benchmark
+    [--benchmarking-params-file /path/to/params.yml]
+    [--benchmarking-output-dir /path/to/outdir/]
 ```
 
 This will
@@ -45,5 +67,9 @@ critest connects to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://l
 - `-ginkgo.focus`: Only run the tests that match the regular expression.
 - `-image-endpoint`: Set the endpoint of image service. Same with runtime-endpoint if not specified.
 - `-runtime-endpoint`: Set the endpoint of runtime service. Default to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://localhost:3735`.
+- `-benchmarking-params-file`: optional path to a YAML file containing parameters describing which
+benchmarks should be run.
+- `-benchmarking-output-dir`: optional path to a pre-existing directory in which to write JSON
+  files detailing the results of the benchmarks.
 - `-ginkgo.skip`: Skip the tests that match the regular expression.
 - `-h`: Should help and all supported options.
diff --git a/pkg/benchmark/benchmark.go b/pkg/benchmark/benchmark.go
@@ -33,6 +33,10 @@ import (
 	. "github.com/onsi/gomega"
 )
 
+const (
+	defaultOperationTimes int = 20
+)
+
 // TestPerformance checks configuration parameters (specified through flags) and then runs
 // benchmark tests using the Ginkgo runner.
 // If a "report directory" is specified, one or more JUnit test reports will be

diff --git a/pkg/benchmark/container.go b/pkg/benchmark/container.go
@@ -1,5 +1,5 @@
 /*
-Copyright 2017 The Kubernetes Authors.
+Copyright 2022 The Kubernetes Authors.
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,12 +17,21 @@ limitations under the License.
 package benchmark
 
 import (
+	"fmt"
+	"path"
+	"time"
+
+	"github.com/golang/glog"
 	"github.com/kubernetes-sigs/cri-tools/pkg/framework"
 	internalapi "k8s.io/cri-api/pkg/apis"
 	runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
 
 	. "github.com/onsi/ginkgo/v2"
-	. "github.com/onsi/gomega"
+	"github.com/onsi/gomega/gmeasure"
+)
+
+const (
+	defaultContainerBenchmarkTimeoutSeconds = 60
 )
 
 var _ = framework.KubeDescribe("Container", func() {
@@ -37,84 +46,115 @@ var _ = framework.KubeDescribe("Container", func() {
 	})
 
 	Context("benchmark about operations on Container", func() {
-		var podID string
-		var podConfig *runtimeapi.PodSandboxConfig
-
-		BeforeEach(func() {
-			podID, podConfig = framework.CreatePodSandboxForContainer(rc)
-		})
-
-		AfterEach(func() {
-			By("stop PodSandbox")
-			rc.StopPodSandbox(podID)
-			By("delete PodSandbox")
-			rc.RemovePodSandbox(podID)
-		})
-
-		Measure("benchmark about basic operations on Container", func(b Benchmarker) {
-			var containerID string
-			var err error
+		It("benchmark about basic operations on Container", func() {
+			timeout := defaultContainerBenchmarkTimeoutSeconds
+			if framework.TestContext.BenchmarkingParams.ContainerBenchmarkTimeoutSeconds > 0 {
+				timeout = framework.TestContext.BenchmarkingParams.ContainerBenchmarkTimeoutSeconds
+			}
 
-			operation := b.Time("create Container", func() {
-				By("benchmark about creating Container")
-				containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Container-for-creating-benchmark-")
-			})
-			Expect(operation.Seconds()).Should(BeNumerically("<", 2), "create Container shouldn't take too long.")
+			// Setup sampling config from TestContext:
+			samplingConfig := gmeasure.SamplingConfig{
+				N:           framework.TestContext.BenchmarkingParams.ContainersNumber,
+				NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel,
+			}
+			if samplingConfig.N <= 0 {
+				Skip("skipping container lifecycle benchmarks since container number option was not set")
+			}
+			if samplingConfig.NumParallel < 1 {
+				samplingConfig.NumParallel = 1
+			}
 
-			operation = b.Time("start Container", func() {
-				By("benchmark about starting Container")
+			// Setup results reporting channel:
+			resultsSet := LifecycleBenchmarksResultsSet{
+				OperationsNames: []string{"CreateContainer", "StartContainer", "StatusContainer", "StopContainer", "RemoveContainer"},
+				NumParallel:     samplingConfig.NumParallel,
+				Datapoints:      make([]LifecycleBenchmarkDatapoint, 0),
+			}
+			resultsManager := NewLifecycleBenchmarksResultsManager(
+				resultsSet,
+				timeout,
+			)
+			resultsChannel := resultsManager.StartResultsConsumer()
+
+			experiment := gmeasure.NewExperiment("ContainerOps")
+			experiment.Sample(func(idx int) {
+				var podID string
+				var podConfig *runtimeapi.PodSandboxConfig
+				var containerID string
+				var lastStartTime, lastEndTime int64
+				var err error
+				durations := make([]int64, len(resultsSet.OperationsNames))
+
+				podID, podConfig = framework.CreatePodSandboxForContainer(rc)
+
+				By(fmt.Sprintf("CreatingContainer %d", idx))
+				startTime := time.Now().UnixNano()
+				lastStartTime = startTime
+				containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Benchmark-container-")
+				lastEndTime = time.Now().UnixNano()
+				durations[0] = lastEndTime - lastStartTime
+
+				By(fmt.Sprintf("StartingContainer %d", idx))
+				lastStartTime = time.Now().UnixNano()
 				err = rc.StartContainer(containerID)
-			})
-
-			framework.ExpectNoError(err, "failed to start Container: %v", err)
-			Expect(operation.Seconds()).Should(BeNumerically("<", 2), "start Container shouldn't take too long.")
-
-			operation = b.Time("Container status", func() {
-				By("benchmark about getting Container status")
-				_, err = rc.ContainerStatus(containerID, false)
-			})
-
-			framework.ExpectNoError(err, "failed to get Container status: %v", err)
-			Expect(operation.Seconds()).Should(BeNumerically("<", 2), "get container status shouldn't take too long.")
-
-			operation = b.Time("stop Container", func() {
-				By("benchmark about stoping Container")
+				lastEndTime = time.Now().UnixNano()
+				durations[1] = lastEndTime - lastStartTime
+				framework.ExpectNoError(err, "failed to start Container: %v", err)
+
+				By(fmt.Sprintf("ContainerStatus %d", idx))
+				lastStartTime = time.Now().UnixNano()
+				_, err = rc.ContainerStatus(containerID, true)
+				lastEndTime = time.Now().UnixNano()
+				durations[2] = lastEndTime - lastStartTime
+				framework.ExpectNoError(err, "failed to get Container status: %v", err)
+
+				By(fmt.Sprintf("ContainerStop %d", idx))
+				lastStartTime = time.Now().UnixNano()
 				err = rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
-			})
-
-			framework.ExpectNoError(err, "failed to stop Container: %v", err)
-			Expect(operation.Seconds()).Should(BeNumerically("<", 2), "stop Container shouldn't take too long.")
+				lastEndTime = time.Now().UnixNano()
+				durations[3] = lastEndTime - lastStartTime
+				framework.ExpectNoError(err, "failed to stop Container: %v", err)
 
-			operation = b.Time("remove Container", func() {
-				By("benchmark about removing Container")
+				By(fmt.Sprintf("ContainerRemove %d", idx))
+				lastStartTime = time.Now().UnixNano()
 				err = rc.RemoveContainer(containerID)
-			})
-
-			framework.ExpectNoError(err, "failed to remove Container: %v", err)
-			Expect(operation.Seconds()).Should(BeNumerically("<", 2), "remove Container shouldn't take too long.")
-
-		}, defaultOperationTimes)
-
-		Measure("benchmark about listing Container", func(b Benchmarker) {
-			containerList := make([]string, 0, framework.TestContext.Number)
-			var err error
-
-			for i := 0; i < framework.TestContext.Number; i++ {
-				containerID := framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Container-for-listing-benchmark-")
-				containerList = append(containerList, containerID)
+				lastEndTime = time.Now().UnixNano()
+				durations[4] = lastEndTime - lastStartTime
+				framework.ExpectNoError(err, "failed to remove Container: %v", err)
+
+				res := LifecycleBenchmarkDatapoint{
+					SampleIndex:           idx,
+					StartTime:             startTime,
+					EndTime:               lastEndTime,
+					OperationsDurationsNs: durations,
+					MetaInfo:              map[string]string{"podId": podID, "containerId": containerID},
+				}
+				resultsChannel <- &res
+
+				By(fmt.Sprintf("stop PodSandbox %d", idx))
+				rc.StopPodSandbox(podID)
+				By(fmt.Sprintf("delete PodSandbox %d", idx))
+				rc.RemovePodSandbox(podID)
+
+			}, samplingConfig)
+
+			// Send nil and give the manager a minute to process any already-queued results:
+			resultsChannel <- nil
+			err := resultsManager.AwaitAllResults(60)
+			if err != nil {
+				glog.Errorf("Results manager failed to await all results: %s", err)
 			}
 
-			operation := b.Time("list Container", func() {
-				_, err = rc.ListContainers(nil)
-			})
-
-			framework.ExpectNoError(err, "failed to list Container: %v", err)
-			Expect(operation.Seconds()).Should(BeNumerically("<", 2), "list Container shouldn't take too long.")
-
-			for _, containerID := range containerList {
-				rc.StopContainer(containerID, framework.DefaultStopContainerTimeout)
-				rc.RemoveContainer(containerID)
+			if framework.TestContext.BenchmarkingOutputDir != "" {
+				filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "container_benchmark_data.json")
+				err = resultsManager.WriteResultsFile(filepath)
+				if err != nil {
+					glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err)
+				}
+			} else {
+				glog.Infof("No benchmarking output dir provided, skipping writing benchmarking results file.")
+				glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet)
 			}
-		}, defaultOperationTimes)
+		})
 	})
 })