diff --git a/cmd/critest/cri_test.go b/cmd/critest/cri_test.go index cf4aee19e7..7956618886 100644 --- a/cmd/critest/cri_test.go +++ b/cmd/critest/cri_test.go @@ -174,6 +174,7 @@ func TestCRISuite(t *testing.T) { flag.Set("ginkgo.focus", "benchmark") flag.Set("ginkgo.succinct", "true") } else { + // Skip benchmark measurements for validation tests. flag.Set("ginkgo.skipMeasurements", "true") } if *parallel > 1 { diff --git a/docs/benchmark.md b/docs/benchmark.md index 22d9bc537d..50b8fdba4b 100644 --- a/docs/benchmark.md +++ b/docs/benchmark.md @@ -26,10 +26,32 @@ git clone https://github.com/kubernetes-sigs/cri-tools -b release-1.9 $GOPATH/sr Before running the test, you need to _ensure that the CRI server under test is running and listening on a Unix socket_ or a Windows tcp socket. Because the benchmark tests are designed to request changes (e.g., create/delete) to the containers and verify that correct status is reported, it expects to be the only user of the CRI server. Please make sure that 1) there are no existing CRI-managed containers running on the node, and 2) no other processes (e.g., Kubelet) will interfere with the tests. +### Defining benchmarking parameters + +You can optionally specify some parameters detailing how benchmarks should be run. + +```yaml +# The number of container lifecycle benchmarks to run: +containersNumber: 100 + +# The number of container lifecycle benchmarks to run in parallel. +# The total number of samples will be floor(containersNumber / containersNumberParallel) +containersNumberParallel: 2 + + +# The number of pod lifecycle benchmarks to run: +podsNumber: 1000 +# The number of pod lifecycle benchmarks to run in parallel. +# The total number of samples will be floor(podsNumber/ podsNumberParallel) +podsNumberParallel: 1 +``` + ### Run ```sh critest -benchmark + [--benchmarking-params-file /path/to/params.yml] + [--benchmarking-output-dir /path/to/outdir/] ``` This will @@ -45,5 +67,9 @@ critest connects to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://l - `-ginkgo.focus`: Only run the tests that match the regular expression. - `-image-endpoint`: Set the endpoint of image service. Same with runtime-endpoint if not specified. - `-runtime-endpoint`: Set the endpoint of runtime service. Default to Unix: `unix:///var/run/dockershim.sock` or Windows: `tcp://localhost:3735`. +- `-benchmarking-params-file`: optional path to a YAML file containing parameters describing which +benchmarks should be run. +- `-benchmarking-output-dir`: optional path to a pre-existing directory in which to write JSON + files detailing the results of the benchmarks. - `-ginkgo.skip`: Skip the tests that match the regular expression. - `-h`: Should help and all supported options. diff --git a/pkg/benchmark/benchmark.go b/pkg/benchmark/benchmark.go index 5e5b2ce1b6..38d5a4ddcb 100644 --- a/pkg/benchmark/benchmark.go +++ b/pkg/benchmark/benchmark.go @@ -33,14 +33,9 @@ import ( . "github.com/onsi/gomega" ) -// Transforms a slice of `time.Duration`s into their `int64` nanosecond representations. -func getNanosecondsForDurations(durations []time.Duration) []int64 { - var ns []int64 - for _, duration := range durations { - ns = append(ns, duration.Nanoseconds()) - } - return ns -} +const ( + defaultOperationTimes int = 20 +) // TestPerformance checks configuration parameters (specified through flags) and then runs // benchmark tests using the Ginkgo runner. diff --git a/pkg/benchmark/container.go b/pkg/benchmark/container.go index f09944cc76..f988b33066 100644 --- a/pkg/benchmark/container.go +++ b/pkg/benchmark/container.go @@ -1,5 +1,5 @@ /* -Copyright 2021 The Kubernetes Authors. +Copyright 2022 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -17,22 +17,17 @@ limitations under the License. package benchmark import ( - "encoding/json" - "io/ioutil" "path" + "time" "github.com/golang/glog" "github.com/kubernetes-sigs/cri-tools/pkg/framework" . "github.com/onsi/ginkgo" "github.com/onsi/gomega/gmeasure" internalapi "k8s.io/cri-api/pkg/apis" - runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1alpha2" + runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" ) -type ContainerExperimentData struct { - CreateContainer, StatusContainer, StopContainer, RemoveContainer, StartContainer []int64 -} - var _ = framework.KubeDescribe("Container", func() { f := framework.NewDefaultCRIFramework() @@ -46,75 +41,109 @@ var _ = framework.KubeDescribe("Container", func() { Context("benchmark about operations on Container", func() { It("benchmark about basic operations on Container", func() { + // Setup sampling config from TestContext: + samplingConfig := gmeasure.SamplingConfig{ + N: framework.TestContext.BenchmarkingParams.ContainersNumber, + NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel, + } + if samplingConfig.N < 1 { + samplingConfig.N = 1 + } + if samplingConfig.NumParallel < 1 { + samplingConfig.NumParallel = 1 + } + + // Setup results reporting channel: + resultsSet := LifecycleBenchmarksResultsSet{ + OperationsNames: []string{"CreateContainer", "StartContainer", "StatusContainer", "StopContainer", "RemoveContainer"}, + NumParallel: samplingConfig.NumParallel, + Datapoints: make([]LifecycleBenchmarkDatapoint, 0), + } + resultsManager := NewLifecycleBenchmarksResultsManager( + resultsSet, + 60, + ) + resultsChannel := resultsManager.StartResultsConsumer() + experiment := gmeasure.NewExperiment("ContainerOps") experiment.Sample(func(idx int) { var podID string var podConfig *runtimeapi.PodSandboxConfig var containerID string + var lastStartTime, lastEndTime int64 var err error + durations := make([]int64, len(resultsSet.OperationsNames)) podID, podConfig = framework.CreatePodSandboxForContainer(rc) By("CreatingContainer") - stopwatch := experiment.NewStopwatch() - stopwatch.Reset() + startTime := time.Now().UnixNano() + lastStartTime = startTime containerID = framework.CreateDefaultContainer(rc, ic, podID, podConfig, "Benchmark-container-") - stopwatch.Record("CreateContainer") + lastEndTime = time.Now().UnixNano() + durations[0] = lastEndTime - lastStartTime By("StartingContainer") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() err = rc.StartContainer(containerID) - stopwatch.Record("StartContainer") + lastEndTime = time.Now().UnixNano() + durations[1] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to start Container: %v", err) By("ContainerStatus") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() _, err = rc.ContainerStatus(containerID) - stopwatch.Record("StatusContainer") + lastEndTime = time.Now().UnixNano() + durations[2] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to get Container status: %v", err) By("ContainerStop") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() err = rc.StopContainer(containerID, framework.DefaultStopContainerTimeout) - stopwatch.Record("StopContainer") + lastEndTime = time.Now().UnixNano() + durations[3] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to stop Container: %v", err) By("ContainerRemove") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() err = rc.RemoveContainer(containerID) - stopwatch.Record("RemoveContainer") + lastEndTime = time.Now().UnixNano() + durations[4] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to remove Container: %v", err) + res := LifecycleBenchmarkDatapoint{ + SampleIndex: idx, + StartTime: startTime, + EndTime: lastEndTime, + OperationsDurationsNs: durations, + MetaInfo: map[string]string{"podId": podID, "containerId": containerID}, + } + resultsChannel <- &res + By("stop PodSandbox") rc.StopPodSandbox(podID) By("delete PodSandbox") rc.RemovePodSandbox(podID) - }, gmeasure.SamplingConfig{N: framework.TestContext.BenchmarkingParams.ContainersNumber, NumParallel: framework.TestContext.BenchmarkingParams.ContainersNumberParallel}) + }, samplingConfig) - data := ContainerExperimentData{ - CreateContainer: getNanosecondsForDurations(experiment.Get("CreateContainer").Durations), - StartContainer: getNanosecondsForDurations(experiment.Get("StartContainer").Durations), - StatusContainer: getNanosecondsForDurations(experiment.Get("StatusContainer").Durations), - StopContainer: getNanosecondsForDurations(experiment.Get("StopContainer").Durations), - RemoveContainer: getNanosecondsForDurations(experiment.Get("RemoveContainer").Durations), + // Send nil and give the manager a minute to process any already-queued results: + resultsChannel <- nil + err := resultsManager.AwaitAllResults(60) + if err != nil { + glog.Errorf("Results manager failed to await all results: %s", err) } if framework.TestContext.BenchmarkingOutputDir != "" { - filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "container_benchmark_data.json") - data, err := json.MarshalIndent(data, "", " ") - if err == nil { - err = ioutil.WriteFile(filepath, data, 0644) - if err != nil { - glog.Errorf("Failed to write container benchmark data: %v", filepath) - } - } else { - glog.Errorf("Failed to serialize benchmark data: %v", err) + filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "newf_container_benchmark_data.json") + err = resultsManager.WriteResultsFile(filepath) + if err != nil { + glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err) } } else { - glog.Infof("No benchmarking output dir provided, skipping writing benchmarking resulsts.") + glog.Infof("No benchmarking output dir provided, skipping writing benchmarking results file.") + glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet) } }) - }) }) diff --git a/pkg/benchmark/pod.go b/pkg/benchmark/pod.go index 5c1461c111..b6e6ceba00 100644 --- a/pkg/benchmark/pod.go +++ b/pkg/benchmark/pod.go @@ -1,5 +1,5 @@ /* -Copyright 2021 The Kubernetes Authors. +Copyright 2022 The Kubernetes Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ package benchmark import ( "path" + "time" "github.com/golang/glog" "github.com/kubernetes-sigs/cri-tools/pkg/framework" @@ -26,19 +27,8 @@ import ( "github.com/onsi/gomega/gmeasure" internalapi "k8s.io/cri-api/pkg/apis" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" - - "encoding/json" - "io/ioutil" -) - -const ( - defaultOperationTimes int = 20 ) -type ExperimentData struct { - CreatePod, StatusPod, StopPod, RemovePod []int64 -} - var _ = framework.KubeDescribe("PodSandbox", func() { f := framework.NewDefaultCRIFramework() @@ -50,11 +40,36 @@ var _ = framework.KubeDescribe("PodSandbox", func() { Context("benchmark about operations on PodSandbox", func() { It("benchmark about lifecycle of PodSandbox", func() { + // Setup sampling config from TestContext: + samplingConfig := gmeasure.SamplingConfig{ + N: framework.TestContext.BenchmarkingParams.PodsNumber, + NumParallel: framework.TestContext.BenchmarkingParams.PodsNumberParallel, + } + if samplingConfig.N < 1 { + samplingConfig.N = 1 + } + if samplingConfig.NumParallel < 1 { + samplingConfig.NumParallel = 1 + } + + // Setup results reporting channel: + resultsSet := LifecycleBenchmarksResultsSet{ + OperationsNames: []string{"CreatePod", "StatusPod", "StopPod", "RemovePod"}, + NumParallel: samplingConfig.NumParallel, + Datapoints: make([]LifecycleBenchmarkDatapoint, 0), + } + resultsManager := NewLifecycleBenchmarksResultsManager( + resultsSet, + 60, + ) + resultsChannel := resultsManager.StartResultsConsumer() experiment := gmeasure.NewExperiment("PodLifecycle") experiment.Sample(func(idx int) { + var lastStartTime, lastEndTime int64 var podID string var err error + durations := make([]int64, len(resultsSet.OperationsNames)) podSandboxName := "PodSandbox-for-creating-performance-test-" + framework.NewUUID() uid := framework.DefaultUIDPrefix + framework.NewUUID() @@ -67,54 +82,62 @@ var _ = framework.KubeDescribe("PodSandbox", func() { } By("Creating a pod") - stopwatch := experiment.NewStopwatch() + startTime := time.Now().UnixNano() + lastStartTime = startTime podID, err = c.RunPodSandbox(config, framework.TestContext.RuntimeHandler) - stopwatch.Record("CreatePod") + lastEndTime = time.Now().UnixNano() + durations[0] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to create PodSandbox: %v", err) By("Get Pod status") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() _, err = c.PodSandboxStatus(podID) - stopwatch.Record("StatusPod") + lastEndTime = time.Now().UnixNano() + durations[1] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to get PodStatus: %v", err) By("Stop PodSandbox") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() err = c.StopPodSandbox(podID) - stopwatch.Record("StopPod") + lastEndTime = time.Now().UnixNano() + durations[2] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to stop PodSandbox: %v", err) By("Remove PodSandbox") - stopwatch.Reset() + lastStartTime = time.Now().UnixNano() err = c.RemovePodSandbox(podID) - stopwatch.Record("RemovePod") + lastEndTime = time.Now().UnixNano() + durations[3] = lastEndTime - lastStartTime framework.ExpectNoError(err, "failed to remove PodSandbox: %v", err) - }, gmeasure.SamplingConfig{N: framework.TestContext.BenchmarkingParams.PodsNumber, NumParallel: framework.TestContext.BenchmarkingParams.PodsNumberParallel}) + res := LifecycleBenchmarkDatapoint{ + StartTime: startTime, + EndTime: lastEndTime, + OperationsDurationsNs: durations, + MetaInfo: map[string]string{"podId": podID, "podSandboxName": podSandboxName}, + } + resultsChannel <- &res + + }, samplingConfig) - data := ExperimentData{ - CreatePod: getNanosecondsForDurations(experiment.Get("CreatePod").Durations), - StatusPod: getNanosecondsForDurations(experiment.Get("StatusPod").Durations), - StopPod: getNanosecondsForDurations(experiment.Get("StopPod").Durations), - RemovePod: getNanosecondsForDurations(experiment.Get("RemovePod").Durations), + // Send nil and give the manager a minute to process any already-queued results: + resultsChannel <- nil + err := resultsManager.AwaitAllResults(60) + if err != nil { + glog.Errorf("Results manager failed to await all results: %s", err) } if framework.TestContext.BenchmarkingOutputDir != "" { - filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "pod_benchmark_data.json") - data, err := json.MarshalIndent(data, "", " ") - if err == nil { - err = ioutil.WriteFile(filepath, data, 0644) - if err != nil { - glog.Errorf("Failed to write container benchmark data: %v", filepath) - } - } else { - glog.Errorf("Failed to serialize benchmark data: %v", err) + filepath := path.Join(framework.TestContext.BenchmarkingOutputDir, "newf_pod_benchmark_data.json") + err = resultsManager.WriteResultsFile(filepath) + if err != nil { + glog.Errorf("Error occurred while writing benchmark results to file %s: %s", filepath, err) } } else { - glog.Infof("No benchmarking out dir provided, skipping writing benchmarking resulsts.") + glog.Infof("No benchmarking out dir provided, skipping writing benchmarking results.") + glog.Infof("Benchmark results were: %+v", resultsManager.resultsSet) } }) }) - }) diff --git a/pkg/benchmark/util.go b/pkg/benchmark/util.go new file mode 100644 index 0000000000..8ff01268f6 --- /dev/null +++ b/pkg/benchmark/util.go @@ -0,0 +1,177 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package benchmark + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "time" + + "github.com/golang/glog" +) + +// LifecycleBenchmarkDatapoint encodes a single benchmark for a lifecycle operation. +// Contains a slice of int64s which represent the duration in nanoseconds of the +// operations which comprise a lifecycle being benchmarked. +// (e.g. the individual CRUD operations which are cycled through during the benchmark) +type LifecycleBenchmarkDatapoint struct { + // int64 index of the sample. + SampleIndex int `json:"sampleIndex"` + + // int64 nanosecond timestamp of the start of the result. + StartTime int64 `json:"startTime"` + + // int64 nanosecond timestamp of the start of the result. + EndTime int64 `json:"endTime"` + + // Slice of int64s representing the durations of individual operations. + // The operation durations should be in the order they were executed in. + // Note that the sum of OperationsDurationsNs need not be exactly equal to the duration + // determined by subtracting StartTime from EndTime, as there may be additional steps + // (e.g. timer setup) performed between the invidual operations. + OperationsDurationsNs []int64 `json:"operationsDurationsNs"` + + // String mapping for adding arbitrary meta-info for the lifecycle result: + MetaInfo map[string]string `json:"metaInfo"` +} + +// LifecycleBenchmarkResultsSet houses results for benchmarks involving resource lifecycles +// which include multiple benchmarked iterations of the cycle. +type LifecycleBenchmarksResultsSet struct { + // Slice of string operation names which represent one cycle. + OperationsNames []string `json:"operationsNames"` + + // The maximum number of lifecycles which were benchmarked in parallel. + // Anything <= 1 should be considered sequential. + NumParallel int `json:"numParallel"` + + // List of datapoints for each lifecycle benchmark. + Datapoints []LifecycleBenchmarkDatapoint `json:"datapoints"` +} + +// Type which tracks lifecycle benchmark results through channels. +type LifecycleBenchmarksResultsManager struct { + // The LifecycleBenchmarksResultsSet where results are added. + resultsSet LifecycleBenchmarksResultsSet + + // Channel for sending results to the manager. + resultsChannel chan *LifecycleBenchmarkDatapoint + + // Channel to indicate when the results consumer goroutine has ended. + resultsOverChannel chan bool + + // Flag to indicate whether the results consumer goroutine is running. + resultsConsumerRunning bool + + // The maximum timeout in seconds to wait between individual results being received. + resultsChannelTimeoutSeconds int +} + +// Instantiates a new LifecycleBenchmarksResultsManager and its internal channels/structures. +func NewLifecycleBenchmarksResultsManager(initialResultsSet LifecycleBenchmarksResultsSet, resultsChannelTimeoutSeconds int) *LifecycleBenchmarksResultsManager { + lbrm := LifecycleBenchmarksResultsManager{ + resultsSet: initialResultsSet, + resultsChannelTimeoutSeconds: resultsChannelTimeoutSeconds, + resultsChannel: make(chan *LifecycleBenchmarkDatapoint), + resultsOverChannel: make(chan bool), + } + if lbrm.resultsSet.Datapoints == nil { + lbrm.resultsSet.Datapoints = make([]LifecycleBenchmarkDatapoint, 0) + } + return &lbrm +} + +// Function which continuously consumes results from the resultsChannel until receiving a nil. +func (lbrm *LifecycleBenchmarksResultsManager) awaitResult() { + numOperations := len(lbrm.resultsSet.OperationsNames) + for { + var res *LifecycleBenchmarkDatapoint + timeout := time.After(time.Duration(lbrm.resultsChannelTimeoutSeconds) * time.Second) + + select { + case res = <-lbrm.resultsChannel: + // Receiving nil indicates results are over: + if res == nil { + glog.Info("Results ended.") + lbrm.resultsConsumerRunning = false + lbrm.resultsOverChannel <- true + return + } + + // Warn if an improper number of results was received: + if len(res.OperationsDurationsNs) != numOperations { + glog.Warningf("Received improper number of datapoints for operations %+v: %+v", lbrm.resultsSet.OperationsNames, res.OperationsDurationsNs) + } + + // Register the result: + lbrm.resultsSet.Datapoints = append(lbrm.resultsSet.Datapoints, *res) + + case <-timeout: + err := fmt.Errorf("Timed out after waiting %d seconds for new results.", lbrm.resultsChannelTimeoutSeconds) + glog.Error(err) + panic(err) + } + } +} + +// Starts the results consumer goroutine and returns the channel to write results to. +// A nil value must be sent after all other results were sent to indicate the end of the result +// stream. +func (lbrm *LifecycleBenchmarksResultsManager) StartResultsConsumer() chan *LifecycleBenchmarkDatapoint { + if !lbrm.resultsConsumerRunning { + lbrm.resultsConsumerRunning = true + go lbrm.awaitResult() + } + return lbrm.resultsChannel +} + +// Waits for the result consumer goroutine and returns all the results registered insofar. +func (lbrm *LifecycleBenchmarksResultsManager) AwaitAllResults(timeoutSeconds int) error { + if !lbrm.resultsConsumerRunning { + return nil + } + + timeout := time.After(time.Duration(timeoutSeconds) * time.Second) + select { + case <-lbrm.resultsOverChannel: + lbrm.resultsConsumerRunning = false + return nil + case <-timeout: + glog.Warningf("Failed to await all results. Results registered so far were: %+v", lbrm.resultsSet) + return fmt.Errorf("Benchmark results waiting timed out after %d seconds.", timeoutSeconds) + } +} + +// Saves the results gathered so far as JSON under the given filepath. +func (lbrm *LifecycleBenchmarksResultsManager) WriteResultsFile(filepath string) error { + if lbrm.resultsConsumerRunning { + return fmt.Errorf("Results consumer is still running and expecting results.") + } + + data, err := json.MarshalIndent(lbrm.resultsSet, "", " ") + if err == nil { + err = ioutil.WriteFile(filepath, data, 0644) + if err != nil { + return fmt.Errorf("Failed to write benchmarks results to file: %v", filepath) + } + } else { + return fmt.Errorf("Failed to serialize benchmark data: %v", err) + } + + return nil +}