-
Notifications
You must be signed in to change notification settings - Fork 107
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #2687 from oasislabs/matevz/feature/bench
benchmarking utilities
- Loading branch information
Showing
66 changed files
with
2,870 additions
and
805 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,147 @@ | ||
# Copied from pipeline.yml. | ||
docker_plugin_default_config: &docker_plugin_default_config | ||
image: "oasislabs/testing:0.3.0" | ||
always_pull: true | ||
workdir: /workdir | ||
volumes: | ||
- /var/lib/buildkite-agent/.coveralls:/root/.coveralls | ||
- /var/lib/buildkite-agent/.codecov:/root/.codecov | ||
# Shared Rust incremental compile caches. | ||
- /var/tmp/cargo_ic/debug:/var/tmp/artifacts/debug/incremental | ||
- /var/tmp/cargo_ic/debug_sgx:/var/tmp/artifacts/x86_64-unknown-linux-sgx/debug/incremental | ||
# Shared Rust package checkouts directory. | ||
- /var/tmp/cargo_pkg/git:/root/.cargo/git | ||
- /var/tmp/cargo_pkg/registry:/root/.cargo/registry | ||
# Shared Rust SGX standard library artifacts cache. | ||
- /var/tmp/xargo_cache:/root/.xargo | ||
# Shared Go package checkouts directory. | ||
- /var/tmp/go_pkg:/root/go/pkg | ||
# Intel SGX Application Enclave Services Manager (AESM) daemon running on | ||
# the Buildkite host. | ||
- /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket | ||
- /var/tmp/benchmarks:/var/tmp/benchmarks | ||
environment: | ||
- "LC_ALL=C.UTF-8" | ||
- "LANG=C.UTF-8" | ||
- "CARGO_TARGET_DIR=/var/tmp/artifacts" | ||
- "CARGO_INSTALL_ROOT=/root/.cargo" | ||
- "GOPROXY=https://proxy.golang.org/" | ||
- "BUILDKITE_PIPELINE_NAME" | ||
- "BUILDKITE_BUILD_NUMBER" | ||
- "BUILDKITE_BUILD_URL" | ||
- "TESTS" | ||
- "NUM_RUNS" | ||
- "SLACK_WEBHOOK_URL" | ||
- "METRICS_PUSH_ADDR" | ||
- "METRICS_QUERY_ADDR" | ||
- "METRICS_SOURCE_GIT_BRANCH" | ||
- "METRICS_TARGET_GIT_BRANCH" | ||
- "METRICS_THRESHOLDS" | ||
propagate-environment: true | ||
unconfined: true | ||
|
||
docker_plugin: &docker_plugin | ||
oasislabs/docker#v3.0.1-oasis1: | ||
<<: *docker_plugin_default_config | ||
|
||
steps: | ||
############################################################### | ||
# The following three steps are copied from code.pipeline.yml # | ||
############################################################### | ||
############ | ||
# Build jobs | ||
############ | ||
- label: Build Go node | ||
command: | ||
- .buildkite/go/build.sh | ||
|
||
# Upload the built artifacts. | ||
- cd /workdir/go/oasis-node | ||
- buildkite-agent artifact upload oasis-node | ||
- buildkite-agent artifact upload oasis-node.test | ||
- cd /workdir/go/oasis-test-runner | ||
- buildkite-agent artifact upload oasis-test-runner | ||
- buildkite-agent artifact upload oasis-test-runner.test | ||
- cd /workdir/go/oasis-net-runner | ||
- buildkite-agent artifact upload oasis-net-runner | ||
- cd /workdir/go/oasis-remote-signer | ||
- buildkite-agent artifact upload oasis-remote-signer | ||
plugins: | ||
<<: *docker_plugin | ||
|
||
- label: Build Rust runtime loader | ||
command: | ||
- .buildkite/rust/build_generic.sh /workdir -p oasis-core-runtime-loader | ||
- .buildkite/rust/build_generic.sh /workdir -p test-long-term-client | ||
- .buildkite/rust/build_generic.sh /workdir -p simple-keyvalue-client | ||
- .buildkite/rust/build_generic.sh /workdir -p simple-keyvalue-enc-client | ||
- .buildkite/rust/build_generic.sh /workdir -p simple-keyvalue-ops-client | ||
|
||
# Upload the built artifacts. | ||
- cd /var/tmp/artifacts/default/debug | ||
- buildkite-agent artifact upload oasis-core-runtime-loader | ||
# Clients for E2E tests. | ||
- buildkite-agent artifact upload test-long-term-client | ||
- buildkite-agent artifact upload simple-keyvalue-client | ||
- buildkite-agent artifact upload simple-keyvalue-enc-client | ||
- buildkite-agent artifact upload simple-keyvalue-ops-client | ||
agents: | ||
buildkite_agent_size: large | ||
plugins: | ||
<<: *docker_plugin | ||
|
||
#################### | ||
# Runtime build jobs | ||
#################### | ||
- label: Build key manager runtime | ||
command: | ||
- .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager | ||
- .buildkite/rust/build_runtime.sh tests/runtimes/simple-keyvalue | ||
|
||
# Upload the built artifacts. | ||
- cd /var/tmp/artifacts/sgx/x86_64-fortanix-unknown-sgx/debug | ||
- buildkite-agent artifact upload simple-keymanager.sgxs | ||
- buildkite-agent artifact upload simple-keyvalue.sgxs | ||
- cd /var/tmp/artifacts/default/debug | ||
- buildkite-agent artifact upload simple-keymanager | ||
- buildkite-agent artifact upload simple-keyvalue | ||
agents: | ||
buildkite_agent_size: large | ||
plugins: | ||
<<: *docker_plugin | ||
|
||
# Wait for all jobs defined before this point | ||
# to finish running in parallel before continuing. | ||
- wait | ||
|
||
######################################### | ||
# E2E test jobs with enabled benchmarking | ||
######################################### | ||
- label: E2E tests | ||
parallelism: 7 | ||
timeout_in_minutes: 30 | ||
command: | ||
- .buildkite/scripts/download_e2e_test_artifacts.sh | ||
- rm -rf /var/tmp/benchmarks/* | ||
- .buildkite/scripts/test_e2e.sh --metrics.address $METRICS_PUSH_ADDR --metrics.interval 5s --metrics.labels instance=\$BUILDKITE_PIPELINE_NAME-\$BUILDKITE_BUILD_NUMBER --num_runs $NUM_RUNS -t $TESTS | ||
env: | ||
TEST_BASE_DIR: /var/tmp/benchmarks | ||
agents: | ||
buildkite_agent_size: large | ||
buildkite_agent_class: stable | ||
plugins: | ||
<<: *docker_plugin | ||
|
||
# Wait for all jobs defined before this point | ||
# to finish running in parallel before continuing. | ||
- wait | ||
|
||
########################### | ||
# Compare benchmark results | ||
########################### | ||
- label: Benchmark analysis | ||
command: | ||
- .buildkite/scripts/download_e2e_test_artifacts.sh | ||
- .buildkite/scripts/daily_benchmark_analysis.sh | ||
plugins: | ||
<<: *docker_plugin |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#! /bin/bash | ||
|
||
# This script compares all metrics of the last benchmark batch from the feature | ||
# branch to the last batch of the master branch. If any thresholds are | ||
# exceeded, the issue is reported to the slack channel and error code is | ||
# returned. | ||
# | ||
# Script should be invoked from .buildkite/benchmarks.pipeline.yml. Required | ||
# env variables: | ||
# BUILDKITE_BUILD_URL - URL for seeing detailed testing and comparison log (e.g. https://buildkite.com/oasislabs/oasis-core-daily-benchmarks/builds/xx) | ||
# METRICS_QUERY_ADDR - address of Prometheus server (e.g. http://localhost:9090) | ||
# METRICS_SOURCE_GIT_BRANCH - name of feature branch on git (e.g. jsmith/feature/abc) | ||
# METRICS_TARGET_GIT_BRANCH - name of master branch on git (e.g. master) | ||
# METRICS_THRESHOLDS - max or min thresholds flags (e.g. --max_threshold.cpu.avg_ratio 1.05) | ||
# TESTS - names of test(s) to compare (e.g. e2e/runtime/runtime) | ||
# SLACK_WEBHOOK_URL - slack webhook for reporting (e.g. https://hooks.slack.com/services/xxxxxx) | ||
|
||
set -ux | ||
|
||
./go/oasis-test-runner/oasis-test-runner cmp \ | ||
--metrics.address $METRICS_QUERY_ADDR \ | ||
--metrics.source.git_branch $METRICS_SOURCE_GIT_BRANCH \ | ||
--metrics.target.git_branch $METRICS_TARGET_GIT_BRANCH \ | ||
-t $TESTS \ | ||
--log.level INFO \ | ||
--log.format JSON \ | ||
$METRICS_THRESHOLDS \ | ||
>out.txt 2>&1 | ||
CMP_RETURN_CODE=$? | ||
|
||
# Show stdout and stderr in logs for debugging. | ||
cat out.txt | ||
|
||
# Escape double quotes for JSON. | ||
CMP_ERROR_LINES=`cat out.txt | sed "s/\"/\\\\\\\\\"/g" | grep error` | ||
|
||
if [ $CMP_RETURN_CODE != 0 ]; then | ||
# Post error to slack channel. | ||
curl -H "Content-Type: application/json" \ | ||
-X POST \ | ||
--data "{\"text\": \"$BUILDKITE_PIPELINE_NAME for branch \`$METRICS_SOURCE_GIT_BRANCH\` failed. Visit $BUILDKITE_BUILD_URL for details.\", \"attachments\":[{\"title\":\"Relevant error lines\",\"text\":\"$CMP_ERROR_LINES\"}]}" \ | ||
"$SLACK_WEBHOOK_URL" | ||
|
||
# Exit with non-zero exit code, so that the buildkite build will be | ||
# marked as failed. | ||
exit 1 | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
`oasis-node`: Refactor `metrics` parameters | ||
|
||
- `--metrics.push.job_name` renamed to `--metrics.job_name`. | ||
- `--metrics.push.interval` renamed to `--metrics.interval`. | ||
- `--metrics.push.instance_label` replaced with more general | ||
`--metrics.labels` map parameter where `instance` is a required key, if | ||
metrics are enabled. For example `--metrics.push.instance_label abc` now | ||
becomes `--metrics.labels instance=abc`. User can also set other | ||
arbitrary Prometheus labels, for example | ||
`--metrics.labels instance=abc,cpu=intel_i7-8750`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
`oasis-node`: Add benchmarking utilities | ||
|
||
- New Prometheus metrics for: | ||
- datadir space usage, | ||
- I/O (read/written bytes), | ||
- memory usage (VMSize, RssAnon, RssFile, RssShmem), | ||
- CPU (utime and stime), | ||
- network interfaces (rx/tx bytes/packets), | ||
- Bumps `prometheus/go_client` to latest version which fixes sending label | ||
values containing non-url characters. | ||
- Bumps `spf13/viper` which fixes `IsSet()` behavior. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
`oasis-test-runner`: Add benchmarking utilities | ||
|
||
- `oasis-test-runner` now accepts `--metrics.address` and `--metrics.interval` | ||
parameters which are forwarded to `oasis-node` workers. | ||
- `oasis-test-runner` now signals `oasis_up` metric to Prometheus when a test | ||
starts and when it finishes. | ||
- `--num_runs` parameter added which specifies how many times each test should | ||
be run. | ||
- `basic` E2E test was renamed to `runtime`. | ||
- Scenario names now use corresponding namespace. e.g. `halt-restore` is now | ||
`e2e/runtime/halt-restore`. | ||
- Scenario parameters are now exposed and settable via CLI by reimplementing | ||
`scenario.Parameters()` and setting it with `--<test_name>.<param>=<val>`. | ||
- Scenario parameters can also be generally set, for example | ||
`--e2e.node.binary` will set `node.binary` parameter for all E2E tests and | ||
`--e2e/runtime.node.binary` will set it for tests which inherit `runtime`. | ||
- Multiple parameter values can be provided in form | ||
`--<test_name>.<param>=<val1>,<val2>,...`. In this case, `oasis-test-runner` | ||
combines them with other parameters and generates unique parameter sets for | ||
each test. | ||
- Each scenario is run in a unique datadir per parameter set of form | ||
`oasis-test-runnerXXXXXX/<test_name>/<run_id>`. | ||
- Due to very long datadir for some e2e tests, custom internal gRPC socket | ||
names are provided to `oasis-node`. | ||
- If metrics are enabled, new labels are passed to oasis-nodes and pushed to | ||
Prometheus for each test: | ||
- `instance`, | ||
- `run`, | ||
- `test`, | ||
- `software_version`, | ||
- `git_branch`, | ||
- whole test-specific parameter set. | ||
- New `version.GitBranch` variable determined and set during compilation. | ||
- Current parameter set, run number, and test name dumped to `test_info.json` | ||
in corresponding datadir. This is useful when packing whole datadir for | ||
external debugging. | ||
- New `cmp` command for analyzing benchmark results has been added which | ||
fetches the last two batches of benchmark results from Prometheus and | ||
compares them. For more information, see `README.md` in | ||
`go/oasis-test-runner` folder. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
`oasis-node`: Add custom internal socket path flag (for E2E tests only!) | ||
|
||
`--debug.grpc.internal.socket_name` flag was added which forces `oasis-node` | ||
to use the given path for the internal gRPC socket. This was necessary, | ||
because some E2E test names became very lengthy and original datadir exceeded | ||
the maximum unix socket path length. `oasis-test-runner` now generates | ||
shorter socket names in `/tmp/oasis-test-runnerXXXXXX` directory and provides | ||
them to `oasis-node`. **Due to security risks never ever use this flag in | ||
production-like environments. Internal gRPC sockets should always reside in | ||
node datadir!** |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
ci: New benchmarks pipeline has been added | ||
|
||
`benchmarks.pipeline.yml` runs all E2E tests and compares the benchmark | ||
results from the previous batch using the new `oasis-test-runner cmp` command. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.