Skip to content

Commit

Permalink
Merge pull request #2687 from oasislabs/matevz/feature/bench
Browse files Browse the repository at this point in the history
benchmarking utilities
  • Loading branch information
matevz committed Apr 30, 2020
2 parents 031766e + 903eef1 commit 40119d9
Show file tree
Hide file tree
Showing 66 changed files with 2,870 additions and 805 deletions.
147 changes: 147 additions & 0 deletions .buildkite/benchmarks.pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# Copied from pipeline.yml.
docker_plugin_default_config: &docker_plugin_default_config
image: "oasislabs/testing:0.3.0"
always_pull: true
workdir: /workdir
volumes:
- /var/lib/buildkite-agent/.coveralls:/root/.coveralls
- /var/lib/buildkite-agent/.codecov:/root/.codecov
# Shared Rust incremental compile caches.
- /var/tmp/cargo_ic/debug:/var/tmp/artifacts/debug/incremental
- /var/tmp/cargo_ic/debug_sgx:/var/tmp/artifacts/x86_64-unknown-linux-sgx/debug/incremental
# Shared Rust package checkouts directory.
- /var/tmp/cargo_pkg/git:/root/.cargo/git
- /var/tmp/cargo_pkg/registry:/root/.cargo/registry
# Shared Rust SGX standard library artifacts cache.
- /var/tmp/xargo_cache:/root/.xargo
# Shared Go package checkouts directory.
- /var/tmp/go_pkg:/root/go/pkg
# Intel SGX Application Enclave Services Manager (AESM) daemon running on
# the Buildkite host.
- /var/run/aesmd/aesm.socket:/var/run/aesmd/aesm.socket
- /var/tmp/benchmarks:/var/tmp/benchmarks
environment:
- "LC_ALL=C.UTF-8"
- "LANG=C.UTF-8"
- "CARGO_TARGET_DIR=/var/tmp/artifacts"
- "CARGO_INSTALL_ROOT=/root/.cargo"
- "GOPROXY=https://proxy.golang.org/"
- "BUILDKITE_PIPELINE_NAME"
- "BUILDKITE_BUILD_NUMBER"
- "BUILDKITE_BUILD_URL"
- "TESTS"
- "NUM_RUNS"
- "SLACK_WEBHOOK_URL"
- "METRICS_PUSH_ADDR"
- "METRICS_QUERY_ADDR"
- "METRICS_SOURCE_GIT_BRANCH"
- "METRICS_TARGET_GIT_BRANCH"
- "METRICS_THRESHOLDS"
propagate-environment: true
unconfined: true

docker_plugin: &docker_plugin
oasislabs/docker#v3.0.1-oasis1:
<<: *docker_plugin_default_config

steps:
###############################################################
# The following three steps are copied from code.pipeline.yml #
###############################################################
############
# Build jobs
############
- label: Build Go node
command:
- .buildkite/go/build.sh

# Upload the built artifacts.
- cd /workdir/go/oasis-node
- buildkite-agent artifact upload oasis-node
- buildkite-agent artifact upload oasis-node.test
- cd /workdir/go/oasis-test-runner
- buildkite-agent artifact upload oasis-test-runner
- buildkite-agent artifact upload oasis-test-runner.test
- cd /workdir/go/oasis-net-runner
- buildkite-agent artifact upload oasis-net-runner
- cd /workdir/go/oasis-remote-signer
- buildkite-agent artifact upload oasis-remote-signer
plugins:
<<: *docker_plugin

- label: Build Rust runtime loader
command:
- .buildkite/rust/build_generic.sh /workdir -p oasis-core-runtime-loader
- .buildkite/rust/build_generic.sh /workdir -p test-long-term-client
- .buildkite/rust/build_generic.sh /workdir -p simple-keyvalue-client
- .buildkite/rust/build_generic.sh /workdir -p simple-keyvalue-enc-client
- .buildkite/rust/build_generic.sh /workdir -p simple-keyvalue-ops-client

# Upload the built artifacts.
- cd /var/tmp/artifacts/default/debug
- buildkite-agent artifact upload oasis-core-runtime-loader
# Clients for E2E tests.
- buildkite-agent artifact upload test-long-term-client
- buildkite-agent artifact upload simple-keyvalue-client
- buildkite-agent artifact upload simple-keyvalue-enc-client
- buildkite-agent artifact upload simple-keyvalue-ops-client
agents:
buildkite_agent_size: large
plugins:
<<: *docker_plugin

####################
# Runtime build jobs
####################
- label: Build key manager runtime
command:
- .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager
- .buildkite/rust/build_runtime.sh tests/runtimes/simple-keyvalue

# Upload the built artifacts.
- cd /var/tmp/artifacts/sgx/x86_64-fortanix-unknown-sgx/debug
- buildkite-agent artifact upload simple-keymanager.sgxs
- buildkite-agent artifact upload simple-keyvalue.sgxs
- cd /var/tmp/artifacts/default/debug
- buildkite-agent artifact upload simple-keymanager
- buildkite-agent artifact upload simple-keyvalue
agents:
buildkite_agent_size: large
plugins:
<<: *docker_plugin

# Wait for all jobs defined before this point
# to finish running in parallel before continuing.
- wait

#########################################
# E2E test jobs with enabled benchmarking
#########################################
- label: E2E tests
parallelism: 7
timeout_in_minutes: 30
command:
- .buildkite/scripts/download_e2e_test_artifacts.sh
- rm -rf /var/tmp/benchmarks/*
- .buildkite/scripts/test_e2e.sh --metrics.address $METRICS_PUSH_ADDR --metrics.interval 5s --metrics.labels instance=\$BUILDKITE_PIPELINE_NAME-\$BUILDKITE_BUILD_NUMBER --num_runs $NUM_RUNS -t $TESTS
env:
TEST_BASE_DIR: /var/tmp/benchmarks
agents:
buildkite_agent_size: large
buildkite_agent_class: stable
plugins:
<<: *docker_plugin

# Wait for all jobs defined before this point
# to finish running in parallel before continuing.
- wait

###########################
# Compare benchmark results
###########################
- label: Benchmark analysis
command:
- .buildkite/scripts/download_e2e_test_artifacts.sh
- .buildkite/scripts/daily_benchmark_analysis.sh
plugins:
<<: *docker_plugin
6 changes: 0 additions & 6 deletions .buildkite/code.pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,6 @@ steps:
############
# Build jobs
############
# This label needs to be synced with runtime-ethereum's
# .buildkite/scripts/download_utils.sh.
- label: Build Go node
command:
- .buildkite/go/build.sh
Expand All @@ -110,8 +108,6 @@ steps:
plugins:
<<: *docker_plugin

# This label needs to be synced with runtime-ethereum's
# .buildkite/scripts/download_utils.sh.
- label: Build Rust runtime loader
command:
- .buildkite/rust/build_generic.sh /workdir -p oasis-core-runtime-loader
Expand All @@ -136,8 +132,6 @@ steps:
####################
# Runtime build jobs
####################
# This label needs to be synced with runtime-ethereum's
# .buildkite/rust/test_runtime_and_gateway.sh and .buildkite/scripts/download_utils.sh.
- label: Build key manager runtime
command:
- .buildkite/rust/build_runtime.sh tests/runtimes/simple-keymanager
Expand Down
47 changes: 47 additions & 0 deletions .buildkite/scripts/daily_benchmark_analysis.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#! /bin/bash

# This script compares all metrics of the last benchmark batch from the feature
# branch to the last batch of the master branch. If any thresholds are
# exceeded, the issue is reported to the slack channel and error code is
# returned.
#
# Script should be invoked from .buildkite/benchmarks.pipeline.yml. Required
# env variables:
# BUILDKITE_BUILD_URL - URL for seeing detailed testing and comparison log (e.g. https://buildkite.com/oasislabs/oasis-core-daily-benchmarks/builds/xx)
# METRICS_QUERY_ADDR - address of Prometheus server (e.g. http://localhost:9090)
# METRICS_SOURCE_GIT_BRANCH - name of feature branch on git (e.g. jsmith/feature/abc)
# METRICS_TARGET_GIT_BRANCH - name of master branch on git (e.g. master)
# METRICS_THRESHOLDS - max or min thresholds flags (e.g. --max_threshold.cpu.avg_ratio 1.05)
# TESTS - names of test(s) to compare (e.g. e2e/runtime/runtime)
# SLACK_WEBHOOK_URL - slack webhook for reporting (e.g. https://hooks.slack.com/services/xxxxxx)

set -ux

./go/oasis-test-runner/oasis-test-runner cmp \
--metrics.address $METRICS_QUERY_ADDR \
--metrics.source.git_branch $METRICS_SOURCE_GIT_BRANCH \
--metrics.target.git_branch $METRICS_TARGET_GIT_BRANCH \
-t $TESTS \
--log.level INFO \
--log.format JSON \
$METRICS_THRESHOLDS \
>out.txt 2>&1
CMP_RETURN_CODE=$?

# Show stdout and stderr in logs for debugging.
cat out.txt

# Escape double quotes for JSON.
CMP_ERROR_LINES=`cat out.txt | sed "s/\"/\\\\\\\\\"/g" | grep error`

if [ $CMP_RETURN_CODE != 0 ]; then
# Post error to slack channel.
curl -H "Content-Type: application/json" \
-X POST \
--data "{\"text\": \"$BUILDKITE_PIPELINE_NAME for branch \`$METRICS_SOURCE_GIT_BRANCH\` failed. Visit $BUILDKITE_BUILD_URL for details.\", \"attachments\":[{\"title\":\"Relevant error lines\",\"text\":\"$CMP_ERROR_LINES\"}]}" \
"$SLACK_WEBHOOK_URL"

# Exit with non-zero exit code, so that the buildkite build will be
# marked as failed.
exit 1
fi
10 changes: 5 additions & 5 deletions .buildkite/scripts/test_e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ ${test_runner_binary} \
${BUILDKITE:+--basedir ${TEST_BASE_DIR:-$PWD}/e2e} \
--basedir.no_cleanup \
--e2e.node.binary ${node_binary} \
--e2e.client.binary_dir ${WORKDIR}/target/default/debug \
--e2e.runtime.binary_dir ${WORKDIR}/target/${runtime_target}/debug \
--e2e.runtime.loader ${WORKDIR}/target/default/debug/oasis-core-runtime-loader \
--e2e.tee_hardware ${OASIS_TEE_HARDWARE:-""} \
--remote_signer.binary ${WORKDIR}/go/oasis-remote-signer/oasis-remote-signer \
--e2e/runtime.client.binary_dir ${WORKDIR}/target/default/debug \
--e2e/runtime.runtime.binary_dir ${WORKDIR}/target/${runtime_target}/debug \
--e2e/runtime.runtime.loader ${WORKDIR}/target/default/debug/oasis-core-runtime-loader \
--e2e/runtime.tee_hardware ${OASIS_TEE_HARDWARE:-""} \
--remote-signer.binary ${WORKDIR}/go/oasis-remote-signer/oasis-remote-signer \
--log.level info \
${BUILDKITE_PARALLEL_JOB_COUNT:+--parallel.job_count ${BUILDKITE_PARALLEL_JOB_COUNT}} \
${BUILDKITE_PARALLEL_JOB:+--parallel.job_index ${BUILDKITE_PARALLEL_JOB}} \
Expand Down
10 changes: 10 additions & 0 deletions .changelog/2687.breaking.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
`oasis-node`: Refactor `metrics` parameters

- `--metrics.push.job_name` renamed to `--metrics.job_name`.
- `--metrics.push.interval` renamed to `--metrics.interval`.
- `--metrics.push.instance_label` replaced with more general
`--metrics.labels` map parameter where `instance` is a required key, if
metrics are enabled. For example `--metrics.push.instance_label abc` now
becomes `--metrics.labels instance=abc`. User can also set other
arbitrary Prometheus labels, for example
`--metrics.labels instance=abc,cpu=intel_i7-8750`.
11 changes: 11 additions & 0 deletions .changelog/2687.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
`oasis-node`: Add benchmarking utilities

- New Prometheus metrics for:
- datadir space usage,
- I/O (read/written bytes),
- memory usage (VMSize, RssAnon, RssFile, RssShmem),
- CPU (utime and stime),
- network interfaces (rx/tx bytes/packets),
- Bumps `prometheus/go_client` to latest version which fixes sending label
values containing non-url characters.
- Bumps `spf13/viper` which fixes `IsSet()` behavior.
40 changes: 40 additions & 0 deletions .changelog/2687.internal.1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
`oasis-test-runner`: Add benchmarking utilities

- `oasis-test-runner` now accepts `--metrics.address` and `--metrics.interval`
parameters which are forwarded to `oasis-node` workers.
- `oasis-test-runner` now signals `oasis_up` metric to Prometheus when a test
starts and when it finishes.
- `--num_runs` parameter added which specifies how many times each test should
be run.
- `basic` E2E test was renamed to `runtime`.
- Scenario names now use corresponding namespace. e.g. `halt-restore` is now
`e2e/runtime/halt-restore`.
- Scenario parameters are now exposed and settable via CLI by reimplementing
`scenario.Parameters()` and setting it with `--<test_name>.<param>=<val>`.
- Scenario parameters can also be generally set, for example
`--e2e.node.binary` will set `node.binary` parameter for all E2E tests and
`--e2e/runtime.node.binary` will set it for tests which inherit `runtime`.
- Multiple parameter values can be provided in form
`--<test_name>.<param>=<val1>,<val2>,...`. In this case, `oasis-test-runner`
combines them with other parameters and generates unique parameter sets for
each test.
- Each scenario is run in a unique datadir per parameter set of form
`oasis-test-runnerXXXXXX/<test_name>/<run_id>`.
- Due to very long datadir for some e2e tests, custom internal gRPC socket
names are provided to `oasis-node`.
- If metrics are enabled, new labels are passed to oasis-nodes and pushed to
Prometheus for each test:
- `instance`,
- `run`,
- `test`,
- `software_version`,
- `git_branch`,
- whole test-specific parameter set.
- New `version.GitBranch` variable determined and set during compilation.
- Current parameter set, run number, and test name dumped to `test_info.json`
in corresponding datadir. This is useful when packing whole datadir for
external debugging.
- New `cmp` command for analyzing benchmark results has been added which
fetches the last two batches of benchmark results from Prometheus and
compares them. For more information, see `README.md` in
`go/oasis-test-runner` folder.
10 changes: 10 additions & 0 deletions .changelog/2687.internal.2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
`oasis-node`: Add custom internal socket path flag (for E2E tests only!)

`--debug.grpc.internal.socket_name` flag was added which forces `oasis-node`
to use the given path for the internal gRPC socket. This was necessary,
because some E2E test names became very lengthy and original datadir exceeded
the maximum unix socket path length. `oasis-test-runner` now generates
shorter socket names in `/tmp/oasis-test-runnerXXXXXX` directory and provides
them to `oasis-node`. **Due to security risks never ever use this flag in
production-like environments. Internal gRPC sockets should always reside in
node datadir!**
4 changes: 4 additions & 0 deletions .changelog/2687.internal.3.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ci: New benchmarks pipeline has been added

`benchmarks.pipeline.yml` runs all E2E tests and compares the benchmark
results from the previous batch using the new `oasis-test-runner cmp` command.
4 changes: 3 additions & 1 deletion common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ ifeq ($(and $(LATEST_TAG),$(IS_TAG)),NO)
endif
export VERSION

GIT_BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)

# Try to compute the next version based on the latest tag of the origin remote
# using the Punch tool.
# First, all tags from the origin remote are fetched. Next, the latest tag on
Expand Down Expand Up @@ -90,7 +92,7 @@ GOFLAGS ?= -trimpath -v

# Add Oasis Core's version as a linker string value definition.
ifneq ($(VERSION),)
export GOLDFLAGS ?= "-X github.com/oasislabs/oasis-core/go/common/version.SoftwareVersion=$(VERSION)"
export GOLDFLAGS ?= "-X github.com/oasislabs/oasis-core/go/common/version.SoftwareVersion=$(VERSION) -X github.com/oasislabs/oasis-core/go/common/version.GitBranch=$(GIT_BRANCH)"
endif

# Go build command to use by default.
Expand Down
2 changes: 2 additions & 0 deletions go/common/grpc/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ func (s *Server) Start() error {
)
return err
}
s.Logger.Info("gRPC server started", "network", cfg.network, "address", cfg.address)

s.startedListeners = append(s.startedListeners, ln)

go func() {
Expand Down
5 changes: 5 additions & 0 deletions go/common/version/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ var (
// by the linker.
SoftwareVersion = "0.0-unset"

// GitBranch is the name of the git branch of Oasis Core.
//
// This is mostly used for reporting and metrics.
GitBranch = ""

// RuntimeProtocol versions the protocol between the Oasis node(s) and
// the runtime.
//
Expand Down

0 comments on commit 40119d9

Please sign in to comment.