Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds new rekor metrics for latency and QPS. #1059

Merged
merged 2 commits into from Sep 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
16 changes: 16 additions & 0 deletions pkg/api/metrics.go
Expand Up @@ -16,6 +16,8 @@
package api

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
Expand All @@ -35,4 +37,18 @@ var (
Name: "rekor_api_latency_summary",
Help: "Api Latency on calls",
}, []string{"path", "code"})

MetricRequestLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
Name: "rekor_latency_by_api",
Help: "Api Latency (in ns) by path and method",
Buckets: prometheus.ExponentialBucketsRange(
float64(time.Millisecond),
float64(4*time.Second),
10),
}, []string{"path", "method"})

MetricRequestCount = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "rekor_qps_by_api",
Help: "Api QPS by path, method, and response code",
}, []string{"path", "method", "code"})
)
76 changes: 70 additions & 6 deletions pkg/generated/restapi/configure_rekor_server.go
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
package restapi

import (
"context"
"crypto/tls"
"net/http"
"strconv"
Expand Down Expand Up @@ -47,6 +48,18 @@ import (

//go:generate swagger generate server --target ../../generated --name RekorServer --spec ../../../openapi.yaml --principal interface{} --exclude-main

type contextKey string

var (
ctxKeyAPIToRecord = contextKey("apiToRecord")
)

// Context payload for recording metrics.
type apiToRecord struct {
method *string // Method to record in metrics, if any.
path *string // Path to record in metrics, if any.
}

func configureFlags(api *operations.RekorServerAPI) {
// api.CommandLineOptionsGroups = []swag.CommandLineOptionsGroup{ ... }
}
Expand Down Expand Up @@ -104,6 +117,16 @@ func configureAPI(api *operations.RekorServerAPI) http.Handler {
api.AddMiddlewareFor("GET", "/api/v1/log/publicKey", cacheForever)
api.AddMiddlewareFor("GET", "/api/v1/log/timestamp/certchain", cacheForever)

// add metrics for explicitly handled endpoints
recordMetricsForAPI(api, "POST", "/api/v1/index/retrieve")
recordMetricsForAPI(api, "GET", "/api/v1/log")
recordMetricsForAPI(api, "GET", "/api/v1/publicKey")
recordMetricsForAPI(api, "GET", "/api/v1/log/proof")
recordMetricsForAPI(api, "GET", "/api/v1/log/entries")
recordMetricsForAPI(api, "POST", "/api/v1/log/entries")
recordMetricsForAPI(api, "GET", "/api/v1/log/entries/{entryUUID}")
recordMetricsForAPI(api, "GET", "/api/v1/log/entries/retrieve")

return setupGlobalMiddleware(api.Serve(setupMiddlewares))
}

Expand Down Expand Up @@ -159,18 +182,59 @@ func setupGlobalMiddleware(handler http.Handler) http.Handler {
}))
}

// Populates the the apiToRecord for this method/path so metrics are emitted.
func recordMetricsForAPI(api *operations.RekorServerAPI, method string, path string) {
metricsHandler := func(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
if apiInfo, ok := ctx.Value(ctxKeyAPIToRecord).(*apiToRecord); ok {
apiInfo.method = &method
apiInfo.path = &path
} else {
log.ContextLogger(ctx).Warn("Could not attach api info - endpoint may not be monitored.")
}
handler.ServeHTTP(w, r)
})
}

api.AddMiddlewareFor(method, path, metricsHandler)
}

func wrapMetrics(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
apiInfo := apiToRecord{}
ctx = context.WithValue(ctx, ctxKeyAPIToRecord, &apiInfo)
r = r.WithContext(ctx)

start := time.Now()
ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor)
defer func() {
labels := map[string]string{
"path": r.URL.Path,
"code": strconv.Itoa(ww.Status()),
// Only record metrics for APIs that need instrumentation.
if apiInfo.path != nil && apiInfo.method != nil {
code := strconv.Itoa(ww.Status())
labels := map[string]string{
"path": *apiInfo.path,
"code": code,
}
// This logs latency broken down by URL path and response code
// TODO(var-sdk): delete these metrics once the new metrics are safely rolled out.
pkgapi.MetricLatency.With(labels).Observe(float64(time.Since(start)))
pkgapi.MetricLatencySummary.With(labels).Observe(float64(time.Since(start)))

pkgapi.MetricRequestLatency.With(
map[string]string{
"path": *apiInfo.path,
"method": *apiInfo.method,
}).Observe(float64(time.Since(start)))

pkgapi.MetricRequestCount.With(
map[string]string{
"path": *apiInfo.path,
"method": *apiInfo.method,
"code": code,
}).Inc()
}
// This logs latency broken down by URL path and response code
pkgapi.MetricLatency.With(labels).Observe(float64(time.Since(start)))
pkgapi.MetricLatencySummary.With(labels).Observe(float64(time.Since(start)))
}()

handler.ServeHTTP(ww, r)
Expand Down
74 changes: 74 additions & 0 deletions tests/e2e_test.go
Expand Up @@ -19,6 +19,7 @@
package e2e

import (
"bufio"
"bytes"
"context"
"crypto"
Expand All @@ -37,6 +38,7 @@ import (
"os/exec"
"path/filepath"
"reflect"
"regexp"
"strconv"
"strings"
"testing"
Expand Down Expand Up @@ -1347,3 +1349,75 @@ func TestSearchValidateTreeID(t *testing.T) {
t.Fatalf("expected 404 status code but got %d", resp.StatusCode)
}
}

func getRekorMetricCount(metricLine string, t *testing.T) (int, error) {
re, err := regexp.Compile(fmt.Sprintf("^%s.*([0-9]+)$", regexp.QuoteMeta(metricLine)))
if err != nil {
return 0, err
}

resp, err := http.Get("http://localhost:2112/metrics")
if err != nil {
return 0, err
}
defer resp.Body.Close()

scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
match := re.FindStringSubmatch(scanner.Text())
if len(match) != 2 {
continue
}

result, err := strconv.Atoi(match[1])
if err != nil {
return 0, nil
}
t.Log("Matched metric line: " + scanner.Text())
return result, nil
}
return 0, nil
}

// Smoke test to ensure we're publishing and recording metrics when an API is
// called.
// TODO: use a more robust test approach here e.g. prometheus client-based?
// TODO: cover all endpoints to make sure none are dropped.
func TestMetricsCounts(t *testing.T) {
latencyMetric := "rekor_latency_by_api_count{method=\"GET\",path=\"/api/v1/log\"}"
qpsMetric := "rekor_qps_by_api{code=\"200\",method=\"GET\",path=\"/api/v1/log\"}"

latencyCount, err := getRekorMetricCount(latencyMetric, t)
if err != nil {
t.Fatal(err)
}

qpsCount, err := getRekorMetricCount(qpsMetric, t)
if err != nil {
t.Fatal(err)
}

resp, err := http.Get("http://localhost:3000/api/v1/log")
if err != nil {
t.Fatal(err)
}
resp.Body.Close()

latencyCount2, err := getRekorMetricCount(latencyMetric, t)
if err != nil {
t.Fatal(err)
}

qpsCount2, err := getRekorMetricCount(qpsMetric, t)
if err != nil {
t.Fatal(err)
}

if latencyCount2-latencyCount != 1 {
t.Error("rekor_latency_by_api_count did not increment")
}

if qpsCount2-qpsCount != 1 {
t.Error("rekor_qps_by_api did not increment")
}
}