From f969a7ae9498ab4221fd93a2e3134c2ccbea60d7 Mon Sep 17 00:00:00 2001 From: Simon Kent Date: Mon, 19 Sep 2022 17:05:08 -0700 Subject: [PATCH] Add new rekor server metrics. Signed-off-by: Simon Kent --- .../restapi/configure_rekor_server.go | 66 ++++++++++++++++- tests/e2e_test.go | 74 +++++++++++++++++++ 2 files changed, 139 insertions(+), 1 deletion(-) diff --git a/pkg/generated/restapi/configure_rekor_server.go b/pkg/generated/restapi/configure_rekor_server.go index e19527ca8..39e5ed035 100644 --- a/pkg/generated/restapi/configure_rekor_server.go +++ b/pkg/generated/restapi/configure_rekor_server.go @@ -18,6 +18,7 @@ limitations under the License. package restapi import ( + "context" "crypto/tls" "net/http" "strconv" @@ -47,6 +48,18 @@ import ( //go:generate swagger generate server --target ../../generated --name RekorServer --spec ../../../openapi.yaml --principal interface{} --exclude-main +type contextKey string + +var ( + ctxKeyAPIToRecord = contextKey("apiToRecord") +) + +// Context payload for recording metrics. +type apiToRecord struct { + method *string // Method to record in metrics, if any. + path *string // Path to record in metrics, if any. +} + func configureFlags(api *operations.RekorServerAPI) { // api.CommandLineOptionsGroups = []swag.CommandLineOptionsGroup{ ... } } @@ -104,6 +117,16 @@ func configureAPI(api *operations.RekorServerAPI) http.Handler { api.AddMiddlewareFor("GET", "/api/v1/log/publicKey", cacheForever) api.AddMiddlewareFor("GET", "/api/v1/log/timestamp/certchain", cacheForever) + // add metrics for explicitly handled endpoints + recordMetricsForAPI(api, "POST", "/api/v1/index/retrieve") + recordMetricsForAPI(api, "GET", "/api/v1/log") + recordMetricsForAPI(api, "GET", "/api/v1/publicKey") + recordMetricsForAPI(api, "GET", "/api/v1/log/proof") + recordMetricsForAPI(api, "GET", "/api/v1/log/entries") + recordMetricsForAPI(api, "POST", "/api/v1/log/entries") + recordMetricsForAPI(api, "GET", "/api/v1/log/entries/{entryUUID}") + recordMetricsForAPI(api, "GET", "/api/v1/log/entries/retrieve") + return setupGlobalMiddleware(api.Serve(setupMiddlewares)) } @@ -159,18 +182,59 @@ func setupGlobalMiddleware(handler http.Handler) http.Handler { })) } +// Populates the the apiToRecord for this method/path so metrics are emitted. +func recordMetricsForAPI(api *operations.RekorServerAPI, method string, path string) { + metricsHandler := func(handler http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + if apiInfo, ok := ctx.Value(ctxKeyAPIToRecord).(*apiToRecord); ok { + apiInfo.method = &method + apiInfo.path = &path + } else { + log.ContextLogger(ctx).Warn("Could not attach api info - endpoint may not be monitored.") + } + handler.ServeHTTP(w, r) + }) + } + + api.AddMiddlewareFor(method, path, metricsHandler) +} + func wrapMetrics(handler http.Handler) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + apiInfo := apiToRecord{} + ctx = context.WithValue(ctx, ctxKeyAPIToRecord, &apiInfo) + r = r.WithContext(ctx) + start := time.Now() ww := middleware.NewWrapResponseWriter(w, r.ProtoMajor) defer func() { + code := strconv.Itoa(ww.Status()) labels := map[string]string{ "path": r.URL.Path, - "code": strconv.Itoa(ww.Status()), + "code": code, } // This logs latency broken down by URL path and response code + // TODO(var-sdk): delete these metrics once the new metrics are safely rolled out. pkgapi.MetricLatency.With(labels).Observe(float64(time.Since(start))) pkgapi.MetricLatencySummary.With(labels).Observe(float64(time.Since(start))) + + // Only record metrics for APIs that need instrumentation. + if apiInfo.path != nil && apiInfo.method != nil { + pkgapi.MetricRequestLatency.With( + map[string]string{ + "path": *apiInfo.path, + "method": *apiInfo.method, + }).Observe(float64(time.Since(start))) + + pkgapi.MetricRequestCount.With( + map[string]string{ + "path": *apiInfo.path, + "method": *apiInfo.method, + "code": code, + }).Inc() + } }() handler.ServeHTTP(ww, r) diff --git a/tests/e2e_test.go b/tests/e2e_test.go index d03b80e81..135965467 100644 --- a/tests/e2e_test.go +++ b/tests/e2e_test.go @@ -19,6 +19,7 @@ package e2e import ( + "bufio" "bytes" "context" "crypto" @@ -37,6 +38,7 @@ import ( "os/exec" "path/filepath" "reflect" + "regexp" "strconv" "strings" "testing" @@ -1347,3 +1349,75 @@ func TestSearchValidateTreeID(t *testing.T) { t.Fatalf("expected 404 status code but got %d", resp.StatusCode) } } + +func getRekorMetricCount(metricLine string, t *testing.T) (int, error) { + re, err := regexp.Compile(fmt.Sprintf("^%s.*([0-9]+)$", regexp.QuoteMeta(metricLine))) + if err != nil { + return 0, err + } + + resp, err := http.Get("http://localhost:2112/metrics") + if err != nil { + return 0, err + } + defer resp.Body.Close() + + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + match := re.FindStringSubmatch(scanner.Text()) + if len(match) != 2 { + continue + } + + result, err := strconv.Atoi(match[1]) + if err != nil { + return 0, nil + } + t.Log("Matched metric line: " + scanner.Text()) + return result, nil + } + return 0, nil +} + +// Smoke test to ensure we're publishing and recording metrics when an API is +// called. +// TODO: use a more robust test approach here e.g. prometheus client-based? +// TODO: cover all endpoints to make sure none are dropped. +func TestMetricsCounts(t *testing.T) { + latencyMetric := "rekor_latency_by_api_count{method=\"GET\",path=\"/api/v1/log\"}" + qpsMetric := "rekor_qps_by_api{code=\"200\",method=\"GET\",path=\"/api/v1/log\"}" + + latencyCount, err := getRekorMetricCount(latencyMetric, t) + if err != nil { + t.Fatal(err) + } + + qpsCount, err := getRekorMetricCount(qpsMetric, t) + if err != nil { + t.Fatal(err) + } + + resp, err := http.Get("http://localhost:3000/api/v1/log") + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + + latencyCount2, err := getRekorMetricCount(latencyMetric, t) + if err != nil { + t.Fatal(err) + } + + qpsCount2, err := getRekorMetricCount(qpsMetric, t) + if err != nil { + t.Fatal(err) + } + + if latencyCount2-latencyCount != 1 { + t.Error("rekor_latency_by_api_count did not increment") + } + + if qpsCount2-qpsCount != 1 { + t.Error("rekor_qps_by_api did not increment") + } +}