From 06006c31f2e44fdcab78e8d33de076b2f1d31e0f Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Thu, 11 Aug 2022 16:18:57 -0700 Subject: [PATCH 1/2] Add namespace to metrics --- .../grafana-dashboards/resource-manager.json | 50 +++++++++---------- p2p/host/resource-manager/obs/stats.go | 27 +++++----- 2 files changed, 39 insertions(+), 38 deletions(-) diff --git a/p2p/host/resource-manager/obs/grafana-dashboards/resource-manager.json b/p2p/host/resource-manager/obs/grafana-dashboards/resource-manager.json index b4e8ec2cd1..0b29c6cd17 100644 --- a/p2p/host/resource-manager/obs/grafana-dashboards/resource-manager.json +++ b/p2p/host/resource-manager/obs/grafana-dashboards/resource-manager.json @@ -165,7 +165,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rate(rcmgr_trace_metrics_blocked_resources[$__rate_interval])", + "expr": "rate(rcmgr_blocked_resources[$__rate_interval])", "interval": "", "legendFormat": "", "refId": "A" @@ -267,7 +267,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"system\"}", + "expr": "rcmgr_streams{scope=\"system\"}", "interval": "", "legendFormat": "{{dir}} {{instance}}", "refId": "A" @@ -356,7 +356,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"transient\"}", + "expr": "rcmgr_streams{scope=\"transient\"}", "interval": "", "legendFormat": "{{dir}} {{instance}}", "refId": "A" @@ -446,7 +446,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"service\"}", + "expr": "rcmgr_streams{scope=\"service\"}", "interval": "", "legendFormat": "{{dir}} {{service}} {{instance}}", "refId": "A" @@ -536,7 +536,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_streams{scope=\"protocol\"}", + "expr": "rcmgr_streams{scope=\"protocol\"}", "interval": "", "legendFormat": "{{dir}} {{protocol}} {{instance}}", "refId": "A" @@ -626,7 +626,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "expr": "histogram_quantile(0.50, (rcmgr_peer_streams_bucket - rcmgr_peer_streams_negative_bucket)) - 0.1", "interval": "", "legendFormat": "p50 {{dir}} streams per peer – {{instance}}", "refId": "A" @@ -637,7 +637,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "expr": "histogram_quantile(0.90, (rcmgr_peer_streams_bucket - rcmgr_peer_streams_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "p90 {{dir}} streams per peer – {{instance}}", @@ -649,7 +649,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_streams_bucket - rcmgr_trace_metrics_peer_streams_negative_bucket)) - 0.1", + "expr": "histogram_quantile(1, (rcmgr_peer_streams_bucket - rcmgr_peer_streams_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "max {{dir}} streams per peer – {{instance}}", @@ -713,7 +713,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": false, - "expr": "sum without (instance) (rcmgr_trace_metrics_peer_streams_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"inbound\"})", + "expr": "sum without (instance) (rcmgr_peer_streams_bucket{dir=\"inbound\"}-rcmgr_peer_streams_negative_bucket{dir=\"inbound\"})", "format": "heatmap", "hide": false, "interval": "", @@ -778,7 +778,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": false, - "expr": "sum without (instance) (rcmgr_trace_metrics_peer_streams_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_streams_negative_bucket{dir=\"outbound\"})", + "expr": "sum without (instance) (rcmgr_peer_streams_bucket{dir=\"outbound\"}-rcmgr_peer_streams_negative_bucket{dir=\"outbound\"})", "format": "heatmap", "hide": false, "interval": "", @@ -901,7 +901,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_connections{scope=\"system\"}", + "expr": "rcmgr_connections{scope=\"system\"}", "interval": "", "legendFormat": "{{dir}} {{instance}}", "refId": "A" @@ -989,7 +989,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_connections{scope=\"transient\"}", + "expr": "rcmgr_connections{scope=\"transient\"}", "interval": "", "legendFormat": "{{dir}} {{instance}}", "refId": "A" @@ -1098,7 +1098,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(0.50, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "expr": "histogram_quantile(0.50, (rcmgr_peer_connections_bucket - rcmgr_peer_connections_negative_bucket)) - 0.1", "interval": "", "legendFormat": "p50 {{dir}} connections per peer – {{instance}}", "refId": "A" @@ -1109,7 +1109,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(0.90, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "expr": "histogram_quantile(0.90, (rcmgr_peer_connections_bucket - rcmgr_peer_connections_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "p90 {{dir}} connections per peer – {{instance}}", @@ -1121,7 +1121,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(1, (rcmgr_trace_metrics_peer_connections_bucket - rcmgr_trace_metrics_peer_connections_negative_bucket)) - 0.1", + "expr": "histogram_quantile(1, (rcmgr_peer_connections_bucket - rcmgr_peer_connections_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "max {{dir}} connections per peer – {{instance}}", @@ -1185,7 +1185,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": false, - "expr": "sum without (instance) (rcmgr_trace_metrics_peer_connections_bucket{dir=\"inbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"inbound\"})", + "expr": "sum without (instance) (rcmgr_peer_connections_bucket{dir=\"inbound\"}-rcmgr_peer_connections_negative_bucket{dir=\"inbound\"})", "format": "heatmap", "hide": false, "interval": "", @@ -1250,7 +1250,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": false, - "expr": "sum without (instance) (rcmgr_trace_metrics_peer_connections_bucket{dir=\"outbound\"}-rcmgr_trace_metrics_peer_connections_negative_bucket{dir=\"outbound\"})", + "expr": "sum without (instance) (rcmgr_peer_connections_bucket{dir=\"outbound\"}-rcmgr_peer_connections_negative_bucket{dir=\"outbound\"})", "format": "heatmap", "hide": false, "interval": "", @@ -1355,7 +1355,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_memory{scope=\"system\"}", + "expr": "rcmgr_memory{scope=\"system\"}", "interval": "", "legendFormat": "Bytes Reserved", "refId": "A" @@ -1445,7 +1445,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_memory{scope=\"protocol\"}", + "expr": "rcmgr_memory{scope=\"protocol\"}", "interval": "", "legendFormat": "{{protocol}} {{instance}}", "refId": "A" @@ -1535,7 +1535,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_memory{scope=\"service\"}", + "expr": "rcmgr_memory{scope=\"service\"}", "interval": "", "legendFormat": "{{service}} {{instance}}", "refId": "A" @@ -1646,7 +1646,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(0.50, sum by (le) (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "expr": "histogram_quantile(0.50, sum by (le) (rcmgr_peer_memory_bucket - rcmgr_peer_memory_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "p50 memory usage per peer", @@ -1658,7 +1658,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(0.90, sum by (le) (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "expr": "histogram_quantile(0.90, sum by (le) (rcmgr_peer_memory_bucket - rcmgr_peer_memory_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "p90 memory usage per peer", @@ -1670,7 +1670,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "histogram_quantile(1, sum by (le) (rcmgr_trace_metrics_peer_memory_bucket - rcmgr_trace_metrics_peer_memory_negative_bucket)) - 0.1", + "expr": "histogram_quantile(1, sum by (le) (rcmgr_peer_memory_bucket - rcmgr_peer_memory_negative_bucket)) - 0.1", "hide": false, "interval": "", "legendFormat": "max memory usage per peer", @@ -1683,7 +1683,7 @@ }, "editorMode": "code", "exemplar": true, - "expr": "sum(rcmgr_trace_metrics_peer_memory_count-rcmgr_trace_metrics_peer_memory_negative_count)", + "expr": "sum(rcmgr_peer_memory_count-rcmgr_peer_memory_negative_count)", "hide": false, "instant": false, "interval": "", @@ -1788,7 +1788,7 @@ "uid": "${DS_PROMETHEUS}" }, "exemplar": true, - "expr": "rcmgr_trace_metrics_fds", + "expr": "rcmgr_fds", "interval": "", "legendFormat": "{{scope}} {{instance}}", "refId": "A" diff --git a/p2p/host/resource-manager/obs/stats.go b/p2p/host/resource-manager/obs/stats.go index 6fd4885835..b3f2877b8a 100644 --- a/p2p/host/resource-manager/obs/stats.go +++ b/p2p/host/resource-manager/obs/stats.go @@ -11,26 +11,27 @@ import ( ) var ( - conns = stats.Int64("connections", "Number of Connections", stats.UnitDimensionless) + metricNamespace = "rcmgr/" + conns = stats.Int64(metricNamespace+"connections", "Number of Connections", stats.UnitDimensionless) - peerConns = stats.Int64("peer/connections", "Number of connections this peer has", stats.UnitDimensionless) - peerConnsNegative = stats.Int64("peer/connections_negative", "Number of connections this peer had. This is used to get the current connection number per peer histogram by subtracting this from the peer/connections histogram", stats.UnitDimensionless) + peerConns = stats.Int64(metricNamespace+"peer/connections", "Number of connections this peer has", stats.UnitDimensionless) + peerConnsNegative = stats.Int64(metricNamespace+"peer/connections_negative", "Number of connections this peer had. This is used to get the current connection number per peer histogram by subtracting this from the peer/connections histogram", stats.UnitDimensionless) - streams = stats.Int64("streams", "Number of Streams", stats.UnitDimensionless) + streams = stats.Int64(metricNamespace+"streams", "Number of Streams", stats.UnitDimensionless) - peerStreams = stats.Int64("peer/streams", "Number of streams this peer has", stats.UnitDimensionless) - peerStreamsNegative = stats.Int64("peer/streams_negative", "Number of streams this peer had. This is used to get the current streams number per peer histogram by subtracting this from the peer/streams histogram", stats.UnitDimensionless) + peerStreams = stats.Int64(metricNamespace+"peer/streams", "Number of streams this peer has", stats.UnitDimensionless) + peerStreamsNegative = stats.Int64(metricNamespace+"peer/streams_negative", "Number of streams this peer had. This is used to get the current streams number per peer histogram by subtracting this from the peer/streams histogram", stats.UnitDimensionless) - memory = stats.Int64("memory", "Amount of memory reserved as reported to the Resource Manager", stats.UnitDimensionless) - peerMemory = stats.Int64("peer/memory", "Amount of memory currently reseved for peer", stats.UnitDimensionless) - peerMemoryNegative = stats.Int64("peer/memory_negative", "Amount of memory previously reseved for peer. This is used to get the current memory per peer histogram by subtracting this from the peer/memory histogram", stats.UnitDimensionless) + memory = stats.Int64(metricNamespace+"memory", "Amount of memory reserved as reported to the Resource Manager", stats.UnitDimensionless) + peerMemory = stats.Int64(metricNamespace+"peer/memory", "Amount of memory currently reseved for peer", stats.UnitDimensionless) + peerMemoryNegative = stats.Int64(metricNamespace+"peer/memory_negative", "Amount of memory previously reseved for peer. This is used to get the current memory per peer histogram by subtracting this from the peer/memory histogram", stats.UnitDimensionless) - connMemory = stats.Int64("conn/memory", "Amount of memory currently reseved for the connection", stats.UnitDimensionless) - connMemoryNegative = stats.Int64("conn/memory_negative", "Amount of memory previously reseved for the connection. This is used to get the current memory per connection histogram by subtracting this from the conn/memory histogram", stats.UnitDimensionless) + connMemory = stats.Int64(metricNamespace+"conn/memory", "Amount of memory currently reseved for the connection", stats.UnitDimensionless) + connMemoryNegative = stats.Int64(metricNamespace+"conn/memory_negative", "Amount of memory previously reseved for the connection. This is used to get the current memory per connection histogram by subtracting this from the conn/memory histogram", stats.UnitDimensionless) - fds = stats.Int64("fds", "Number of fds as reported to the Resource Manager", stats.UnitDimensionless) + fds = stats.Int64(metricNamespace+"fds", "Number of fds as reported to the Resource Manager", stats.UnitDimensionless) - blockedResources = stats.Int64("blocked_resources", "Number of resource requests blocked", stats.UnitDimensionless) + blockedResources = stats.Int64(metricNamespace+"blocked_resources", "Number of resource requests blocked", stats.UnitDimensionless) ) var ( From c277824bef1d7c4e7d0535f58aa8740bd076d1b1 Mon Sep 17 00:00:00 2001 From: Marco Munizaga Date: Thu, 11 Aug 2022 16:56:26 -0700 Subject: [PATCH 2/2] Update readme --- p2p/host/resource-manager/obs/grafana-dashboards/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/p2p/host/resource-manager/obs/grafana-dashboards/README.md b/p2p/host/resource-manager/obs/grafana-dashboards/README.md index 132ac2f0cb..dfa7265044 100644 --- a/p2p/host/resource-manager/obs/grafana-dashboards/README.md +++ b/p2p/host/resource-manager/obs/grafana-dashboards/README.md @@ -21,11 +21,11 @@ import ( ) func SetupResourceManager() (network.ResourceManager, error) { - // Hook up the trace reporter metrics + // Hook up the trace reporter metrics. This will expose all opencensus + // stats via the default prometheus registry. See https://opencensus.io/exporters/supported-exporters/go/prometheus/ for other options. view.Register(rcmgrObs.DefaultViews...) ocprom.NewExporter(ocprom.Options{ Registry: prometheus.DefaultRegisterer.(*prometheus.Registry), - Namespace: "rcmgr_trace_metrics", }) str, err := rcmgrObs.NewStatsTraceReporter()