Skip to content

Commit

Permalink
Sanitize metric names in the prometheus exporter (#3212)
Browse files Browse the repository at this point in the history
* sanitize metric names in the prometheus exporter

* add changelog entry

* fix now invalid comment

* replace sanitizeName algorithm with one based on strings.Map

* check that digits as first characters are replaced

* Update exporters/prometheus/exporter.go

Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com>

* add missing utf8 import

* add explicit tests for sanitizeName

* fix testdata with digit prepend

Co-authored-by: Tyler Yahn <MrAlias@users.noreply.github.com>
Co-authored-by: Chester Cheung <cheung.zhy.csu@gmail.com>
  • Loading branch information
3 people committed Sep 22, 2022
1 parent 2ae5922 commit bfaff3a
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -20,6 +20,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- The metric SDK in `go.opentelemetry.io/otel/sdk/metric` is completely refactored to comply with the OpenTelemetry specification.
Please see the package documentation for how the new SDK is initialized and configured. (#3175)
- Update the minimum supported go version to go1.18. Removes support for go1.17 (#3179)
- Instead of dropping metric, the Prometheus exporter will replace any invalid character in metric names with `_`. (#3212)

### Removed

Expand Down
60 changes: 57 additions & 3 deletions exporters/prometheus/exporter.go
Expand Up @@ -19,6 +19,7 @@ import (
"sort"
"strings"
"unicode"
"unicode/utf8"

"github.com/prometheus/client_golang/prometheus"

Expand Down Expand Up @@ -142,7 +143,7 @@ func getHistogramMetricData(histogram metricdata.Histogram, m metricdata.Metrics
dataPoints := make([]*metricData, 0, len(histogram.DataPoints))
for _, dp := range histogram.DataPoints {
keys, values := getAttrs(dp.Attributes)
desc := prometheus.NewDesc(m.Name, m.Description, keys, nil)
desc := prometheus.NewDesc(sanitizeName(m.Name), m.Description, keys, nil)
buckets := make(map[float64]uint64, len(dp.Bounds))
for i, bound := range dp.Bounds {
buckets[bound] = dp.BucketCounts[i]
Expand All @@ -165,7 +166,7 @@ func getSumMetricData[N int64 | float64](sum metricdata.Sum[N], m metricdata.Met
dataPoints := make([]*metricData, 0, len(sum.DataPoints))
for _, dp := range sum.DataPoints {
keys, values := getAttrs(dp.Attributes)
desc := prometheus.NewDesc(m.Name, m.Description, keys, nil)
desc := prometheus.NewDesc(sanitizeName(m.Name), m.Description, keys, nil)
md := &metricData{
name: m.Name,
description: desc,
Expand All @@ -182,7 +183,7 @@ func getGaugeMetricData[N int64 | float64](gauge metricdata.Gauge[N], m metricda
dataPoints := make([]*metricData, 0, len(gauge.DataPoints))
for _, dp := range gauge.DataPoints {
keys, values := getAttrs(dp.Attributes)
desc := prometheus.NewDesc(m.Name, m.Description, keys, nil)
desc := prometheus.NewDesc(sanitizeName(m.Name), m.Description, keys, nil)
md := &metricData{
name: m.Name,
description: desc,
Expand Down Expand Up @@ -230,3 +231,56 @@ func sanitizeRune(r rune) rune {
}
return '_'
}

func sanitizeName(n string) string {
// This algorithm is based on strings.Map from Go 1.19.
const replacement = '_'

valid := func(i int, r rune) bool {
// Taken from
// https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric.go#L92-L102
if (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == '_' || r == ':' || (r >= '0' && r <= '9' && i > 0) {
return true
}
return false
}

// This output buffer b is initialized on demand, the first time a
// character needs to be replaced.
var b strings.Builder
for i, c := range n {
if valid(i, c) {
continue
}

if i == 0 && c >= '0' && c <= '9' {
// Prefix leading number with replacement character.
b.Grow(len(n) + 1)
b.WriteByte(byte(replacement))
break
}
b.Grow(len(n))
b.WriteString(n[:i])
b.WriteByte(byte(replacement))
width := utf8.RuneLen(c)
n = n[i+width:]
break
}

// Fast path for unchanged input.
if b.Cap() == 0 { // b.Grow was not called above.
return n
}

for _, c := range n {
// Due to inlining, it is more performant to invoke WriteByte rather then
// WriteRune.
if valid(1, c) { // We are guaranteed to not be at the start.
b.WriteByte(byte(c))
} else {
b.WriteByte(byte(replacement))
}
}

return b.String()
}
42 changes: 36 additions & 6 deletions exporters/prometheus/exporter_test.go
Expand Up @@ -103,8 +103,8 @@ func TestPrometheusExporter(t *testing.T) {
},
},
{
name: "invalid instruments are dropped",
expectedFile: "testdata/gauge.txt",
name: "invalid instruments are renamed",
expectedFile: "testdata/sanitized_names.txt",
recordMetrics: func(ctx context.Context, meter otelmetric.Meter) {
attrs := []attribute.KeyValue{
attribute.Key("A").String("B"),
Expand All @@ -116,16 +116,16 @@ func TestPrometheusExporter(t *testing.T) {
gauge.Add(ctx, 100, attrs...)
gauge.Add(ctx, -25, attrs...)

// Invalid, should be dropped.
gauge, err = meter.SyncFloat64().UpDownCounter("invalid.gauge.name")
// Invalid, will be renamed.
gauge, err = meter.SyncFloat64().UpDownCounter("invalid.gauge.name", instrument.WithDescription("a gauge with an invalid name"))
require.NoError(t, err)
gauge.Add(ctx, 100, attrs...)

counter, err := meter.SyncFloat64().Counter("invalid.counter.name")
counter, err := meter.SyncFloat64().Counter("0invalid.counter.name", instrument.WithDescription("a counter with an invalid name"))
require.NoError(t, err)
counter.Add(ctx, 100, attrs...)

histogram, err := meter.SyncFloat64().Histogram("invalid.hist.name")
histogram, err := meter.SyncFloat64().Histogram("invalid.hist.name", instrument.WithDescription("a histogram with an invalid name"))
require.NoError(t, err)
histogram.Record(ctx, 23, attrs...)
},
Expand Down Expand Up @@ -155,3 +155,33 @@ func TestPrometheusExporter(t *testing.T) {
})
}
}

func TestSantitizeName(t *testing.T) {
tests := []struct {
input string
want string
}{
{"nam€_with_3_width_rune", "nam__with_3_width_rune"},
{"`", "_"},
{
`! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~`,
`________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____`,
},

// Test cases taken from
// https://github.com/prometheus/common/blob/dfbc25bd00225c70aca0d94c3c4bb7744f28ace0/model/metric_test.go#L85-L136
{"Avalid_23name", "Avalid_23name"},
{"_Avalid_23name", "_Avalid_23name"},
{"1valid_23name", "_1valid_23name"},
{"avalid_23name", "avalid_23name"},
{"Ava:lid_23name", "Ava:lid_23name"},
{"a lid_23name", "a_lid_23name"},
{":leading_colon", ":leading_colon"},
{"colon:in:the:middle", "colon:in:the:middle"},
{"", ""},
}

for _, test := range tests {
require.Equalf(t, test.want, sanitizeName(test.input), "input: %q", test.input)
}
}
24 changes: 24 additions & 0 deletions exporters/prometheus/testdata/sanitized_names.txt
@@ -0,0 +1,24 @@
# HELP bar a fun little gauge
# TYPE bar counter
bar{A="B",C="D"} 75
# HELP _0invalid_counter_name a counter with an invalid name
# TYPE _0invalid_counter_name counter
_0invalid_counter_name{A="B",C="D"} 100
# HELP invalid_gauge_name a gauge with an invalid name
# TYPE invalid_gauge_name counter
invalid_gauge_name{A="B",C="D"} 100
# HELP invalid_hist_name a histogram with an invalid name
# TYPE invalid_hist_name histogram
invalid_hist_name_bucket{A="B",C="D",le="0"} 0
invalid_hist_name_bucket{A="B",C="D",le="5"} 0
invalid_hist_name_bucket{A="B",C="D",le="10"} 0
invalid_hist_name_bucket{A="B",C="D",le="25"} 1
invalid_hist_name_bucket{A="B",C="D",le="50"} 0
invalid_hist_name_bucket{A="B",C="D",le="75"} 0
invalid_hist_name_bucket{A="B",C="D",le="100"} 0
invalid_hist_name_bucket{A="B",C="D",le="250"} 0
invalid_hist_name_bucket{A="B",C="D",le="500"} 0
invalid_hist_name_bucket{A="B",C="D",le="1000"} 0
invalid_hist_name_bucket{A="B",C="D",le="+Inf"} 1
invalid_hist_name_sum{A="B",C="D"} 23
invalid_hist_name_count{A="B",C="D"} 1

0 comments on commit bfaff3a

Please sign in to comment.