Skip to content

Commit

Permalink
Reduce granularity of histogram buckets for Go 1.17 collector
Browse files Browse the repository at this point in the history
The Go runtime/metrics package currently exports extremely granular
histograms. Exponentially bucket any histogram with unit "seconds"
or "bytes" instead to dramatically reduce the number of buckets, and
thus the number of metrics.

This change also adds a test to check for expected cardinality to
prevent cardinality surprises in the future.

Signed-off-by: Michael Anthony Knyszek <mknyszek@google.com>
  • Loading branch information
mknyszek committed Jan 24, 2022
1 parent f63e219 commit 030b81a
Show file tree
Hide file tree
Showing 5 changed files with 176 additions and 26 deletions.
50 changes: 50 additions & 0 deletions prometheus/gen_go_collector_metrics_set.go
Expand Up @@ -21,7 +21,9 @@ import (
"fmt"
"go/format"
"log"
"math"
"os"
"runtime"
"runtime/metrics"
"strconv"
"strings"
Expand All @@ -35,6 +37,10 @@ func main() {
if len(os.Args) != 2 {
log.Fatal("requires Go version (e.g. go1.17) as an argument")
}
toolVersion := runtime.Version()
if majorVersion := toolVersion[:strings.LastIndexByte(toolVersion, '.')]; majorVersion != os.Args[1] {
log.Fatalf("using Go version %q but expected Go version %q", majorVersion, os.Args[1])
}
version, err := parseVersion(os.Args[1])
if err != nil {
log.Fatalf("parsing Go version: %v", err)
Expand All @@ -45,9 +51,11 @@ func main() {
err = testFile.Execute(&buf, struct {
Descriptions []metrics.Description
GoVersion goVersion
Cardinality int
}{
Descriptions: metrics.All(),
GoVersion: version,
Cardinality: rmCardinality(),
})
if err != nil {
log.Fatalf("executing template: %v", err)
Expand Down Expand Up @@ -85,6 +93,46 @@ func parseVersion(s string) (goVersion, error) {
return goVersion(i), err
}

func rmCardinality() int {
cardinality := 0

// Collect all histogram samples so that we can get their buckets.
// The API guarantees that the buckets are always fixed for the lifetime
// of the process.
var histograms []metrics.Sample
for _, d := range metrics.All() {
if d.Kind == metrics.KindFloat64Histogram {
histograms = append(histograms, metrics.Sample{Name: d.Name})
} else {
cardinality++
}
}

// Handle histograms.
metrics.Read(histograms)
for i := range histograms {
name := histograms[i].Name
buckets := internal.RuntimeMetricsBucketsForUnit(
histograms[i].Value.Float64Histogram().Buckets,
name[strings.IndexRune(name, ':')+1:],
)
cardinality += len(buckets) + 3 // Plus total count, sum, and the implicit infinity bucket.
// runtime/metrics bucket boundaries are lower-bound-inclusive, but
// always represents each actual *boundary* so Buckets is always
// 1 longer than Counts, while in Prometheus the mapping is one-to-one,
// as the bottom bucket extends to -Inf, and the top infinity bucket is
// implicit. Therefore, we should have one fewer bucket than is listed
// above.
cardinality--
if buckets[len(buckets)-1] == math.Inf(1) {
// We already counted the infinity bucket separately.
cardinality--
}
}

return cardinality
}

var testFile = template.Must(template.New("testFile").Funcs(map[string]interface{}{
"rm2prom": func(d metrics.Description) string {
ns, ss, n, ok := internal.RuntimeMetricsToProm(&d)
Expand Down Expand Up @@ -112,4 +160,6 @@ var expectedRuntimeMetrics = map[string]string{
{{- end -}}
{{end}}
}
const expectedRuntimeMetricsCardinality = {{.Cardinality}}
`))
55 changes: 35 additions & 20 deletions prometheus/go_collector_go117.go
Expand Up @@ -20,6 +20,7 @@ import (
"math"
"runtime"
"runtime/metrics"
"strings"
"sync"

//nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility.
Expand Down Expand Up @@ -53,9 +54,20 @@ type goCollector struct {
// Deprecated: Use collectors.NewGoCollector instead.
func NewGoCollector() Collector {
descriptions := metrics.All()
descMap := make(map[string]*metrics.Description)
for i := range descriptions {
descMap[descriptions[i].Name] = &descriptions[i]

// Collect all histogram samples so that we can get their buckets.
// The API guarantees that the buckets are always fixed for the lifetime
// of the process.
var histograms []metrics.Sample
for _, d := range descriptions {
if d.Kind == metrics.KindFloat64Histogram {
histograms = append(histograms, metrics.Sample{Name: d.Name})
}
}
metrics.Read(histograms)
bucketsMap := make(map[string][]float64)
for i := range histograms {
bucketsMap[histograms[i].Name] = histograms[i].Value.Float64Histogram().Buckets
}

// Generate a Desc and ValueType for each runtime/metrics metric.
Expand All @@ -80,13 +92,15 @@ func NewGoCollector() Collector {
var m collectorMetric
if d.Kind == metrics.KindFloat64Histogram {
_, hasSum := rmExactSumMap[d.Name]
unit := d.Name[strings.IndexRune(d.Name, ':')+1:]
m = newBatchHistogram(
NewDesc(
BuildFQName(namespace, subsystem, name),
d.Description,
nil,
nil,
),
internal.RuntimeMetricsBucketsForUnit(bucketsMap[d.Name], unit),
hasSum,
)
} else if d.Cumulative {
Expand Down Expand Up @@ -303,37 +317,38 @@ type batchHistogram struct {
sum float64 // Used if hasSum is true.
}

func newBatchHistogram(desc *Desc, hasSum bool) *batchHistogram {
h := &batchHistogram{desc: desc, hasSum: hasSum}
func newBatchHistogram(desc *Desc, buckets []float64, hasSum bool) *batchHistogram {
h := &batchHistogram{
desc: desc,
buckets: buckets,
counts: make([]uint64, len(buckets)-1),
hasSum: hasSum,
}
h.init(h)
return h
}

// update updates the batchHistogram from a runtime/metrics histogram.
//
// sum must be provided if the batchHistogram was created to have an exact sum.
// h.buckets must be a strict subset of his.Buckets.
func (h *batchHistogram) update(his *metrics.Float64Histogram, sum float64) {
counts, buckets := his.Counts, his.Buckets
// Skip a -Inf bucket altogether. It's not clear how to represent that.
if math.IsInf(buckets[0], -1) {
buckets = buckets[1:]
counts = counts[1:]
}

h.mu.Lock()
defer h.mu.Unlock()

// Check if we're initialized.
if h.buckets == nil {
// Make copies of counts and buckets. It's really important
// that we don't retain his.Counts or his.Buckets anywhere since
// it's going to get reused.
h.buckets = make([]float64, len(buckets))
copy(h.buckets, buckets)

h.counts = make([]uint64, len(counts))
// Reduce buckets.
var j int
var total uint64
for i, count := range counts {
total += count
if buckets[i+1] == h.buckets[j+1] {
h.counts[j] = total
total = 0
j++
}
}
copy(h.counts, counts)
if h.hasSum {
h.sum = sum
}
Expand Down
48 changes: 42 additions & 6 deletions prometheus/go_collector_go117_test.go
Expand Up @@ -140,9 +140,13 @@ func TestBatchHistogram(t *testing.T) {
}
metrics.Read(s)
rmHist := s[0].Value.Float64Histogram()
// runtime/metrics histograms always have -Inf and +Inf buckets.
// We never handle -Inf and +Inf is implicit.
wantBuckets := len(rmHist.Buckets) - 2
wantBuckets := internal.RuntimeMetricsBucketsForUnit(rmHist.Buckets, "bytes")
// runtime/metrics histograms always have a +Inf bucket and are lower
// bound inclusive. In contrast, we have an implicit +Inf bucket and
// are upper bound inclusive, so we can chop off the first bucket
// (since the conversion to upper bound inclusive will shift all buckets
// down one index) and the +Inf for the last bucket.
wantBuckets = wantBuckets[1 : len(wantBuckets)-1]

// Check to make sure the output proto makes sense.
pb := &dto.Metric{}
Expand All @@ -151,14 +155,14 @@ func TestBatchHistogram(t *testing.T) {
if math.IsInf(pb.Histogram.Bucket[len(pb.Histogram.Bucket)-1].GetUpperBound(), +1) {
t.Errorf("found +Inf bucket")
}
if got := len(pb.Histogram.Bucket); got != wantBuckets {
t.Errorf("got %d buckets in protobuf, want %d", got, wantBuckets)
if got := len(pb.Histogram.Bucket); got != len(wantBuckets) {
t.Errorf("got %d buckets in protobuf, want %d", got, len(wantBuckets))
}
for i, bucket := range pb.Histogram.Bucket {
// runtime/metrics histograms are lower-bound inclusive, but we're
// upper-bound inclusive. So just make sure the new inclusive upper
// bound is somewhere close by (in some cases it's equal).
wantBound := rmHist.Buckets[i+1]
wantBound := wantBuckets[i]
if gotBound := *bucket.UpperBound; (wantBound-gotBound)/wantBound > 0.001 {
t.Errorf("got bound %f, want within 0.1%% of %f", gotBound, wantBound)
}
Expand Down Expand Up @@ -244,6 +248,7 @@ func TestExpectedRuntimeMetrics(t *testing.T) {

descs := metrics.All()
rmSet := make(map[string]struct{})
// Iterate over runtime-reported descriptions to find new metrics.
for i := range descs {
rmName := descs[i].Name
rmSet[rmName] = struct{}{}
Expand All @@ -263,6 +268,8 @@ func TestExpectedRuntimeMetrics(t *testing.T) {
continue
}
}
// Now iterate over the expected metrics and look for removals.
cardinality := 0
for rmName, fqName := range expectedRuntimeMetrics {
if _, ok := rmSet[rmName]; !ok {
t.Errorf("runtime/metrics metric %s removed", rmName)
Expand All @@ -272,13 +279,42 @@ func TestExpectedRuntimeMetrics(t *testing.T) {
t.Errorf("runtime/metrics metric %s not appearing under expected name %s", rmName, fqName)
continue
}

// While we're at it, check to make sure expected cardinality lines
// up, but at the point of the protobuf write to get as close to the
// real deal as possible.
//
// Note that we filter out non-runtime/metrics metrics here, because
// those are manually managed.
var m dto.Metric
if err := goMetricSet[fqName].Write(&m); err != nil {
t.Errorf("writing metric %s: %v", fqName, err)
continue
}
// N.B. These are the only fields populated by runtime/metrics metrics specifically.
// Other fields are populated by e.g. GCStats metrics.
switch {
case m.Counter != nil:
fallthrough
case m.Gauge != nil:
cardinality++
case m.Histogram != nil:
cardinality += len(m.Histogram.Bucket) + 3 // + sum, count, and +inf
default:
t.Errorf("unexpected protobuf structure for metric %s", fqName)
}
}

if t.Failed() {
t.Log("a new Go version may have been detected, please run")
t.Log("\tgo run gen_go_collector_metrics_set.go go1.X")
t.Log("where X is the Go version you are currently using")
}

expectCardinality := expectedRuntimeMetricsCardinality
if cardinality != expectCardinality {
t.Errorf("unexpected cardinality for runtime/metrics metrics: got %d, want %d", cardinality, expectCardinality)
}
}

func TestGoCollectorConcurrency(t *testing.T) {
Expand Down
2 changes: 2 additions & 0 deletions prometheus/go_collector_metrics_go117_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

47 changes: 47 additions & 0 deletions prometheus/internal/go_runtime_metrics.go
Expand Up @@ -17,6 +17,7 @@
package internal

import (
"math"
"path"
"runtime/metrics"
"strings"
Expand Down Expand Up @@ -75,3 +76,49 @@ func RuntimeMetricsToProm(d *metrics.Description) (string, string, string, bool)
}
return namespace, subsystem, name, valid
}

// RuntimeMetricsBucketsForUnit takes a set of buckets obtained for a runtime/metrics histogram
// type (so, lower-bound inclusive) and a unit from a runtime/metrics name, and produces
// a reduced set of buckets. This function always removes any -Inf bucket as it's represented
// as the bottom-most upper-bound inclusive bucket in Prometheus.
func RuntimeMetricsBucketsForUnit(buckets []float64, unit string) []float64 {
switch unit {
case "bytes":
// Rebucket as powers of 2.
return rebucketExp(buckets, 2)
case "seconds":
// Rebucket as powers of 10 and then truncate all buckets
// above 1 second and merge them into a single bucket.
b := rebucketExp(buckets, 10)
for i := range b {
if b[i] > 1 {
b[i] = math.Inf(1)
b = b[:i+1]
break
}
}
return b
}
return buckets
}

// rebucketExp takes a list of bucket boundaries (lower bound inclusive) and
// downsamples the buckets to those a multiple of base apart. The end result
// is a roughly exponential (in many cases, perfectly exponential) bucketing
// scheme.
func rebucketExp(buckets []float64, base float64) []float64 {
bucket := buckets[0]
if bucket == math.Inf(1) {
return buckets
}
var newBuckets []float64
for i := 1; i < len(buckets); i++ {
if (bucket >= 0 && buckets[i] >= bucket*base) ||
(bucket < 0 && buckets[i] >= bucket/base) ||
(bucket == math.Inf(-1)) {
newBuckets = append(newBuckets, bucket)
bucket = buckets[i]
}
}
return append(newBuckets, bucket)
}

0 comments on commit 030b81a

Please sign in to comment.