From ef972b918885cde6d8c38131b9ba8a28f677365d Mon Sep 17 00:00:00 2001 From: beorn7 Date: Tue, 14 Jan 2020 19:22:19 +0100 Subject: [PATCH] Add exemplars to counter and histogram Signed-off-by: beorn7 --- examples/random/main.go | 15 ++++- go.mod | 4 +- go.sum | 10 ++-- prometheus/counter.go | 36 +++++++++++- prometheus/gauge.go | 2 +- prometheus/histogram.go | 113 ++++++++++++++++++++++++++---------- prometheus/promhttp/http.go | 63 +++++++++++++++----- prometheus/value.go | 46 ++++++++++++++- 8 files changed, 229 insertions(+), 60 deletions(-) diff --git a/examples/random/main.go b/examples/random/main.go index cb78b07ba..1da9bd1e5 100644 --- a/examples/random/main.go +++ b/examples/random/main.go @@ -18,6 +18,7 @@ package main import ( "flag" + "fmt" "log" "math" "math/rand" @@ -89,7 +90,11 @@ func main() { for { v := (rand.NormFloat64() * *normDomain) + *normMean rpcDurations.WithLabelValues("normal").Observe(v) - rpcDurationsHistogram.Observe(v) + rpcDurationsHistogram.ObserveWithExemplar( + // Demonstrate exemplar support with a dummy ID. This would be + // something like a trace ID in a real application. + v, prometheus.Labels{"dummyID": fmt.Sprint(rand.Intn(100000))}, + ) time.Sleep(time.Duration(75*oscillationFactor()) * time.Millisecond) } }() @@ -103,6 +108,12 @@ func main() { }() // Expose the registered metrics via HTTP. - http.Handle("/metrics", promhttp.Handler()) + http.Handle("/metrics", promhttp.HandlerFor( + prometheus.DefaultGatherer, + promhttp.HandlerOpts{ + // Opt into OpenMetrics to support exemplars. + EnableOpenMetrics: true, + }, + )) log.Fatal(http.ListenAndServe(*addr, nil)) } diff --git a/go.mod b/go.mod index 1540dba22..976e337be 100644 --- a/go.mod +++ b/go.mod @@ -5,8 +5,8 @@ require ( github.com/cespare/xxhash/v2 v2.1.1 github.com/golang/protobuf v1.3.2 github.com/json-iterator/go v1.1.8 - github.com/prometheus/client_model v0.1.0 - github.com/prometheus/common v0.7.0 + github.com/prometheus/client_model v0.2.0 + github.com/prometheus/common v0.9.0 github.com/prometheus/procfs v0.0.8 golang.org/x/sys v0.0.0-20191220142924-d4481acd189f ) diff --git a/go.sum b/go.sum index ff3d14668..059c79832 100644 --- a/go.sum +++ b/go.sum @@ -58,12 +58,12 @@ github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 h1:idejC8f github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 h1:S/YWwWx/RA8rT8tKFRuGUZhuA90OyIBpPCXkcbwU8DE= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/prometheus/client_model v0.1.0 h1:ElTg5tNp4DqfV7UQjDqv2+RJlNzsDtvNAWccbItceIE= -github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.4.1 h1:K0MGApIoQvMw27RTdJkPbr3JZ7DNbtxQNyi5STVM6Kw= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= -github.com/prometheus/common v0.7.0 h1:L+1lyG48J1zAQXA3RBX/nG/B3gjlHq0zTt2tlbJLyCY= -github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= +github.com/prometheus/common v0.9.0 h1:yg//x/8DqN+PxXTBFMwVCopGqDn3wSxmbF/3PCuu1bk= +github.com/prometheus/common v0.9.0/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= @@ -97,4 +97,4 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/prometheus/counter.go b/prometheus/counter.go index d463e36d3..553f9b005 100644 --- a/prometheus/counter.go +++ b/prometheus/counter.go @@ -17,6 +17,7 @@ import ( "errors" "math" "sync/atomic" + "time" dto "github.com/prometheus/client_model/go" ) @@ -40,6 +41,16 @@ type Counter interface { // Add adds the given value to the counter. It panics if the value is < // 0. Add(float64) + // AddWithExemplar works like Add but also replaces the currently saved + // exemplar (possibly none) with a new one, created from the provided + // value, the current time as timestamp, and the provided labels. Empty + // Labels will lead to a valid (label-less) exemplar. But if Labels is + // nil, the current exemplar is left in place. This method panics if the + // value is < 0, if any of the provided labels are invalid, or if the + // provided labels contain more than 64 runes in total. AddWithExemplar + // should not be called in a hot path as it is significantly more costly + // than Add. + AddWithExemplar(value float64, exemplar Labels) } // CounterOpts is an alias for Opts. See there for doc comments. @@ -78,6 +89,7 @@ type counter struct { desc *Desc labelPairs []*dto.LabelPair + exemplar atomic.Value // *dto.Exemplar } func (c *counter) Desc() *Desc { @@ -88,6 +100,7 @@ func (c *counter) Add(v float64) { if v < 0 { panic(errors.New("counter cannot decrease in value")) } + ival := uint64(v) if float64(ival) == v { atomic.AddUint64(&c.valInt, ival) @@ -103,6 +116,11 @@ func (c *counter) Add(v float64) { } } +func (c *counter) AddWithExemplar(v float64, e Labels) { + c.Add(v) + c.updateExemplar(v, e) +} + func (c *counter) Inc() { atomic.AddUint64(&c.valInt, 1) } @@ -112,7 +130,23 @@ func (c *counter) Write(out *dto.Metric) error { ival := atomic.LoadUint64(&c.valInt) val := fval + float64(ival) - return populateMetric(CounterValue, val, c.labelPairs, out) + var exemplar *dto.Exemplar + if e := c.exemplar.Load(); e != nil { + exemplar = e.(*dto.Exemplar) + } + + return populateMetric(CounterValue, val, c.labelPairs, exemplar, out) +} + +func (c *counter) updateExemplar(v float64, l Labels) { + if l == nil { + return + } + e, err := newExemplar(v, time.Now(), l) + if err != nil { + panic(err) + } + c.exemplar.Store(e) } // CounterVec is a Collector that bundles a set of Counters that all share the diff --git a/prometheus/gauge.go b/prometheus/gauge.go index 56d8cc209..d67573f76 100644 --- a/prometheus/gauge.go +++ b/prometheus/gauge.go @@ -123,7 +123,7 @@ func (g *gauge) Sub(val float64) { func (g *gauge) Write(out *dto.Metric) error { val := math.Float64frombits(atomic.LoadUint64(&g.valBits)) - return populateMetric(GaugeValue, val, g.labelPairs, out) + return populateMetric(GaugeValue, val, g.labelPairs, nil, out) } // GaugeVec is a Collector that bundles a set of Gauges that all share the same diff --git a/prometheus/histogram.go b/prometheus/histogram.go index ac2614d52..e30de3bc8 100644 --- a/prometheus/histogram.go +++ b/prometheus/histogram.go @@ -20,6 +20,7 @@ import ( "sort" "sync" "sync/atomic" + "time" "github.com/golang/protobuf/proto" @@ -47,6 +48,16 @@ type Histogram interface { // Observe adds a single observation to the histogram. Observe(float64) + // ObserveWithExemplar works like Observe but also replaces the + // currently saved exemplar for the relevant bucket (possibly none) with + // a new one, created from the provided value, the current time as + // timestamp, and the provided Labels. Empty Labels will lead to a valid + // (label-less) exemplar. But if Labels is nil, the current exemplar in + // the relevant bucket is left in place. This method panics if any of + // the provided labels are invalid or if the provided labels contain + // more than 64 runes in total. ObserveWithExemplar should not be called + // in a hot path as it is significantly more costly than Observe. + ObserveWithExemplar(value float64, exemplar Labels) } // bucketLabel is used for the label that defines the upper bound of a @@ -205,9 +216,10 @@ func newHistogram(desc *Desc, opts HistogramOpts, labelValues ...string) Histogr } } // Finally we know the final length of h.upperBounds and can make buckets - // for both counts: + // for both counts as well as exemplars: h.counts[0].buckets = make([]uint64, len(h.upperBounds)) h.counts[1].buckets = make([]uint64, len(h.upperBounds)) + h.exemplars = make([]atomic.Value, len(h.upperBounds)+1) h.init(h) // Init self-collection. return h @@ -254,6 +266,7 @@ type histogram struct { upperBounds []float64 labelPairs []*dto.LabelPair + exemplars []atomic.Value // One more than buckets (to include +Inf), each a *dto.Exemplar. } func (h *histogram) Desc() *Desc { @@ -261,36 +274,13 @@ func (h *histogram) Desc() *Desc { } func (h *histogram) Observe(v float64) { - // TODO(beorn7): For small numbers of buckets (<30), a linear search is - // slightly faster than the binary search. If we really care, we could - // switch from one search strategy to the other depending on the number - // of buckets. - // - // Microbenchmarks (BenchmarkHistogramNoLabels): - // 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op - // 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op - // 300 buckets: 154 ns/op linear - binary 61.6 ns/op - i := sort.SearchFloat64s(h.upperBounds, v) - - // We increment h.countAndHotIdx so that the counter in the lower - // 63 bits gets incremented. At the same time, we get the new value - // back, which we can use to find the currently-hot counts. - n := atomic.AddUint64(&h.countAndHotIdx, 1) - hotCounts := h.counts[n>>63] + h.observe(v, h.findBucket(v)) +} - if i < len(h.upperBounds) { - atomic.AddUint64(&hotCounts.buckets[i], 1) - } - for { - oldBits := atomic.LoadUint64(&hotCounts.sumBits) - newBits := math.Float64bits(math.Float64frombits(oldBits) + v) - if atomic.CompareAndSwapUint64(&hotCounts.sumBits, oldBits, newBits) { - break - } - } - // Increment count last as we take it as a signal that the observation - // is complete. - atomic.AddUint64(&hotCounts.count, 1) +func (h *histogram) ObserveWithExemplar(v float64, e Labels) { + i := h.findBucket(v) + h.observe(v, i) + h.updateExemplar(v, i, e) } func (h *histogram) Write(out *dto.Metric) error { @@ -329,6 +319,18 @@ func (h *histogram) Write(out *dto.Metric) error { CumulativeCount: proto.Uint64(cumCount), UpperBound: proto.Float64(upperBound), } + if e := h.exemplars[i].Load(); e != nil { + his.Bucket[i].Exemplar = e.(*dto.Exemplar) + } + } + // If there is an exemplar for the +Inf bucket, we have to add that bucket explicitly. + if e := h.exemplars[len(h.upperBounds)].Load(); e != nil { + b := &dto.Bucket{ + CumulativeCount: proto.Uint64(count), + UpperBound: proto.Float64(math.Inf(1)), + Exemplar: e.(*dto.Exemplar), + } + his.Bucket = append(his.Bucket, b) } out.Histogram = his @@ -352,6 +354,57 @@ func (h *histogram) Write(out *dto.Metric) error { return nil } +// findBucket returns the index of the bucket for the provided value, or +// len(h.upperBounds) for the +Inf bucket. +func (h *histogram) findBucket(v float64) int { + // TODO(beorn7): For small numbers of buckets (<30), a linear search is + // slightly faster than the binary search. If we really care, we could + // switch from one search strategy to the other depending on the number + // of buckets. + // + // Microbenchmarks (BenchmarkHistogramNoLabels): + // 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op + // 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op + // 300 buckets: 154 ns/op linear - binary 61.6 ns/op + return sort.SearchFloat64s(h.upperBounds, v) +} + +// observe is the implementation for Observe without the findBucket part. +func (h *histogram) observe(v float64, bucket int) { + // We increment h.countAndHotIdx so that the counter in the lower + // 63 bits gets incremented. At the same time, we get the new value + // back, which we can use to find the currently-hot counts. + n := atomic.AddUint64(&h.countAndHotIdx, 1) + hotCounts := h.counts[n>>63] + + if bucket < len(h.upperBounds) { + atomic.AddUint64(&hotCounts.buckets[bucket], 1) + } + for { + oldBits := atomic.LoadUint64(&hotCounts.sumBits) + newBits := math.Float64bits(math.Float64frombits(oldBits) + v) + if atomic.CompareAndSwapUint64(&hotCounts.sumBits, oldBits, newBits) { + break + } + } + // Increment count last as we take it as a signal that the observation + // is complete. + atomic.AddUint64(&hotCounts.count, 1) +} + +// updateExemplar replaces the exemplar for the provided bucket. With empty +// labels, it's a no-op. It panics if any of the labels is invalid. +func (h *histogram) updateExemplar(v float64, bucket int, l Labels) { + if l == nil { + return + } + e, err := newExemplar(v, time.Now(), l) + if err != nil { + panic(err) + } + h.exemplars[bucket].Store(e) +} + // HistogramVec is a Collector that bundles a set of Histograms that all share the // same Desc, but have different values for their variable labels. This is used // if you want to count the same thing partitioned by various dimensions diff --git a/prometheus/promhttp/http.go b/prometheus/promhttp/http.go index cea5a90fd..93d626e50 100644 --- a/prometheus/promhttp/http.go +++ b/prometheus/promhttp/http.go @@ -144,7 +144,12 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler { } } - contentType := expfmt.Negotiate(req.Header) + var contentType expfmt.Format + if opts.EnableOpenMetrics { + contentType = expfmt.NegotiateIncludingOpenMetrics(req.Header) + } else { + contentType = expfmt.Negotiate(req.Header) + } header := rsp.Header() header.Set(contentTypeHeader, string(contentType)) @@ -163,22 +168,38 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler { enc := expfmt.NewEncoder(w, contentType) var lastErr error + + // handleError returns true if we have to abort after handling the error. + handleError := func(err error) bool { + if err == nil { + return false + } + lastErr = err + if opts.ErrorLog != nil { + opts.ErrorLog.Println("error encoding and sending metric family:", err) + } + errCnt.WithLabelValues("encoding").Inc() + switch opts.ErrorHandling { + case PanicOnError: + panic(err) + case ContinueOnError: + // Handled later. + case HTTPErrorOnError: + httpError(rsp, err) + return true + } + return false + } + for _, mf := range mfs { - if err := enc.Encode(mf); err != nil { - lastErr = err - if opts.ErrorLog != nil { - opts.ErrorLog.Println("error encoding and sending metric family:", err) - } - errCnt.WithLabelValues("encoding").Inc() - switch opts.ErrorHandling { - case PanicOnError: - panic(err) - case ContinueOnError: - // Handled later. - case HTTPErrorOnError: - httpError(rsp, err) - return - } + if handleError(enc.Encode(mf)) { + return + } + } + if closer, ok := enc.(expfmt.Closer); ok { + // This in particular takes care of the final "# EOF\n" line for OpenMetrics. + if handleError(closer.Close()) { + return } } @@ -318,6 +339,16 @@ type HandlerOpts struct { // away). Until the implementation is improved, it is recommended to // implement a separate timeout in potentially slow Collectors. Timeout time.Duration + // If true, the experimental OpenMetrics encoding is added to the + // possible options during content negotiation. Note that Prometheus + // 2.5.0+ will negotiate OpenMetrics as first priority. OpenMetrics is + // the only way to transmit exemplars. However, the move to OpenMetrics + // is not completely transparent. Most notably, the values of "quantile" + // labels of Summaries and "le" labels of Histograms are formatted with + // a trailing ".0" if they would otherwise look like integer numbers + // (which changes the identity of the resulting series on the Prometheus + // server). + EnableOpenMetrics bool } // gzipAccepted returns whether the client will accept gzip-encoded content. diff --git a/prometheus/value.go b/prometheus/value.go index eb248f108..f285f7c5b 100644 --- a/prometheus/value.go +++ b/prometheus/value.go @@ -16,8 +16,11 @@ package prometheus import ( "fmt" "sort" + "time" + "unicode/utf8" "github.com/golang/protobuf/proto" + "github.com/golang/protobuf/ptypes" dto "github.com/prometheus/client_model/go" ) @@ -69,7 +72,7 @@ func (v *valueFunc) Desc() *Desc { } func (v *valueFunc) Write(out *dto.Metric) error { - return populateMetric(v.valType, v.function(), v.labelPairs, out) + return populateMetric(v.valType, v.function(), v.labelPairs, nil, out) } // NewConstMetric returns a metric with one fixed value that cannot be @@ -116,19 +119,20 @@ func (m *constMetric) Desc() *Desc { } func (m *constMetric) Write(out *dto.Metric) error { - return populateMetric(m.valType, m.val, m.labelPairs, out) + return populateMetric(m.valType, m.val, m.labelPairs, nil, out) } func populateMetric( t ValueType, v float64, labelPairs []*dto.LabelPair, + e *dto.Exemplar, m *dto.Metric, ) error { m.Label = labelPairs switch t { case CounterValue: - m.Counter = &dto.Counter{Value: proto.Float64(v)} + m.Counter = &dto.Counter{Value: proto.Float64(v), Exemplar: e} case GaugeValue: m.Gauge = &dto.Gauge{Value: proto.Float64(v)} case UntypedValue: @@ -160,3 +164,39 @@ func makeLabelPairs(desc *Desc, labelValues []string) []*dto.LabelPair { sort.Sort(labelPairSorter(labelPairs)) return labelPairs } + +// ExemplarMaxRunes is the max total number of runes allowed in exemplar labels. +const ExemplarMaxRunes = 64 + +// newExemplar creates a new dto.Exemplar from the provided values. An error is +// returned if any of the label names or values are invalid. +func newExemplar(value float64, ts time.Time, l Labels) (*dto.Exemplar, error) { + e := &dto.Exemplar{} + e.Value = proto.Float64(value) + tsProto, err := ptypes.TimestampProto(ts) + if err != nil { + return nil, err + } + e.Timestamp = tsProto + labelPairs := make([]*dto.LabelPair, 0, len(l)) + var runes int + for name, value := range l { + if !checkLabelName(name) { + return nil, fmt.Errorf("exemplar label name %q is invalid", name) + } + runes += utf8.RuneCountInString(name) + if !utf8.ValidString(value) { + return nil, fmt.Errorf("exemplar label value %q is not valid UTF-8", value) + } + runes += utf8.RuneCountInString(value) + labelPairs = append(labelPairs, &dto.LabelPair{ + Name: proto.String(name), + Value: proto.String(value), + }) + } + if runes > ExemplarMaxRunes { + return nil, fmt.Errorf("exemplar labels have %d runes, exceeding the limit of %d", runes, ExemplarMaxRunes) + } + e.Label = labelPairs + return e, nil +}