From ef972b918885cde6d8c38131b9ba8a28f677365d Mon Sep 17 00:00:00 2001
From: beorn7 <beorn@grafana.com>
Date: Tue, 14 Jan 2020 19:22:19 +0100
Subject: [PATCH] Add exemplars to counter and histogram

Signed-off-by: beorn7 <beorn@grafana.com>
---
 examples/random/main.go     |  15 ++++-
 go.mod                      |   4 +-
 go.sum                      |  10 ++--
 prometheus/counter.go       |  36 +++++++++++-
 prometheus/gauge.go         |   2 +-
 prometheus/histogram.go     | 113 ++++++++++++++++++++++++++----------
 prometheus/promhttp/http.go |  63 +++++++++++++++-----
 prometheus/value.go         |  46 ++++++++++++++-
 8 files changed, 229 insertions(+), 60 deletions(-)

diff --git a/examples/random/main.go b/examples/random/main.go
index cb78b07ba..1da9bd1e5 100644
--- a/examples/random/main.go
+++ b/examples/random/main.go
@@ -18,6 +18,7 @@ package main
 
 import (
 	"flag"
+	"fmt"
 	"log"
 	"math"
 	"math/rand"
@@ -89,7 +90,11 @@ func main() {
 		for {
 			v := (rand.NormFloat64() * *normDomain) + *normMean
 			rpcDurations.WithLabelValues("normal").Observe(v)
-			rpcDurationsHistogram.Observe(v)
+			rpcDurationsHistogram.ObserveWithExemplar(
+				// Demonstrate exemplar support with a dummy ID. This would be
+				// something like a trace ID in a real application.
+				v, prometheus.Labels{"dummyID": fmt.Sprint(rand.Intn(100000))},
+			)
 			time.Sleep(time.Duration(75*oscillationFactor()) * time.Millisecond)
 		}
 	}()
@@ -103,6 +108,12 @@ func main() {
 	}()
 
 	// Expose the registered metrics via HTTP.
-	http.Handle("/metrics", promhttp.Handler())
+	http.Handle("/metrics", promhttp.HandlerFor(
+		prometheus.DefaultGatherer,
+		promhttp.HandlerOpts{
+			// Opt into OpenMetrics to support exemplars.
+			EnableOpenMetrics: true,
+		},
+	))
 	log.Fatal(http.ListenAndServe(*addr, nil))
 }
diff --git a/go.mod b/go.mod
index 1540dba22..976e337be 100644
--- a/go.mod
+++ b/go.mod
@@ -5,8 +5,8 @@ require (
 	github.com/cespare/xxhash/v2 v2.1.1
 	github.com/golang/protobuf v1.3.2
 	github.com/json-iterator/go v1.1.8
-	github.com/prometheus/client_model v0.1.0
-	github.com/prometheus/common v0.7.0
+	github.com/prometheus/client_model v0.2.0
+	github.com/prometheus/common v0.9.0
 	github.com/prometheus/procfs v0.0.8
 	golang.org/x/sys v0.0.0-20191220142924-d4481acd189f
 )
diff --git a/go.sum b/go.sum
index ff3d14668..059c79832 100644
--- a/go.sum
+++ b/go.sum
@@ -58,12 +58,12 @@ github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 h1:idejC8f
 github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 h1:S/YWwWx/RA8rT8tKFRuGUZhuA90OyIBpPCXkcbwU8DE=
 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
-github.com/prometheus/client_model v0.1.0 h1:ElTg5tNp4DqfV7UQjDqv2+RJlNzsDtvNAWccbItceIE=
-github.com/prometheus/client_model v0.1.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
+github.com/prometheus/client_model v0.2.0 h1:uq5h0d+GuxiXLJLNABMgp2qUWDPiLvgCzz2dUR+/W/M=
+github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
 github.com/prometheus/common v0.4.1 h1:K0MGApIoQvMw27RTdJkPbr3JZ7DNbtxQNyi5STVM6Kw=
 github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
-github.com/prometheus/common v0.7.0 h1:L+1lyG48J1zAQXA3RBX/nG/B3gjlHq0zTt2tlbJLyCY=
-github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA=
+github.com/prometheus/common v0.9.0 h1:yg//x/8DqN+PxXTBFMwVCopGqDn3wSxmbF/3PCuu1bk=
+github.com/prometheus/common v0.9.0/go.mod h1:yhUN8i9wzaXS3w1O07YhxHEBxD+W35wd8bs7vj7HSQ4=
 github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
 github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs=
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
@@ -97,4 +97,4 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
-gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
diff --git a/prometheus/counter.go b/prometheus/counter.go
index d463e36d3..553f9b005 100644
--- a/prometheus/counter.go
+++ b/prometheus/counter.go
@@ -17,6 +17,7 @@ import (
 	"errors"
 	"math"
 	"sync/atomic"
+	"time"
 
 	dto "github.com/prometheus/client_model/go"
 )
@@ -40,6 +41,16 @@ type Counter interface {
 	// Add adds the given value to the counter. It panics if the value is <
 	// 0.
 	Add(float64)
+	// AddWithExemplar works like Add but also replaces the currently saved
+	// exemplar (possibly none) with a new one, created from the provided
+	// value, the current time as timestamp, and the provided labels. Empty
+	// Labels will lead to a valid (label-less) exemplar. But if Labels is
+	// nil, the current exemplar is left in place. This method panics if the
+	// value is < 0, if any of the provided labels are invalid, or if the
+	// provided labels contain more than 64 runes in total. AddWithExemplar
+	// should not be called in a hot path as it is significantly more costly
+	// than Add.
+	AddWithExemplar(value float64, exemplar Labels)
 }
 
 // CounterOpts is an alias for Opts. See there for doc comments.
@@ -78,6 +89,7 @@ type counter struct {
 	desc *Desc
 
 	labelPairs []*dto.LabelPair
+	exemplar   atomic.Value // *dto.Exemplar
 }
 
 func (c *counter) Desc() *Desc {
@@ -88,6 +100,7 @@ func (c *counter) Add(v float64) {
 	if v < 0 {
 		panic(errors.New("counter cannot decrease in value"))
 	}
+
 	ival := uint64(v)
 	if float64(ival) == v {
 		atomic.AddUint64(&c.valInt, ival)
@@ -103,6 +116,11 @@ func (c *counter) Add(v float64) {
 	}
 }
 
+func (c *counter) AddWithExemplar(v float64, e Labels) {
+	c.Add(v)
+	c.updateExemplar(v, e)
+}
+
 func (c *counter) Inc() {
 	atomic.AddUint64(&c.valInt, 1)
 }
@@ -112,7 +130,23 @@ func (c *counter) Write(out *dto.Metric) error {
 	ival := atomic.LoadUint64(&c.valInt)
 	val := fval + float64(ival)
 
-	return populateMetric(CounterValue, val, c.labelPairs, out)
+	var exemplar *dto.Exemplar
+	if e := c.exemplar.Load(); e != nil {
+		exemplar = e.(*dto.Exemplar)
+	}
+
+	return populateMetric(CounterValue, val, c.labelPairs, exemplar, out)
+}
+
+func (c *counter) updateExemplar(v float64, l Labels) {
+	if l == nil {
+		return
+	}
+	e, err := newExemplar(v, time.Now(), l)
+	if err != nil {
+		panic(err)
+	}
+	c.exemplar.Store(e)
 }
 
 // CounterVec is a Collector that bundles a set of Counters that all share the
diff --git a/prometheus/gauge.go b/prometheus/gauge.go
index 56d8cc209..d67573f76 100644
--- a/prometheus/gauge.go
+++ b/prometheus/gauge.go
@@ -123,7 +123,7 @@ func (g *gauge) Sub(val float64) {
 
 func (g *gauge) Write(out *dto.Metric) error {
 	val := math.Float64frombits(atomic.LoadUint64(&g.valBits))
-	return populateMetric(GaugeValue, val, g.labelPairs, out)
+	return populateMetric(GaugeValue, val, g.labelPairs, nil, out)
 }
 
 // GaugeVec is a Collector that bundles a set of Gauges that all share the same
diff --git a/prometheus/histogram.go b/prometheus/histogram.go
index ac2614d52..e30de3bc8 100644
--- a/prometheus/histogram.go
+++ b/prometheus/histogram.go
@@ -20,6 +20,7 @@ import (
 	"sort"
 	"sync"
 	"sync/atomic"
+	"time"
 
 	"github.com/golang/protobuf/proto"
 
@@ -47,6 +48,16 @@ type Histogram interface {
 
 	// Observe adds a single observation to the histogram.
 	Observe(float64)
+	// ObserveWithExemplar works like Observe but also replaces the
+	// currently saved exemplar for the relevant bucket (possibly none) with
+	// a new one, created from the provided value, the current time as
+	// timestamp, and the provided Labels. Empty Labels will lead to a valid
+	// (label-less) exemplar. But if Labels is nil, the current exemplar in
+	// the relevant bucket is left in place. This method panics if any of
+	// the provided labels are invalid or if the provided labels contain
+	// more than 64 runes in total. ObserveWithExemplar should not be called
+	// in a hot path as it is significantly more costly than Observe.
+	ObserveWithExemplar(value float64, exemplar Labels)
 }
 
 // bucketLabel is used for the label that defines the upper bound of a
@@ -205,9 +216,10 @@ func newHistogram(desc *Desc, opts HistogramOpts, labelValues ...string) Histogr
 		}
 	}
 	// Finally we know the final length of h.upperBounds and can make buckets
-	// for both counts:
+	// for both counts as well as exemplars:
 	h.counts[0].buckets = make([]uint64, len(h.upperBounds))
 	h.counts[1].buckets = make([]uint64, len(h.upperBounds))
+	h.exemplars = make([]atomic.Value, len(h.upperBounds)+1)
 
 	h.init(h) // Init self-collection.
 	return h
@@ -254,6 +266,7 @@ type histogram struct {
 
 	upperBounds []float64
 	labelPairs  []*dto.LabelPair
+	exemplars   []atomic.Value // One more than buckets (to include +Inf), each a *dto.Exemplar.
 }
 
 func (h *histogram) Desc() *Desc {
@@ -261,36 +274,13 @@ func (h *histogram) Desc() *Desc {
 }
 
 func (h *histogram) Observe(v float64) {
-	// TODO(beorn7): For small numbers of buckets (<30), a linear search is
-	// slightly faster than the binary search. If we really care, we could
-	// switch from one search strategy to the other depending on the number
-	// of buckets.
-	//
-	// Microbenchmarks (BenchmarkHistogramNoLabels):
-	// 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op
-	// 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op
-	// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
-	i := sort.SearchFloat64s(h.upperBounds, v)
-
-	// We increment h.countAndHotIdx so that the counter in the lower
-	// 63 bits gets incremented. At the same time, we get the new value
-	// back, which we can use to find the currently-hot counts.
-	n := atomic.AddUint64(&h.countAndHotIdx, 1)
-	hotCounts := h.counts[n>>63]
+	h.observe(v, h.findBucket(v))
+}
 
-	if i < len(h.upperBounds) {
-		atomic.AddUint64(&hotCounts.buckets[i], 1)
-	}
-	for {
-		oldBits := atomic.LoadUint64(&hotCounts.sumBits)
-		newBits := math.Float64bits(math.Float64frombits(oldBits) + v)
-		if atomic.CompareAndSwapUint64(&hotCounts.sumBits, oldBits, newBits) {
-			break
-		}
-	}
-	// Increment count last as we take it as a signal that the observation
-	// is complete.
-	atomic.AddUint64(&hotCounts.count, 1)
+func (h *histogram) ObserveWithExemplar(v float64, e Labels) {
+	i := h.findBucket(v)
+	h.observe(v, i)
+	h.updateExemplar(v, i, e)
 }
 
 func (h *histogram) Write(out *dto.Metric) error {
@@ -329,6 +319,18 @@ func (h *histogram) Write(out *dto.Metric) error {
 			CumulativeCount: proto.Uint64(cumCount),
 			UpperBound:      proto.Float64(upperBound),
 		}
+		if e := h.exemplars[i].Load(); e != nil {
+			his.Bucket[i].Exemplar = e.(*dto.Exemplar)
+		}
+	}
+	// If there is an exemplar for the +Inf bucket, we have to add that bucket explicitly.
+	if e := h.exemplars[len(h.upperBounds)].Load(); e != nil {
+		b := &dto.Bucket{
+			CumulativeCount: proto.Uint64(count),
+			UpperBound:      proto.Float64(math.Inf(1)),
+			Exemplar:        e.(*dto.Exemplar),
+		}
+		his.Bucket = append(his.Bucket, b)
 	}
 
 	out.Histogram = his
@@ -352,6 +354,57 @@ func (h *histogram) Write(out *dto.Metric) error {
 	return nil
 }
 
+// findBucket returns the index of the bucket for the provided value, or
+// len(h.upperBounds) for the +Inf bucket.
+func (h *histogram) findBucket(v float64) int {
+	// TODO(beorn7): For small numbers of buckets (<30), a linear search is
+	// slightly faster than the binary search. If we really care, we could
+	// switch from one search strategy to the other depending on the number
+	// of buckets.
+	//
+	// Microbenchmarks (BenchmarkHistogramNoLabels):
+	// 11 buckets: 38.3 ns/op linear - binary 48.7 ns/op
+	// 100 buckets: 78.1 ns/op linear - binary 54.9 ns/op
+	// 300 buckets: 154 ns/op linear - binary 61.6 ns/op
+	return sort.SearchFloat64s(h.upperBounds, v)
+}
+
+// observe is the implementation for Observe without the findBucket part.
+func (h *histogram) observe(v float64, bucket int) {
+	// We increment h.countAndHotIdx so that the counter in the lower
+	// 63 bits gets incremented. At the same time, we get the new value
+	// back, which we can use to find the currently-hot counts.
+	n := atomic.AddUint64(&h.countAndHotIdx, 1)
+	hotCounts := h.counts[n>>63]
+
+	if bucket < len(h.upperBounds) {
+		atomic.AddUint64(&hotCounts.buckets[bucket], 1)
+	}
+	for {
+		oldBits := atomic.LoadUint64(&hotCounts.sumBits)
+		newBits := math.Float64bits(math.Float64frombits(oldBits) + v)
+		if atomic.CompareAndSwapUint64(&hotCounts.sumBits, oldBits, newBits) {
+			break
+		}
+	}
+	// Increment count last as we take it as a signal that the observation
+	// is complete.
+	atomic.AddUint64(&hotCounts.count, 1)
+}
+
+// updateExemplar replaces the exemplar for the provided bucket. With empty
+// labels, it's a no-op. It panics if any of the labels is invalid.
+func (h *histogram) updateExemplar(v float64, bucket int, l Labels) {
+	if l == nil {
+		return
+	}
+	e, err := newExemplar(v, time.Now(), l)
+	if err != nil {
+		panic(err)
+	}
+	h.exemplars[bucket].Store(e)
+}
+
 // HistogramVec is a Collector that bundles a set of Histograms that all share the
 // same Desc, but have different values for their variable labels. This is used
 // if you want to count the same thing partitioned by various dimensions
diff --git a/prometheus/promhttp/http.go b/prometheus/promhttp/http.go
index cea5a90fd..93d626e50 100644
--- a/prometheus/promhttp/http.go
+++ b/prometheus/promhttp/http.go
@@ -144,7 +144,12 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
 			}
 		}
 
-		contentType := expfmt.Negotiate(req.Header)
+		var contentType expfmt.Format
+		if opts.EnableOpenMetrics {
+			contentType = expfmt.NegotiateIncludingOpenMetrics(req.Header)
+		} else {
+			contentType = expfmt.Negotiate(req.Header)
+		}
 		header := rsp.Header()
 		header.Set(contentTypeHeader, string(contentType))
 
@@ -163,22 +168,38 @@ func HandlerFor(reg prometheus.Gatherer, opts HandlerOpts) http.Handler {
 		enc := expfmt.NewEncoder(w, contentType)
 
 		var lastErr error
+
+		// handleError returns true if we have to abort after handling the error.
+		handleError := func(err error) bool {
+			if err == nil {
+				return false
+			}
+			lastErr = err
+			if opts.ErrorLog != nil {
+				opts.ErrorLog.Println("error encoding and sending metric family:", err)
+			}
+			errCnt.WithLabelValues("encoding").Inc()
+			switch opts.ErrorHandling {
+			case PanicOnError:
+				panic(err)
+			case ContinueOnError:
+				// Handled later.
+			case HTTPErrorOnError:
+				httpError(rsp, err)
+				return true
+			}
+			return false
+		}
+
 		for _, mf := range mfs {
-			if err := enc.Encode(mf); err != nil {
-				lastErr = err
-				if opts.ErrorLog != nil {
-					opts.ErrorLog.Println("error encoding and sending metric family:", err)
-				}
-				errCnt.WithLabelValues("encoding").Inc()
-				switch opts.ErrorHandling {
-				case PanicOnError:
-					panic(err)
-				case ContinueOnError:
-					// Handled later.
-				case HTTPErrorOnError:
-					httpError(rsp, err)
-					return
-				}
+			if handleError(enc.Encode(mf)) {
+				return
+			}
+		}
+		if closer, ok := enc.(expfmt.Closer); ok {
+			// This in particular takes care of the final "# EOF\n" line for OpenMetrics.
+			if handleError(closer.Close()) {
+				return
 			}
 		}
 
@@ -318,6 +339,16 @@ type HandlerOpts struct {
 	// away). Until the implementation is improved, it is recommended to
 	// implement a separate timeout in potentially slow Collectors.
 	Timeout time.Duration
+	// If true, the experimental OpenMetrics encoding is added to the
+	// possible options during content negotiation. Note that Prometheus
+	// 2.5.0+ will negotiate OpenMetrics as first priority. OpenMetrics is
+	// the only way to transmit exemplars. However, the move to OpenMetrics
+	// is not completely transparent. Most notably, the values of "quantile"
+	// labels of Summaries and "le" labels of Histograms are formatted with
+	// a trailing ".0" if they would otherwise look like integer numbers
+	// (which changes the identity of the resulting series on the Prometheus
+	// server).
+	EnableOpenMetrics bool
 }
 
 // gzipAccepted returns whether the client will accept gzip-encoded content.
diff --git a/prometheus/value.go b/prometheus/value.go
index eb248f108..f285f7c5b 100644
--- a/prometheus/value.go
+++ b/prometheus/value.go
@@ -16,8 +16,11 @@ package prometheus
 import (
 	"fmt"
 	"sort"
+	"time"
+	"unicode/utf8"
 
 	"github.com/golang/protobuf/proto"
+	"github.com/golang/protobuf/ptypes"
 
 	dto "github.com/prometheus/client_model/go"
 )
@@ -69,7 +72,7 @@ func (v *valueFunc) Desc() *Desc {
 }
 
 func (v *valueFunc) Write(out *dto.Metric) error {
-	return populateMetric(v.valType, v.function(), v.labelPairs, out)
+	return populateMetric(v.valType, v.function(), v.labelPairs, nil, out)
 }
 
 // NewConstMetric returns a metric with one fixed value that cannot be
@@ -116,19 +119,20 @@ func (m *constMetric) Desc() *Desc {
 }
 
 func (m *constMetric) Write(out *dto.Metric) error {
-	return populateMetric(m.valType, m.val, m.labelPairs, out)
+	return populateMetric(m.valType, m.val, m.labelPairs, nil, out)
 }
 
 func populateMetric(
 	t ValueType,
 	v float64,
 	labelPairs []*dto.LabelPair,
+	e *dto.Exemplar,
 	m *dto.Metric,
 ) error {
 	m.Label = labelPairs
 	switch t {
 	case CounterValue:
-		m.Counter = &dto.Counter{Value: proto.Float64(v)}
+		m.Counter = &dto.Counter{Value: proto.Float64(v), Exemplar: e}
 	case GaugeValue:
 		m.Gauge = &dto.Gauge{Value: proto.Float64(v)}
 	case UntypedValue:
@@ -160,3 +164,39 @@ func makeLabelPairs(desc *Desc, labelValues []string) []*dto.LabelPair {
 	sort.Sort(labelPairSorter(labelPairs))
 	return labelPairs
 }
+
+// ExemplarMaxRunes is the max total number of runes allowed in exemplar labels.
+const ExemplarMaxRunes = 64
+
+// newExemplar creates a new dto.Exemplar from the provided values. An error is
+// returned if any of the label names or values are invalid.
+func newExemplar(value float64, ts time.Time, l Labels) (*dto.Exemplar, error) {
+	e := &dto.Exemplar{}
+	e.Value = proto.Float64(value)
+	tsProto, err := ptypes.TimestampProto(ts)
+	if err != nil {
+		return nil, err
+	}
+	e.Timestamp = tsProto
+	labelPairs := make([]*dto.LabelPair, 0, len(l))
+	var runes int
+	for name, value := range l {
+		if !checkLabelName(name) {
+			return nil, fmt.Errorf("exemplar label name %q is invalid", name)
+		}
+		runes += utf8.RuneCountInString(name)
+		if !utf8.ValidString(value) {
+			return nil, fmt.Errorf("exemplar label value %q is not valid UTF-8", value)
+		}
+		runes += utf8.RuneCountInString(value)
+		labelPairs = append(labelPairs, &dto.LabelPair{
+			Name:  proto.String(name),
+			Value: proto.String(value),
+		})
+	}
+	if runes > ExemplarMaxRunes {
+		return nil, fmt.Errorf("exemplar labels have %d runes, exceeding the limit of %d", runes, ExemplarMaxRunes)
+	}
+	e.Label = labelPairs
+	return e, nil
+}