From 7a609518cc089ef51899103cdbbb119aa903c630 Mon Sep 17 00:00:00 2001 From: Michael Anthony Knyszek Date: Mon, 24 Jan 2022 05:49:56 +0000 Subject: [PATCH] Use simpler locking in the Go 1.17 collector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A previous PR made it so that the Go 1.17 collector locked only around uses of rmSampleBuf, but really that means that Metric values may be sent over the channel containing some values from future metrics.Read calls. While generally-speaking this isn't a problem, we lose any consistency guarantees provided by the runtime/metrics package. Also, that optimization to not just lock around all of Collect was premature. Truthfully, Collect is called relatively infrequently, and its critical path is fairly fast (10s of µs). To prove it, this test also adds a benchmark. Signed-off-by: Michael Anthony Knyszek --- prometheus/go_collector_go117.go | 33 ++++++++++++++------------- prometheus/go_collector_go117_test.go | 2 +- prometheus/go_collector_test.go | 17 ++++++++++++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/prometheus/go_collector_go117.go b/prometheus/go_collector_go117.go index 504684039..a9b1fbaea 100644 --- a/prometheus/go_collector_go117.go +++ b/prometheus/go_collector_go117.go @@ -31,8 +31,11 @@ import ( type goCollector struct { base baseGoCollector + // mu protects updates to all fields ensuring a consistent + // snapshot is always produced by Collect. + mu sync.Mutex + // rm... fields all pertain to the runtime/metrics package. - rmSampleMu sync.Mutex rmSampleBuf []metrics.Sample rmSampleMap map[string]*metrics.Sample rmMetrics []collectorMetric @@ -135,10 +138,16 @@ func (c *goCollector) Collect(ch chan<- Metric) { // rmSampleBuf. Just read into rmSampleBuf but write all the data // we get into our Metrics or MemStats. // - // Note that we cannot simply read and then clone rmSampleBuf - // because we'd need to perform a deep clone of it, which is likely - // not worth it. - c.rmSampleMu.Lock() + // This lock also ensures that the Metrics we send out are all from + // the same updates, ensuring their mutual consistency insofar as + // is guaranteed by the runtime/metrics package. + // + // N.B. This locking is heavy-handed, but Collect is expected to be called + // relatively infrequently. Also the core operation here, metrics.Read, + // is fast (O(tens of microseconds)) so contention should certainly be + // low, though channel operations and any allocations may add to that. + c.mu.Lock() + defer c.mu.Unlock() // Populate runtime/metrics sample buffer. metrics.Read(c.rmSampleBuf) @@ -157,10 +166,13 @@ func (c *goCollector) Collect(ch chan<- Metric) { if v1 > v0 { m.Add(unwrapScalarRMValue(sample.Value) - m.get()) } + m.Collect(ch) case *gauge: m.Set(unwrapScalarRMValue(sample.Value)) + m.Collect(ch) case *batchHistogram: m.update(sample.Value.Float64Histogram(), c.exactSumFor(sample.Name)) + m.Collect(ch) default: panic("unexpected metric type") } @@ -169,17 +181,6 @@ func (c *goCollector) Collect(ch chan<- Metric) { // populate the old metrics from it. var ms runtime.MemStats memStatsFromRM(&ms, c.rmSampleMap) - - c.rmSampleMu.Unlock() - - // Export all the metrics to ch. - // At this point we must not access rmSampleBuf or rmSampleMap, because - // a concurrent caller could use it. It's safe to Collect all our Metrics, - // however, because they're updated in a thread-safe way while MemStats - // is local to this call of Collect. - for _, m := range c.rmMetrics { - m.Collect(ch) - } for _, i := range c.msMetrics { ch <- MustNewConstMetric(i.desc, i.valType, i.eval(&ms)) } diff --git a/prometheus/go_collector_go117_test.go b/prometheus/go_collector_go117_test.go index e780bce4e..fe715fc88 100644 --- a/prometheus/go_collector_go117_test.go +++ b/prometheus/go_collector_go117_test.go @@ -292,7 +292,7 @@ func TestGoCollectorConcurrency(t *testing.T) { go func() { ch := make(chan Metric) go func() { - // Drain all metrics recieved until the + // Drain all metrics received until the // channel is closed. for range ch { } diff --git a/prometheus/go_collector_test.go b/prometheus/go_collector_test.go index 9cc1b2e7f..47b944db5 100644 --- a/prometheus/go_collector_test.go +++ b/prometheus/go_collector_test.go @@ -154,3 +154,20 @@ func TestGoCollectorGC(t *testing.T) { break } } + +func BenchmarkGoCollector(b *testing.B) { + c := NewGoCollector().(*goCollector) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + ch := make(chan Metric, 8) + go func() { + // Drain all metrics received until the + // channel is closed. + for range ch { + } + }() + c.Collect(ch) + close(ch) + } +}