Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Preserve reset hint during histogram deduplication #7092

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 65 additions & 1 deletion pkg/query/iter.go
Expand Up @@ -168,7 +168,7 @@ func getFirstIterator(cs ...*storepb.Chunk) chunkenc.Iterator {
if err != nil {
return errSeriesIterator{err}
}
return chk.Iterator(nil)
return newHistogramResetDetector(chk.Iterator(nil))
}
return errSeriesIterator{errors.New("no valid chunk found")}
}
Expand Down Expand Up @@ -308,3 +308,67 @@ func (c *lazySeriesSet) Warnings() annotations.Annotations {
}
return nil
}

// histogramResetDetector sets the CounterResetHint to UnknownCounterReset for the first histogram read from the Iterator.
//
// The reset hint is always present in the first sample of the chunk, and all
// consecutive samples will have a NotCounterReset hint. During deduplication the
// first sample could be skipped, which means the reset will not be properly
// detected. This iterator will make sure that in that case the hint for the
// first read sample is set to UnknownCounterReset so that the PromQL engine will
// do the reset detection manually.
type histogramResetDetector struct {
chunkenc.Iterator

lastT int64
lastValType chunkenc.ValueType

i int16
histogramRead bool
}

func newHistogramResetDetector(iterator chunkenc.Iterator) *histogramResetDetector {
return &histogramResetDetector{
Iterator: iterator,
i: -1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see the only purpose of this is to check whether Next() has been called? Maybe we could convert it into a simple boolean? Otherwise this might overflow.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable tracks whether we are at the first sample. We could use a boolean pointer because we need to represent 3 states, one being Next not being called at all. AFAIK a chunk has 120 samples so we should not have a risk of overflow, and even if it does the side-effect will be that we recalculate a reset one more time in the engine.

}
}

func (it *histogramResetDetector) Seek(t int64) chunkenc.ValueType {
for {
if it.lastT >= t {
return it.lastValType
}
if it.lastValType = it.Next(); it.lastValType == chunkenc.ValNone {
return chunkenc.ValNone
}
}
}

func (it *histogramResetDetector) Next() chunkenc.ValueType {
it.i++
it.lastValType = it.Iterator.Next()
if it.lastValType == chunkenc.ValNone {
return chunkenc.ValNone
}
it.lastT = it.Iterator.AtT()
return it.lastValType
}

func (it *histogramResetDetector) AtHistogram(h *histogram.Histogram) (int64, *histogram.Histogram) {
t, h := it.Iterator.AtHistogram(h)
if !it.histogramRead && it.i != 0 {
h.CounterResetHint = histogram.UnknownCounterReset
}
it.histogramRead = true
return t, h
}

func (it *histogramResetDetector) AtFloatHistogram(fh *histogram.FloatHistogram) (int64, *histogram.FloatHistogram) {
t, fh := it.Iterator.AtFloatHistogram(fh)
if !it.histogramRead && it.i != 0 {
fh.CounterResetHint = histogram.UnknownCounterReset
}
it.histogramRead = true
return t, fh
}
74 changes: 74 additions & 0 deletions pkg/query/iter_test.go
@@ -0,0 +1,74 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package query

import (
"testing"

"github.com/efficientgo/core/testutil"
"github.com/prometheus/prometheus/model/histogram"
"github.com/prometheus/prometheus/tsdb/tsdbutil"
)

func TestNativeHistogramDedup(t *testing.T) {
cases := []struct {
samples []sample
tcase string
}{
{
tcase: "unknown counter reset chunk",
samples: []sample{
{t: 10000, h: makeHistWithHint(1, histogram.UnknownCounterReset)},
{t: 20000, h: makeHistWithHint(2, histogram.UnknownCounterReset)},
},
},
{
tcase: "not counter reset chunk",
samples: []sample{
{t: 10000, h: makeHistWithHint(1, histogram.NotCounterReset)},
{t: 20000, h: makeHistWithHint(2, histogram.NotCounterReset)},
},
},
{
tcase: "counter reset chunk",
samples: []sample{
{t: 10000, h: makeHistWithHint(1, histogram.CounterReset)},
{t: 20000, h: makeHistWithHint(2, histogram.NotCounterReset)},
},
},
}

for _, c := range cases {
t.Run(c.tcase, func(t *testing.T) {
// When the first sample is read, the counter reset hint for the second sample
// does not need to be reset.
t.Run("read_first_sample", func(t *testing.T) {
it := newHistogramResetDetector(newMockedSeriesIterator(c.samples))
it.Next()
_, h := it.AtHistogram(nil)
testutil.Equals(t, c.samples[0].h.CounterResetHint, h.CounterResetHint)

it.Next()
_, h = it.AtHistogram(nil)
testutil.Equals(t, c.samples[1].h.CounterResetHint, h.CounterResetHint)
})

// When the first sample is not read, the counter reset hint for the second
// sample should be reset.
t.Run("skip_first_sample", func(t *testing.T) {
it := newHistogramResetDetector(newMockedSeriesIterator(c.samples))
it.Next()
it.Next()
_, h := it.AtHistogram(nil)
testutil.Equals(t, h.CounterResetHint, histogram.UnknownCounterReset)
})
})
}
}

func makeHistWithHint(i int, hint histogram.CounterResetHint) *histogram.Histogram {
h := tsdbutil.GenerateTestHistogram(i)
h.CounterResetHint = hint
return h
}