query-frontend: Don't cache request with dedup=false (#5300)

* query-frontend: Added repro for dedup affecting precision of querying. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> * QFE does not cache request with dedup=false. Signed-off-by: Bartlomiej Plotka <bwplotka@gmail.com> Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> * Move info about queries that skip cache logic to docs Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> * Update CHANGELOG Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> * Run docs formatter Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> * Fix e2e tests where caching logic is desired Signed-off-by: Douglas Camata <159076+douglascamata@users.noreply.github.com> Co-authored-by: Bartlomiej Plotka <bwplotka@gmail.com>
thanos-io · Apr 26, 2022 · 0a6287f · 0a6287f
1 parent 7cf9309
commit 0a6287f
Show file tree

Hide file tree

Showing 8 changed files with 108 additions and 42 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
 
 ### Fixed
 - [#5281](https://github.com/thanos-io/thanos/pull/5281) Blocks: Use correct separators for filesystem paths and object storage paths respectively.
+- [#5300](https://github.com/thanos-io/thanos/pull/5300) Query: Ignore cache on queries with deduplication off.
 
 ### Added
 

diff --git a/docs/components/query-frontend.md b/docs/components/query-frontend.md
@@ -36,6 +36,12 @@ Query Frontend supports a retry mechanism to retry query when HTTP requests are
 
 Query Frontend supports caching query results and reuses them on subsequent queries. If the cached results are incomplete, Query Frontend calculates the required subqueries and executes them in parallel on downstream queriers. Query Frontend can optionally align queries with their step parameter to improve the cacheability of the query results. Currently, in-memory cache (fifo cache) and memcached are supported.
 
+### Excluded from caching
+
+* Requests that support deduplication and having it disabled with `dedup=false`. Read more about deduplication in [Dedup documentation](query.md#deduplication-enabled).
+* Requests that specify store matchers.
+* Requests were the caching is explicitely disabled.
+
 #### In-memory
 
 ```yaml mdox-exec="go run scripts/cfggen/main.go --name=queryfrontend.InMemoryResponseCacheConfig"

diff --git a/pkg/queryfrontend/labels_codec_test.go b/pkg/queryfrontend/labels_codec_test.go
@@ -28,7 +28,7 @@ func TestLabelsCodec_DecodeRequest(t *testing.T) {
 		url             string
 		partialResponse bool
 		expectedError   error
-		expectedRequest ThanosRequest
+		expectedRequest ThanosRequestStoreMatcherGetter
 	}{
 		{
 			name:            "label_names cannot parse start",

diff --git a/pkg/queryfrontend/queryrange_codec.go b/pkg/queryfrontend/queryrange_codec.go
@@ -192,7 +192,7 @@ func parseDurationMillis(s string) (int64, error) {
 }
 
 func parseEnableDedupParam(s string) (bool, error) {
-	enableDeduplication := true
+	enableDeduplication := true // Deduplication is enabled by default.
 	if s != "" {
 		var err error
 		enableDeduplication, err = strconv.ParseBool(s)

diff --git a/pkg/queryfrontend/request.go b/pkg/queryfrontend/request.go
@@ -13,9 +13,9 @@ import (
 	"github.com/prometheus/prometheus/model/timestamp"
 )
 
-// TODO(yeya24): add partial result when needed.
-// ThanosRequest is a common interface defined for specific thanos requests.
-type ThanosRequest interface {
+// ThanosRequestStoreMatcherGetter is a an interface for store matching that all request share.
+// TODO(yeya24): Add partial result when needed.
+type ThanosRequestStoreMatcherGetter interface {
 	GetStoreMatchers() [][]*labels.Matcher
 }
 
@@ -24,6 +24,11 @@ type RequestHeader struct {
 	Values []string
 }
 
+// ThanosRequestDedup is a an interface for all requests that share setting deduplication.
+type ThanosRequestDedup interface {
+	IsDedupEnabled() bool
+}
+
 type ThanosQueryRangeRequest struct {
 	Path                string
 	Start               int64
@@ -41,6 +46,12 @@ type ThanosQueryRangeRequest struct {
 	Headers             []*RequestHeader
 }
 
+// IsDedupEnabled returns true if deduplication is enabled.
+func (r *ThanosQueryRangeRequest) IsDedupEnabled() bool { return r.Dedup }
+
+// GetStoreMatchers returns store matches.
+func (r *ThanosQueryRangeRequest) GetStoreMatchers() [][]*labels.Matcher { return r.StoreMatchers }
+
 // GetStart returns the start timestamp of the request in milliseconds.
 func (r *ThanosQueryRangeRequest) GetStart() int64 { return r.Start }
 
@@ -102,8 +113,6 @@ func (r *ThanosQueryRangeRequest) String() string { return "" }
 // which is not used in thanos.
 func (r *ThanosQueryRangeRequest) ProtoMessage() {}
 
-func (r *ThanosQueryRangeRequest) GetStoreMatchers() [][]*labels.Matcher { return r.StoreMatchers }
-
 type ThanosLabelsRequest struct {
 	Start           int64
 	End             int64
@@ -116,6 +125,9 @@ type ThanosLabelsRequest struct {
 	Headers         []*RequestHeader
 }
 
+// GetStoreMatchers returns store matches.
+func (r *ThanosLabelsRequest) GetStoreMatchers() [][]*labels.Matcher { return r.StoreMatchers }
+
 // GetStart returns the start timestamp of the request in milliseconds.
 func (r *ThanosLabelsRequest) GetStart() int64 { return r.Start }
 
@@ -173,8 +185,6 @@ func (r *ThanosLabelsRequest) String() string { return "" }
 // which is not used in thanos.
 func (r *ThanosLabelsRequest) ProtoMessage() {}
 
-func (r *ThanosLabelsRequest) GetStoreMatchers() [][]*labels.Matcher { return r.StoreMatchers }
-
 type ThanosSeriesRequest struct {
 	Path            string
 	Start           int64
@@ -188,6 +198,12 @@ type ThanosSeriesRequest struct {
 	Headers         []*RequestHeader
 }
 
+// IsDedupEnabled returns true if deduplication is enabled.
+func (r *ThanosSeriesRequest) IsDedupEnabled() bool { return r.Dedup }
+
+// GetStoreMatchers returns store matches.
+func (r *ThanosSeriesRequest) GetStoreMatchers() [][]*labels.Matcher { return r.StoreMatchers }
+
 // GetStart returns the start timestamp of the request in milliseconds.
 func (r *ThanosSeriesRequest) GetStart() int64 { return r.Start }
 
@@ -243,5 +259,3 @@ func (r *ThanosSeriesRequest) String() string { return "" }
 // ProtoMessage implements proto.Message interface required by queryrange.Request,
 // which is not used in thanos.
 func (r *ThanosSeriesRequest) ProtoMessage() {}
-
-func (r *ThanosSeriesRequest) GetStoreMatchers() [][]*labels.Matcher { return r.StoreMatchers }
diff --git a/pkg/queryfrontend/roundtrip.go b/pkg/queryfrontend/roundtrip.go
@@ -274,10 +274,18 @@ func newLabelsTripperware(
 	}, nil
 }
 
-// Don't go to response cache if StoreMatchers are set.
+// shouldCache controls what kind of Thanos request should be cached.
+// For more information about requests that skip caching logic, please visit
+// the query-frontend documentation.
 func shouldCache(r queryrange.Request) bool {
-	if thanosReq, ok := r.(ThanosRequest); ok {
-		if len(thanosReq.GetStoreMatchers()) > 0 {
+	if thanosReqStoreMatcherGettable, ok := r.(ThanosRequestStoreMatcherGetter); ok {
+		if len(thanosReqStoreMatcherGettable.GetStoreMatchers()) > 0 {
+			return false
+		}
+	}
+
+	if thanosReqDedup, ok := r.(ThanosRequestDedup); ok {
+		if !thanosReqDedup.IsDedupEnabled() {
 			return false
 		}
 	}

diff --git a/pkg/queryfrontend/roundtrip_test.go b/pkg/queryfrontend/roundtrip_test.go
@@ -381,6 +381,16 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 		End:                 2 * hour,
 		Step:                10 * seconds,
 		MaxSourceResolution: 1 * seconds,
+		Dedup:               true, // Deduplication is enabled by default.
+	}
+
+	testRequestWithoutDedup := &ThanosQueryRangeRequest{
+		Path:                "/api/v1/query_range",
+		Start:               0,
+		End:                 2 * hour,
+		Step:                10 * seconds,
+		MaxSourceResolution: 1 * seconds,
+		Dedup:               false,
 	}
 
 	// Non query range request, won't be cached.
@@ -389,6 +399,7 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 		Start: 0,
 		End:   2 * hour,
 		Step:  10 * seconds,
+		Dedup: true,
 	}
 
 	// Same query params as testRequest, different maxSourceResolution
@@ -399,6 +410,7 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 		End:                 2 * hour,
 		Step:                10 * seconds,
 		MaxSourceResolution: 10 * seconds,
+		Dedup:               true,
 	}
 
 	// Same query params as testRequest, different maxSourceResolution
@@ -409,6 +421,7 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 		End:                 2 * hour,
 		Step:                10 * seconds,
 		MaxSourceResolution: 1 * hour,
+		Dedup:               true,
 	}
 
 	// Same query params as testRequest, but with storeMatchers
@@ -419,6 +432,7 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 		Step:                10 * seconds,
 		MaxSourceResolution: 1 * seconds,
 		StoreMatchers:       [][]*labels.Matcher{{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")}},
+		Dedup:               true,
 	}
 
 	cacheConf := &queryrange.ResultsCacheConfig{
@@ -457,20 +471,22 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 	}{
 		{name: "first request", req: testRequest, expected: 1},
 		{name: "same request as the first one, directly use cache", req: testRequest, expected: 1},
-		{name: "non query range request won't be cached", req: testRequestInstant, expected: 2},
-		{name: "do it again", req: testRequestInstant, expected: 3},
-		{name: "different max source resolution but still same level", req: testRequestSameLevelDownsampling, expected: 3},
-		{name: "different max source resolution and different level", req: testRequestHigherLevelDownsampling, expected: 4},
-		{name: "storeMatchers requests won't go to cache", req: testRequestWithStoreMatchers, expected: 5},
+		{name: "same request as the first one but with dedup disabled, should not use cache", req: testRequestWithoutDedup, expected: 2},
+		{name: "non query range request won't be cached", req: testRequestInstant, expected: 3},
+		{name: "do it again", req: testRequestInstant, expected: 4},
+		{name: "different max source resolution but still same level", req: testRequestSameLevelDownsampling, expected: 4},
+		{name: "different max source resolution and different level", req: testRequestHigherLevelDownsampling, expected: 5},
+		{name: "storeMatchers requests won't go to cache", req: testRequestWithStoreMatchers, expected: 6},
 		{
 			name: "request but will be partitioned",
 			req: &ThanosQueryRangeRequest{
 				Path:  "/api/v1/query_range",
 				Start: 0,
 				End:   25 * hour,
 				Step:  10 * seconds,
+				Dedup: true,
 			},
-			expected: 7,
+			expected: 8,
 		},
 		{
 			name: "same query as the previous one",
@@ -479,12 +495,12 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 				Start: 0,
 				End:   25 * hour,
 				Step:  10 * seconds,
+				Dedup: true,
 			},
-			expected: 7,
+			expected: 8,
 		},
 	} {
-
-		t.Run(tc.name, func(t *testing.T) {
+		if !t.Run(tc.name, func(t *testing.T) {
 
 			ctx := user.InjectOrgID(context.Background(), "1")
 			httpReq, err := NewThanosQueryRangeCodec(true).EncodeRequest(ctx, tc.req)
@@ -494,8 +510,9 @@ func TestRoundTripQueryRangeCacheMiddleware(t *testing.T) {
 			testutil.Ok(t, err)
 
 			testutil.Equals(t, tc.expected, *res)
-		})
-
+		}) {
+			break
+		}
 	}
 }
 
@@ -608,9 +625,7 @@ func TestRoundTripLabelsCacheMiddleware(t *testing.T) {
 			expected: 8,
 		},
 	} {
-
-		t.Run(tc.name, func(t *testing.T) {
-
+		if !t.Run(tc.name, func(t *testing.T) {
 			ctx := user.InjectOrgID(context.Background(), "1")
 			httpReq, err := NewThanosLabelsCodec(true, 24*time.Hour).EncodeRequest(ctx, tc.req)
 			testutil.Ok(t, err)
@@ -619,8 +634,9 @@ func TestRoundTripLabelsCacheMiddleware(t *testing.T) {
 			testutil.Ok(t, err)
 
 			testutil.Equals(t, tc.expected, *res)
-		})
-
+		}) {
+			break
+		}
 	}
 }
 
@@ -631,6 +647,15 @@ func TestRoundTripSeriesCacheMiddleware(t *testing.T) {
 		Start:    0,
 		End:      2 * hour,
 		Matchers: [][]*labels.Matcher{{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")}},
+		Dedup:    true,
+	}
+
+	testRequestWithoutDedup := &ThanosSeriesRequest{
+		Path:     "/api/v1/series",
+		Start:    0,
+		End:      2 * hour,
+		Matchers: [][]*labels.Matcher{{labels.MustNewMatcher(labels.MatchEqual, "foo", "bar")}},
+		Dedup:    false,
 	}
 
 	// Different matchers set with the first request.
@@ -639,10 +664,11 @@ func TestRoundTripSeriesCacheMiddleware(t *testing.T) {
 		Start:    0,
 		End:      2 * hour,
 		Matchers: [][]*labels.Matcher{{labels.MustNewMatcher(labels.MatchEqual, "foo", "baz")}},
+		Dedup:    true,
 	}
 
 	// Same query params as testRequest, but with storeMatchers
-	testRequestWithStoreMatchers := &ThanosLabelsRequest{
+	testRequestWithStoreMatchers := &ThanosSeriesRequest{
 		Path:          "/api/v1/series",
 		Start:         0,
 		End:           2 * hour,
@@ -685,12 +711,12 @@ func TestRoundTripSeriesCacheMiddleware(t *testing.T) {
 	}{
 		{name: "first request", req: testRequest, expected: 1},
 		{name: "same request as the first one, directly use cache", req: testRequest, expected: 1},
-		{name: "different series request, not use cache", req: testRequest2, expected: 2},
-		{name: "storeMatchers requests won't go to cache", req: testRequestWithStoreMatchers, expected: 3},
+		{name: "same request as the first one but with dedup disabled, should not use cache", req: testRequestWithoutDedup, expected: 2},
+		{name: "different series request, not use cache", req: testRequest2, expected: 3},
+		{name: "storeMatchers requests won't go to cache", req: testRequestWithStoreMatchers, expected: 4},
 	} {
 
-		t.Run(tc.name, func(t *testing.T) {
-
+		if !t.Run(tc.name, func(t *testing.T) {
 			ctx := user.InjectOrgID(context.Background(), "1")
 			httpReq, err := NewThanosLabelsCodec(true, 24*time.Hour).EncodeRequest(ctx, tc.req)
 			testutil.Ok(t, err)
@@ -699,8 +725,9 @@ func TestRoundTripSeriesCacheMiddleware(t *testing.T) {
 			testutil.Ok(t, err)
 
 			testutil.Equals(t, tc.expected, *res)
-		})
-
+		}) {
+			break
+		}
 	}
 }
 

diff --git a/test/e2e/query_frontend_test.go b/test/e2e/query_frontend_test.go
@@ -109,7 +109,9 @@ func TestQueryFrontend(t *testing.T) {
 			timestamp.FromTime(now.Add(-time.Hour)),
 			timestamp.FromTime(now.Add(time.Hour)),
 			14,
-			promclient.QueryOptions{},
+			promclient.QueryOptions{
+				Deduplicate: true,
+			},
 			func(res model.Matrix) error {
 				if len(res) == 0 {
 					return errors.Errorf("expected some results, got nothing")
@@ -151,7 +153,9 @@ func TestQueryFrontend(t *testing.T) {
 			timestamp.FromTime(now.Add(-time.Hour)),
 			timestamp.FromTime(now.Add(time.Hour)),
 			14,
-			promclient.QueryOptions{},
+			promclient.QueryOptions{
+				Deduplicate: true,
+			},
 			func(res model.Matrix) error {
 				if len(res) == 0 {
 					return errors.Errorf("expected some results, got nothing")
@@ -196,7 +200,9 @@ func TestQueryFrontend(t *testing.T) {
 			timestamp.FromTime(now.Add(-time.Hour)),
 			timestamp.FromTime(now.Add(24*time.Hour)),
 			14,
-			promclient.QueryOptions{},
+			promclient.QueryOptions{
+				Deduplicate: true,
+			},
 			func(res model.Matrix) error {
 				if len(res) == 0 {
 					return errors.Errorf("expected some results, got nothing")
@@ -459,7 +465,9 @@ func TestQueryFrontendMemcachedCache(t *testing.T) {
 		timestamp.FromTime(now.Add(-time.Hour)),
 		timestamp.FromTime(now.Add(time.Hour)),
 		14,
-		promclient.QueryOptions{},
+		promclient.QueryOptions{
+			Deduplicate: true,
+		},
 		func(res model.Matrix) error {
 			if len(res) == 0 {
 				return errors.Errorf("expected some results, got nothing")
@@ -489,7 +497,9 @@ func TestQueryFrontendMemcachedCache(t *testing.T) {
 		timestamp.FromTime(now.Add(-time.Hour)),
 		timestamp.FromTime(now.Add(time.Hour)),
 		14,
-		promclient.QueryOptions{},
+		promclient.QueryOptions{
+			Deduplicate: true,
+		},
 		func(res model.Matrix) error {
 			if len(res) == 0 {
 				return errors.Errorf("expected some results, got nothing")