Fix ClassCastException in Significant Terms (#108429)

Prior to this PR, if a SignificantTerms aggregation targeted a field existing on two indices (that were included in the aggregation) but mapped to different field types, the query would fail at reduce time with a somewhat obscure ClassCastException. This change brings the behavior in line with the Terms aggregation, which returns a 400 class IllegalArgumentException with a useful message in this situation. Resolves #108427
elastic · May 9, 2024 · 9f438ed · 9f438ed
1 parent 6308bbf
commit 9f438ed
Show file tree

Hide file tree

Showing 2 changed files with 25 additions and 0 deletions.
diff --git a/docs/changelog/108429.yaml b/docs/changelog/108429.yaml
@@ -0,0 +1,6 @@
+pr: 108429
+summary: Fix `ClassCastException` in Significant Terms
+area: Aggregations
+type: bug
+issues:
+ - 108427
diff --git a/...ain/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/...ain/java/org/elasticsearch/search/aggregations/bucket/terms/InternalSignificantTerms.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.common.util.ObjectObjectPagedHashMap;
 import org.elasticsearch.core.Releasables;
 import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.AggregationErrors;
 import org.elasticsearch.search.aggregations.AggregationReduceContext;
 import org.elasticsearch.search.aggregations.Aggregator;
 import org.elasticsearch.search.aggregations.AggregatorReducer;
@@ -29,6 +30,7 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
+import java.util.Optional;
 
 /**
  * Result of the significant terms aggregation.
@@ -208,10 +210,27 @@ protected AggregatorReducer getLeaderReducer(AggregationReduceContext reduceCont
                 reduceContext.bigArrays()
             );
 
+            private InternalAggregation referenceAgg = null;
+
             @Override
             public void accept(InternalAggregation aggregation) {
+                /*
+                canLeadReduction here is essentially checking if this shard returned data.  Unmapped shards (that didn't
+                specify a missing value) will be false. Since they didn't return data, we can safely skip them, and
+                doing so prevents us from accidentally taking one as the reference agg for type checking, which would cause
+                shards that actually returned data to fail.
+                 */
+                if (aggregation.canLeadReduction() == false) {
+                    return;
+                }
                 @SuppressWarnings("unchecked")
                 final InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
+                if (referenceAgg == null) {
+                    referenceAgg = terms;
+                } else if (referenceAgg.getClass().equals(terms.getClass()) == false) {
+                    // We got here because shards had different mappings for the same field (presumably different indices)
+                    throw AggregationErrors.reduceTypeMismatch(referenceAgg.getName(), Optional.empty());
+                }
                 // Compute the overall result set size and the corpus size using the
                 // top-level Aggregations from each shard
                 globalSubsetSize += terms.getSubsetSize();