Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix max bucket limit in histogram #1703

Merged
merged 2 commits into from Dec 12, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
63 changes: 58 additions & 5 deletions src/aggregation/bucket/histogram/histogram.rs
Expand Up @@ -206,6 +206,7 @@ pub struct SegmentHistogramCollector {
field_type: Type,
interval: f64,
offset: f64,
min_doc_count: u64,
first_bucket_num: i64,
bounds: HistogramBounds,
}
Expand All @@ -215,6 +216,30 @@ impl SegmentHistogramCollector {
self,
agg_with_accessor: &BucketAggregationWithAccessor,
) -> crate::Result<IntermediateBucketResult> {
// Compute the number of buckets to validate against max num buckets
// Note: We use min_doc_count here, but it's only an lowerbound here, since were are on the
// intermediate level and after merging the number of documents of a bucket could exceed
// `min_doc_count`.
{
let cut_off_buckets_front = self
.buckets
.iter()
.take_while(|bucket| bucket.doc_count <= self.min_doc_count)
.count();
let cut_off_buckets_back = self.buckets[cut_off_buckets_front..]
.iter()
.rev()
.take_while(|bucket| bucket.doc_count <= self.min_doc_count)
.count();
let estimate_num_buckets =
self.buckets.len() - cut_off_buckets_front - cut_off_buckets_back;

agg_with_accessor
.bucket_count
.add_count(estimate_num_buckets as u32);
agg_with_accessor.bucket_count.validate_bucket_count()?;
}

let mut buckets = Vec::with_capacity(
self.buckets
.iter()
Expand Down Expand Up @@ -251,11 +276,6 @@ impl SegmentHistogramCollector {
);
};

agg_with_accessor
.bucket_count
.add_count(buckets.len() as u32);
agg_with_accessor.bucket_count.validate_bucket_count()?;

Ok(IntermediateBucketResult::Histogram { buckets })
}

Expand Down Expand Up @@ -308,6 +328,7 @@ impl SegmentHistogramCollector {
first_bucket_num,
bounds,
sub_aggregations,
min_doc_count: req.min_doc_count(),
})
}

Expand Down Expand Up @@ -1521,4 +1542,36 @@ mod tests {

Ok(())
}

#[test]
fn histogram_test_max_buckets_segments() -> crate::Result<()> {
let values = vec![0.0, 70000.0];

let index = get_test_index_from_values(true, &values)?;

let agg_req: Aggregations = vec![(
"my_interval".to_string(),
Aggregation::Bucket(BucketAggregation {
bucket_agg: BucketAggregationType::Histogram(HistogramAggregation {
field: "score_f64".to_string(),
interval: 1.0,
..Default::default()
}),
sub_aggregation: Default::default(),
}),
)]
.into_iter()
.collect();

let res = exec_request(agg_req, &index);

assert_eq!(
res.unwrap_err().to_string(),
"An invalid argument was passed: 'Aborting aggregation because too many buckets were \
created'"
.to_string()
);

Ok(())
}
}