From 3c77cfda4cd28f23f044504136a1d153582fa263 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 23 Nov 2022 18:09:45 +0800 Subject: [PATCH 1/4] add aggregation support for date type fixes #1332 --- src/aggregation/agg_req_with_accessor.rs | 10 +---- src/aggregation/agg_result.rs | 3 ++ src/aggregation/bucket/histogram/histogram.rs | 45 +++++++++++++++++++ src/aggregation/intermediate_agg_result.rs | 2 + src/aggregation/mod.rs | 14 +++++- src/fastfield/mod.rs | 2 +- 6 files changed, 66 insertions(+), 10 deletions(-) diff --git a/src/aggregation/agg_req_with_accessor.rs b/src/aggregation/agg_req_with_accessor.rs index 6e09749aaf..23aba24095 100644 --- a/src/aggregation/agg_req_with_accessor.rs +++ b/src/aggregation/agg_req_with_accessor.rs @@ -11,7 +11,7 @@ use super::bucket::{HistogramAggregation, RangeAggregation, TermsAggregation}; use super::metric::{AverageAggregation, StatsAggregation}; use super::segment_agg_result::BucketCount; use super::VecWithNames; -use crate::fastfield::{type_and_cardinality, FastType, MultiValuedFastFieldReader}; +use crate::fastfield::{type_and_cardinality, MultiValuedFastFieldReader}; use crate::schema::{Cardinality, Type}; use crate::{InvertedIndexReader, SegmentReader, TantivyError}; @@ -194,13 +194,7 @@ fn get_ff_reader_and_validate( .ok_or_else(|| TantivyError::FieldNotFound(field_name.to_string()))?; let field_type = reader.schema().get_field_entry(field).field_type(); - if let Some((ff_type, field_cardinality)) = type_and_cardinality(field_type) { - if ff_type == FastType::Date { - return Err(TantivyError::InvalidArgument( - "Unsupported field type date in aggregation".to_string(), - )); - } - + if let Some((_ff_type, field_cardinality)) = type_and_cardinality(field_type) { if cardinality != field_cardinality { return Err(TantivyError::InvalidArgument(format!( "Invalid field cardinality on field {} expected {:?}, but got {:?}", diff --git a/src/aggregation/agg_result.rs b/src/aggregation/agg_result.rs index 3b80c93dc6..30a884be37 100644 --- a/src/aggregation/agg_result.rs +++ b/src/aggregation/agg_result.rs @@ -174,6 +174,9 @@ pub enum BucketEntries { /// ``` #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub struct BucketEntry { + #[serde(skip_serializing_if = "Option::is_none")] + /// The string representation of the bucket. + pub key_as_string: Option, /// The identifier of the bucket. pub key: Key, /// Number of documents in the bucket. diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index c2d0c1277c..329bea6d4b 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -1372,6 +1372,51 @@ mod tests { Ok(()) } + #[test] + fn histogram_date_test_single_segment() -> crate::Result<()> { + histogram_date_test_with_opt(true) + } + + #[test] + fn histogram_date_test_multi_segment() -> crate::Result<()> { + histogram_date_test_with_opt(false) + } + + fn histogram_date_test_with_opt(merge_segments: bool) -> crate::Result<()> { + let index = get_test_index_2_segments(merge_segments)?; + + let agg_req: Aggregations = vec![( + "histogram".to_string(), + Aggregation::Bucket(BucketAggregation { + bucket_agg: BucketAggregationType::Histogram(HistogramAggregation { + field: "date".to_string(), + interval: 86400000000.0, // one day in microseconds + ..Default::default() + }), + sub_aggregation: Default::default(), + }), + )] + .into_iter() + .collect(); + + let agg_res = exec_request(agg_req, &index)?; + + let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?; + + assert_eq!(res["histogram"]["buckets"][0]["key"], 1546300800000000.0); + assert_eq!(res["histogram"]["buckets"][0]["doc_count"], 1); + + assert_eq!(res["histogram"]["buckets"][1]["key"], 1546387200000000.0); + assert_eq!(res["histogram"]["buckets"][1]["doc_count"], 5); + + assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0); + assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0); + + assert_eq!(res["histogram"]["buckets"][3], Value::Null); + + Ok(()) + } + #[test] fn histogram_invalid_request() -> crate::Result<()> { let index = get_test_index_2_segments(true)?; diff --git a/src/aggregation/intermediate_agg_result.rs b/src/aggregation/intermediate_agg_result.rs index 6f0dea8cff..5a9613b3c5 100644 --- a/src/aggregation/intermediate_agg_result.rs +++ b/src/aggregation/intermediate_agg_result.rs @@ -419,6 +419,7 @@ impl IntermediateTermBucketResult { .filter(|bucket| bucket.1.doc_count >= req.min_doc_count) .map(|(key, entry)| { Ok(BucketEntry { + key_as_string: None, key: Key::Str(key), doc_count: entry.doc_count, sub_aggregation: entry @@ -530,6 +531,7 @@ impl IntermediateHistogramBucketEntry { req: &AggregationsInternal, ) -> crate::Result { Ok(BucketEntry { + key_as_string: None, key: Key::F64(self.key), doc_count: self.doc_count, sub_aggregation: self diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 876522f7d3..4469bc6610 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -288,6 +288,7 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 { match field_type { Type::U64 => val as f64, Type::I64 => i64::from_u64(val) as f64, + Type::Date => i64::from_u64(val) as f64, Type::F64 => f64::from_u64(val), _ => { panic!("unexpected type {:?}. This should not happen", field_type) @@ -317,6 +318,7 @@ pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option { #[cfg(test)] mod tests { use serde_json::Value; + use time::OffsetDateTime; use super::agg_req::{Aggregation, Aggregations, BucketAggregation}; use super::bucket::RangeAggregation; @@ -332,7 +334,7 @@ mod tests { use crate::aggregation::DistributedAggregationCollector; use crate::query::{AllQuery, TermQuery}; use crate::schema::{Cardinality, IndexRecordOption, Schema, TextFieldIndexing, FAST, STRING}; - use crate::{Index, Term}; + use crate::{DateTime, Index, Term}; fn get_avg_req(field_name: &str) -> Aggregation { Aggregation::Metric(MetricAggregation::Average( @@ -648,6 +650,7 @@ mod tests { .set_fast() .set_stored(); let text_field = schema_builder.add_text_field("text", text_fieldtype); + let date_field = schema_builder.add_date_field("date", FAST); schema_builder.add_text_field("dummy_text", STRING); let score_fieldtype = crate::schema::NumericOptions::default().set_fast(Cardinality::SingleValue); @@ -665,6 +668,7 @@ mod tests { // writing the segment index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800).unwrap()), score_field => 1u64, score_field_f64 => 1f64, score_field_i64 => 1i64, @@ -673,6 +677,7 @@ mod tests { ))?; index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()), score_field => 3u64, score_field_f64 => 3f64, score_field_i64 => 3i64, @@ -681,18 +686,21 @@ mod tests { ))?; index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()), score_field => 5u64, score_field_f64 => 5f64, score_field_i64 => 5i64, ))?; index_writer.add_document(doc!( text_field => "nohit", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()), score_field => 6u64, score_field_f64 => 6f64, score_field_i64 => 6i64, ))?; index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()), score_field => 7u64, score_field_f64 => 7f64, score_field_i64 => 7i64, @@ -700,12 +708,14 @@ mod tests { index_writer.commit()?; index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400).unwrap()), score_field => 11u64, score_field_f64 => 11f64, score_field_i64 => 11i64, ))?; index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()), score_field => 14u64, score_field_f64 => 14f64, score_field_i64 => 14i64, @@ -713,6 +723,7 @@ mod tests { index_writer.add_document(doc!( text_field => "cool", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()), score_field => 44u64, score_field_f64 => 44.5f64, score_field_i64 => 44i64, @@ -723,6 +734,7 @@ mod tests { // no hits segment index_writer.add_document(doc!( text_field => "nohit", + date_field => DateTime::from_utc(OffsetDateTime::from_unix_timestamp(1_546_300_800 + 86400 + 86400).unwrap()), score_field => 44u64, score_field_f64 => 44.5f64, score_field_i64 => 44i64, diff --git a/src/fastfield/mod.rs b/src/fastfield/mod.rs index a29ee238ce..0c689b0b97 100644 --- a/src/fastfield/mod.rs +++ b/src/fastfield/mod.rs @@ -30,8 +30,8 @@ pub use self::multivalued::{ MultiValueIndex, MultiValueU128FastFieldWriter, MultiValuedFastFieldReader, MultiValuedFastFieldWriter, MultiValuedU128FastFieldReader, }; +pub(crate) use self::readers::type_and_cardinality; pub use self::readers::FastFieldReaders; -pub(crate) use self::readers::{type_and_cardinality, FastType}; pub use self::serializer::{Column, CompositeFastFieldSerializer}; use self::writer::unexpected_value; pub use self::writer::{FastFieldsWriter, IntFastFieldWriter}; From 82ae997b2e401d837cb12cda56bde77b8b592cd0 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 24 Nov 2022 00:36:10 +0800 Subject: [PATCH 2/4] serialize key_as_string as rfc3339 in date histogram --- examples/aggregation.rs | 2 +- src/aggregation/agg_result.rs | 8 ++- src/aggregation/bucket/histogram/histogram.rs | 67 +++++++++++++++++-- src/aggregation/collector.rs | 7 +- src/aggregation/intermediate_agg_result.rs | 35 +++++++--- src/aggregation/metric/stats.rs | 4 +- src/aggregation/mod.rs | 44 ++++++------ src/schema/field_type.rs | 5 ++ 8 files changed, 127 insertions(+), 45 deletions(-) diff --git a/examples/aggregation.rs b/examples/aggregation.rs index fb0d131c17..fbe412e8e1 100644 --- a/examples/aggregation.rs +++ b/examples/aggregation.rs @@ -118,7 +118,7 @@ fn main() -> tantivy::Result<()> { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap(); diff --git a/src/aggregation/agg_result.rs b/src/aggregation/agg_result.rs index 30a884be37..4f71e9d7a5 100644 --- a/src/aggregation/agg_result.rs +++ b/src/aggregation/agg_result.rs @@ -12,6 +12,7 @@ use super::bucket::GetDocCount; use super::intermediate_agg_result::{IntermediateBucketResult, IntermediateMetricResult}; use super::metric::{SingleMetricResult, Stats}; use super::Key; +use crate::schema::Schema; use crate::TantivyError; #[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize)] @@ -129,9 +130,12 @@ pub enum BucketResult { } impl BucketResult { - pub(crate) fn empty_from_req(req: &BucketAggregationInternal) -> crate::Result { + pub(crate) fn empty_from_req( + req: &BucketAggregationInternal, + schema: &Schema, + ) -> crate::Result { let empty_bucket = IntermediateBucketResult::empty_from_req(&req.bucket_agg); - empty_bucket.into_final_bucket_result(req) + empty_bucket.into_final_bucket_result(req, schema) } } diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index 329bea6d4b..c37cc65162 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -4,6 +4,8 @@ use std::fmt::Display; use fastfield_codecs::Column; use itertools::Itertools; use serde::{Deserialize, Serialize}; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; use crate::aggregation::agg_req::AggregationsInternal; use crate::aggregation::agg_req_with_accessor::{ @@ -15,7 +17,7 @@ use crate::aggregation::intermediate_agg_result::{ IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry, }; use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector; -use crate::schema::Type; +use crate::schema::{Schema, Type}; use crate::{DocId, TantivyError}; /// Histogram is a bucket aggregation, where buckets are created dynamically for given `interval`. @@ -451,6 +453,7 @@ fn intermediate_buckets_to_final_buckets_fill_gaps( buckets: Vec, histogram_req: &HistogramAggregation, sub_aggregation: &AggregationsInternal, + schema: &Schema, ) -> crate::Result> { // Generate the full list of buckets without gaps. // @@ -491,7 +494,9 @@ fn intermediate_buckets_to_final_buckets_fill_gaps( sub_aggregation: empty_sub_aggregation.clone(), }, }) - .map(|intermediate_bucket| intermediate_bucket.into_final_bucket_entry(sub_aggregation)) + .map(|intermediate_bucket| { + intermediate_bucket.into_final_bucket_entry(sub_aggregation, schema) + }) .collect::>>() } @@ -500,20 +505,56 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets( buckets: Vec, histogram_req: &HistogramAggregation, sub_aggregation: &AggregationsInternal, + schema: &Schema, ) -> crate::Result> { - if histogram_req.min_doc_count() == 0 { + let mut buckets = if histogram_req.min_doc_count() == 0 { // With min_doc_count != 0, we may need to add buckets, so that there are no // gaps, since intermediate result does not contain empty buckets (filtered to // reduce serialization size). - intermediate_buckets_to_final_buckets_fill_gaps(buckets, histogram_req, sub_aggregation) + intermediate_buckets_to_final_buckets_fill_gaps( + buckets, + histogram_req, + sub_aggregation, + schema, + )? } else { buckets .into_iter() .filter(|histogram_bucket| histogram_bucket.doc_count >= histogram_req.min_doc_count()) - .map(|histogram_bucket| histogram_bucket.into_final_bucket_entry(sub_aggregation)) - .collect::>>() + .map(|histogram_bucket| { + histogram_bucket.into_final_bucket_entry(sub_aggregation, schema) + }) + .collect::>>()? + }; + + // If we have a date type on the histogram buckets, we add the `key_as_string` field as rfc339 + let field = schema + .get_field(&histogram_req.field) + .ok_or_else(|| TantivyError::FieldNotFound(histogram_req.field.to_string()))?; + if schema.get_field_entry(field).field_type().is_date() { + for bucket in buckets.iter_mut() { + match bucket.key { + crate::aggregation::Key::F64(val) => { + let datetime = OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)) + .map_err(|err| { + TantivyError::InvalidArgument(format!( + "Could not convert {:?} to OffsetDateTime, err {:?}", + val, err + )) + })?; + let key_as_string = datetime.format(&Rfc3339).map_err(|_err| { + TantivyError::InvalidArgument("Could not serialize date".to_string()) + })?; + + bucket.key_as_string = Some(key_as_string); + } + _ => {} + } + } } + + Ok(buckets) } /// Applies req extended_bounds/hard_bounds on the min_max value @@ -1404,13 +1445,25 @@ mod tests { let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?; assert_eq!(res["histogram"]["buckets"][0]["key"], 1546300800000000.0); + assert_eq!( + res["histogram"]["buckets"][0]["key_as_string"], + "2019-01-01T00:00:00Z" + ); assert_eq!(res["histogram"]["buckets"][0]["doc_count"], 1); assert_eq!(res["histogram"]["buckets"][1]["key"], 1546387200000000.0); + assert_eq!( + res["histogram"]["buckets"][1]["key_as_string"], + "2019-01-02T00:00:00Z" + ); + assert_eq!(res["histogram"]["buckets"][1]["doc_count"], 5); assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0); - assert_eq!(res["histogram"]["buckets"][2]["key"], 1546473600000000.0); + assert_eq!( + res["histogram"]["buckets"][2]["key_as_string"], + "2019-01-03T00:00:00Z" + ); assert_eq!(res["histogram"]["buckets"][3], Value::Null); diff --git a/src/aggregation/collector.rs b/src/aggregation/collector.rs index cd91ec20c8..a53ac268ae 100644 --- a/src/aggregation/collector.rs +++ b/src/aggregation/collector.rs @@ -7,6 +7,7 @@ use super::intermediate_agg_result::IntermediateAggregationResults; use super::segment_agg_result::SegmentAggregationResultsCollector; use crate::aggregation::agg_req_with_accessor::get_aggs_with_accessor_and_validate; use crate::collector::{Collector, SegmentCollector}; +use crate::schema::Schema; use crate::{SegmentReader, TantivyError}; /// The default max bucket count, before the aggregation fails. @@ -16,6 +17,7 @@ pub const MAX_BUCKET_COUNT: u32 = 65000; /// /// The collector collects all aggregations by the underlying aggregation request. pub struct AggregationCollector { + schema: Schema, agg: Aggregations, max_bucket_count: u32, } @@ -25,8 +27,9 @@ impl AggregationCollector { /// /// Aggregation fails when the total bucket count is higher than max_bucket_count. /// max_bucket_count will default to `MAX_BUCKET_COUNT` (65000) when unset - pub fn from_aggs(agg: Aggregations, max_bucket_count: Option) -> Self { + pub fn from_aggs(agg: Aggregations, max_bucket_count: Option, schema: Schema) -> Self { Self { + schema, agg, max_bucket_count: max_bucket_count.unwrap_or(MAX_BUCKET_COUNT), } @@ -113,7 +116,7 @@ impl Collector for AggregationCollector { segment_fruits: Vec<::Fruit>, ) -> crate::Result { let res = merge_fruits(segment_fruits)?; - res.into_final_bucket_result(self.agg.clone()) + res.into_final_bucket_result(self.agg.clone(), &self.schema) } } diff --git a/src/aggregation/intermediate_agg_result.rs b/src/aggregation/intermediate_agg_result.rs index 5a9613b3c5..e0117228ac 100644 --- a/src/aggregation/intermediate_agg_result.rs +++ b/src/aggregation/intermediate_agg_result.rs @@ -22,6 +22,7 @@ use super::segment_agg_result::SegmentMetricResultCollector; use super::{Key, SerializedKey, VecWithNames}; use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry}; use crate::aggregation::bucket::TermsAggregationInternal; +use crate::schema::Schema; /// Contains the intermediate aggregation result, which is optimized to be merged with other /// intermediate results. @@ -35,8 +36,12 @@ pub struct IntermediateAggregationResults { impl IntermediateAggregationResults { /// Convert intermediate result and its aggregation request to the final result. - pub fn into_final_bucket_result(self, req: Aggregations) -> crate::Result { - self.into_final_bucket_result_internal(&(req.into())) + pub fn into_final_bucket_result( + self, + req: Aggregations, + schema: &Schema, + ) -> crate::Result { + self.into_final_bucket_result_internal(&(req.into()), schema) } /// Convert intermediate result and its aggregation request to the final result. @@ -46,6 +51,7 @@ impl IntermediateAggregationResults { pub(crate) fn into_final_bucket_result_internal( self, req: &AggregationsInternal, + schema: &Schema, ) -> crate::Result { // Important assumption: // When the tree contains buckets/metric, we expect it to have all buckets/metrics from the @@ -53,11 +59,11 @@ impl IntermediateAggregationResults { let mut results: FxHashMap = FxHashMap::default(); if let Some(buckets) = self.buckets { - convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets)? + convert_and_add_final_buckets_to_result(&mut results, buckets, &req.buckets, schema)? } else { // When there are no buckets, we create empty buckets, so that the serialized json // format is constant - add_empty_final_buckets_to_result(&mut results, &req.buckets)? + add_empty_final_buckets_to_result(&mut results, &req.buckets, schema)? }; if let Some(metrics) = self.metrics { @@ -158,10 +164,12 @@ fn add_empty_final_metrics_to_result( fn add_empty_final_buckets_to_result( results: &mut FxHashMap, req_buckets: &VecWithNames, + schema: &Schema, ) -> crate::Result<()> { let requested_buckets = req_buckets.iter(); for (key, req) in requested_buckets { - let empty_bucket = AggregationResult::BucketResult(BucketResult::empty_from_req(req)?); + let empty_bucket = + AggregationResult::BucketResult(BucketResult::empty_from_req(req, schema)?); results.insert(key.to_string(), empty_bucket); } Ok(()) @@ -171,12 +179,13 @@ fn convert_and_add_final_buckets_to_result( results: &mut FxHashMap, buckets: VecWithNames, req_buckets: &VecWithNames, + schema: &Schema, ) -> crate::Result<()> { assert_eq!(buckets.len(), req_buckets.len()); let buckets_with_request = buckets.into_iter().zip(req_buckets.values()); for ((key, bucket), req) in buckets_with_request { - let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req)?); + let result = AggregationResult::BucketResult(bucket.into_final_bucket_result(req, schema)?); results.insert(key, result); } Ok(()) @@ -266,13 +275,14 @@ impl IntermediateBucketResult { pub(crate) fn into_final_bucket_result( self, req: &BucketAggregationInternal, + schema: &Schema, ) -> crate::Result { match self { IntermediateBucketResult::Range(range_res) => { let mut buckets: Vec = range_res .buckets .into_iter() - .map(|(_, bucket)| bucket.into_final_bucket_entry(&req.sub_aggregation)) + .map(|(_, bucket)| bucket.into_final_bucket_entry(&req.sub_aggregation, schema)) .collect::>>()?; buckets.sort_by(|left, right| { @@ -303,6 +313,7 @@ impl IntermediateBucketResult { req.as_histogram() .expect("unexpected aggregation, expected histogram aggregation"), &req.sub_aggregation, + schema, )?; let buckets = if req.as_histogram().unwrap().keyed { @@ -321,6 +332,7 @@ impl IntermediateBucketResult { req.as_term() .expect("unexpected aggregation, expected term aggregation"), &req.sub_aggregation, + schema, ), } } @@ -411,6 +423,7 @@ impl IntermediateTermBucketResult { self, req: &TermsAggregation, sub_aggregation_req: &AggregationsInternal, + schema: &Schema, ) -> crate::Result { let req = TermsAggregationInternal::from_req(req); let mut buckets: Vec = self @@ -424,7 +437,7 @@ impl IntermediateTermBucketResult { doc_count: entry.doc_count, sub_aggregation: entry .sub_aggregation - .into_final_bucket_result_internal(sub_aggregation_req)?, + .into_final_bucket_result_internal(sub_aggregation_req, schema)?, }) }) .collect::>()?; @@ -529,6 +542,7 @@ impl IntermediateHistogramBucketEntry { pub(crate) fn into_final_bucket_entry( self, req: &AggregationsInternal, + schema: &Schema, ) -> crate::Result { Ok(BucketEntry { key_as_string: None, @@ -536,7 +550,7 @@ impl IntermediateHistogramBucketEntry { doc_count: self.doc_count, sub_aggregation: self .sub_aggregation - .into_final_bucket_result_internal(req)?, + .into_final_bucket_result_internal(req, schema)?, }) } } @@ -573,13 +587,14 @@ impl IntermediateRangeBucketEntry { pub(crate) fn into_final_bucket_entry( self, req: &AggregationsInternal, + schema: &Schema, ) -> crate::Result { Ok(RangeBucketEntry { key: self.key, doc_count: self.doc_count, sub_aggregation: self .sub_aggregation - .into_final_bucket_result_internal(req)?, + .into_final_bucket_result_internal(req, schema)?, to: self.to, from: self.from, }) diff --git a/src/aggregation/metric/stats.rs b/src/aggregation/metric/stats.rs index f84944c261..dec50bdf0d 100644 --- a/src/aggregation/metric/stats.rs +++ b/src/aggregation/metric/stats.rs @@ -222,7 +222,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let reader = index.reader()?; let searcher = reader.searcher(); @@ -300,7 +300,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap(); diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index 4469bc6610..a9dc69c775 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -53,9 +53,10 @@ //! use tantivy::query::AllQuery; //! use tantivy::aggregation::agg_result::AggregationResults; //! use tantivy::IndexReader; +//! use tantivy::schema::Schema; //! //! # #[allow(dead_code)] -//! fn aggregate_on_index(reader: &IndexReader) { +//! fn aggregate_on_index(reader: &IndexReader, schema: Schema) { //! let agg_req: Aggregations = vec![ //! ( //! "average".to_string(), @@ -67,7 +68,7 @@ //! .into_iter() //! .collect(); //! -//! let collector = AggregationCollector::from_aggs(agg_req, None); +//! let collector = AggregationCollector::from_aggs(agg_req, None, schema); //! //! let searcher = reader.searcher(); //! let agg_res: AggregationResults = searcher.search(&AllQuery, &collector).unwrap(); @@ -360,7 +361,7 @@ mod tests { index: &Index, query: Option<(&str, &str)>, ) -> crate::Result { - let collector = AggregationCollector::from_aggs(agg_req, None); + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); let reader = index.reader()?; let searcher = reader.searcher(); @@ -554,10 +555,10 @@ mod tests { let searcher = reader.searcher(); let intermediate_agg_result = searcher.search(&AllQuery, &collector).unwrap(); intermediate_agg_result - .into_final_bucket_result(agg_req) + .into_final_bucket_result(agg_req, &index.schema()) .unwrap() } else { - let collector = AggregationCollector::from_aggs(agg_req, None); + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); let searcher = reader.searcher(); searcher.search(&AllQuery, &collector).unwrap() @@ -807,7 +808,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap(); @@ -1007,9 +1008,10 @@ mod tests { // Test de/serialization roundtrip on intermediate_agg_result let res: IntermediateAggregationResults = serde_json::from_str(&serde_json::to_string(&res).unwrap()).unwrap(); - res.into_final_bucket_result(agg_req.clone()).unwrap() + res.into_final_bucket_result(agg_req.clone(), &index.schema()) + .unwrap() } else { - let collector = AggregationCollector::from_aggs(agg_req.clone(), None); + let collector = AggregationCollector::from_aggs(agg_req.clone(), None, index.schema()); let searcher = reader.searcher(); searcher.search(&term_query, &collector).unwrap() @@ -1067,7 +1069,7 @@ mod tests { ); // Test empty result set - let collector = AggregationCollector::from_aggs(agg_req, None); + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); let searcher = reader.searcher(); searcher.search(&query_with_no_hits, &collector).unwrap(); @@ -1132,7 +1134,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); @@ -1245,7 +1247,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1276,7 +1278,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1307,7 +1309,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1346,7 +1348,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1375,7 +1377,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req, None); + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1404,7 +1406,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req, None); + let collector = AggregationCollector::from_aggs(agg_req, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1441,7 +1443,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1476,7 +1478,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1515,7 +1517,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1545,7 +1547,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = @@ -1602,7 +1604,7 @@ mod tests { .into_iter() .collect(); - let collector = AggregationCollector::from_aggs(agg_req_1, None); + let collector = AggregationCollector::from_aggs(agg_req_1, None, index.schema()); let searcher = reader.searcher(); let agg_res: AggregationResults = diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index c2cae2f1e1..b92f4448d7 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -181,6 +181,11 @@ impl FieldType { matches!(self, FieldType::IpAddr(_)) } + /// returns true if this is an date field + pub fn is_date(&self) -> bool { + matches!(self, FieldType::Date(_)) + } + /// returns true if the field is indexed. pub fn is_indexed(&self) -> bool { match *self { From 788ac05d4554a15d85a32299fe938b575b0f2a0f Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 24 Nov 2022 14:25:01 +0800 Subject: [PATCH 3/4] update docs --- src/aggregation/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index a9dc69c775..d095be976e 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -12,7 +12,7 @@ //! //! ## Prerequisite //! Currently aggregations work only on [fast fields](`crate::fastfield`). Single value fast fields -//! of type `u64`, `f64`, `i64` and fast fields on text fields. +//! of type `u64`, `f64`, `i64`, `date` and fast fields on text fields. //! //! ## Usage //! To use aggregations, build an aggregation request by constructing From ee8fd25811060651f93e2962818677f0159b8b9a Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 24 Nov 2022 22:15:28 +0800 Subject: [PATCH 4/4] enable date for range aggregation --- src/aggregation/agg_result.rs | 6 + src/aggregation/bucket/histogram/histogram.rs | 23 +--- src/aggregation/bucket/range.rs | 108 ++++++++++++++++-- src/aggregation/date.rs | 18 +++ src/aggregation/intermediate_agg_result.rs | 39 ++++++- src/aggregation/mod.rs | 14 +-- 6 files changed, 166 insertions(+), 42 deletions(-) create mode 100644 src/aggregation/date.rs diff --git a/src/aggregation/agg_result.rs b/src/aggregation/agg_result.rs index 4f71e9d7a5..be5bfd0835 100644 --- a/src/aggregation/agg_result.rs +++ b/src/aggregation/agg_result.rs @@ -245,4 +245,10 @@ pub struct RangeBucketEntry { /// The to range of the bucket. Equals `f64::MAX` when `None`. #[serde(skip_serializing_if = "Option::is_none")] pub to: Option, + /// The optional string representation for the `from` range. + #[serde(skip_serializing_if = "Option::is_none")] + pub from_as_string: Option, + /// The optional string representation for the `to` range. + #[serde(skip_serializing_if = "Option::is_none")] + pub to_as_string: Option, } diff --git a/src/aggregation/bucket/histogram/histogram.rs b/src/aggregation/bucket/histogram/histogram.rs index c37cc65162..fb59693b9c 100644 --- a/src/aggregation/bucket/histogram/histogram.rs +++ b/src/aggregation/bucket/histogram/histogram.rs @@ -4,19 +4,17 @@ use std::fmt::Display; use fastfield_codecs::Column; use itertools::Itertools; use serde::{Deserialize, Serialize}; -use time::format_description::well_known::Rfc3339; -use time::OffsetDateTime; use crate::aggregation::agg_req::AggregationsInternal; use crate::aggregation::agg_req_with_accessor::{ AggregationsWithAccessor, BucketAggregationWithAccessor, }; use crate::aggregation::agg_result::BucketEntry; -use crate::aggregation::f64_from_fastfield_u64; use crate::aggregation::intermediate_agg_result::{ IntermediateAggregationResults, IntermediateBucketResult, IntermediateHistogramBucketEntry, }; use crate::aggregation::segment_agg_result::SegmentAggregationResultsCollector; +use crate::aggregation::{f64_from_fastfield_u64, format_date}; use crate::schema::{Schema, Type}; use crate::{DocId, TantivyError}; @@ -534,22 +532,9 @@ pub(crate) fn intermediate_histogram_buckets_to_final_buckets( .ok_or_else(|| TantivyError::FieldNotFound(histogram_req.field.to_string()))?; if schema.get_field_entry(field).field_type().is_date() { for bucket in buckets.iter_mut() { - match bucket.key { - crate::aggregation::Key::F64(val) => { - let datetime = OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)) - .map_err(|err| { - TantivyError::InvalidArgument(format!( - "Could not convert {:?} to OffsetDateTime, err {:?}", - val, err - )) - })?; - let key_as_string = datetime.format(&Rfc3339).map_err(|_err| { - TantivyError::InvalidArgument("Could not serialize date".to_string()) - })?; - - bucket.key_as_string = Some(key_as_string); - } - _ => {} + if let crate::aggregation::Key::F64(val) = bucket.key { + let key_as_string = format_date(val as i64)?; + bucket.key_as_string = Some(key_as_string); } } } diff --git a/src/aggregation/bucket/range.rs b/src/aggregation/bucket/range.rs index 333727536e..16296812c5 100644 --- a/src/aggregation/bucket/range.rs +++ b/src/aggregation/bucket/range.rs @@ -1,6 +1,7 @@ use std::fmt::Debug; use std::ops::Range; +use fastfield_codecs::MonotonicallyMappableToU64; use rustc_hash::FxHashMap; use serde::{Deserialize, Serialize}; @@ -11,7 +12,9 @@ use crate::aggregation::intermediate_agg_result::{ IntermediateBucketResult, IntermediateRangeBucketEntry, IntermediateRangeBucketResult, }; use crate::aggregation::segment_agg_result::{BucketCount, SegmentAggregationResultsCollector}; -use crate::aggregation::{f64_from_fastfield_u64, f64_to_fastfield_u64, Key, SerializedKey}; +use crate::aggregation::{ + f64_from_fastfield_u64, f64_to_fastfield_u64, format_date, Key, SerializedKey, +}; use crate::schema::Type; use crate::{DocId, TantivyError}; @@ -181,7 +184,7 @@ impl SegmentRangeCollector { .into_iter() .map(move |range_bucket| { Ok(( - range_to_string(&range_bucket.range, &field_type), + range_to_string(&range_bucket.range, &field_type)?, range_bucket .bucket .into_intermediate_bucket_entry(&agg_with_accessor.sub_aggregation)?, @@ -209,8 +212,8 @@ impl SegmentRangeCollector { let key = range .key .clone() - .map(Key::Str) - .unwrap_or_else(|| range_to_key(&range.range, &field_type)); + .map(|key| Ok(Key::Str(key))) + .unwrap_or_else(|| range_to_key(&range.range, &field_type))?; let to = if range.range.end == u64::MAX { None } else { @@ -228,6 +231,7 @@ impl SegmentRangeCollector { sub_aggregation, )?) }; + Ok(SegmentRangeAndBucketEntry { range: range.range.clone(), bucket: SegmentRangeBucketEntry { @@ -402,34 +406,45 @@ fn extend_validate_ranges( Ok(converted_buckets) } -pub(crate) fn range_to_string(range: &Range, field_type: &Type) -> String { +pub(crate) fn range_to_string(range: &Range, field_type: &Type) -> crate::Result { // is_start is there for malformed requests, e.g. ig the user passes the range u64::MIN..0.0, // it should be rendered as "*-0" and not "*-*" let to_str = |val: u64, is_start: bool| { if (is_start && val == u64::MIN) || (!is_start && val == u64::MAX) { - "*".to_string() + Ok("*".to_string()) + } else if *field_type == Type::Date { + let val = i64::from_u64(val); + format_date(val) } else { - f64_from_fastfield_u64(val, field_type).to_string() + Ok(f64_from_fastfield_u64(val, field_type).to_string()) } }; - format!("{}-{}", to_str(range.start, true), to_str(range.end, false)) + Ok(format!( + "{}-{}", + to_str(range.start, true)?, + to_str(range.end, false)? + )) } -pub(crate) fn range_to_key(range: &Range, field_type: &Type) -> Key { - Key::Str(range_to_string(range, field_type)) +pub(crate) fn range_to_key(range: &Range, field_type: &Type) -> crate::Result { + Ok(Key::Str(range_to_string(range, field_type)?)) } #[cfg(test)] mod tests { use fastfield_codecs::MonotonicallyMappableToU64; + use serde_json::Value; use super::*; use crate::aggregation::agg_req::{ Aggregation, Aggregations, BucketAggregation, BucketAggregationType, }; - use crate::aggregation::tests::{exec_request_with_query, get_test_index_with_num_docs}; + use crate::aggregation::tests::{ + exec_request, exec_request_with_query, get_test_index_2_segments, + get_test_index_with_num_docs, + }; pub fn get_collector_from_ranges( ranges: Vec, @@ -567,6 +582,77 @@ mod tests { Ok(()) } + #[test] + fn range_date_test_single_segment() -> crate::Result<()> { + range_date_test_with_opt(true) + } + + #[test] + fn range_date_test_multi_segment() -> crate::Result<()> { + range_date_test_with_opt(false) + } + + fn range_date_test_with_opt(merge_segments: bool) -> crate::Result<()> { + let index = get_test_index_2_segments(merge_segments)?; + + let agg_req: Aggregations = vec![( + "date_ranges".to_string(), + Aggregation::Bucket(BucketAggregation { + bucket_agg: BucketAggregationType::Range(RangeAggregation { + field: "date".to_string(), + ranges: vec![ + RangeAggregationRange { + key: None, + from: None, + to: Some(1546300800000000.0f64), + }, + RangeAggregationRange { + key: None, + from: Some(1546300800000000.0f64), + to: Some(1546387200000000.0f64), + }, + ], + keyed: false, + }), + sub_aggregation: Default::default(), + }), + )] + .into_iter() + .collect(); + + let agg_res = exec_request(agg_req, &index)?; + + let res: Value = serde_json::from_str(&serde_json::to_string(&agg_res)?)?; + + assert_eq!( + res["date_ranges"]["buckets"][0]["from_as_string"], + Value::Null + ); + assert_eq!( + res["date_ranges"]["buckets"][0]["key"], + "*-2019-01-01T00:00:00Z" + ); + assert_eq!( + res["date_ranges"]["buckets"][1]["from_as_string"], + "2019-01-01T00:00:00Z" + ); + assert_eq!( + res["date_ranges"]["buckets"][1]["to_as_string"], + "2019-01-02T00:00:00Z" + ); + + assert_eq!( + res["date_ranges"]["buckets"][2]["from_as_string"], + "2019-01-02T00:00:00Z" + ); + assert_eq!( + res["date_ranges"]["buckets"][2]["to_as_string"], + Value::Null + ); + + Ok(()) + } + #[test] fn range_custom_key_keyed_buckets_test() -> crate::Result<()> { let index = get_test_index_with_num_docs(false, 100)?; diff --git a/src/aggregation/date.rs b/src/aggregation/date.rs new file mode 100644 index 0000000000..b80daa9c61 --- /dev/null +++ b/src/aggregation/date.rs @@ -0,0 +1,18 @@ +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; + +use crate::TantivyError; + +pub(crate) fn format_date(val: i64) -> crate::Result { + let datetime = + OffsetDateTime::from_unix_timestamp_nanos(1_000 * (val as i128)).map_err(|err| { + TantivyError::InvalidArgument(format!( + "Could not convert {:?} to OffsetDateTime, err {:?}", + val, err + )) + })?; + let key_as_string = datetime + .format(&Rfc3339) + .map_err(|_err| TantivyError::InvalidArgument("Could not serialize date".to_string()))?; + Ok(key_as_string) +} diff --git a/src/aggregation/intermediate_agg_result.rs b/src/aggregation/intermediate_agg_result.rs index e0117228ac..cdeaa92e3d 100644 --- a/src/aggregation/intermediate_agg_result.rs +++ b/src/aggregation/intermediate_agg_result.rs @@ -10,7 +10,7 @@ use serde::{Deserialize, Serialize}; use super::agg_req::{ Aggregations, AggregationsInternal, BucketAggregationInternal, BucketAggregationType, - MetricAggregation, + MetricAggregation, RangeAggregation, }; use super::agg_result::{AggregationResult, BucketResult, RangeBucketEntry}; use super::bucket::{ @@ -19,10 +19,11 @@ use super::bucket::{ }; use super::metric::{IntermediateAverage, IntermediateStats}; use super::segment_agg_result::SegmentMetricResultCollector; -use super::{Key, SerializedKey, VecWithNames}; +use super::{format_date, Key, SerializedKey, VecWithNames}; use crate::aggregation::agg_result::{AggregationResults, BucketEntries, BucketEntry}; use crate::aggregation::bucket::TermsAggregationInternal; use crate::schema::Schema; +use crate::TantivyError; /// Contains the intermediate aggregation result, which is optimized to be merged with other /// intermediate results. @@ -282,7 +283,14 @@ impl IntermediateBucketResult { let mut buckets: Vec = range_res .buckets .into_iter() - .map(|(_, bucket)| bucket.into_final_bucket_entry(&req.sub_aggregation, schema)) + .map(|(_, bucket)| { + bucket.into_final_bucket_entry( + &req.sub_aggregation, + schema, + req.as_range() + .expect("unexpected aggregation, expected histogram aggregation"), + ) + }) .collect::>>()?; buckets.sort_by(|left, right| { @@ -588,8 +596,9 @@ impl IntermediateRangeBucketEntry { self, req: &AggregationsInternal, schema: &Schema, + range_req: &RangeAggregation, ) -> crate::Result { - Ok(RangeBucketEntry { + let mut range_bucket_entry = RangeBucketEntry { key: self.key, doc_count: self.doc_count, sub_aggregation: self @@ -597,7 +606,27 @@ impl IntermediateRangeBucketEntry { .into_final_bucket_result_internal(req, schema)?, to: self.to, from: self.from, - }) + to_as_string: None, + from_as_string: None, + }; + + // If we have a date type on the histogram buckets, we add the `key_as_string` field as + // rfc339 + let field = schema + .get_field(&range_req.field) + .ok_or_else(|| TantivyError::FieldNotFound(range_req.field.to_string()))?; + if schema.get_field_entry(field).field_type().is_date() { + if let Some(val) = range_bucket_entry.to { + let key_as_string = format_date(val as i64)?; + range_bucket_entry.to_as_string = Some(key_as_string); + } + if let Some(val) = range_bucket_entry.from { + let key_as_string = format_date(val as i64)?; + range_bucket_entry.from_as_string = Some(key_as_string); + } + } + + Ok(range_bucket_entry) } } diff --git a/src/aggregation/mod.rs b/src/aggregation/mod.rs index d095be976e..a6bd4478c4 100644 --- a/src/aggregation/mod.rs +++ b/src/aggregation/mod.rs @@ -158,6 +158,7 @@ mod agg_req_with_accessor; pub mod agg_result; pub mod bucket; mod collector; +mod date; pub mod intermediate_agg_result; pub mod metric; mod segment_agg_result; @@ -168,6 +169,7 @@ pub use collector::{ AggregationCollector, AggregationSegmentCollector, DistributedAggregationCollector, MAX_BUCKET_COUNT, }; +pub(crate) use date::format_date; use fastfield_codecs::MonotonicallyMappableToU64; use itertools::Itertools; use serde::{Deserialize, Serialize}; @@ -284,12 +286,11 @@ impl Display for Key { /// Inverse of `to_fastfield_u64`. Used to convert to `f64` for metrics. /// /// # Panics -/// Only `u64`, `f64`, and `i64` are supported. +/// Only `u64`, `f64`, `date`, and `i64` are supported. pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 { match field_type { Type::U64 => val as f64, - Type::I64 => i64::from_u64(val) as f64, - Type::Date => i64::from_u64(val) as f64, + Type::I64 | Type::Date => i64::from_u64(val) as f64, Type::F64 => f64::from_u64(val), _ => { panic!("unexpected type {:?}. This should not happen", field_type) @@ -297,10 +298,9 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 { } } -/// Converts the `f64` value to fast field value space. +/// Converts the `f64` value to fast field value space, which is always u64. /// -/// If the fast field has `u64`, values are stored as `u64` in the fast field. -/// A `f64` value of e.g. `2.0` therefore needs to be converted to `1u64`. +/// If the fast field has `u64`, values are stored unchanged as `u64` in the fast field. /// /// If the fast field has `f64` values are converted and stored to `u64` using a /// monotonic mapping. @@ -310,7 +310,7 @@ pub(crate) fn f64_from_fastfield_u64(val: u64, field_type: &Type) -> f64 { pub(crate) fn f64_to_fastfield_u64(val: f64, field_type: &Type) -> Option { match field_type { Type::U64 => Some(val as u64), - Type::I64 => Some((val as i64).to_u64()), + Type::I64 | Type::Date => Some((val as i64).to_u64()), Type::F64 => Some(val.to_u64()), _ => None, }