Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added datetime support #1396

Merged
merged 1 commit into from Jul 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,10 @@
Tantivy 0.19
================================
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
The `DateTime` type has been updated to hold timestamps with microseconds precision.
`DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing).


Tantivy 0.18
================================
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -49,7 +49,7 @@ thiserror = "1.0.30"
htmlescape = "0.3.1"
fail = "0.5.0"
murmurhash32 = "0.2.0"
time = { version = "0.3.9", features = ["serde-well-known"] }
time = { version = "0.3.10", features = ["serde-well-known"] }
smallvec = "1.8.0"
rayon = "1.5.2"
lru = "0.7.5"
Expand Down
69 changes: 69 additions & 0 deletions examples/date_time_field.rs
@@ -0,0 +1,69 @@
// # DateTime field example
//
// This example shows how the DateTime field can be used

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{Cardinality, DateOptions, Schema, Value, INDEXED, STORED, STRING};
use tantivy::Index;

fn main() -> tantivy::Result<()> {
// # Defining the schema
evanxg852000 marked this conversation as resolved.
Show resolved Hide resolved
let mut schema_builder = Schema::builder();
let opts = DateOptions::from(INDEXED)
.set_stored()
.set_fast(Cardinality::SingleValue)
.set_precision(tantivy::DatePrecision::Seconds);
let occurred_at = schema_builder.add_date_field("occurred_at", opts);
let event_type = schema_builder.add_text_field("event", STRING | STORED);
let schema = schema_builder.build();

// # Indexing documents
let index = Index::create_in_ram(schema.clone());

let mut index_writer = index.writer(50_000_000)?;
let doc = schema.parse_document(
r#"{
"occurred_at": "2022-06-22T12:53:50.53Z",
"event": "pull-request"
}"#,
)?;
index_writer.add_document(doc)?;
let doc = schema.parse_document(
r#"{
"occurred_at": "2022-06-22T13:00:00.22Z",
"event": "comment"
}"#,
)?;
index_writer.add_document(doc)?;
index_writer.commit()?;

let reader = index.reader()?;
let searcher = reader.searcher();

// # Default fields: event_type
let query_parser = QueryParser::for_index(&index, vec![event_type]);
{
let query = query_parser.parse_query("event:comment")?;
let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?;
assert_eq!(count_docs.len(), 1);
}
{
let query = query_parser
.parse_query(r#"occurred_at:[2022-06-22T12:58:00Z TO 2022-06-23T00:00:00Z}"#)?;
let count_docs = searcher.search(&*query, &TopDocs::with_limit(4))?;
assert_eq!(count_docs.len(), 1);
for (_score, doc_address) in count_docs {
let retrieved_doc = searcher.doc(doc_address)?;
assert!(matches!(
retrieved_doc.get_first(occurred_at),
Some(Value::Date(_))
));
assert_eq!(
schema.to_json(&retrieved_doc),
r#"{"event":["comment"],"occurred_at":["2022-06-22T13:00:00.22Z"]}"#
);
}
}
Ok(())
}
2 changes: 1 addition & 1 deletion fastfield_codecs/src/bitpacked.rs
Expand Up @@ -14,7 +14,7 @@ pub struct BitpackedFastFieldReader {
pub max_value_u64: u64,
}

impl<'data> FastFieldCodecReader for BitpackedFastFieldReader {
fulmicoton marked this conversation as resolved.
Show resolved Hide resolved
impl FastFieldCodecReader for BitpackedFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
let (_data, mut footer) = bytes.split_at(bytes.len() - 16);
Expand Down
2 changes: 1 addition & 1 deletion query-grammar/src/query_grammar.rs
Expand Up @@ -575,7 +575,7 @@ mod test {
for special_char in SPECIAL_CHARS.iter() {
let query = &format!("\\{special_char}my\\{special_char}field:a");
assert_eq!(
super::field_name().parse(&query),
super::field_name().parse(query),
Ok((format!("{special_char}my{special_char}field"), "a"))
);
}
Expand Down
5 changes: 4 additions & 1 deletion src/aggregation/intermediate_agg_result.rs
Expand Up @@ -36,7 +36,10 @@ pub struct IntermediateAggregationResults {

impl IntermediateAggregationResults {
/// Convert intermediate result and its aggregation request to the final result.
pub fn into_final_bucket_result(self, req: Aggregations) -> crate::Result<AggregationResults> {
pub(crate) fn into_final_bucket_result(
self,
req: Aggregations,
) -> crate::Result<AggregationResults> {
self.into_final_bucket_result_internal(&(req.into()))
}

Expand Down
5 changes: 2 additions & 3 deletions src/collector/histogram_collector.rs
Expand Up @@ -72,8 +72,7 @@ impl HistogramComputer {
return;
}
let delta = value - self.min_value;
let delta_u64 = delta.to_u64();
let bucket_id: usize = self.divider.divide(delta_u64) as usize;
let bucket_id: usize = self.divider.divide(delta) as usize;
if bucket_id < self.counts.len() {
self.counts[bucket_id] += 1;
}
Expand Down Expand Up @@ -287,7 +286,7 @@ mod tests {
DateTime::from_primitive(
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
),
3600 * 24 * 365, // it is just for a unit test... sorry leap years.
3_600_000_000 * 24 * 365, // it is just for a unit test... sorry leap years.
10,
);
let week_histogram = searcher.search(&all_query, &week_histogram_collector)?;
Expand Down
50 changes: 30 additions & 20 deletions src/fastfield/mod.rs
Expand Up @@ -52,11 +52,13 @@ pub trait MultiValueLength {
fn get_total_len(&self) -> u64;
}

/// Trait for types that are allowed for fast fields: (u64, i64 and f64).
/// Trait for types that are allowed for fast fields:
/// (u64, i64 and f64, bool, DateTime).
pub trait FastValue: Clone + Copy + Send + Sync + PartialOrd + 'static {
/// Converts a value from u64
///
/// Internally all fast field values are encoded as u64.
/// **Note: To be used for converting encoded Term, Posting values.**
fn from_u64(val: u64) -> Self;

/// Converts a value to u64.
Expand Down Expand Up @@ -189,24 +191,27 @@ impl FastValue for bool {
}

impl FastValue for DateTime {
fn from_u64(timestamp_u64: u64) -> Self {
let unix_timestamp = i64::from_u64(timestamp_u64);
Self::from_unix_timestamp(unix_timestamp)
/// Converts a timestamp microseconds into DateTime.
///
/// **Note the timestamps is expected to be in microseconds.**
fn from_u64(timestamp_micros_u64: u64) -> Self {
let timestamp_micros = i64::from_u64(timestamp_micros_u64);
Self::from_timestamp_micros(timestamp_micros)
}

fn to_u64(&self) -> u64 {
self.into_unix_timestamp().to_u64()
common::i64_to_u64(self.into_timestamp_micros())
}

fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
match *field_type {
FieldType::Date(ref integer_options) => integer_options.get_fastfield_cardinality(),
FieldType::Date(ref options) => options.get_fastfield_cardinality(),
_ => None,
}
}

fn as_u64(&self) -> u64 {
self.into_unix_timestamp().as_u64()
self.into_timestamp_micros().as_u64()
}

fn to_type() -> Type {
Expand Down Expand Up @@ -261,9 +266,9 @@ mod tests {
use super::*;
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::merge_policy::NoMergePolicy;
use crate::schema::{Document, Field, NumericOptions, Schema, FAST, STRING, TEXT};
use crate::schema::{Document, Field, Schema, FAST, STRING, TEXT};
use crate::time::OffsetDateTime;
use crate::{Index, SegmentId, SegmentReader};
use crate::{DateOptions, DatePrecision, Index, SegmentId, SegmentReader};

pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
let mut schema_builder = Schema::builder();
Expand Down Expand Up @@ -559,8 +564,8 @@ mod tests {
}

#[test]
fn test_default_datetime() {
assert_eq!(0, DateTime::make_zero().into_unix_timestamp());
fn test_default_date() {
assert_eq!(0, DateTime::make_zero().into_timestamp_secs());
}

fn get_vals_for_docs(ff: &MultiValuedFastFieldReader<u64>, docs: Range<u32>) -> Vec<u64> {
Expand Down Expand Up @@ -766,10 +771,15 @@ mod tests {
fn test_datefastfield() -> crate::Result<()> {
use crate::fastfield::FastValue;
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field("date", FAST);
let date_field = schema_builder.add_date_field(
"date",
DateOptions::from(FAST).set_precision(DatePrecision::Microseconds),
);
let multi_date_field = schema_builder.add_date_field(
"multi_date",
NumericOptions::default().set_fast(Cardinality::MultiValues),
DateOptions::default()
.set_precision(DatePrecision::Microseconds)
.set_fast(Cardinality::MultiValues),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down Expand Up @@ -797,23 +807,23 @@ mod tests {
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
let mut dates = vec![];
{
assert_eq!(date_fast_field.get(0u32).into_unix_timestamp(), 1i64);
assert_eq!(date_fast_field.get(0u32).into_timestamp_micros(), 1i64);
dates_fast_field.get_vals(0u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_unix_timestamp(), 2i64);
assert_eq!(dates[1].into_unix_timestamp(), 3i64);
assert_eq!(dates[0].into_timestamp_micros(), 2i64);
assert_eq!(dates[1].into_timestamp_micros(), 3i64);
}
{
assert_eq!(date_fast_field.get(1u32).into_unix_timestamp(), 4i64);
assert_eq!(date_fast_field.get(1u32).into_timestamp_micros(), 4i64);
dates_fast_field.get_vals(1u32, &mut dates);
assert!(dates.is_empty());
}
{
assert_eq!(date_fast_field.get(2u32).into_unix_timestamp(), 0i64);
assert_eq!(date_fast_field.get(2u32).into_timestamp_micros(), 0i64);
dates_fast_field.get_vals(2u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].into_unix_timestamp(), 5i64);
assert_eq!(dates[1].into_unix_timestamp(), 6i64);
assert_eq!(dates[0].into_timestamp_micros(), 5i64);
assert_eq!(dates[1].into_timestamp_micros(), 6i64);
}
Ok(())
}
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/mod.rs
Expand Up @@ -13,7 +13,7 @@ mod tests {
use crate::collector::TopDocs;
use crate::indexer::NoMergePolicy;
use crate::query::QueryParser;
use crate::schema::{Cardinality, Facet, FacetOptions, NumericOptions, Schema};
use crate::schema::{Cardinality, DateOptions, Facet, FacetOptions, NumericOptions, Schema};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{Duration, OffsetDateTime};
use crate::{DateTime, Document, Index, Term};
Expand Down Expand Up @@ -58,7 +58,7 @@ mod tests {
let mut schema_builder = Schema::builder();
let date_field = schema_builder.add_date_field(
"multi_date_field",
NumericOptions::default()
DateOptions::default()
.set_fast(Cardinality::MultiValues)
.set_indexed()
.set_fieldnorm()
Expand Down
23 changes: 18 additions & 5 deletions src/fastfield/multivalued/writer.rs
Expand Up @@ -4,12 +4,12 @@ use fnv::FnvHashMap;
use tantivy_bitpacker::minmax;

use crate::fastfield::serializer::BitpackedFastFieldSerializerLegacy;
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType};
use crate::fastfield::{value_to_u64, CompositeFastFieldSerializer, FastFieldType, FastValue};
use crate::indexer::doc_id_mapping::DocIdMapping;
use crate::postings::UnorderedTermId;
use crate::schema::{Document, Field};
use crate::schema::{Document, Field, Value};
use crate::termdict::TermOrdinal;
use crate::DocId;
use crate::{DatePrecision, DocId};

/// Writer for multi-valued (as in, more than one value per document)
/// int fast field.
Expand All @@ -36,16 +36,22 @@ use crate::DocId;
/// term ids when the segment is getting serialized.
pub struct MultiValuedFastFieldWriter {
field: Field,
precision_opt: Option<DatePrecision>,
vals: Vec<UnorderedTermId>,
doc_index: Vec<u64>,
fast_field_type: FastFieldType,
}

impl MultiValuedFastFieldWriter {
/// Creates a new `MultiValuedFastFieldWriter`
pub(crate) fn new(field: Field, fast_field_type: FastFieldType) -> Self {
pub(crate) fn new(
field: Field,
fast_field_type: FastFieldType,
precision_opt: Option<DatePrecision>,
) -> Self {
MultiValuedFastFieldWriter {
field,
precision_opt,
vals: Vec::new(),
doc_index: Vec::new(),
fast_field_type,
Expand Down Expand Up @@ -83,7 +89,14 @@ impl MultiValuedFastFieldWriter {
}
for field_value in doc.field_values() {
if field_value.field == self.field {
self.add_val(value_to_u64(field_value.value()));
let value = field_value.value();
let value_u64 = match (self.precision_opt, value) {
(Some(precision), Value::Date(date_val)) => {
date_val.truncate(precision).to_u64()
}
_ => value_to_u64(value),
};
self.add_val(value_u64);
}
}
}
Expand Down