Skip to content

Commit

Permalink
Replace chrono with time (#1307)
Browse files Browse the repository at this point in the history
For date values `chrono` has been replaced with `time` 
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
- The type alias `tantivy::DateTime` has been removed.
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
directly instead.

Closes #1304
  • Loading branch information
uklotzde committed Mar 21, 2022
1 parent 46d5de9 commit 125707d
Show file tree
Hide file tree
Showing 20 changed files with 323 additions and 190 deletions.
11 changes: 11 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,14 @@
Unreleased
================================
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
- The `time` crate is re-exported as `tantivy::time` instead of `tantivy::chrono`.
- The type alias `tantivy::DateTime` has been removed.
- `Value::Date` wraps `time::PrimitiveDateTime` without time zone information.
- Internally date/time values are stored as seconds since UNIX epoch in UTC.
- Converting a `time::OffsetDateTime` to `Value::Date` implicitly converts the value into UTC.
If this is not desired do the time zone conversion yourself and use `time::PrimitiveDateTime`
directly instead.

Tantivy 0.17
================================
- LogMergePolicy now triggers merges if the ratio of deleted documents reaches a threshold (@shikhar @fulmicoton) [#115](https://github.com/quickwit-oss/tantivy/issues/115)
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -48,7 +48,7 @@ thiserror = "1.0.24"
htmlescape = "0.3.1"
fail = "0.5"
murmurhash32 = "0.2"
chrono = "0.4.19"
time = { version = "0.3.7", features = ["serde-well-known"] }
smallvec = "1.6.1"
rayon = "1.5"
lru = "0.7.0"
Expand Down
2 changes: 1 addition & 1 deletion query-grammar/src/query_grammar.rs
Expand Up @@ -67,7 +67,7 @@ fn word<'a>() -> impl Parser<&'a str, Output = String> {
///
/// NOTE: also accepts 999999-99-99T99:99:99.266051969+99:99
/// We delegate rejecting such invalid dates to the logical AST compuation code
/// which invokes chrono::DateTime::parse_from_rfc3339 on the value to actually parse
/// which invokes time::OffsetDateTime::parse(..., &Rfc3339) on the value to actually parse
/// it (instead of merely extracting the datetime value as string as done here).
fn date_time<'a>() -> impl Parser<&'a str, Output = String> {
let two_digits = || recognize::<String, _, _>((digit(), digit()));
Expand Down
16 changes: 10 additions & 6 deletions src/collector/histogram_collector.rs
Expand Up @@ -152,9 +152,9 @@ mod tests {
use query::AllQuery;

use super::{add_vecs, HistogramCollector, HistogramComputer};
use crate::chrono::{TimeZone, Utc};
use crate::schema::{Schema, FAST};
use crate::{doc, query, Index};
use crate::time::{Date, Month};
use crate::{doc, query, DateTime, Index};

#[test]
fn test_add_histograms_simple() {
Expand Down Expand Up @@ -273,16 +273,20 @@ mod tests {
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut writer = index.writer_with_num_threads(1, 4_000_000)?;
writer.add_document(doc!(date_field=>Utc.ymd(1982, 9, 17).and_hms(0, 0,0)))?;
writer.add_document(doc!(date_field=>Utc.ymd(1986, 3, 9).and_hms(0, 0, 0)))?;
writer.add_document(doc!(date_field=>Utc.ymd(1983, 9, 27).and_hms(0, 0, 0)))?;
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1982, Month::September, 17)?.with_hms(0, 0, 0)?)))?;
writer.add_document(
doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1986, Month::March, 9)?.with_hms(0, 0, 0)?)),
)?;
writer.add_document(doc!(date_field=>DateTime::new_primitive(Date::from_calendar_date(1983, Month::September, 27)?.with_hms(0, 0, 0)?)))?;
writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let all_query = AllQuery;
let week_histogram_collector = HistogramCollector::new(
date_field,
Utc.ymd(1980, 1, 1).and_hms(0, 0, 0),
DateTime::new_primitive(
Date::from_calendar_date(1980, Month::January, 1)?.with_hms(0, 0, 0)?,
),
3600 * 24 * 365, // it is just for a unit test... sorry leap years.
10,
);
Expand Down
18 changes: 10 additions & 8 deletions src/collector/tests.rs
@@ -1,11 +1,11 @@
use std::str::FromStr;

use super::*;
use crate::collector::{Count, FilterCollector, TopDocs};
use crate::core::SegmentReader;
use crate::fastfield::{BytesFastFieldReader, DynamicFastFieldReader, FastFieldReader};
use crate::query::{AllQuery, QueryParser};
use crate::schema::{Field, Schema, FAST, TEXT};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::{doc, DateTime, DocAddress, DocId, Document, Index, Score, Searcher, SegmentOrdinal};

pub const TEST_COLLECTOR_WITH_SCORE: TestCollector = TestCollector {
Expand All @@ -26,11 +26,11 @@ pub fn test_filter_collector() -> crate::Result<()> {
let index = Index::create_in_ram(schema);

let mut index_writer = index.writer_with_num_threads(1, 10_000_000)?;
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::from_str("1898-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::from_str("2020-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::from_str("2019-04-20T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::from_str("2018-04-09T00:00:00+00:00").unwrap()))?;
index_writer.add_document(doc!(title => "The Name of the Wind", price => 30_200u64, date => DateTime::new_utc(OffsetDateTime::parse("1898-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "The Diary of Muadib", price => 29_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2020-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "The Diary of Anne Frank", price => 18_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-20T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "A Dairy Cow", price => 21_240u64, date => DateTime::new_utc(OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.add_document(doc!(title => "The Diary of a Young Girl", price => 20_120u64, date => DateTime::new_utc(OffsetDateTime::parse("2018-04-09T00:00:00+00:00", &Rfc3339).unwrap())))?;
index_writer.commit()?;

let reader = index.reader()?;
Expand All @@ -55,7 +55,9 @@ pub fn test_filter_collector() -> crate::Result<()> {
assert_eq!(filtered_top_docs.len(), 0);

fn date_filter(value: DateTime) -> bool {
(value - DateTime::from_str("2019-04-09T00:00:00+00:00").unwrap()).num_weeks() > 0
(value.to_utc() - OffsetDateTime::parse("2019-04-09T00:00:00+00:00", &Rfc3339).unwrap())
.whole_weeks()
> 0
}

let filter_dates_collector = FilterCollector::new(date, &date_filter, TopDocs::with_limit(5));
Expand Down
22 changes: 14 additions & 8 deletions src/collector/top_score_collector.rs
Expand Up @@ -714,7 +714,9 @@ mod tests {
use crate::collector::Collector;
use crate::query::{AllQuery, Query, QueryParser};
use crate::schema::{Field, Schema, FAST, STORED, TEXT};
use crate::{DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};
use crate::time::format_description::well_known::Rfc3339;
use crate::time::OffsetDateTime;
use crate::{DateTime, DocAddress, DocId, Index, IndexWriter, Score, SegmentReader};

fn make_index() -> crate::Result<Index> {
let mut schema_builder = Schema::builder();
Expand Down Expand Up @@ -890,28 +892,32 @@ mod tests {

#[test]
fn test_top_field_collector_datetime() -> crate::Result<()> {
use std::str::FromStr;
let mut schema_builder = Schema::builder();
let name = schema_builder.add_text_field("name", TEXT);
let birthday = schema_builder.add_date_field("birthday", FAST);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
let pr_birthday = crate::DateTime::from_str("1898-04-09T00:00:00+00:00")?;
let pr_birthday = DateTime::new_utc(OffsetDateTime::parse(
"1898-04-09T00:00:00+00:00",
&Rfc3339,
)?);
index_writer.add_document(doc!(
name => "Paul Robeson",
birthday => pr_birthday
birthday => pr_birthday,
))?;
let mr_birthday = crate::DateTime::from_str("1947-11-08T00:00:00+00:00")?;
let mr_birthday = DateTime::new_utc(OffsetDateTime::parse(
"1947-11-08T00:00:00+00:00",
&Rfc3339,
)?);
index_writer.add_document(doc!(
name => "Minnie Riperton",
birthday => mr_birthday
birthday => mr_birthday,
))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let top_collector = TopDocs::with_limit(3).order_by_fast_field(birthday);
let top_docs: Vec<(crate::DateTime, DocAddress)> =
searcher.search(&AllQuery, &top_collector)?;
let top_docs: Vec<(DateTime, DocAddress)> = searcher.search(&AllQuery, &top_collector)?;
assert_eq!(
&top_docs[..],
&[
Expand Down
18 changes: 15 additions & 3 deletions src/error.rs
Expand Up @@ -149,9 +149,21 @@ impl<Guard> From<PoisonError<Guard>> for TantivyError {
}
}

impl From<chrono::ParseError> for TantivyError {
fn from(err: chrono::ParseError) -> TantivyError {
TantivyError::InvalidArgument(err.to_string())
impl From<time::error::Format> for TantivyError {
fn from(err: time::error::Format) -> TantivyError {
TantivyError::InvalidArgument(format!("Date formatting error: {err}"))
}
}

impl From<time::error::Parse> for TantivyError {
fn from(err: time::error::Parse) -> TantivyError {
TantivyError::InvalidArgument(format!("Date parsing error: {err}"))
}
}

impl From<time::error::ComponentRange> for TantivyError {
fn from(err: time::error::ComponentRange) -> TantivyError {
TantivyError::InvalidArgument(format!("Date range error: {err}"))
}
}

Expand Down
59 changes: 30 additions & 29 deletions src/fastfield/mod.rs
Expand Up @@ -30,9 +30,8 @@ pub use self::readers::FastFieldReaders;
pub(crate) use self::readers::{type_and_cardinality, FastType};
pub use self::serializer::{CompositeFastFieldSerializer, FastFieldDataAccess, FastFieldStats};
pub use self::writer::{FastFieldsWriter, IntFastFieldWriter};
use crate::chrono::{NaiveDateTime, Utc};
use crate::schema::{Cardinality, FieldType, Type, Value};
use crate::DocId;
use crate::{DateTime, DocId};

mod alive_bitset;
mod bytes;
Expand Down Expand Up @@ -161,14 +160,14 @@ impl FastValue for f64 {
}
}

impl FastValue for crate::DateTime {
impl FastValue for DateTime {
fn from_u64(timestamp_u64: u64) -> Self {
let timestamp_i64 = i64::from_u64(timestamp_u64);
crate::DateTime::from_utc(NaiveDateTime::from_timestamp(timestamp_i64, 0), Utc)
let unix_timestamp = i64::from_u64(timestamp_u64);
Self::from_unix_timestamp(unix_timestamp)
}

fn to_u64(&self) -> u64 {
self.timestamp().to_u64()
self.to_unix_timestamp().to_u64()
}

fn fast_field_cardinality(field_type: &FieldType) -> Option<Cardinality> {
Expand All @@ -179,7 +178,7 @@ impl FastValue for crate::DateTime {
}

fn as_u64(&self) -> u64 {
self.timestamp().as_u64()
self.to_unix_timestamp().as_u64()
}

fn to_type() -> Type {
Expand All @@ -188,12 +187,12 @@ impl FastValue for crate::DateTime {
}

fn value_to_u64(value: &Value) -> u64 {
match *value {
Value::U64(ref val) => *val,
Value::I64(ref val) => common::i64_to_u64(*val),
Value::F64(ref val) => common::f64_to_u64(*val),
Value::Date(ref datetime) => common::i64_to_u64(datetime.timestamp()),
_ => panic!("Expected a u64/i64/f64 field, got {:?} ", value),
match value {
Value::U64(val) => val.to_u64(),
Value::I64(val) => val.to_u64(),
Value::F64(val) => val.to_u64(),
Value::Date(val) => val.to_u64(),
_ => panic!("Expected a u64/i64/f64/date field, got {:?} ", value),
}
}

Expand All @@ -213,6 +212,7 @@ mod tests {
use crate::directory::{CompositeFile, Directory, RamDirectory, WritePtr};
use crate::merge_policy::NoMergePolicy;
use crate::schema::{Document, Field, NumericOptions, Schema, FAST};
use crate::time::OffsetDateTime;
use crate::{Index, SegmentId, SegmentReader};

pub static SCHEMA: Lazy<Schema> = Lazy::new(|| {
Expand All @@ -233,7 +233,7 @@ mod tests {

#[test]
pub fn test_fastfield_i64_u64() {
let datetime = crate::DateTime::from_utc(NaiveDateTime::from_timestamp(0i64, 0), Utc);
let datetime = DateTime::new_utc(OffsetDateTime::UNIX_EPOCH);
assert_eq!(i64::from_u64(datetime.to_u64()), 0i64);
}

Expand Down Expand Up @@ -489,7 +489,8 @@ mod tests {
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(date_field =>crate::chrono::prelude::Utc::now()))?;
index_writer
.add_document(doc!(date_field =>DateTime::new_utc(OffsetDateTime::now_utc())))?;
index_writer.commit()?;
index_writer.add_document(doc!())?;
index_writer.commit()?;
Expand All @@ -509,7 +510,7 @@ mod tests {

#[test]
fn test_default_datetime() {
assert_eq!(crate::DateTime::make_zero().timestamp(), 0i64);
assert_eq!(0, DateTime::make_zero().to_unix_timestamp());
}

#[test]
Expand All @@ -526,16 +527,16 @@ mod tests {
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));
index_writer.add_document(doc!(
date_field => crate::DateTime::from_u64(1i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(2i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(3i64.to_u64())
date_field => DateTime::from_u64(1i64.to_u64()),
multi_date_field => DateTime::from_u64(2i64.to_u64()),
multi_date_field => DateTime::from_u64(3i64.to_u64())
))?;
index_writer.add_document(doc!(
date_field => crate::DateTime::from_u64(4i64.to_u64())
date_field => DateTime::from_u64(4i64.to_u64())
))?;
index_writer.add_document(doc!(
multi_date_field => crate::DateTime::from_u64(5i64.to_u64()),
multi_date_field => crate::DateTime::from_u64(6i64.to_u64())
multi_date_field => DateTime::from_u64(5i64.to_u64()),
multi_date_field => DateTime::from_u64(6i64.to_u64())
))?;
index_writer.commit()?;
let reader = index.reader()?;
Expand All @@ -547,23 +548,23 @@ mod tests {
let dates_fast_field = fast_fields.dates(multi_date_field).unwrap();
let mut dates = vec![];
{
assert_eq!(date_fast_field.get(0u32).timestamp(), 1i64);
assert_eq!(date_fast_field.get(0u32).to_unix_timestamp(), 1i64);
dates_fast_field.get_vals(0u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].timestamp(), 2i64);
assert_eq!(dates[1].timestamp(), 3i64);
assert_eq!(dates[0].to_unix_timestamp(), 2i64);
assert_eq!(dates[1].to_unix_timestamp(), 3i64);
}
{
assert_eq!(date_fast_field.get(1u32).timestamp(), 4i64);
assert_eq!(date_fast_field.get(1u32).to_unix_timestamp(), 4i64);
dates_fast_field.get_vals(1u32, &mut dates);
assert!(dates.is_empty());
}
{
assert_eq!(date_fast_field.get(2u32).timestamp(), 0i64);
assert_eq!(date_fast_field.get(2u32).to_unix_timestamp(), 0i64);
dates_fast_field.get_vals(2u32, &mut dates);
assert_eq!(dates.len(), 2);
assert_eq!(dates[0].timestamp(), 5i64);
assert_eq!(dates[1].timestamp(), 6i64);
assert_eq!(dates[0].to_unix_timestamp(), 5i64);
assert_eq!(dates[1].to_unix_timestamp(), 6i64);
}
Ok(())
}
Expand Down

0 comments on commit 125707d

Please sign in to comment.