Skip to content

Commit

Permalink
Updated DateTime to hold timestamp in microseconds, while making date…
Browse files Browse the repository at this point in the history
… field precision configurable
  • Loading branch information
evanxg852000 committed Jul 11, 2022
1 parent 11e4225 commit b4a80b0
Show file tree
Hide file tree
Showing 42 changed files with 1,269 additions and 483 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
@@ -1,3 +1,10 @@
Tantivy 0.19
================================
- Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396)
The `DateTime` type has been updated to hold timestamps with microseconds precision.
`DateOptions` and `DatePrecision` have been added to configure Date fields. The precision is used to hint on fast values compression. Otherwise, seconds precision is used everywhere else (i.e terms, indexing).


Tantivy 0.18
================================
- For date values `chrono` has been replaced with `time` (@uklotzde) #1304 :
Expand Down
2 changes: 1 addition & 1 deletion Cargo.toml
Expand Up @@ -49,7 +49,7 @@ thiserror = "1.0.30"
htmlescape = "0.3.1"
fail = "0.5.0"
murmurhash32 = "0.2.0"
time = { version = "0.3.9", features = ["serde-well-known"] }
time = { version = "0.3.10", features = ["serde-well-known"] }
smallvec = "1.8.0"
rayon = "1.5.2"
lru = "0.7.5"
Expand Down
11 changes: 10 additions & 1 deletion README.md
Expand Up @@ -152,4 +152,13 @@ You can also find other bindings on [GitHub](https://github.com/search?q=tantivy
- and [more](https://github.com/search?q=tantivy)!

### On average, how much faster is Tantivy compared to Lucene?
- According to our [search latency benchmark](https://tantivy-search.github.io/bench/), Tantivy is approximately 2x faster than Lucene.
- According to our [search latency benchmark](https://tantivy-search.github.io/bench/), Tantivy is approximately 2x faster than Lucene.

### Does tantivy support incremental indexing?
- Yes.

### How can I edit documents?
- Data in tantivy is immutable. To edit a document, the document needs to be deleted and reindexed.

### When will my documents be searchable during indexing?
- Documents will be searchable after a `commit` is called on an `IndexWriter`. Existing `IndexReader`s will also need to be reloaded in order to reflect the changes. Finally, changes are only visible to newly acquired `Searcher`.
2 changes: 1 addition & 1 deletion common/src/writer.rs
Expand Up @@ -62,7 +62,7 @@ impl<W: TerminatingWrite> TerminatingWrite for CountingWriter<W> {
pub struct AntiCallToken(());

/// Trait used to indicate when no more write need to be done on a writer
pub trait TerminatingWrite: Write {
pub trait TerminatingWrite: Write + Send {
/// Indicate that the writer will no longer be used. Internally call terminate_ref.
fn terminate(mut self) -> io::Result<()>
where Self: Sized {
Expand Down
2 changes: 1 addition & 1 deletion examples/aggregation.rs
Expand Up @@ -117,7 +117,7 @@ fn main() -> tantivy::Result<()> {
.into_iter()
.collect();

let collector = AggregationCollector::from_aggs(agg_req_1);
let collector = AggregationCollector::from_aggs(agg_req_1, None);

let searcher = reader.searcher();
let agg_res: AggregationResults = searcher.search(&term_query, &collector).unwrap();
Expand Down
69 changes: 69 additions & 0 deletions examples/date_time_field.rs
@@ -0,0 +1,69 @@
// # DateTime field example
//
// This example shows how the DateTime field can be used

use tantivy::collector::TopDocs;
use tantivy::query::QueryParser;
use tantivy::schema::{Cardinality, DateOptions, Schema, Value, INDEXED, STORED, STRING};
use tantivy::Index;

fn main() -> tantivy::Result<()> {
// # Defining the schema
let mut schema_builder = Schema::builder();
let opts = DateOptions::from(INDEXED)
.set_stored()
.set_fast(Cardinality::SingleValue)
.set_precision(tantivy::DatePrecision::Seconds);
let occurred_at = schema_builder.add_date_field("occurred_at", opts);
let event_type = schema_builder.add_text_field("event", STRING | STORED);
let schema = schema_builder.build();

// # Indexing documents
let index = Index::create_in_ram(schema.clone());

let mut index_writer = index.writer(50_000_000)?;
let doc = schema.parse_document(
r#"{
"occurred_at": "2022-06-22T12:53:50.53Z",
"event": "pull-request"
}"#,
)?;
index_writer.add_document(doc)?;
let doc = schema.parse_document(
r#"{
"occurred_at": "2022-06-22T13:00:00.22Z",
"event": "comment"
}"#,
)?;
index_writer.add_document(doc)?;
index_writer.commit()?;

let reader = index.reader()?;
let searcher = reader.searcher();

// # Default fields: event_type
let query_parser = QueryParser::for_index(&index, vec![event_type]);
{
let query = query_parser.parse_query("event:comment")?;
let count_docs = searcher.search(&*query, &TopDocs::with_limit(5))?;
assert_eq!(count_docs.len(), 1);
}
{
let query = query_parser
.parse_query(r#"occurred_at:[2022-06-22T12:58:00Z TO 2022-06-23T00:00:00Z}"#)?;
let count_docs = searcher.search(&*query, &TopDocs::with_limit(4))?;
assert_eq!(count_docs.len(), 1);
for (_score, doc_address) in count_docs {
let retrieved_doc = searcher.doc(doc_address)?;
assert!(matches!(
retrieved_doc.get_first(occurred_at),
Some(Value::Date(_))
));
assert_eq!(
schema.to_json(&retrieved_doc),
r#"{"event":["comment"],"occurred_at":["2022-06-22T13:00:00.22Z"]}"#
);
}
}
Ok(())
}
2 changes: 1 addition & 1 deletion fastfield_codecs/src/bitpacked.rs
Expand Up @@ -14,7 +14,7 @@ pub struct BitpackedFastFieldReader {
pub max_value_u64: u64,
}

impl<'data> FastFieldCodecReader for BitpackedFastFieldReader {
impl FastFieldCodecReader for BitpackedFastFieldReader {
/// Opens a fast field given a file.
fn open_from_bytes(bytes: &[u8]) -> io::Result<Self> {
let (_data, mut footer) = bytes.split_at(bytes.len() - 16);
Expand Down
21 changes: 20 additions & 1 deletion src/aggregation/agg_req_with_accessor.rs
@@ -1,10 +1,13 @@
//! This will enhance the request tree with access to the fastfield and metadata.

use std::rc::Rc;
use std::sync::atomic::AtomicU32;
use std::sync::Arc;

use super::agg_req::{Aggregation, Aggregations, BucketAggregationType, MetricAggregation};
use super::bucket::{HistogramAggregation, RangeAggregation, TermsAggregation};
use super::metric::{AverageAggregation, StatsAggregation};
use super::segment_agg_result::BucketCount;
use super::VecWithNames;
use crate::fastfield::{
type_and_cardinality, DynamicFastFieldReader, FastType, MultiValuedFastFieldReader,
Expand Down Expand Up @@ -60,13 +63,16 @@ pub struct BucketAggregationWithAccessor {
pub(crate) field_type: Type,
pub(crate) bucket_agg: BucketAggregationType,
pub(crate) sub_aggregation: AggregationsWithAccessor,
pub(crate) bucket_count: BucketCount,
}

impl BucketAggregationWithAccessor {
fn try_from_bucket(
bucket: &BucketAggregationType,
sub_aggregation: &Aggregations,
reader: &SegmentReader,
bucket_count: Rc<AtomicU32>,
max_bucket_count: u32,
) -> crate::Result<BucketAggregationWithAccessor> {
let mut inverted_index = None;
let (accessor, field_type) = match &bucket {
Expand All @@ -92,9 +98,18 @@ impl BucketAggregationWithAccessor {
Ok(BucketAggregationWithAccessor {
accessor,
field_type,
sub_aggregation: get_aggs_with_accessor_and_validate(&sub_aggregation, reader)?,
sub_aggregation: get_aggs_with_accessor_and_validate(
&sub_aggregation,
reader,
bucket_count.clone(),
max_bucket_count,
)?,
bucket_agg: bucket.clone(),
inverted_index,
bucket_count: BucketCount {
bucket_count,
max_bucket_count,
},
})
}
}
Expand Down Expand Up @@ -134,6 +149,8 @@ impl MetricAggregationWithAccessor {
pub(crate) fn get_aggs_with_accessor_and_validate(
aggs: &Aggregations,
reader: &SegmentReader,
bucket_count: Rc<AtomicU32>,
max_bucket_count: u32,
) -> crate::Result<AggregationsWithAccessor> {
let mut metrics = vec![];
let mut buckets = vec![];
Expand All @@ -145,6 +162,8 @@ pub(crate) fn get_aggs_with_accessor_and_validate(
&bucket.bucket_agg,
&bucket.sub_aggregation,
reader,
Rc::clone(&bucket_count),
max_bucket_count,
)?,
)),
Aggregation::Metric(metric) => metrics.push((
Expand Down

0 comments on commit b4a80b0

Please sign in to comment.