Skip to content

Commit

Permalink
Removes the indexed option for facets.
Browse files Browse the repository at this point in the history
Facets are now always indexed.

Closes #1195
  • Loading branch information
fulmicoton committed Dec 1, 2021
1 parent bd0f921 commit c65d3d9
Show file tree
Hide file tree
Showing 16 changed files with 57 additions and 155 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
@@ -1,6 +1,7 @@
Tantivy 0.17
================================
- Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 .

Tantivy 0.16.2
================================
Expand Down
2 changes: 1 addition & 1 deletion examples/faceted_search.rs
Expand Up @@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {

let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
// this is our faceted field: its scientific classification
let classification = schema_builder.add_facet_field("classification", INDEXED);
let classification = schema_builder.add_facet_field("classification", FacetOptions::default());

let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down
2 changes: 1 addition & 1 deletion examples/faceted_search_with_tweaked_score.rs
Expand Up @@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();

let title = schema_builder.add_text_field("title", STORED);
let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);
let ingredient = schema_builder.add_facet_field("ingredient", FacetOptions::default());

let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down
16 changes: 8 additions & 8 deletions src/collector/facet_collector.rs
Expand Up @@ -83,7 +83,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// ```rust
/// use tantivy::collector::FacetCollector;
/// use tantivy::query::AllQuery;
/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT};
/// use tantivy::schema::{Facet, Schema, FacetOptions, TEXT};
/// use tantivy::{doc, Index};
///
/// fn example() -> tantivy::Result<()> {
Expand All @@ -92,7 +92,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize {
/// // Facet have their own specific type.
/// // It is not a bad practise to put all of your
/// // facet information in the same field.
/// let facet = schema_builder.add_facet_field("facet", INDEXED);
/// let facet = schema_builder.add_facet_field("facet", FacetOptions::default());
/// let title = schema_builder.add_text_field("title", TEXT);
/// let schema = schema_builder.build();
/// let index = Index::create_in_ram(schema);
Expand Down Expand Up @@ -462,7 +462,7 @@ mod tests {
use crate::collector::Count;
use crate::core::Index;
use crate::query::{AllQuery, QueryParser, TermQuery};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED};
use crate::schema::{Document, Facet, FacetOptions, Field, IndexRecordOption, Schema};
use crate::Term;
use rand::distributions::Uniform;
use rand::prelude::SliceRandom;
Expand All @@ -472,7 +472,7 @@ mod tests {
#[test]
fn test_facet_collector_drilldown() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -533,7 +533,7 @@ mod tests {
#[test]
fn test_doc_unsorted_multifacet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -558,7 +558,7 @@ mod tests {
#[test]
fn test_doc_search_by_facet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down Expand Up @@ -615,7 +615,7 @@ mod tests {
#[test]
fn test_facet_collector_topk() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -664,7 +664,7 @@ mod tests {
#[test]
fn test_facet_collector_topk_tie_break() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down
62 changes: 8 additions & 54 deletions src/fastfield/facet_reader.rs
Expand Up @@ -84,14 +84,14 @@ impl FacetReader {
mod tests {
use crate::Index;
use crate::{
schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED},
schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED},
DocAddress, Document,
};

#[test]
fn test_facet_only_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -106,38 +106,15 @@ mod tests {
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
let value = doc.get_first(facet_field).and_then(Value::facet);
assert_eq!(value, None);
Ok(())
}

#[test]
fn test_facet_only_stored() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}

#[test]
fn test_facet_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED);
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -152,38 +129,15 @@ mod tests {
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}

#[test]
fn test_facet_neither_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, None);
let value: Option<&Facet> = doc.get_first(facet_field).and_then(Value::facet);
assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
Ok(())
}

#[test]
fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -206,7 +160,7 @@ mod tests {
#[test]
fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/mod.rs
Expand Up @@ -12,9 +12,9 @@ mod tests {
use crate::query::QueryParser;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::FacetOptions;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Document;
use crate::Index;
use crate::Term;
Expand Down Expand Up @@ -334,7 +334,7 @@ mod tests {
#[ignore]
fn test_many_facets() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield", INDEXED);
let field = schema_builder.add_facet_field("facetfield", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/reader.rs
Expand Up @@ -91,12 +91,12 @@ impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
mod tests {

use crate::core::Index;
use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED};
use crate::schema::{Cardinality, Facet, FacetOptions, IntOptions, Schema};

#[test]
fn test_multifastfield_reader() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
3 changes: 2 additions & 1 deletion src/indexer/index_writer.rs
Expand Up @@ -796,6 +796,7 @@ mod tests {
use crate::query::TermQuery;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::FacetOptions;
use crate::schema::IntOptions;
use crate::schema::TextFieldIndexing;
use crate::schema::TextOptions;
Expand Down Expand Up @@ -1417,7 +1418,7 @@ mod tests {
.set_fast(Cardinality::MultiValues)
.set_stored(),
);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let settings = if sort_index {
IndexSettings {
Expand Down
4 changes: 2 additions & 2 deletions src/indexer/merger.rs
Expand Up @@ -1118,13 +1118,13 @@ mod tests {
use crate::query::BooleanQuery;
use crate::query::Scorer;
use crate::query::TermQuery;
use crate::schema::Document;
use crate::schema::Facet;
use crate::schema::IndexRecordOption;
use crate::schema::IntOptions;
use crate::schema::Term;
use crate::schema::TextFieldIndexing;
use crate::schema::{Cardinality, TEXT};
use crate::schema::{Document, FacetOptions};
use crate::DocAddress;
use crate::IndexSettings;
use crate::IndexSortByField;
Expand Down Expand Up @@ -1650,7 +1650,7 @@ mod tests {
// ranges between segments so that merge algorithm can't apply certain optimizations
fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let int_options = IntOptions::default()
.set_fast(Cardinality::SingleValue)
.set_indexed();
Expand Down
29 changes: 12 additions & 17 deletions src/indexer/merger_sorted_index_test.rs
@@ -1,22 +1,17 @@
#[cfg(test)]
mod tests {
use crate::collector::TopDocs;
use crate::core::Index;
use crate::fastfield::MultiValuedFastFieldReader;
use crate::fastfield::{AliveBitSet, FastFieldReader};
use crate::schema::IndexRecordOption;
use crate::{
collector::TopDocs,
schema::{Cardinality, TextFieldIndexing},
};
use crate::{core::Index, fastfield::MultiValuedFastFieldReader};
use crate::{
query::QueryParser,
schema::{IntOptions, TextOptions},
};
use crate::{schema::Facet, IndexSortByField};
use crate::{schema::INDEXED, Order};
use crate::{
schema::{self, BytesOptions},
DocAddress,
use crate::query::QueryParser;
use crate::schema::{
self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, TextFieldIndexing,
};
use crate::schema::{IntOptions, TextOptions};
use crate::DocAddress;
use crate::IndexSortByField;
use crate::Order;
use crate::{DocSet, IndexSettings, Postings, Term};
use futures::executor::block_on;

Expand All @@ -27,7 +22,7 @@ mod tests {
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);

let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());

let schema = schema_builder.build();

Expand Down Expand Up @@ -79,7 +74,7 @@ mod tests {

let bytes_options = BytesOptions::default().set_fast().set_indexed();
let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());

let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
Expand Down
13 changes: 3 additions & 10 deletions src/indexer/segment_writer.rs
Expand Up @@ -175,16 +175,9 @@ impl SegmentWriter {
match *field_entry.field_type() {
FieldType::HierarchicalFacet(_) => {
term_buffer.set_field(field);
let facets =
field_values
.iter()
.flat_map(|field_value| match *field_value.value() {
Value::Facet(ref facet) => Some(facet.encoded_str()),
_ => {
panic!("Expected hierarchical facet");
}
});
for facet_str in facets {
for field_value in field_values {
let facet = field_value.value().facet().ok_or_else(make_schema_error)?;
let facet_str = facet.encoded_str();
let mut unordered_term_id_opt = None;
FacetTokenizer
.token_stream(facet_str)
Expand Down
1 change: 0 additions & 1 deletion src/postings/mod.rs
Expand Up @@ -47,7 +47,6 @@ pub mod tests {
use crate::fieldnorm::FieldNormReader;
use crate::indexer::operation::AddOperation;
use crate::indexer::SegmentWriter;
use crate::merge_policy::NoMergePolicy;
use crate::query::Scorer;
use crate::schema::{Field, TextOptions};
use crate::schema::{IndexRecordOption, TextFieldIndexing};
Expand Down

0 comments on commit c65d3d9

Please sign in to comment.