Skip to content

Commit

Permalink
Removes the indexed option for facets.
Browse files Browse the repository at this point in the history
Facets are now always indexed.

Closes #1195
  • Loading branch information
fulmicoton committed Dec 1, 2021
1 parent bd0f921 commit 31d38be
Show file tree
Hide file tree
Showing 16 changed files with 53 additions and 151 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
@@ -1,6 +1,7 @@
Tantivy 0.17
================================
- Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211
- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 .

Tantivy 0.16.2
================================
Expand Down
2 changes: 1 addition & 1 deletion examples/faceted_search.rs
Expand Up @@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> {

let name = schema_builder.add_text_field("felin_name", TEXT | STORED);
// this is our faceted field: its scientific classification
let classification = schema_builder.add_facet_field("classification", INDEXED);
let classification = schema_builder.add_facet_field("classification", FacetOptions::default());

let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down
2 changes: 1 addition & 1 deletion examples/faceted_search_with_tweaked_score.rs
Expand Up @@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> {
let mut schema_builder = Schema::builder();

let title = schema_builder.add_text_field("title", STORED);
let ingredient = schema_builder.add_facet_field("ingredient", INDEXED);
let ingredient = schema_builder.add_facet_field("ingredient", FacetOptions::default());

let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
Expand Down
12 changes: 6 additions & 6 deletions src/collector/facet_collector.rs
Expand Up @@ -462,7 +462,7 @@ mod tests {
use crate::collector::Count;
use crate::core::Index;
use crate::query::{AllQuery, QueryParser, TermQuery};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED};
use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, FacetOptions};
use crate::Term;
use rand::distributions::Uniform;
use rand::prelude::SliceRandom;
Expand All @@ -472,7 +472,7 @@ mod tests {
#[test]
fn test_facet_collector_drilldown() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -533,7 +533,7 @@ mod tests {
#[test]
fn test_doc_unsorted_multifacet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -558,7 +558,7 @@ mod tests {
#[test]
fn test_doc_search_by_facet() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down Expand Up @@ -615,7 +615,7 @@ mod tests {
#[test]
fn test_facet_collector_topk() {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down Expand Up @@ -664,7 +664,7 @@ mod tests {
#[test]
fn test_facet_collector_topk_tie_break() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);

Expand Down
62 changes: 8 additions & 54 deletions src/fastfield/facet_reader.rs
Expand Up @@ -84,14 +84,14 @@ impl FacetReader {
mod tests {
use crate::Index;
use crate::{
schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED},
schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED},
DocAddress, Document,
};

#[test]
fn test_facet_only_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -106,38 +106,15 @@ mod tests {
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
let value = doc.get_first(facet_field).and_then(Value::facet);
assert_eq!(value, None);
Ok(())
}

#[test]
fn test_facet_only_stored() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}

#[test]
fn test_facet_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED);
let facet_field = schema_builder.add_facet_field("facet", STORED);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -152,38 +129,15 @@ mod tests {
facet_reader.facet_ords(0u32, &mut facet_ords);
assert_eq!(&facet_ords, &[2u64]);
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, Some("/a/b".to_string()));
Ok(())
}

#[test]
fn test_facet_neither_stored_and_indexed() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?;
index_writer.commit()?;
let searcher = index.reader()?.searcher();
let facet_reader = searcher
.segment_reader(0u32)
.facet_reader(facet_field)
.unwrap();
let mut facet_ords = Vec::new();
facet_reader.facet_ords(0u32, &mut facet_ords);
assert!(facet_ords.is_empty());
let doc = searcher.doc(DocAddress::new(0u32, 0u32))?;
let value = doc.get_first(facet_field).and_then(Value::path);
assert_eq!(value, None);
let value: Option<&Facet> = doc.get_first(facet_field).and_then(Value::facet);
assert_eq!(value, Facet::from_text("/a/b").ok().as_ref());
Ok(())
}

#[test]
fn test_facet_not_populated_for_all_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand All @@ -206,7 +160,7 @@ mod tests {
#[test]
fn test_facet_not_populated_for_any_docs() -> crate::Result<()> {
let mut schema_builder = SchemaBuilder::default();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/mod.rs
Expand Up @@ -12,9 +12,9 @@ mod tests {
use crate::query::QueryParser;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::FacetOptions;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Document;
use crate::Index;
use crate::Term;
Expand Down Expand Up @@ -334,7 +334,7 @@ mod tests {
#[ignore]
fn test_many_facets() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_facet_field("facetfield", INDEXED);
let field = schema_builder.add_facet_field("facetfield", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
4 changes: 2 additions & 2 deletions src/fastfield/multivalued/reader.rs
Expand Up @@ -91,12 +91,12 @@ impl<Item: FastValue> MultiValueLength for MultiValuedFastFieldReader<Item> {
mod tests {

use crate::core::Index;
use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED};
use crate::schema::{Cardinality, Facet, IntOptions, Schema, FacetOptions};

#[test]
fn test_multifastfield_reader() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let facet_field = schema_builder.add_facet_field("facets", INDEXED);
let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default());
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests()?;
Expand Down
3 changes: 2 additions & 1 deletion src/indexer/index_writer.rs
Expand Up @@ -796,6 +796,7 @@ mod tests {
use crate::query::TermQuery;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::FacetOptions;
use crate::schema::IntOptions;
use crate::schema::TextFieldIndexing;
use crate::schema::TextOptions;
Expand Down Expand Up @@ -1417,7 +1418,7 @@ mod tests {
.set_fast(Cardinality::MultiValues)
.set_stored(),
);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let schema = schema_builder.build();
let settings = if sort_index {
IndexSettings {
Expand Down
4 changes: 2 additions & 2 deletions src/indexer/merger.rs
Expand Up @@ -1118,7 +1118,7 @@ mod tests {
use crate::query::BooleanQuery;
use crate::query::Scorer;
use crate::query::TermQuery;
use crate::schema::Document;
use crate::schema::{Document, FacetOptions};
use crate::schema::Facet;
use crate::schema::IndexRecordOption;
use crate::schema::IntOptions;
Expand Down Expand Up @@ -1650,7 +1650,7 @@ mod tests {
// ranges between segments so that merge algorithm can't apply certain optimizations
fn test_merge_facets(index_settings: Option<IndexSettings>, force_segment_value_overlap: bool) {
let mut schema_builder = schema::Schema::builder();
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());
let int_options = IntOptions::default()
.set_fast(Cardinality::SingleValue)
.set_indexed();
Expand Down
29 changes: 11 additions & 18 deletions src/indexer/merger_sorted_index_test.rs
@@ -1,22 +1,15 @@
#[cfg(test)]
mod tests {
use crate::fastfield::{AliveBitSet, FastFieldReader};
use crate::schema::IndexRecordOption;
use crate::{
collector::TopDocs,
schema::{Cardinality, TextFieldIndexing},
};
use crate::{core::Index, fastfield::MultiValuedFastFieldReader};
use crate::{
query::QueryParser,
schema::{IntOptions, TextOptions},
};
use crate::{schema::Facet, IndexSortByField};
use crate::{schema::INDEXED, Order};
use crate::{
schema::{self, BytesOptions},
DocAddress,
};
use crate::schema::{self, IndexRecordOption, FacetOptions, Cardinality, TextFieldIndexing, Facet, BytesOptions};
use crate::collector::TopDocs;
use crate::core::Index;
use crate::fastfield::MultiValuedFastFieldReader;
use crate::schema::{IntOptions, TextOptions};
use crate::query::QueryParser;
use crate::IndexSortByField;
use crate::Order;
use crate::DocAddress;
use crate::{DocSet, IndexSettings, Postings, Term};
use futures::executor::block_on;

Expand All @@ -27,7 +20,7 @@ mod tests {
.set_indexed();
let int_field = schema_builder.add_u64_field("intval", int_options);

let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());

let schema = schema_builder.build();

Expand Down Expand Up @@ -79,7 +72,7 @@ mod tests {

let bytes_options = BytesOptions::default().set_fast().set_indexed();
let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options);
let facet_field = schema_builder.add_facet_field("facet", INDEXED);
let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default());

let multi_numbers = schema_builder.add_u64_field(
"multi_numbers",
Expand Down
13 changes: 3 additions & 10 deletions src/indexer/segment_writer.rs
Expand Up @@ -175,16 +175,9 @@ impl SegmentWriter {
match *field_entry.field_type() {
FieldType::HierarchicalFacet(_) => {
term_buffer.set_field(field);
let facets =
field_values
.iter()
.flat_map(|field_value| match *field_value.value() {
Value::Facet(ref facet) => Some(facet.encoded_str()),
_ => {
panic!("Expected hierarchical facet");
}
});
for facet_str in facets {
for field_value in field_values {
let facet = field_value.value().facet().ok_or_else(make_schema_error)?;
let facet_str = facet.encoded_str();
let mut unordered_term_id_opt = None;
FacetTokenizer
.token_stream(facet_str)
Expand Down
1 change: 0 additions & 1 deletion src/postings/mod.rs
Expand Up @@ -47,7 +47,6 @@ pub mod tests {
use crate::fieldnorm::FieldNormReader;
use crate::indexer::operation::AddOperation;
use crate::indexer::SegmentWriter;
use crate::merge_policy::NoMergePolicy;
use crate::query::Scorer;
use crate::schema::{Field, TextOptions};
use crate::schema::{IndexRecordOption, TextFieldIndexing};
Expand Down
15 changes: 4 additions & 11 deletions src/query/query_parser/query_parser.rs
Expand Up @@ -587,6 +587,7 @@ mod test {
use super::QueryParser;
use super::QueryParserError;
use crate::query::Query;
use crate::schema::FacetOptions;
use crate::schema::Field;
use crate::schema::{IndexRecordOption, TextFieldIndexing, TextOptions};
use crate::schema::{Schema, Term, INDEXED, STORED, STRING, TEXT};
Expand Down Expand Up @@ -615,8 +616,7 @@ mod test {
schema_builder.add_text_field("with_stop_words", text_options);
schema_builder.add_date_field("date", INDEXED);
schema_builder.add_f64_field("float", INDEXED);
schema_builder.add_facet_field("facet", INDEXED);
schema_builder.add_facet_field("facet_not_indexed", STORED);
schema_builder.add_facet_field("facet", FacetOptions::default());
schema_builder.add_bytes_field("bytes", INDEXED);
schema_builder.add_bytes_field("bytes_not_indexed", STORED);
schema_builder.build()
Expand Down Expand Up @@ -669,13 +669,6 @@ mod test {
);
}

#[test]
fn test_parse_query_facet_not_indexed() {
let error =
parse_query_to_logical_ast("facet_not_indexed:/root/branch/leaf", false).unwrap_err();
assert!(matches!(error, QueryParserError::FieldNotIndexed(_)));
}

#[test]
pub fn test_parse_query_with_boost() {
let mut query_parser = make_query_parser();
Expand Down Expand Up @@ -817,7 +810,7 @@ mod test {
fn test_parse_bytes() {
test_parse_query_to_logical_ast_helper(
"bytes:YnVidQ==",
"Term(field=13,bytes=[98, 117, 98, 117])",
"Term(field=12,bytes=[98, 117, 98, 117])",
false,
);
}
Expand All @@ -832,7 +825,7 @@ mod test {
fn test_parse_bytes_phrase() {
test_parse_query_to_logical_ast_helper(
"bytes:\"YnVidQ==\"",
"Term(field=13,bytes=[98, 117, 98, 117])",
"Term(field=12,bytes=[98, 117, 98, 117])",
false,
);
}
Expand Down

0 comments on commit 31d38be

Please sign in to comment.