diff --git a/CHANGELOG.md b/CHANGELOG.md index b08955083d..69fdc7336f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ Tantivy 0.17 ================================ - Change to non-strict schema. Ignore fields in data which are not defined in schema. Previously this returned an error. #1211 +- Facets are necessarily indexed. Existing index with indexed facets should work out of the box. Index without facets that are marked with index: false should be broken (but they were already broken in a sense). (@fulmicoton) #1195 . Tantivy 0.16.2 ================================ diff --git a/examples/faceted_search.rs b/examples/faceted_search.rs index 1026a42ed9..4b0edee14f 100644 --- a/examples/faceted_search.rs +++ b/examples/faceted_search.rs @@ -23,7 +23,7 @@ fn main() -> tantivy::Result<()> { let name = schema_builder.add_text_field("felin_name", TEXT | STORED); // this is our faceted field: its scientific classification - let classification = schema_builder.add_facet_field("classification", INDEXED); + let classification = schema_builder.add_facet_field("classification", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); diff --git a/examples/faceted_search_with_tweaked_score.rs b/examples/faceted_search_with_tweaked_score.rs index 638672976b..de2e2e3236 100644 --- a/examples/faceted_search_with_tweaked_score.rs +++ b/examples/faceted_search_with_tweaked_score.rs @@ -9,7 +9,7 @@ fn main() -> tantivy::Result<()> { let mut schema_builder = Schema::builder(); let title = schema_builder.add_text_field("title", STORED); - let ingredient = schema_builder.add_facet_field("ingredient", INDEXED); + let ingredient = schema_builder.add_facet_field("ingredient", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); diff --git a/src/collector/facet_collector.rs b/src/collector/facet_collector.rs index 99a4f74711..01701b995e 100644 --- a/src/collector/facet_collector.rs +++ b/src/collector/facet_collector.rs @@ -83,7 +83,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// ```rust /// use tantivy::collector::FacetCollector; /// use tantivy::query::AllQuery; -/// use tantivy::schema::{Facet, Schema, INDEXED, TEXT}; +/// use tantivy::schema::{Facet, Schema, FacetOptions, TEXT}; /// use tantivy::{doc, Index}; /// /// fn example() -> tantivy::Result<()> { @@ -92,7 +92,7 @@ fn facet_depth(facet_bytes: &[u8]) -> usize { /// // Facet have their own specific type. /// // It is not a bad practise to put all of your /// // facet information in the same field. -/// let facet = schema_builder.add_facet_field("facet", INDEXED); +/// let facet = schema_builder.add_facet_field("facet", FacetOptions::default()); /// let title = schema_builder.add_text_field("title", TEXT); /// let schema = schema_builder.build(); /// let index = Index::create_in_ram(schema); @@ -462,7 +462,7 @@ mod tests { use crate::collector::Count; use crate::core::Index; use crate::query::{AllQuery, QueryParser, TermQuery}; - use crate::schema::{Document, Facet, Field, IndexRecordOption, Schema, INDEXED}; + use crate::schema::{Document, Facet, FacetOptions, Field, IndexRecordOption, Schema}; use crate::Term; use rand::distributions::Uniform; use rand::prelude::SliceRandom; @@ -472,7 +472,7 @@ mod tests { #[test] fn test_facet_collector_drilldown() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -533,7 +533,7 @@ mod tests { #[test] fn test_doc_unsorted_multifacet() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facets", INDEXED); + let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -558,7 +558,7 @@ mod tests { #[test] fn test_doc_search_by_facet() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -615,7 +615,7 @@ mod tests { #[test] fn test_facet_collector_topk() { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); @@ -664,7 +664,7 @@ mod tests { #[test] fn test_facet_collector_topk_tie_break() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); diff --git a/src/fastfield/facet_reader.rs b/src/fastfield/facet_reader.rs index 54b62b70f4..ef354af614 100644 --- a/src/fastfield/facet_reader.rs +++ b/src/fastfield/facet_reader.rs @@ -84,14 +84,14 @@ impl FacetReader { mod tests { use crate::Index; use crate::{ - schema::{Facet, FacetOptions, SchemaBuilder, Value, INDEXED, STORED}, + schema::{Facet, FacetOptions, SchemaBuilder, Value, STORED}, DocAddress, Document, }; #[test] fn test_facet_only_indexed() -> crate::Result<()> { let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -106,38 +106,15 @@ mod tests { facet_reader.facet_ords(0u32, &mut facet_ords); assert_eq!(&facet_ords, &[2u64]); let doc = searcher.doc(DocAddress::new(0u32, 0u32))?; - let value = doc.get_first(facet_field).and_then(Value::path); + let value = doc.get_first(facet_field).and_then(Value::facet); assert_eq!(value, None); Ok(()) } - #[test] - fn test_facet_only_stored() -> crate::Result<()> { - let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet", STORED); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_for_tests()?; - index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?; - index_writer.commit()?; - let searcher = index.reader()?.searcher(); - let facet_reader = searcher - .segment_reader(0u32) - .facet_reader(facet_field) - .unwrap(); - let mut facet_ords = Vec::new(); - facet_reader.facet_ords(0u32, &mut facet_ords); - assert!(facet_ords.is_empty()); - let doc = searcher.doc(DocAddress::new(0u32, 0u32))?; - let value = doc.get_first(facet_field).and_then(Value::path); - assert_eq!(value, Some("/a/b".to_string())); - Ok(()) - } - #[test] fn test_facet_stored_and_indexed() -> crate::Result<()> { let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet", STORED | INDEXED); + let facet_field = schema_builder.add_facet_field("facet", STORED); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -152,38 +129,15 @@ mod tests { facet_reader.facet_ords(0u32, &mut facet_ords); assert_eq!(&facet_ords, &[2u64]); let doc = searcher.doc(DocAddress::new(0u32, 0u32))?; - let value = doc.get_first(facet_field).and_then(Value::path); - assert_eq!(value, Some("/a/b".to_string())); - Ok(()) - } - - #[test] - fn test_facet_neither_stored_and_indexed() -> crate::Result<()> { - let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); - let schema = schema_builder.build(); - let index = Index::create_in_ram(schema); - let mut index_writer = index.writer_for_tests()?; - index_writer.add_document(doc!(facet_field=>Facet::from_text("/a/b").unwrap()))?; - index_writer.commit()?; - let searcher = index.reader()?.searcher(); - let facet_reader = searcher - .segment_reader(0u32) - .facet_reader(facet_field) - .unwrap(); - let mut facet_ords = Vec::new(); - facet_reader.facet_ords(0u32, &mut facet_ords); - assert!(facet_ords.is_empty()); - let doc = searcher.doc(DocAddress::new(0u32, 0u32))?; - let value = doc.get_first(facet_field).and_then(Value::path); - assert_eq!(value, None); + let value: Option<&Facet> = doc.get_first(facet_field).and_then(Value::facet); + assert_eq!(value, Facet::from_text("/a/b").ok().as_ref()); Ok(()) } #[test] fn test_facet_not_populated_for_all_docs() -> crate::Result<()> { let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; @@ -206,7 +160,7 @@ mod tests { #[test] fn test_facet_not_populated_for_any_docs() -> crate::Result<()> { let mut schema_builder = SchemaBuilder::default(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; diff --git a/src/fastfield/multivalued/mod.rs b/src/fastfield/multivalued/mod.rs index 5533b5cf3e..82d3145acf 100644 --- a/src/fastfield/multivalued/mod.rs +++ b/src/fastfield/multivalued/mod.rs @@ -12,9 +12,9 @@ mod tests { use crate::query::QueryParser; use crate::schema::Cardinality; use crate::schema::Facet; + use crate::schema::FacetOptions; use crate::schema::IntOptions; use crate::schema::Schema; - use crate::schema::INDEXED; use crate::Document; use crate::Index; use crate::Term; @@ -334,7 +334,7 @@ mod tests { #[ignore] fn test_many_facets() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let field = schema_builder.add_facet_field("facetfield", INDEXED); + let field = schema_builder.add_facet_field("facetfield", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; diff --git a/src/fastfield/multivalued/reader.rs b/src/fastfield/multivalued/reader.rs index dd339f1511..3278d94c46 100644 --- a/src/fastfield/multivalued/reader.rs +++ b/src/fastfield/multivalued/reader.rs @@ -91,12 +91,12 @@ impl MultiValueLength for MultiValuedFastFieldReader { mod tests { use crate::core::Index; - use crate::schema::{Cardinality, Facet, IntOptions, Schema, INDEXED}; + use crate::schema::{Cardinality, Facet, FacetOptions, IntOptions, Schema}; #[test] fn test_multifastfield_reader() -> crate::Result<()> { let mut schema_builder = Schema::builder(); - let facet_field = schema_builder.add_facet_field("facets", INDEXED); + let facet_field = schema_builder.add_facet_field("facets", FacetOptions::default()); let schema = schema_builder.build(); let index = Index::create_in_ram(schema); let mut index_writer = index.writer_for_tests()?; diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 3ee60c8752..67b059b2dc 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -796,6 +796,7 @@ mod tests { use crate::query::TermQuery; use crate::schema::Cardinality; use crate::schema::Facet; + use crate::schema::FacetOptions; use crate::schema::IntOptions; use crate::schema::TextFieldIndexing; use crate::schema::TextOptions; @@ -1417,7 +1418,7 @@ mod tests { .set_fast(Cardinality::MultiValues) .set_stored(), ); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); let settings = if sort_index { IndexSettings { diff --git a/src/indexer/merger.rs b/src/indexer/merger.rs index 6d1f559190..637fdb0bf0 100644 --- a/src/indexer/merger.rs +++ b/src/indexer/merger.rs @@ -1118,13 +1118,13 @@ mod tests { use crate::query::BooleanQuery; use crate::query::Scorer; use crate::query::TermQuery; - use crate::schema::Document; use crate::schema::Facet; use crate::schema::IndexRecordOption; use crate::schema::IntOptions; use crate::schema::Term; use crate::schema::TextFieldIndexing; use crate::schema::{Cardinality, TEXT}; + use crate::schema::{Document, FacetOptions}; use crate::DocAddress; use crate::IndexSettings; use crate::IndexSortByField; @@ -1650,7 +1650,7 @@ mod tests { // ranges between segments so that merge algorithm can't apply certain optimizations fn test_merge_facets(index_settings: Option, force_segment_value_overlap: bool) { let mut schema_builder = schema::Schema::builder(); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let int_options = IntOptions::default() .set_fast(Cardinality::SingleValue) .set_indexed(); diff --git a/src/indexer/merger_sorted_index_test.rs b/src/indexer/merger_sorted_index_test.rs index 091e42e7c8..2f6921565c 100644 --- a/src/indexer/merger_sorted_index_test.rs +++ b/src/indexer/merger_sorted_index_test.rs @@ -1,22 +1,17 @@ #[cfg(test)] mod tests { + use crate::collector::TopDocs; + use crate::core::Index; + use crate::fastfield::MultiValuedFastFieldReader; use crate::fastfield::{AliveBitSet, FastFieldReader}; - use crate::schema::IndexRecordOption; - use crate::{ - collector::TopDocs, - schema::{Cardinality, TextFieldIndexing}, - }; - use crate::{core::Index, fastfield::MultiValuedFastFieldReader}; - use crate::{ - query::QueryParser, - schema::{IntOptions, TextOptions}, - }; - use crate::{schema::Facet, IndexSortByField}; - use crate::{schema::INDEXED, Order}; - use crate::{ - schema::{self, BytesOptions}, - DocAddress, + use crate::query::QueryParser; + use crate::schema::{ + self, BytesOptions, Cardinality, Facet, FacetOptions, IndexRecordOption, TextFieldIndexing, }; + use crate::schema::{IntOptions, TextOptions}; + use crate::DocAddress; + use crate::IndexSortByField; + use crate::Order; use crate::{DocSet, IndexSettings, Postings, Term}; use futures::executor::block_on; @@ -27,7 +22,7 @@ mod tests { .set_indexed(); let int_field = schema_builder.add_u64_field("intval", int_options); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let schema = schema_builder.build(); @@ -79,7 +74,7 @@ mod tests { let bytes_options = BytesOptions::default().set_fast().set_indexed(); let bytes_field = schema_builder.add_bytes_field("bytes", bytes_options); - let facet_field = schema_builder.add_facet_field("facet", INDEXED); + let facet_field = schema_builder.add_facet_field("facet", FacetOptions::default()); let multi_numbers = schema_builder.add_u64_field( "multi_numbers", diff --git a/src/indexer/segment_writer.rs b/src/indexer/segment_writer.rs index b88a03421a..7625d0cfba 100644 --- a/src/indexer/segment_writer.rs +++ b/src/indexer/segment_writer.rs @@ -175,16 +175,9 @@ impl SegmentWriter { match *field_entry.field_type() { FieldType::HierarchicalFacet(_) => { term_buffer.set_field(field); - let facets = - field_values - .iter() - .flat_map(|field_value| match *field_value.value() { - Value::Facet(ref facet) => Some(facet.encoded_str()), - _ => { - panic!("Expected hierarchical facet"); - } - }); - for facet_str in facets { + for field_value in field_values { + let facet = field_value.value().facet().ok_or_else(make_schema_error)?; + let facet_str = facet.encoded_str(); let mut unordered_term_id_opt = None; FacetTokenizer .token_stream(facet_str) diff --git a/src/postings/mod.rs b/src/postings/mod.rs index 7108d64ef2..69ecda7a90 100644 --- a/src/postings/mod.rs +++ b/src/postings/mod.rs @@ -47,7 +47,6 @@ pub mod tests { use crate::fieldnorm::FieldNormReader; use crate::indexer::operation::AddOperation; use crate::indexer::SegmentWriter; - use crate::merge_policy::NoMergePolicy; use crate::query::Scorer; use crate::schema::{Field, TextOptions}; use crate::schema::{IndexRecordOption, TextFieldIndexing}; diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index 259e78258f..086c52c0b7 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -587,6 +587,7 @@ mod test { use super::QueryParser; use super::QueryParserError; use crate::query::Query; + use crate::schema::FacetOptions; use crate::schema::Field; use crate::schema::{IndexRecordOption, TextFieldIndexing, TextOptions}; use crate::schema::{Schema, Term, INDEXED, STORED, STRING, TEXT}; @@ -615,8 +616,7 @@ mod test { schema_builder.add_text_field("with_stop_words", text_options); schema_builder.add_date_field("date", INDEXED); schema_builder.add_f64_field("float", INDEXED); - schema_builder.add_facet_field("facet", INDEXED); - schema_builder.add_facet_field("facet_not_indexed", STORED); + schema_builder.add_facet_field("facet", FacetOptions::default()); schema_builder.add_bytes_field("bytes", INDEXED); schema_builder.add_bytes_field("bytes_not_indexed", STORED); schema_builder.build() @@ -669,13 +669,6 @@ mod test { ); } - #[test] - fn test_parse_query_facet_not_indexed() { - let error = - parse_query_to_logical_ast("facet_not_indexed:/root/branch/leaf", false).unwrap_err(); - assert!(matches!(error, QueryParserError::FieldNotIndexed(_))); - } - #[test] pub fn test_parse_query_with_boost() { let mut query_parser = make_query_parser(); @@ -817,7 +810,7 @@ mod test { fn test_parse_bytes() { test_parse_query_to_logical_ast_helper( "bytes:YnVidQ==", - "Term(field=13,bytes=[98, 117, 98, 117])", + "Term(field=12,bytes=[98, 117, 98, 117])", false, ); } @@ -832,7 +825,7 @@ mod test { fn test_parse_bytes_phrase() { test_parse_query_to_logical_ast_helper( "bytes:\"YnVidQ==\"", - "Term(field=13,bytes=[98, 117, 98, 117])", + "Term(field=12,bytes=[98, 117, 98, 117])", false, ); } diff --git a/src/schema/facet_options.rs b/src/schema/facet_options.rs index 59fbf5a589..f43a0bf964 100644 --- a/src/schema/facet_options.rs +++ b/src/schema/facet_options.rs @@ -1,11 +1,12 @@ -use crate::schema::flags::{IndexedFlag, SchemaFlagList, StoredFlag}; +use crate::schema::flags::{SchemaFlagList, StoredFlag}; use serde::{Deserialize, Serialize}; use std::ops::BitOr; /// Define how a facet field should be handled by tantivy. +/// +/// Note that a Facet is always indexed and stored as a fastfield. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize, Default)] pub struct FacetOptions { - indexed: bool, stored: bool, } @@ -15,11 +16,6 @@ impl FacetOptions { self.stored } - /// Returns true iff the value is indexed. - pub fn is_indexed(&self) -> bool { - self.indexed - } - /// Set the field as stored. /// /// Only the fields that are set as *stored* are @@ -28,15 +24,6 @@ impl FacetOptions { self.stored = true; self } - - /// Set the field as indexed. - /// - /// Setting a facet as indexed will generate - /// a walkable path. - pub fn set_indexed(mut self) -> FacetOptions { - self.indexed = true; - self - } } impl From<()> for FacetOptions { @@ -47,19 +34,7 @@ impl From<()> for FacetOptions { impl From for FacetOptions { fn from(_: StoredFlag) -> Self { - FacetOptions { - indexed: false, - stored: true, - } - } -} - -impl From for FacetOptions { - fn from(_: IndexedFlag) -> Self { - FacetOptions { - indexed: true, - stored: false, - } + FacetOptions { stored: true } } } @@ -69,7 +44,6 @@ impl> BitOr for FacetOptions { fn bitor(self, other: T) -> FacetOptions { let other = other.into(); FacetOptions { - indexed: self.indexed | other.indexed, stored: self.stored | other.stored, } } diff --git a/src/schema/field_type.rs b/src/schema/field_type.rs index 6cc0f227e3..c2ce3d1272 100644 --- a/src/schema/field_type.rs +++ b/src/schema/field_type.rs @@ -92,7 +92,7 @@ impl FieldType { | FieldType::I64(ref int_options) | FieldType::F64(ref int_options) => int_options.is_indexed(), FieldType::Date(ref date_options) => date_options.is_indexed(), - FieldType::HierarchicalFacet(ref facet_options) => facet_options.is_indexed(), + FieldType::HierarchicalFacet(ref _facet_options) => true, FieldType::Bytes(ref bytes_options) => bytes_options.is_indexed(), } } @@ -116,13 +116,7 @@ impl FieldType { None } } - FieldType::HierarchicalFacet(ref facet_options) => { - if facet_options.is_indexed() { - Some(IndexRecordOption::Basic) - } else { - None - } - } + FieldType::HierarchicalFacet(ref _facet_options) => Some(IndexRecordOption::Basic), FieldType::Bytes(ref bytes_options) => { if bytes_options.is_indexed() { Some(IndexRecordOption::Basic) diff --git a/src/schema/value.rs b/src/schema/value.rs index b3b49a8eb5..73f643a89c 100644 --- a/src/schema/value.rs +++ b/src/schema/value.rs @@ -137,19 +137,18 @@ impl Value { } } - /// Returns the path value, provided the value is of the `Facet` type. + /// Returns the facet value, provided the value is of the `Facet` type. /// (Returns None if the value is not of the `Facet` type). - pub fn path(&self) -> Option { + pub fn facet(&self) -> Option<&Facet> { if let Value::Facet(facet) = self { - Some(facet.to_path_string()) + Some(facet) } else { None } } /// Returns the tokenized text, provided the value is of the `PreTokStr` type. - /// - /// Returns None if the value is not of the `PreTokStr` type. + /// (Returns None if the value is not of the `PreTokStr` type.) pub fn tokenized_text(&self) -> Option<&PreTokenizedString> { if let Value::PreTokStr(tokenized_text) = self { Some(tokenized_text) @@ -159,8 +158,7 @@ impl Value { } /// Returns the u64-value, provided the value is of the `U64` type. - /// - /// Returns None if the value is not of the `U64` type. + /// (Returns None if the value is not of the `U64` type) pub fn u64_value(&self) -> Option { if let Value::U64(val) = self { Some(*val)