diff --git a/src/query/query_parser/query_parser.rs b/src/query/query_parser/query_parser.rs index c703cdef67..0b1d3e0a03 100644 --- a/src/query/query_parser/query_parser.rs +++ b/src/query/query_parser/query_parser.rs @@ -183,7 +183,6 @@ pub struct QueryParser { conjunction_by_default: bool, tokenizer_manager: TokenizerManager, boost: HashMap, - field_names: HashMap, } fn all_negative(ast: &LogicalAst) -> bool { @@ -196,31 +195,6 @@ fn all_negative(ast: &LogicalAst) -> bool { } } -// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`. -// -// This function operates directly on bytes (as opposed to codepoint), relying -// on a encoding property of utf-8 for its correctness. -fn locate_splitting_dots(field_path: &str) -> Vec { - let mut splitting_dots_pos = Vec::new(); - let mut escape_state = false; - for (pos, b) in field_path.bytes().enumerate() { - if escape_state { - escape_state = false; - continue; - } - match b { - b'\\' => { - escape_state = true; - } - b'.' => { - splitting_dots_pos.push(pos); - } - _ => {} - } - } - splitting_dots_pos -} - impl QueryParser { /// Creates a `QueryParser`, given /// * schema - index Schema @@ -230,34 +204,19 @@ impl QueryParser { default_fields: Vec, tokenizer_manager: TokenizerManager, ) -> QueryParser { - let field_names = schema - .fields() - .map(|(field, field_entry)| (field_entry.name().to_string(), field)) - .collect(); QueryParser { schema, default_fields, tokenizer_manager, conjunction_by_default: false, boost: Default::default(), - field_names, } } // Splits a full_path as written in a query, into a field name and a // json path. pub(crate) fn split_full_path<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> { - if let Some(field) = self.field_names.get(full_path) { - return Some((*field, "")); - } - let mut splitting_period_pos: Vec = locate_splitting_dots(full_path); - while let Some(pos) = splitting_period_pos.pop() { - let (prefix, suffix) = full_path.split_at(pos); - if let Some(field) = self.field_names.get(prefix) { - return Some((*field, &suffix[1..])); - } - } - None + self.schema.find_field(full_path) } /// Creates a `QueryParser`, given @@ -1566,13 +1525,6 @@ mod test { assert_eq!(query_parser.split_full_path("firsty"), None); } - #[test] - fn test_locate_splitting_dots() { - assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]); - assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]); - assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]); - } - #[test] pub fn test_phrase_slop() { test_parse_query_to_logical_ast_helper( diff --git a/src/schema/schema.rs b/src/schema/schema.rs index 0650a78f2b..fbc6e50c65 100644 --- a/src/schema/schema.rs +++ b/src/schema/schema.rs @@ -252,6 +252,31 @@ impl Eq for InnerSchema {} #[derive(Clone, Eq, PartialEq, Debug)] pub struct Schema(Arc); +// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`. +// +// This function operates directly on bytes (as opposed to codepoint), relying +// on a encoding property of utf-8 for its correctness. +fn locate_splitting_dots(field_path: &str) -> Vec { + let mut splitting_dots_pos = Vec::new(); + let mut escape_state = false; + for (pos, b) in field_path.bytes().enumerate() { + if escape_state { + escape_state = false; + continue; + } + match b { + b'\\' => { + escape_state = true; + } + b'.' => { + splitting_dots_pos.push(pos); + } + _ => {} + } + } + splitting_dots_pos +} + impl Schema { /// Return the `FieldEntry` associated with a `Field`. pub fn get_field_entry(&self, field: Field) -> &FieldEntry { @@ -358,6 +383,28 @@ impl Schema { } Ok(doc) } + + /// Searches for a full_path in the schema, returning the field name and a JSON path. + /// + /// This function works by checking if the field exists for the exact given full_path. + /// If it's not, it splits the full_path at non-escaped '.' chars and tries to match the + /// prefix with the field names, favoring the longest field names. + /// + /// This does not check if field is a JSON field. It is possible for this functions to + /// return a non-empty JSON path with a non-JSON field. + pub fn find_field<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> { + if let Some(field) = self.0.fields_map.get(full_path) { + return Some((*field, "")); + } + let mut splitting_period_pos: Vec = locate_splitting_dots(full_path); + while let Some(pos) = splitting_period_pos.pop() { + let (prefix, suffix) = full_path.split_at(pos); + if let Some(field) = self.0.fields_map.get(prefix) { + return Some((*field, &suffix[1..])); + } + } + None + } } impl Serialize for Schema { @@ -436,6 +483,13 @@ mod tests { use crate::schema::schema::DocParsingError::InvalidJson; use crate::schema::*; + #[test] + fn test_locate_splitting_dots() { + assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]); + assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]); + assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]); + } + #[test] pub fn is_indexed_test() { let mut schema_builder = Schema::builder(); @@ -936,4 +990,46 @@ mod tests { ]"#; assert_eq!(schema_json, expected); } + + #[test] + fn test_find_field() { + let mut schema_builder = Schema::builder(); + schema_builder.add_json_field("foo", STRING); + + schema_builder.add_text_field("bar", STRING); + schema_builder.add_text_field("foo.bar", STRING); + schema_builder.add_text_field("foo.bar.baz", STRING); + schema_builder.add_text_field("bar.a.b.c", STRING); + let schema = schema_builder.build(); + + assert_eq!( + schema.find_field("foo.bar"), + Some((schema.get_field("foo.bar").unwrap(), "")) + ); + assert_eq!( + schema.find_field("foo.bar.bar"), + Some((schema.get_field("foo.bar").unwrap(), "bar")) + ); + assert_eq!( + schema.find_field("foo.bar.baz"), + Some((schema.get_field("foo.bar.baz").unwrap(), "")) + ); + assert_eq!( + schema.find_field("foo.toto"), + Some((schema.get_field("foo").unwrap(), "toto")) + ); + assert_eq!( + schema.find_field("foo.bar"), + Some((schema.get_field("foo.bar").unwrap(), "")) + ); + assert_eq!( + schema.find_field("bar.toto.titi"), + Some((schema.get_field("bar").unwrap(), "toto.titi")) + ); + + assert_eq!(schema.find_field("hello"), None); + assert_eq!(schema.find_field(""), None); + assert_eq!(schema.find_field("thiswouldbeareallylongfieldname"), None); + assert_eq!(schema.find_field("baz.bar.foo"), None); + } }