Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move split_full_path to Schema #1692

Merged
merged 5 commits into from Nov 29, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
50 changes: 1 addition & 49 deletions src/query/query_parser/query_parser.rs
Expand Up @@ -183,7 +183,6 @@ pub struct QueryParser {
conjunction_by_default: bool,
tokenizer_manager: TokenizerManager,
boost: HashMap<Field, Score>,
field_names: HashMap<String, Field>,
}

fn all_negative(ast: &LogicalAst) -> bool {
Expand All @@ -196,31 +195,6 @@ fn all_negative(ast: &LogicalAst) -> bool {
}
}

// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`.
//
// This function operates directly on bytes (as opposed to codepoint), relying
// on a encoding property of utf-8 for its correctness.
fn locate_splitting_dots(field_path: &str) -> Vec<usize> {
let mut splitting_dots_pos = Vec::new();
let mut escape_state = false;
for (pos, b) in field_path.bytes().enumerate() {
if escape_state {
escape_state = false;
continue;
}
match b {
b'\\' => {
escape_state = true;
}
b'.' => {
splitting_dots_pos.push(pos);
}
_ => {}
}
}
splitting_dots_pos
}

impl QueryParser {
/// Creates a `QueryParser`, given
/// * schema - index Schema
Expand All @@ -230,34 +204,19 @@ impl QueryParser {
default_fields: Vec<Field>,
tokenizer_manager: TokenizerManager,
) -> QueryParser {
let field_names = schema
.fields()
.map(|(field, field_entry)| (field_entry.name().to_string(), field))
.collect();
QueryParser {
schema,
default_fields,
tokenizer_manager,
conjunction_by_default: false,
boost: Default::default(),
field_names,
}
}

// Splits a full_path as written in a query, into a field name and a
// json path.
pub(crate) fn split_full_path<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> {
if let Some(field) = self.field_names.get(full_path) {
return Some((*field, ""));
}
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
while let Some(pos) = splitting_period_pos.pop() {
let (prefix, suffix) = full_path.split_at(pos);
if let Some(field) = self.field_names.get(prefix) {
return Some((*field, &suffix[1..]));
}
}
None
self.schema.find_field(full_path)
}

/// Creates a `QueryParser`, given
Expand Down Expand Up @@ -1566,13 +1525,6 @@ mod test {
assert_eq!(query_parser.split_full_path("firsty"), None);
}

#[test]
fn test_locate_splitting_dots() {
assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]);
assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]);
assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]);
}

#[test]
pub fn test_phrase_slop() {
test_parse_query_to_logical_ast_helper(
Expand Down
47 changes: 47 additions & 0 deletions src/schema/schema.rs
Expand Up @@ -252,6 +252,31 @@ impl Eq for InnerSchema {}
#[derive(Clone, Eq, PartialEq, Debug)]
pub struct Schema(Arc<InnerSchema>);

// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`.
//
// This function operates directly on bytes (as opposed to codepoint), relying
// on a encoding property of utf-8 for its correctness.
fn locate_splitting_dots(field_path: &str) -> Vec<usize> {
let mut splitting_dots_pos = Vec::new();
let mut escape_state = false;
for (pos, b) in field_path.bytes().enumerate() {
if escape_state {
escape_state = false;
continue;
}
match b {
b'\\' => {
escape_state = true;
}
b'.' => {
splitting_dots_pos.push(pos);
}
_ => {}
}
}
splitting_dots_pos
}

impl Schema {
/// Return the `FieldEntry` associated with a `Field`.
pub fn get_field_entry(&self, field: Field) -> &FieldEntry {
Expand Down Expand Up @@ -358,6 +383,21 @@ impl Schema {
}
Ok(doc)
}

/// Searches for a full_path, returning the field name and a json path.
boraarslan marked this conversation as resolved.
Show resolved Hide resolved
pub fn find_field<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> {
if let Some(field) = self.0.fields_map.get(full_path) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No unit tests for find_field!? Can we add some?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mostly copied the same unit test on QueryParser. I can remove the test from QueryParser or add more tests to Schema if that's what we want.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes please that would be awesome!

return Some((*field, ""));
}
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path);
while let Some(pos) = splitting_period_pos.pop() {
let (prefix, suffix) = full_path.split_at(pos);
if let Some(field) = self.0.fields_map.get(prefix) {
return Some((*field, &suffix[1..]));
}
}
None
}
}

impl Serialize for Schema {
Expand Down Expand Up @@ -436,6 +476,13 @@ mod tests {
use crate::schema::schema::DocParsingError::InvalidJson;
use crate::schema::*;

#[test]
fn test_locate_splitting_dots() {
assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]);
assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]);
assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]);
}

#[test]
pub fn is_indexed_test() {
let mut schema_builder = Schema::builder();
Expand Down