New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Move split_full_path
to Schema
#1692
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -252,6 +252,31 @@ impl Eq for InnerSchema {} | |
#[derive(Clone, Eq, PartialEq, Debug)] | ||
pub struct Schema(Arc<InnerSchema>); | ||
|
||
// Returns the position (in byte offsets) of the unescaped '.' in the `field_path`. | ||
// | ||
// This function operates directly on bytes (as opposed to codepoint), relying | ||
// on a encoding property of utf-8 for its correctness. | ||
fn locate_splitting_dots(field_path: &str) -> Vec<usize> { | ||
let mut splitting_dots_pos = Vec::new(); | ||
let mut escape_state = false; | ||
for (pos, b) in field_path.bytes().enumerate() { | ||
if escape_state { | ||
escape_state = false; | ||
continue; | ||
} | ||
match b { | ||
b'\\' => { | ||
escape_state = true; | ||
} | ||
b'.' => { | ||
splitting_dots_pos.push(pos); | ||
} | ||
_ => {} | ||
} | ||
} | ||
splitting_dots_pos | ||
} | ||
|
||
impl Schema { | ||
/// Return the `FieldEntry` associated with a `Field`. | ||
pub fn get_field_entry(&self, field: Field) -> &FieldEntry { | ||
|
@@ -358,6 +383,28 @@ impl Schema { | |
} | ||
Ok(doc) | ||
} | ||
|
||
/// Searches for a full_path in the schema, returning the field name and a JSON path. | ||
/// | ||
/// This function works by checking if the field exists for the exact given full_path. | ||
/// If it's not, it splits the full_path at non-escaped '.' chars and tries to match the | ||
/// prefix with the field names, favoring the longest field names. | ||
/// | ||
/// This does not check if field is a JSON field. It is possible for this functions to | ||
/// return a non-empty JSON path with a non-JSON field. | ||
pub fn find_field<'a>(&self, full_path: &'a str) -> Option<(Field, &'a str)> { | ||
if let Some(field) = self.0.fields_map.get(full_path) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No unit tests for find_field!? Can we add some? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mostly copied the same unit test on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes please that would be awesome! |
||
return Some((*field, "")); | ||
} | ||
let mut splitting_period_pos: Vec<usize> = locate_splitting_dots(full_path); | ||
while let Some(pos) = splitting_period_pos.pop() { | ||
let (prefix, suffix) = full_path.split_at(pos); | ||
if let Some(field) = self.0.fields_map.get(prefix) { | ||
return Some((*field, &suffix[1..])); | ||
} | ||
} | ||
None | ||
} | ||
} | ||
|
||
impl Serialize for Schema { | ||
|
@@ -436,6 +483,13 @@ mod tests { | |
use crate::schema::schema::DocParsingError::InvalidJson; | ||
use crate::schema::*; | ||
|
||
#[test] | ||
fn test_locate_splitting_dots() { | ||
assert_eq!(&super::locate_splitting_dots("a.b.c"), &[1, 3]); | ||
assert_eq!(&super::locate_splitting_dots(r#"a\.b.c"#), &[4]); | ||
assert_eq!(&super::locate_splitting_dots(r#"a\..b.c"#), &[3, 5]); | ||
} | ||
|
||
#[test] | ||
pub fn is_indexed_test() { | ||
let mut schema_builder = Schema::builder(); | ||
|
@@ -936,4 +990,46 @@ mod tests { | |
]"#; | ||
assert_eq!(schema_json, expected); | ||
} | ||
|
||
#[test] | ||
fn test_find_field() { | ||
let mut schema_builder = Schema::builder(); | ||
schema_builder.add_json_field("foo", STRING); | ||
|
||
schema_builder.add_text_field("bar", STRING); | ||
schema_builder.add_text_field("foo.bar", STRING); | ||
schema_builder.add_text_field("foo.bar.baz", STRING); | ||
schema_builder.add_text_field("bar.a.b.c", STRING); | ||
let schema = schema_builder.build(); | ||
|
||
assert_eq!( | ||
schema.find_field("foo.bar"), | ||
Some((schema.get_field("foo.bar").unwrap(), "")) | ||
); | ||
assert_eq!( | ||
schema.find_field("foo.bar.bar"), | ||
Some((schema.get_field("foo.bar").unwrap(), "bar")) | ||
); | ||
assert_eq!( | ||
schema.find_field("foo.bar.baz"), | ||
Some((schema.get_field("foo.bar.baz").unwrap(), "")) | ||
); | ||
assert_eq!( | ||
schema.find_field("foo.toto"), | ||
Some((schema.get_field("foo").unwrap(), "toto")) | ||
); | ||
assert_eq!( | ||
schema.find_field("foo.bar"), | ||
Some((schema.get_field("foo.bar").unwrap(), "")) | ||
); | ||
assert_eq!( | ||
schema.find_field("bar.toto.titi"), | ||
Some((schema.get_field("bar").unwrap(), "toto.titi")) | ||
); | ||
|
||
assert_eq!(schema.find_field("hello"), None); | ||
assert_eq!(schema.find_field(""), None); | ||
assert_eq!(schema.find_field("thiswouldbeareallylongfieldname"), None); | ||
assert_eq!(schema.find_field("baz.bar.foo"), None); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to improve the spec and avoid this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might be. I changed the function to continue when the prefix is not a JSON field, and all of the tests passed (probably does not mean it is completely ok to change), but I think it might be better to open a separate PR for it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you open a ticket?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#1699