Skip to content

Commit

Permalink
Handle escaped dot in json path in the QueryParser. (#1682)
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Nov 15, 2022
1 parent ca62311 commit 2a39289
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 6 deletions.
83 changes: 80 additions & 3 deletions src/indexer/json_term_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,39 @@ pub struct JsonTermWriter<'a> {
path_stack: Vec<usize>,
}

/// Splits a json path supplied to the query parser in such a way that
/// `.` can be escaped.
///
/// In other words,
/// - `k8s.node` ends up as `["k8s", "node"]`.
/// - `k8s\.node` ends up as `["k8s.node"]`.
fn split_json_path(json_path: &str) -> Vec<String> {
let mut escaped_state: bool = false;
let mut json_path_segments = Vec::new();
let mut buffer = String::new();
for ch in json_path.chars() {
if escaped_state {
buffer.push(ch);
escaped_state = false;
continue;
}
match ch {
'\\' => {
escaped_state = true;
}
'.' => {
let new_segment = std::mem::take(&mut buffer);
json_path_segments.push(new_segment);
}
_ => {
buffer.push(ch);
}
}
}
json_path_segments.push(buffer);
json_path_segments
}

impl<'a> JsonTermWriter<'a> {
pub fn from_field_and_json_path(
field: Field,
Expand All @@ -269,8 +302,8 @@ impl<'a> JsonTermWriter<'a> {
) -> Self {
term_buffer.set_field_and_type(field, Type::Json);
let mut json_term_writer = Self::wrap(term_buffer);
for segment in json_path.split('.') {
json_term_writer.push_path_segment(segment);
for segment in split_json_path(json_path) {
json_term_writer.push_path_segment(&segment);
}
json_term_writer
}
Expand Down Expand Up @@ -350,7 +383,7 @@ impl<'a> JsonTermWriter<'a> {

#[cfg(test)]
mod tests {
use super::JsonTermWriter;
use super::{split_json_path, JsonTermWriter};
use crate::schema::{Field, Type};
use crate::Term;

Expand Down Expand Up @@ -495,4 +528,48 @@ mod tests {
json_writer.set_str("pink");
assert_eq!(json_writer.path(), b"color\x01hue");
}

#[test]
fn test_split_json_path_simple() {
let json_path = split_json_path("titi.toto");
assert_eq!(&json_path, &["titi", "toto"]);
}

#[test]
fn test_split_json_path_single_segment() {
let json_path = split_json_path("toto");
assert_eq!(&json_path, &["toto"]);
}

#[test]
fn test_split_json_path_trailing_dot() {
let json_path = split_json_path("toto.");
assert_eq!(&json_path, &["toto", ""]);
}

#[test]
fn test_split_json_path_heading_dot() {
let json_path = split_json_path(".toto");
assert_eq!(&json_path, &["", "toto"]);
}

#[test]
fn test_split_json_path_escaped_dot() {
let json_path = split_json_path(r#"toto\.titi"#);
assert_eq!(&json_path, &["toto.titi"]);
let json_path_2 = split_json_path(r#"k8s\.container\.name"#);
assert_eq!(&json_path_2, &["k8s.container.name"]);
}

#[test]
fn test_split_json_path_escaped_backslash() {
let json_path = split_json_path(r#"toto\\titi"#);
assert_eq!(&json_path, &[r#"toto\titi"#]);
}

#[test]
fn test_split_json_path_escaped_normal_letter() {
let json_path = split_json_path(r#"toto\titi"#);
assert_eq!(&json_path, &[r#"tototiti"#]);
}
}
27 changes: 24 additions & 3 deletions src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,15 @@ type AddBatchReceiver = channel::Receiver<AddBatch>;
#[cfg(feature = "mmap")]
#[cfg(test)]
mod tests_mmap {
use crate::schema::{self, Schema};
use crate::collector::Count;
use crate::query::QueryParser;
use crate::schema::{Schema, STORED, TEXT};
use crate::{Index, Term};

#[test]
fn test_advance_delete_bug() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
let text_field = schema_builder.add_text_field("text", schema::TEXT);
let text_field = schema_builder.add_text_field("text", TEXT);
let index = Index::create_from_tempdir(schema_builder.build())?;
let mut index_writer = index.writer_for_tests()?;
// there must be one deleted document in the segment
Expand All @@ -75,7 +77,26 @@ mod tests_mmap {
index_writer.add_document(doc!(text_field=>"c"))?;
}
index_writer.commit()?;
index_writer.commit()?;
Ok(())
}

#[test]
fn test_json_field_espace() {
let mut schema_builder = Schema::builder();
let json_field = schema_builder.add_json_field("json", TEXT | STORED);
let index = Index::create_in_ram(schema_builder.build());
let mut index_writer = index.writer_for_tests().unwrap();
let json = serde_json::json!({"k8s.container.name": "prometheus", "val": "hello"});
index_writer.add_document(doc!(json_field=>json)).unwrap();
index_writer.commit().unwrap();
let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.num_docs(), 1);
let parse_query = QueryParser::for_index(&index, Vec::new());
let query = parse_query
.parse_query(r#"json.k8s\.container\.name:prometheus"#)
.unwrap();
let num_docs = searcher.search(&query, &Count).unwrap();
assert_eq!(num_docs, 1);
}
}
22 changes: 22 additions & 0 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,28 @@ mod test {
);
}

fn extract_query_term_json_path(query: &str) -> String {
let LogicalAst::Leaf(literal) = parse_query_to_logical_ast(query, false).unwrap() else {
panic!();
};
let LogicalLiteral::Term(term) = *literal else {
panic!();
};
std::str::from_utf8(term.value_bytes()).unwrap().to_string()
}

#[test]
fn test_json_field_query_with_espaced_dot() {
assert_eq!(
extract_query_term_json_path(r#"json.k8s.node.name:hello"#),
"k8s\u{1}node\u{1}name\0shello"
);
assert_eq!(
extract_query_term_json_path(r#"json.k8s\.node\.name:hello"#),
"k8s.node.name\0shello"
);
}

#[test]
fn test_json_field_possibly_a_number() {
test_parse_query_to_logical_ast_helper(
Expand Down

0 comments on commit 2a39289

Please sign in to comment.