Skip to content

Commit

Permalink
add support for TermSetQuery in query parser (#1683)
Browse files Browse the repository at this point in the history
  • Loading branch information
trinity-1686a committed Nov 17, 2022
1 parent 2a39289 commit e758080
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 4 deletions.
23 changes: 22 additions & 1 deletion query-grammar/src/query_grammar.rs
Expand Up @@ -5,7 +5,8 @@ use combine::parser::range::{take_while, take_while1};
use combine::parser::repeat::escaped;
use combine::parser::Parser;
use combine::{
attempt, choice, eof, many, many1, one_of, optional, parser, satisfy, skip_many1, value,
attempt, between, choice, eof, many, many1, one_of, optional, parser, satisfy, sep_by,
skip_many1, value,
};
use once_cell::sync::Lazy;
use regex::Regex;
Expand Down Expand Up @@ -264,6 +265,17 @@ fn range<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
})
}

/// Function that parses a set out of a Stream
/// Supports ranges like: `IN [val1 val2 val3]`
fn set<'a>() -> impl Parser<&'a str, Output = UserInputLeaf> {
let term_list = between(char('['), char(']'), sep_by(term_val(), spaces()));

let set_content = ((string("IN"), spaces()), term_list).map(|(_, elements)| elements);

(optional(attempt(field_name().skip(spaces()))), set_content)
.map(|(field, elements)| UserInputLeaf::Set { field, elements })
}

fn negate(expr: UserInputAst) -> UserInputAst {
expr.unary(Occur::MustNot)
}
Expand All @@ -278,6 +290,7 @@ fn leaf<'a>() -> impl Parser<&'a str, Output = UserInputAst> {
string("NOT").skip(spaces1()).with(leaf()).map(negate),
))
.or(attempt(range().map(UserInputAst::from)))
.or(attempt(set().map(UserInputAst::from)))
.or(literal().map(UserInputAst::from))
.parse_stream(input)
.into_result()
Expand Down Expand Up @@ -747,6 +760,14 @@ mod test {
test_parse_query_to_ast_helper("+(a b) +d", "(+(*\"a\" *\"b\") +\"d\")");
}

#[test]
fn test_parse_test_query_set() {
test_parse_query_to_ast_helper("abc: IN [a b c]", r#""abc": IN ["a" "b" "c"]"#);
test_parse_query_to_ast_helper("abc: IN [1]", r#""abc": IN ["1"]"#);
test_parse_query_to_ast_helper("abc: IN []", r#""abc": IN []"#);
test_parse_query_to_ast_helper("IN [1 2]", r#"IN ["1" "2"]"#);
}

#[test]
fn test_parse_test_query_other() {
test_parse_query_to_ast_helper("(+a +b) d", "(*(+\"a\" +\"b\") *\"d\")");
Expand Down
17 changes: 17 additions & 0 deletions query-grammar/src/user_input_ast.rs
Expand Up @@ -12,6 +12,10 @@ pub enum UserInputLeaf {
lower: UserInputBound,
upper: UserInputBound,
},
Set {
field: Option<String>,
elements: Vec<String>,
},
}

impl Debug for UserInputLeaf {
Expand All @@ -31,6 +35,19 @@ impl Debug for UserInputLeaf {
upper.display_upper(formatter)?;
Ok(())
}
UserInputLeaf::Set { field, elements } => {
if let Some(ref field) = field {
write!(formatter, "\"{}\": ", field)?;
}
write!(formatter, "IN [")?;
for (i, element) in elements.iter().enumerate() {
if i != 0 {
write!(formatter, " ")?;
}
write!(formatter, "\"{}\"", element)?;
}
write!(formatter, "]")
}
UserInputLeaf::All => write!(formatter, "*"),
}
}
Expand Down
26 changes: 26 additions & 0 deletions src/query/query_parser/logical_ast.rs
Expand Up @@ -15,6 +15,11 @@ pub enum LogicalLiteral {
lower: Bound<Term>,
upper: Bound<Term>,
},
Set {
field: Field,
value_type: Type,
elements: Vec<Term>,
},
All,
}

Expand Down Expand Up @@ -87,6 +92,27 @@ impl fmt::Debug for LogicalLiteral {
ref upper,
..
} => write!(formatter, "({:?} TO {:?})", lower, upper),
LogicalLiteral::Set { ref elements, .. } => {
const MAX_DISPLAYED: usize = 10;

write!(formatter, "IN [")?;
for (i, element) in elements.iter().enumerate() {
if i == 0 {
write!(formatter, "{:?}", element)?;
} else if i == MAX_DISPLAYED - 1 {
write!(
formatter,
", {:?}, ... ({} more)",
element,
elements.len() - i - 1
)?;
break;
} else {
write!(formatter, ", {:?}", element)?;
}
}
write!(formatter, "]")
}
LogicalLiteral::All => write!(formatter, "*"),
}
}
Expand Down
53 changes: 52 additions & 1 deletion src/query/query_parser/query_parser.rs
Expand Up @@ -13,7 +13,7 @@ use crate::indexer::{
};
use crate::query::{
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, Occur, PhraseQuery, Query, RangeQuery,
TermQuery,
TermQuery, TermSetQuery,
};
use crate::schema::{
Facet, FacetParseError, Field, FieldType, IndexRecordOption, IntoIpv6Addr, Schema, Term, Type,
Expand Down Expand Up @@ -685,6 +685,31 @@ impl QueryParser {
}));
Ok(logical_ast)
}
UserInputLeaf::Set {
field: full_field_opt,
elements,
} => {
let full_path = full_field_opt.ok_or_else(|| {
QueryParserError::UnsupportedQuery(
"Set query need to target a specific field.".to_string(),
)
})?;
let (field, json_path) = self
.split_full_path(&full_path)
.ok_or_else(|| QueryParserError::FieldDoesNotExist(full_path.clone()))?;
let field_entry = self.schema.get_field_entry(field);
let value_type = field_entry.field_type().value_type();
let logical_ast = LogicalAst::Leaf(Box::new(LogicalLiteral::Set {
elements: elements
.into_iter()
.map(|element| self.compute_boundary_term(field, json_path, &element))
.collect::<Result<Vec<_>, _>>()?,

field,
value_type,
}));
Ok(logical_ast)
}
}
}
}
Expand All @@ -703,6 +728,7 @@ fn convert_literal_to_query(logical_literal: LogicalLiteral) -> Box<dyn Query> {
} => Box::new(RangeQuery::new_term_bounds(
field, value_type, &lower, &upper,
)),
LogicalLiteral::Set { elements, .. } => Box::new(TermSetQuery::new(elements)),
LogicalLiteral::All => Box::new(AllQuery),
}
}
Expand Down Expand Up @@ -1563,4 +1589,29 @@ mod test {
false,
);
}

#[test]
pub fn test_term_set_query() {
test_parse_query_to_logical_ast_helper(
"title: IN [a b cd]",
r#"IN [Term(type=Str, field=0, "a"), Term(type=Str, field=0, "b"), Term(type=Str, field=0, "cd")]"#,
false,
);
test_parse_query_to_logical_ast_helper(
"bytes: IN [AA== ABA= ABCD]",
r#"IN [Term(type=Bytes, field=12, [0]), Term(type=Bytes, field=12, [0, 16]), Term(type=Bytes, field=12, [0, 16, 131])]"#,
false,
);
test_parse_query_to_logical_ast_helper(
"signed: IN [1 2 -3]",
r#"IN [Term(type=I64, field=2, 1), Term(type=I64, field=2, 2), Term(type=I64, field=2, -3)]"#,
false,
);

test_parse_query_to_logical_ast_helper(
"float: IN [1.1 2.2 -3.3]",
r#"IN [Term(type=F64, field=10, 1.1), Term(type=F64, field=10, 2.2), Term(type=F64, field=10, -3.3)]"#,
false,
);
}
}
30 changes: 28 additions & 2 deletions src/query/set_query.rs
Expand Up @@ -101,9 +101,8 @@ impl Automaton for SetDfaWrapper {

#[cfg(test)]
mod tests {

use crate::collector::TopDocs;
use crate::query::TermSetQuery;
use crate::query::{QueryParser, TermSetQuery};
use crate::schema::{Schema, TEXT};
use crate::{assert_nearly_equals, Index, Term};

Expand Down Expand Up @@ -215,4 +214,31 @@ mod tests {

Ok(())
}

#[test]
fn test_term_set_query_parser() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
schema_builder.add_text_field("field", TEXT);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema.clone());
let mut index_writer = index.writer_for_tests()?;
let field = schema.get_field("field").unwrap();
index_writer.add_document(doc!(
field => "val1",
))?;
index_writer.add_document(doc!(
field => "val2",
))?;
index_writer.add_document(doc!(
field => "val3",
))?;
index_writer.commit()?;
let reader = index.reader()?;
let searcher = reader.searcher();
let query_parser = QueryParser::for_index(&index, vec![]);
let query = query_parser.parse_query("field: IN [val1 val2]")?;
let top_docs = searcher.search(&query, &TopDocs::with_limit(3))?;
assert_eq!(top_docs.len(), 2);
Ok(())
}
}

0 comments on commit e758080

Please sign in to comment.