Skip to content

Commit

Permalink
Slop support for phrase queries (#1241)
Browse files Browse the repository at this point in the history
Closes #1068
  • Loading branch information
halvorboe committed Mar 7, 2022
1 parent d31f045 commit cedced5
Show file tree
Hide file tree
Showing 4 changed files with 255 additions and 60 deletions.
84 changes: 84 additions & 0 deletions src/query/phrase_query/mod.rs
Expand Up @@ -181,6 +181,90 @@ pub mod tests {
Ok(())
}

#[ignore]
#[test]
pub fn test_phrase_score_with_slop() -> crate::Result<()> {
let index = create_index(&["a c b", "a b c a b"])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
.map(|text| Term::from_field_text(text_field, text))
.collect();
let mut phrase_query = PhraseQuery::new(terms);
phrase_query.set_slop(1);
searcher
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
.expect("search should succeed")
.scores()
.to_vec()
};
let scores = test_query(vec!["a", "b"]);
assert_nearly_equals!(scores[0], 0.40618482);
assert_nearly_equals!(scores[1], 0.46844664);
Ok(())
}

#[test]
pub fn test_phrase_score_with_slop_size() -> crate::Result<()> {
let index = create_index(&["a b e c", "a e e e c", "a e e e e c"])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
.map(|text| Term::from_field_text(text_field, text))
.collect();
let mut phrase_query = PhraseQuery::new(terms);
phrase_query.set_slop(3);
searcher
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
.expect("search should succeed")
.scores()
.to_vec()
};
let scores = test_query(vec!["a", "c"]);
assert_nearly_equals!(scores[0], 0.29086056);
assert_nearly_equals!(scores[1], 0.26706287);
Ok(())
}

#[test]
pub fn test_phrase_score_with_slop_ordering() -> crate::Result<()> {
let index = create_index(&[
"a e b e c",
"a e e e e e b e e e e c",
"a c b",
"a c e b e",
"a e c b",
"a e b c",
])?;
let schema = index.schema();
let text_field = schema.get_field("text").unwrap();
let searcher = index.reader().unwrap().searcher();
let test_query = |texts: Vec<&str>| {
let terms: Vec<Term> = texts
.iter()
.map(|text| Term::from_field_text(text_field, text))
.collect();
let mut phrase_query = PhraseQuery::new(terms);
phrase_query.set_slop(3);
searcher
.search(&phrase_query, &TEST_COLLECTOR_WITH_SCORE)
.expect("search should succeed")
.scores()
.to_vec()
};
let scores = test_query(vec!["a", "b", "c"]);
// The first and last matches.
assert_nearly_equals!(scores[0], 0.23091172);
assert_nearly_equals!(scores[1], 0.25024384);
Ok(())
}

#[test] // motivated by #234
pub fn test_phrase_query_docfreq_order() -> crate::Result<()> {
let mut schema_builder = Schema::builder();
Expand Down
17 changes: 12 additions & 5 deletions src/query/phrase_query/phrase_query.rs
Expand Up @@ -23,6 +23,7 @@ use crate::schema::{Field, IndexRecordOption, Term};
pub struct PhraseQuery {
field: Field,
phrase_terms: Vec<(usize, Term)>,
slop: u32,
}

impl PhraseQuery {
Expand Down Expand Up @@ -53,9 +54,15 @@ impl PhraseQuery {
PhraseQuery {
field,
phrase_terms: terms,
slop: 0,
}
}

/// Slop allowed for the phrase.
pub fn set_slop(&mut self, value: u32) {
self.slop = value;
}

/// The `Field` this `PhraseQuery` is targeting.
pub fn field(&self) -> Field {
self.field
Expand Down Expand Up @@ -94,11 +101,11 @@ impl PhraseQuery {
}
let terms = self.phrase_terms();
let bm25_weight = Bm25Weight::for_terms(searcher, &terms)?;
Ok(PhraseWeight::new(
self.phrase_terms.clone(),
bm25_weight,
scoring_enabled,
))
let mut weight = PhraseWeight::new(self.phrase_terms.clone(), bm25_weight, scoring_enabled);
if self.slop > 0 {
weight.slop(self.slop);
}
Ok(weight)
}
}

Expand Down

0 comments on commit cedced5

Please sign in to comment.