From af839753e0b8e6b5faa556626ad49626ed0aedc7 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 26 Oct 2022 09:12:13 +0800 Subject: [PATCH 1/4] No score calls if score is not requested --- CHANGELOG.md | 1 + src/collector/mod.rs | 4 ++-- src/indexer/index_writer.rs | 10 ++++++---- src/query/boolean_query/boolean_weight.rs | 5 +++-- src/query/term_query/term_weight.rs | 3 ++- src/query/weight.rs | 18 ++++++++++++++---- 6 files changed, 28 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2208b8b38e..168c84345e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ Tantivy 0.19 ================================ +- Skip score calculation, when no scoring is required [#1646](https://github.com/quickwit-oss/tantivy/pull/1646) (@PSeitz) - Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz) - Major bugfix: Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz) - Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396) diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 81a9315ed4..970affb0ad 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -173,13 +173,13 @@ pub trait Collector: Sync + Send { let mut segment_collector = self.for_segment(segment_ord as u32, reader)?; if let Some(alive_bitset) = reader.alive_bitset() { - weight.for_each(reader, &mut |doc, score| { + weight.for_each(reader, self.requires_scoring(), &mut |doc, score| { if alive_bitset.is_alive(doc) { segment_collector.collect(doc, score); } })?; } else { - weight.for_each(reader, &mut |doc, score| { + weight.for_each(reader, self.requires_scoring(), &mut |doc, score| { segment_collector.collect(doc, score); })?; } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 272300549e..b46809bc5b 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -93,14 +93,16 @@ fn compute_deleted_bitset( // A delete operation should only affect // document that were inserted before it. - delete_op - .target - .for_each(segment_reader, &mut |doc_matching_delete_query, _| { + delete_op.target.for_each( + segment_reader, + false, // requires_scoring + &mut |doc_matching_delete_query, _| { if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) { alive_bitset.remove(doc_matching_delete_query); might_have_changed = true; } - })?; + }, + )?; delete_cursor.advance(); } Ok(might_have_changed) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 859c3dde44..13e0a89d82 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -204,16 +204,17 @@ impl Weight for BooleanWeight crate::Result<()> { let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; match scorer { SpecializedScorer::TermUnion(term_scorers) => { let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn); - for_each_scorer(&mut union_scorer, callback); + for_each_scorer(&mut union_scorer, requires_scoring, callback); } SpecializedScorer::Other(mut scorer) => { - for_each_scorer(scorer.as_mut(), callback); + for_each_scorer(scorer.as_mut(), requires_scoring, callback); } } Ok(()) diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 4e742bc444..59fea26485 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -49,10 +49,11 @@ impl Weight for TermWeight { fn for_each( &self, reader: &SegmentReader, + requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.specialized_scorer(reader, 1.0)?; - for_each_scorer(&mut scorer, callback); + for_each_scorer(&mut scorer, requires_scoring, callback); Ok(()) } diff --git a/src/query/weight.rs b/src/query/weight.rs index 210ffb30f4..8e57b1dc52 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -7,12 +7,21 @@ use crate::{DocId, Score, TERMINATED}; /// `DocSet` and push the scored documents to the collector. pub(crate) fn for_each_scorer( scorer: &mut TScorer, + requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) { let mut doc = scorer.doc(); - while doc != TERMINATED { - callback(doc, scorer.score()); - doc = scorer.advance(); + + if requires_scoring { + while doc != TERMINATED { + callback(doc, scorer.score()); + doc = scorer.advance(); + } + } else { + while doc != TERMINATED { + callback(doc, 0.0); + doc = scorer.advance(); + } } } @@ -71,10 +80,11 @@ pub trait Weight: Send + Sync + 'static { fn for_each( &self, reader: &SegmentReader, + requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.scorer(reader, 1.0)?; - for_each_scorer(scorer.as_mut(), callback); + for_each_scorer(scorer.as_mut(), requires_scoring, callback); Ok(()) } From dfab201191c5979c7263bc7d77fe1ab34a750be8 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 26 Oct 2022 17:23:56 +0800 Subject: [PATCH 2/4] for_each_docset to iterate without score --- src/collector/mod.rs | 34 +++++++++++++----- src/indexer/index_writer.rs | 10 +++--- src/query/boolean_query/boolean_weight.rs | 25 ++++++++++--- src/query/term_query/term_weight.rs | 17 +++++++-- src/query/weight.rs | 44 ++++++++++++++--------- 5 files changed, 92 insertions(+), 38 deletions(-) diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 970affb0ad..b0a08d48da 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -172,17 +172,33 @@ pub trait Collector: Sync + Send { ) -> crate::Result<::Fruit> { let mut segment_collector = self.for_segment(segment_ord as u32, reader)?; - if let Some(alive_bitset) = reader.alive_bitset() { - weight.for_each(reader, self.requires_scoring(), &mut |doc, score| { - if alive_bitset.is_alive(doc) { + match (reader.alive_bitset(), self.requires_scoring()) { + (Some(alive_bitset), true) => { + weight.for_each(reader, &mut |doc, score| { + if alive_bitset.is_alive(doc) { + segment_collector.collect(doc, score); + } + })?; + } + (Some(alive_bitset), false) => { + weight.for_each_no_score(reader, &mut |doc| { + if alive_bitset.is_alive(doc) { + segment_collector.collect(doc, 0.0); + } + })?; + } + (None, true) => { + weight.for_each(reader, &mut |doc, score| { segment_collector.collect(doc, score); - } - })?; - } else { - weight.for_each(reader, self.requires_scoring(), &mut |doc, score| { - segment_collector.collect(doc, score); - })?; + })?; + } + (None, false) => { + weight.for_each_no_score(reader, &mut |doc| { + segment_collector.collect(doc, 0.0); + })?; + } } + Ok(segment_collector.harvest()) } } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index b46809bc5b..9869cd08a5 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -93,16 +93,14 @@ fn compute_deleted_bitset( // A delete operation should only affect // document that were inserted before it. - delete_op.target.for_each( - segment_reader, - false, // requires_scoring - &mut |doc_matching_delete_query, _| { + delete_op + .target + .for_each_no_score(segment_reader, &mut |doc_matching_delete_query| { if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) { alive_bitset.remove(doc_matching_delete_query); might_have_changed = true; } - }, - )?; + })?; delete_cursor.advance(); } Ok(might_have_changed) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 13e0a89d82..dd43270d77 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -5,7 +5,7 @@ use crate::postings::FreqReadingOption; use crate::query::explanation::does_not_match; use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner}; use crate::query::term_query::TermScorer; -use crate::query::weight::{for_each_pruning_scorer, for_each_scorer}; +use crate::query::weight::{for_each_docset, for_each_pruning_scorer, for_each_scorer}; use crate::query::{ intersect_scorers, EmptyScorer, Exclude, Explanation, Occur, RequiredOptionalScorer, Scorer, Union, Weight, @@ -204,17 +204,34 @@ impl Weight for BooleanWeight crate::Result<()> { let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; match scorer { SpecializedScorer::TermUnion(term_scorers) => { let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn); - for_each_scorer(&mut union_scorer, requires_scoring, callback); + for_each_scorer(&mut union_scorer, callback); } SpecializedScorer::Other(mut scorer) => { - for_each_scorer(scorer.as_mut(), requires_scoring, callback); + for_each_scorer(scorer.as_mut(), callback); + } + } + Ok(()) + } + + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; + match scorer { + SpecializedScorer::TermUnion(term_scorers) => { + let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn); + for_each_docset(&mut union_scorer, callback); + } + SpecializedScorer::Other(mut scorer) => { + for_each_docset(scorer.as_mut(), callback); } } Ok(()) diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 59fea26485..abe1835dc8 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -5,7 +5,7 @@ use crate::fieldnorm::FieldNormReader; use crate::postings::SegmentPostings; use crate::query::bm25::Bm25Weight; use crate::query::explanation::does_not_match; -use crate::query::weight::for_each_scorer; +use crate::query::weight::{for_each_docset, for_each_scorer}; use crate::query::{Explanation, Scorer, Weight}; use crate::schema::IndexRecordOption; use crate::{DocId, Score, Term}; @@ -49,11 +49,22 @@ impl Weight for TermWeight { fn for_each( &self, reader: &SegmentReader, - requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.specialized_scorer(reader, 1.0)?; - for_each_scorer(&mut scorer, requires_scoring, callback); + for_each_scorer(&mut scorer, callback); + Ok(()) + } + + /// Iterates through all of the document matched by the DocSet + /// `DocSet` and push the scored documents to the collector. + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let mut scorer = self.specialized_scorer(reader, 1.0)?; + for_each_docset(&mut scorer, callback); Ok(()) } diff --git a/src/query/weight.rs b/src/query/weight.rs index 8e57b1dc52..cb4c7af957 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -1,27 +1,28 @@ use super::Scorer; use crate::core::SegmentReader; use crate::query::Explanation; -use crate::{DocId, Score, TERMINATED}; +use crate::{DocId, DocSet, Score, TERMINATED}; -/// Iterates through all of the document matched by the DocSet -/// `DocSet` and push the scored documents to the collector. +/// Iterates through all of the documents and scores matched by the DocSet +/// `DocSet`. pub(crate) fn for_each_scorer( scorer: &mut TScorer, - requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) { let mut doc = scorer.doc(); + while doc != TERMINATED { + callback(doc, scorer.score()); + doc = scorer.advance(); + } +} - if requires_scoring { - while doc != TERMINATED { - callback(doc, scorer.score()); - doc = scorer.advance(); - } - } else { - while doc != TERMINATED { - callback(doc, 0.0); - doc = scorer.advance(); - } +/// Iterates through all of the documents matched by the DocSet +/// `DocSet`. +pub(crate) fn for_each_docset(scorer: &mut T, callback: &mut dyn FnMut(DocId)) { + let mut doc = scorer.doc(); + while doc != TERMINATED { + callback(doc); + doc = scorer.advance(); } } @@ -80,11 +81,22 @@ pub trait Weight: Send + Sync + 'static { fn for_each( &self, reader: &SegmentReader, - requires_scoring: bool, callback: &mut dyn FnMut(DocId, Score), ) -> crate::Result<()> { let mut scorer = self.scorer(reader, 1.0)?; - for_each_scorer(scorer.as_mut(), requires_scoring, callback); + for_each_scorer(scorer.as_mut(), callback); + Ok(()) + } + + /// Iterates through all of the document matched by the DocSet + /// `DocSet` and push the scored documents to the collector. + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let mut scorer = self.scorer(reader, 1.0)?; + for_each_docset(scorer.as_mut(), callback); Ok(()) } From 43df356010bfdd12adb8fa5a5d3e820604f7b696 Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Thu, 27 Oct 2022 16:53:38 +0800 Subject: [PATCH 3/4] rename to docset --- src/query/weight.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/query/weight.rs b/src/query/weight.rs index cb4c7af957..19a12b39a6 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -18,11 +18,11 @@ pub(crate) fn for_each_scorer( /// Iterates through all of the documents matched by the DocSet /// `DocSet`. -pub(crate) fn for_each_docset(scorer: &mut T, callback: &mut dyn FnMut(DocId)) { - let mut doc = scorer.doc(); +pub(crate) fn for_each_docset(docset: &mut T, callback: &mut dyn FnMut(DocId)) { + let mut doc = docset.doc(); while doc != TERMINATED { callback(doc); - doc = scorer.advance(); + doc = docset.advance(); } } @@ -95,8 +95,8 @@ pub trait Weight: Send + Sync + 'static { reader: &SegmentReader, callback: &mut dyn FnMut(DocId), ) -> crate::Result<()> { - let mut scorer = self.scorer(reader, 1.0)?; - for_each_docset(scorer.as_mut(), callback); + let mut docset = self.scorer(reader, 1.0)?; + for_each_docset(docset.as_mut(), callback); Ok(()) } From 2af6b01c177a7a1135d4a6566ea961a43b482d2f Mon Sep 17 00:00:00 2001 From: PSeitz Date: Tue, 1 Nov 2022 08:33:58 +0100 Subject: [PATCH 4/4] Update src/query/boolean_query/boolean_weight.rs Co-authored-by: Paul Masurel --- src/query/boolean_query/boolean_weight.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index dd43270d77..20c4f46710 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -224,7 +224,7 @@ impl Weight for BooleanWeight crate::Result<()> { - let scorer = self.complex_scorer(reader, 1.0, &self.score_combiner_fn)?; + let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?; match scorer { SpecializedScorer::TermUnion(term_scorers) => { let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn);