diff --git a/CHANGELOG.md b/CHANGELOG.md index 2208b8b38e..168c84345e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ Tantivy 0.19 ================================ +- Skip score calculation, when no scoring is required [#1646](https://github.com/quickwit-oss/tantivy/pull/1646) (@PSeitz) - Limit fast fields to u32 (`get_val(u32)`) [#1644](https://github.com/quickwit-oss/tantivy/pull/1644) (@PSeitz) - Major bugfix: Fix missing fieldnorms for u64, i64, f64, bool, bytes and date [#1620](https://github.com/quickwit-oss/tantivy/pull/1620) (@PSeitz) - Updated [Date Field Type](https://github.com/quickwit-oss/tantivy/pull/1396) diff --git a/src/collector/mod.rs b/src/collector/mod.rs index 81a9315ed4..b0a08d48da 100644 --- a/src/collector/mod.rs +++ b/src/collector/mod.rs @@ -172,17 +172,33 @@ pub trait Collector: Sync + Send { ) -> crate::Result<::Fruit> { let mut segment_collector = self.for_segment(segment_ord as u32, reader)?; - if let Some(alive_bitset) = reader.alive_bitset() { - weight.for_each(reader, &mut |doc, score| { - if alive_bitset.is_alive(doc) { + match (reader.alive_bitset(), self.requires_scoring()) { + (Some(alive_bitset), true) => { + weight.for_each(reader, &mut |doc, score| { + if alive_bitset.is_alive(doc) { + segment_collector.collect(doc, score); + } + })?; + } + (Some(alive_bitset), false) => { + weight.for_each_no_score(reader, &mut |doc| { + if alive_bitset.is_alive(doc) { + segment_collector.collect(doc, 0.0); + } + })?; + } + (None, true) => { + weight.for_each(reader, &mut |doc, score| { segment_collector.collect(doc, score); - } - })?; - } else { - weight.for_each(reader, &mut |doc, score| { - segment_collector.collect(doc, score); - })?; + })?; + } + (None, false) => { + weight.for_each_no_score(reader, &mut |doc| { + segment_collector.collect(doc, 0.0); + })?; + } } + Ok(segment_collector.harvest()) } } diff --git a/src/indexer/index_writer.rs b/src/indexer/index_writer.rs index 272300549e..9869cd08a5 100644 --- a/src/indexer/index_writer.rs +++ b/src/indexer/index_writer.rs @@ -95,7 +95,7 @@ fn compute_deleted_bitset( // document that were inserted before it. delete_op .target - .for_each(segment_reader, &mut |doc_matching_delete_query, _| { + .for_each_no_score(segment_reader, &mut |doc_matching_delete_query| { if doc_opstamps.is_deleted(doc_matching_delete_query, delete_op.opstamp) { alive_bitset.remove(doc_matching_delete_query); might_have_changed = true; diff --git a/src/query/boolean_query/boolean_weight.rs b/src/query/boolean_query/boolean_weight.rs index 859c3dde44..20c4f46710 100644 --- a/src/query/boolean_query/boolean_weight.rs +++ b/src/query/boolean_query/boolean_weight.rs @@ -5,7 +5,7 @@ use crate::postings::FreqReadingOption; use crate::query::explanation::does_not_match; use crate::query::score_combiner::{DoNothingCombiner, ScoreCombiner}; use crate::query::term_query::TermScorer; -use crate::query::weight::{for_each_pruning_scorer, for_each_scorer}; +use crate::query::weight::{for_each_docset, for_each_pruning_scorer, for_each_scorer}; use crate::query::{ intersect_scorers, EmptyScorer, Exclude, Explanation, Occur, RequiredOptionalScorer, Scorer, Union, Weight, @@ -219,6 +219,24 @@ impl Weight for BooleanWeight crate::Result<()> { + let scorer = self.complex_scorer(reader, 1.0, || DoNothingCombiner)?; + match scorer { + SpecializedScorer::TermUnion(term_scorers) => { + let mut union_scorer = Union::build(term_scorers, &self.score_combiner_fn); + for_each_docset(&mut union_scorer, callback); + } + SpecializedScorer::Other(mut scorer) => { + for_each_docset(scorer.as_mut(), callback); + } + } + Ok(()) + } + /// Calls `callback` with all of the `(doc, score)` for which score /// is exceeding a given threshold. /// diff --git a/src/query/term_query/term_weight.rs b/src/query/term_query/term_weight.rs index 4e742bc444..abe1835dc8 100644 --- a/src/query/term_query/term_weight.rs +++ b/src/query/term_query/term_weight.rs @@ -5,7 +5,7 @@ use crate::fieldnorm::FieldNormReader; use crate::postings::SegmentPostings; use crate::query::bm25::Bm25Weight; use crate::query::explanation::does_not_match; -use crate::query::weight::for_each_scorer; +use crate::query::weight::{for_each_docset, for_each_scorer}; use crate::query::{Explanation, Scorer, Weight}; use crate::schema::IndexRecordOption; use crate::{DocId, Score, Term}; @@ -56,6 +56,18 @@ impl Weight for TermWeight { Ok(()) } + /// Iterates through all of the document matched by the DocSet + /// `DocSet` and push the scored documents to the collector. + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let mut scorer = self.specialized_scorer(reader, 1.0)?; + for_each_docset(&mut scorer, callback); + Ok(()) + } + /// Calls `callback` with all of the `(doc, score)` for which score /// is exceeding a given threshold. /// diff --git a/src/query/weight.rs b/src/query/weight.rs index 210ffb30f4..19a12b39a6 100644 --- a/src/query/weight.rs +++ b/src/query/weight.rs @@ -1,10 +1,10 @@ use super::Scorer; use crate::core::SegmentReader; use crate::query::Explanation; -use crate::{DocId, Score, TERMINATED}; +use crate::{DocId, DocSet, Score, TERMINATED}; -/// Iterates through all of the document matched by the DocSet -/// `DocSet` and push the scored documents to the collector. +/// Iterates through all of the documents and scores matched by the DocSet +/// `DocSet`. pub(crate) fn for_each_scorer( scorer: &mut TScorer, callback: &mut dyn FnMut(DocId, Score), @@ -16,6 +16,16 @@ pub(crate) fn for_each_scorer( } } +/// Iterates through all of the documents matched by the DocSet +/// `DocSet`. +pub(crate) fn for_each_docset(docset: &mut T, callback: &mut dyn FnMut(DocId)) { + let mut doc = docset.doc(); + while doc != TERMINATED { + callback(doc); + doc = docset.advance(); + } +} + /// Calls `callback` with all of the `(doc, score)` for which score /// is exceeding a given threshold. /// @@ -78,6 +88,18 @@ pub trait Weight: Send + Sync + 'static { Ok(()) } + /// Iterates through all of the document matched by the DocSet + /// `DocSet` and push the scored documents to the collector. + fn for_each_no_score( + &self, + reader: &SegmentReader, + callback: &mut dyn FnMut(DocId), + ) -> crate::Result<()> { + let mut docset = self.scorer(reader, 1.0)?; + for_each_docset(docset.as_mut(), callback); + Ok(()) + } + /// Calls `callback` with all of the `(doc, score)` for which score /// is exceeding a given threshold. ///