-
-
Notifications
You must be signed in to change notification settings - Fork 609
/
term_weight.rs
132 lines (123 loc) · 4.41 KB
/
term_weight.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
use super::term_scorer::TermScorer;
use crate::core::SegmentReader;
use crate::docset::DocSet;
use crate::fieldnorm::FieldNormReader;
use crate::postings::SegmentPostings;
use crate::query::bm25::Bm25Weight;
use crate::query::explanation::does_not_match;
use crate::query::weight::for_each_scorer;
use crate::query::{Explanation, Scorer, Weight};
use crate::schema::IndexRecordOption;
use crate::{DocId, Score, Term};
pub struct TermWeight {
term: Term,
index_record_option: IndexRecordOption,
similarity_weight: Bm25Weight,
scoring_enabled: bool,
}
impl Weight for TermWeight {
fn scorer(&self, reader: &SegmentReader, boost: Score) -> crate::Result<Box<dyn Scorer>> {
let term_scorer = self.specialized_scorer(reader, boost)?;
Ok(Box::new(term_scorer))
}
fn explain(&self, reader: &SegmentReader, doc: DocId) -> crate::Result<Explanation> {
let mut scorer = self.specialized_scorer(reader, 1.0)?;
if scorer.doc() > doc || scorer.seek(doc) != doc {
return Err(does_not_match(doc));
}
let mut explanation = scorer.explain();
explanation.add_context(format!("Term={:?}", self.term,));
Ok(explanation)
}
fn count(&self, reader: &SegmentReader) -> crate::Result<u32> {
if let Some(alive_bitset) = reader.alive_bitset() {
Ok(self.scorer(reader, 1.0)?.count(alive_bitset))
} else {
let field = self.term.field();
let inv_index = reader.inverted_index(field)?;
let term_info = inv_index.get_term_info(&self.term)?;
Ok(term_info.map(|term_info| term_info.doc_freq).unwrap_or(0))
}
}
/// Iterates through all of the document matched by the DocSet
/// `DocSet` and push the scored documents to the collector.
fn for_each(
&self,
reader: &SegmentReader,
requires_scoring: bool,
callback: &mut dyn FnMut(DocId, Score),
) -> crate::Result<()> {
let mut scorer = self.specialized_scorer(reader, 1.0)?;
for_each_scorer(&mut scorer, requires_scoring, callback);
Ok(())
}
/// Calls `callback` with all of the `(doc, score)` for which score
/// is exceeding a given threshold.
///
/// This method is useful for the TopDocs collector.
/// For all docsets, the blanket implementation has the benefit
/// of prefiltering (doc, score) pairs, avoiding the
/// virtual dispatch cost.
///
/// More importantly, it makes it possible for scorers to implement
/// important optimization (e.g. BlockWAND for union).
fn for_each_pruning(
&self,
threshold: Score,
reader: &SegmentReader,
callback: &mut dyn FnMut(DocId, Score) -> Score,
) -> crate::Result<()> {
let scorer = self.specialized_scorer(reader, 1.0)?;
crate::query::boolean_query::block_wand_single_scorer(scorer, threshold, callback);
Ok(())
}
}
impl TermWeight {
pub fn new(
term: Term,
index_record_option: IndexRecordOption,
similarity_weight: Bm25Weight,
scoring_enabled: bool,
) -> TermWeight {
TermWeight {
term,
index_record_option,
similarity_weight,
scoring_enabled,
}
}
pub fn term(&self) -> &Term {
&self.term
}
pub(crate) fn specialized_scorer(
&self,
reader: &SegmentReader,
boost: Score,
) -> crate::Result<TermScorer> {
let field = self.term.field();
let inverted_index = reader.inverted_index(field)?;
let fieldnorm_reader_opt = if self.scoring_enabled {
reader.fieldnorms_readers().get_field(field)?
} else {
None
};
let fieldnorm_reader =
fieldnorm_reader_opt.unwrap_or_else(|| FieldNormReader::constant(reader.max_doc(), 1));
let similarity_weight = self.similarity_weight.boost_by(boost);
let postings_opt: Option<SegmentPostings> =
inverted_index.read_postings(&self.term, self.index_record_option)?;
if let Some(segment_postings) = postings_opt {
Ok(TermScorer::new(
segment_postings,
fieldnorm_reader,
similarity_weight,
))
} else {
Ok(TermScorer::new(
SegmentPostings::empty(),
fieldnorm_reader,
similarity_weight,
))
}
}
}