Skip to content

Commit

Permalink
fix delta position in merge and index sorting
Browse files Browse the repository at this point in the history
fixes #1125
  • Loading branch information
PSeitz committed Aug 3, 2021
1 parent 85d23e8 commit 96a2239
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 5 deletions.
5 changes: 3 additions & 2 deletions src/indexer/merger.rs
Expand Up @@ -958,12 +958,13 @@ impl IndexMerger {
}
if !doc_id_mapping.is_trivial() {
doc_id_and_positions.sort_unstable_by_key(|&(doc_id, _, _)| doc_id);

for (doc_id, term_freq, positions) in &doc_id_and_positions {
field_serializer.write_doc(*doc_id, *term_freq, positions);
let delta_positions = delta_computer.compute_delta(&positions);
field_serializer.write_doc(*doc_id, *term_freq, delta_positions);
}
doc_id_and_positions.clear();
}

// closing the term.
field_serializer.close_term()?;
}
Expand Down
95 changes: 92 additions & 3 deletions src/indexer/merger_sorted_index_test.rs
@@ -1,6 +1,7 @@
#[cfg(test)]
mod tests {
use crate::fastfield::FastFieldReader;
use crate::schema::IndexRecordOption;
use crate::{
collector::TopDocs,
schema::{Cardinality, TextFieldIndexing},
Expand All @@ -16,7 +17,7 @@ mod tests {
schema::{self, BytesOptions},
DocAddress,
};
use crate::{IndexSettings, Term};
use crate::{DocSet, IndexSettings, Postings, Term};
use futures::executor::block_on;

fn create_test_index_posting_list_issue(index_settings: Option<IndexSettings>) -> Index {
Expand Down Expand Up @@ -106,7 +107,7 @@ mod tests {
);
index_writer.add_document(doc!(int_field=>1_u64, text_field=> "deleteme"));
index_writer.add_document(
doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64),
doc!(int_field=>2_u64, multi_numbers => 2_u64, multi_numbers => 3_u64, text_field => "ok text more text"),
);

assert!(index_writer.commit().is_ok());
Expand Down Expand Up @@ -243,6 +244,24 @@ mod tests {
assert_eq!(do_search("biggest"), vec![0]);
}

// postings file
{
let my_text_field = index.schema().get_field("text_field").unwrap();
let term_a = Term::from_field_text(my_text_field, "text");
let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap()
.unwrap();
let mut output = vec![];
postings.positions(&mut output);
assert_eq!(output, vec![1]);
postings.advance();

postings.positions(&mut output);
assert_eq!(output, vec![1, 3]);
}

// access doc store
{
let blubber_pos = if force_disjunct_segment_sort_values {
Expand All @@ -260,6 +279,58 @@ mod tests {
}
}

#[test]
fn test_merge_unsorted_index() {
let index = create_test_index(
Some(IndexSettings {
..Default::default()
}),
false,
);

let reader = index.reader().unwrap();
let searcher = reader.searcher();
assert_eq!(searcher.segment_readers().len(), 1);
let segment_reader = searcher.segment_readers().last().unwrap();

let searcher = index.reader().unwrap().searcher();
{
let my_text_field = index.schema().get_field("text_field").unwrap();

let do_search = |term: &str| {
let query = QueryParser::for_index(&index, vec![my_text_field])
.parse_query(term)
.unwrap();
let top_docs: Vec<(f32, DocAddress)> =
searcher.search(&query, &TopDocs::with_limit(3)).unwrap();

top_docs.iter().map(|el| el.1.doc_id).collect::<Vec<_>>()
};

assert_eq!(do_search("some"), vec![1]);
assert_eq!(do_search("blubber"), vec![3]);
assert_eq!(do_search("biggest"), vec![4]);
}

// postings file
{
let my_text_field = index.schema().get_field("text_field").unwrap();
let term_a = Term::from_field_text(my_text_field, "text");
let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap()
.unwrap();
let mut output = vec![];
postings.positions(&mut output);
assert_eq!(output, vec![1]);
postings.advance();

postings.positions(&mut output);
assert_eq!(output, vec![1, 3]);
}
}

#[test]
fn test_merge_sorted_index_asc() {
let index = create_test_index(
Expand Down Expand Up @@ -314,7 +385,7 @@ mod tests {
let my_text_field = index.schema().get_field("text_field").unwrap();
let fieldnorm_reader = segment_reader.get_fieldnorms_reader(my_text_field).unwrap();
assert_eq!(fieldnorm_reader.fieldnorm(0), 0);
assert_eq!(fieldnorm_reader.fieldnorm(1), 0);
assert_eq!(fieldnorm_reader.fieldnorm(1), 4);
assert_eq!(fieldnorm_reader.fieldnorm(2), 2); // some text
assert_eq!(fieldnorm_reader.fieldnorm(3), 1);
assert_eq!(fieldnorm_reader.fieldnorm(5), 3); // the biggest num
Expand All @@ -339,6 +410,24 @@ mod tests {
assert_eq!(do_search("biggest"), vec![5]);
}

// postings file
{
let my_text_field = index.schema().get_field("text_field").unwrap();
let term_a = Term::from_field_text(my_text_field, "text");
let inverted_index = segment_reader.inverted_index(my_text_field).unwrap();
let mut postings = inverted_index
.read_postings(&term_a, IndexRecordOption::WithFreqsAndPositions)
.unwrap()
.unwrap();
let mut output = vec![];
postings.positions(&mut output);
assert_eq!(output, vec![1, 3]);
postings.advance();

postings.positions(&mut output);
assert_eq!(output, vec![1]);
}

// access doc store
{
let doc = searcher.doc(DocAddress::new(0, 0)).unwrap();
Expand Down

0 comments on commit 96a2239

Please sign in to comment.