Skip to content

Commit

Permalink
fix #1151 (#1152)
Browse files Browse the repository at this point in the history
* fix #1151

Fixes a off by one error in the stats for the index fast field in the multi value fast field.
When retrieving the data range for a docid, `get(doc)..get(docid+1)` is requested. On creation
the num_vals statistic was set to doc instead of docid + 1. In the multivaluelinearinterpol fast
field the last value was therefore not serialized (and would return 0 instead in most cases).
So the last document get(lastdoc)..get(lastdoc + 1) would return the invalid range `value..0`.

This PR adds a proptest to cover this scenario. A combination of a large number values, since multilinear
interpolation is only active for more than 5_000 values, and a merge is required.
  • Loading branch information
PSeitz committed Sep 10, 2021
1 parent 319609e commit 3bc177e
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 6 deletions.
2 changes: 2 additions & 0 deletions Cargo.toml
Expand Up @@ -65,6 +65,8 @@ maplit = "1.0.2"
matches = "0.1.8"
proptest = "1.0"
criterion = "0.3.5"
test-env-log = "0.2.7"
env_logger = "0.9.0"

[dev-dependencies.fail]
version = "0.4"
Expand Down
113 changes: 113 additions & 0 deletions src/fastfield/multivalued/mod.rs
Expand Up @@ -8,14 +8,22 @@ pub use self::writer::MultiValuedFastFieldWriter;
mod tests {

use crate::collector::TopDocs;
use crate::indexer::NoMergePolicy;
use crate::query::QueryParser;
use crate::schema::Cardinality;
use crate::schema::Facet;
use crate::schema::IntOptions;
use crate::schema::Schema;
use crate::schema::INDEXED;
use crate::Document;
use crate::Index;
use crate::Term;
use chrono::Duration;
use futures::executor::block_on;
use proptest::prop_oneof;
use proptest::proptest;
use proptest::strategy::Strategy;
use test_env_log::test;

#[test]
fn test_multivalued_u64() {
Expand Down Expand Up @@ -225,6 +233,111 @@ mod tests {
multi_value_reader.get_vals(3, &mut vals);
assert_eq!(&vals, &[-5i64, -20i64, 1i64]);
}

fn test_multivalued_no_panic(ops: &[IndexingOp]) {
let mut schema_builder = Schema::builder();
let field = schema_builder.add_u64_field(
"multifield",
IntOptions::default()
.set_fast(Cardinality::MultiValues)
.set_indexed(),
);
let schema = schema_builder.build();
let index = Index::create_in_ram(schema);
let mut index_writer = index.writer_for_tests().unwrap();
index_writer.set_merge_policy(Box::new(NoMergePolicy));

for &op in ops {
match op {
IndexingOp::AddDoc { id } => {
match id % 3 {
0 => {
index_writer.add_document(doc!());
}
1 => {
let mut doc = Document::new();
for _ in 0..5001 {
doc.add_u64(field, id as u64);
}
index_writer.add_document(doc);
}
_ => {
let mut doc = Document::new();
doc.add_u64(field, id as u64);
index_writer.add_document(doc);
}
};
}
IndexingOp::DeleteDoc { id } => {
index_writer.delete_term(Term::from_field_u64(field, id as u64));
}
IndexingOp::Commit => {
index_writer.commit().unwrap();
}
IndexingOp::Merge => {
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
if segment_ids.len() >= 2 {
block_on(index_writer.merge(&segment_ids)).unwrap();
assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
}
}
}
}

assert!(index_writer.commit().is_ok());

// Merging the segments
{
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
if !segment_ids.is_empty() {
block_on(index_writer.merge(&segment_ids)).unwrap();
assert!(index_writer.wait_merging_threads().is_ok());
}
}
}

#[derive(Debug, Clone, Copy)]
enum IndexingOp {
AddDoc { id: u32 },
DeleteDoc { id: u32 },
Commit,
Merge,
}

fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
prop_oneof![
(0u32..10u32).prop_map(|id| IndexingOp::DeleteDoc { id }),
(0u32..10u32).prop_map(|id| IndexingOp::AddDoc { id }),
(0u32..2u32).prop_map(|_| IndexingOp::Commit),
(0u32..1u32).prop_map(|_| IndexingOp::Merge),
]
}

proptest! {
#[test]
fn test_multivalued_proptest(ops in proptest::collection::vec(operation_strategy(), 1..10)) {
test_multivalued_no_panic(&ops[..]);
}
}

#[test]
fn test_multivalued_proptest_off_by_one_bug_1151() {
use IndexingOp::*;
let ops = [
AddDoc { id: 3 },
AddDoc { id: 1 },
AddDoc { id: 3 },
Commit,
Merge,
];

test_multivalued_no_panic(&ops[..]);
}

#[test]
#[ignore]
fn test_many_facets() {
Expand Down
17 changes: 15 additions & 2 deletions src/indexer/index_writer.rs
Expand Up @@ -1361,13 +1361,15 @@ mod tests {
AddDoc { id: u64 },
DeleteDoc { id: u64 },
Commit,
Merge,
}

fn operation_strategy() -> impl Strategy<Value = IndexingOp> {
prop_oneof![
(0u64..10u64).prop_map(|id| IndexingOp::DeleteDoc { id }),
(0u64..10u64).prop_map(|id| IndexingOp::AddDoc { id }),
(0u64..2u64).prop_map(|_| IndexingOp::Commit),
(0u64..1u64).prop_map(|_| IndexingOp::Merge),
]
}

Expand All @@ -1393,7 +1395,7 @@ mod tests {
fn test_operation_strategy(
ops: &[IndexingOp],
sort_index: bool,
force_merge: bool,
force_end_merge: bool,
) -> crate::Result<()> {
let mut schema_builder = schema::Schema::builder();
let id_field = schema_builder.add_u64_field("id", FAST | INDEXED | STORED);
Expand Down Expand Up @@ -1435,6 +1437,8 @@ mod tests {
.settings(settings)
.create_in_ram()?;
let mut index_writer = index.writer_for_tests()?;
index_writer.set_merge_policy(Box::new(NoMergePolicy));

for &op in ops {
match op {
IndexingOp::AddDoc { id } => {
Expand All @@ -1448,12 +1452,21 @@ mod tests {
IndexingOp::Commit => {
index_writer.commit()?;
}
IndexingOp::Merge => {
let segment_ids = index
.searchable_segment_ids()
.expect("Searchable segments failed.");
if segment_ids.len() >= 2 {
block_on(index_writer.merge(&segment_ids)).unwrap();
assert!(index_writer.segment_updater().wait_merging_thread().is_ok());
}
}
}
}
index_writer.commit()?;

let searcher = index.reader()?.searcher();
if force_merge {
if force_end_merge {
index_writer.wait_merging_threads()?;
let mut index_writer = index.writer_for_tests()?;
let segment_ids = index
Expand Down
9 changes: 5 additions & 4 deletions src/indexer/merger.rs
Expand Up @@ -501,25 +501,26 @@ impl IndexMerger {
//
// This is required by the bitpacker, as it needs to know
// what should be the bit length use for bitpacking.
let mut idx_num_vals = 0;
let mut num_docs = 0;
for (reader, u64s_reader) in reader_and_field_accessors.iter() {
if let Some(delete_bitset) = reader.delete_bitset() {
idx_num_vals += reader.max_doc() as u64 - delete_bitset.len() as u64;
num_docs += reader.max_doc() as u64 - delete_bitset.len() as u64;
for doc in 0u32..reader.max_doc() {
if delete_bitset.is_alive(doc) {
let num_vals = u64s_reader.get_len(doc) as u64;
total_num_vals += num_vals;
}
}
} else {
idx_num_vals += reader.max_doc() as u64;
num_docs += reader.max_doc() as u64;
total_num_vals += u64s_reader.get_total_len();
}
}

let stats = FastFieldStats {
max_value: total_num_vals,
num_vals: idx_num_vals,
// The fastfield offset index contains (num_docs + 1) values.
num_vals: num_docs + 1,
min_value: 0,
};
// We can now create our `idx` serializer, and in a second pass,
Expand Down

0 comments on commit 3bc177e

Please sign in to comment.