diff --git a/src/indexer/flat_map_with_buffer.rs b/src/indexer/flat_map_with_buffer.rs new file mode 100644 index 0000000000..88b509cdbe --- /dev/null +++ b/src/indexer/flat_map_with_buffer.rs @@ -0,0 +1,69 @@ +pub struct FlatMapWithBuffer { + buffer: Vec, + fill_buffer: F, + underlying_it: Iter, +} + +impl Iterator for FlatMapWithBuffer +where + Iter: Iterator, + F: Fn(I, &mut Vec), +{ + type Item = T; + + fn next(&mut self) -> Option { + while self.buffer.is_empty() { + let next_el = self.underlying_it.next()?; + (self.fill_buffer)(next_el, &mut self.buffer); + // We will pop elements, so we reverse the buffer first. + self.buffer.reverse(); + } + self.buffer.pop() + } +} + +pub trait FlatMapWithBufferIter: Iterator { + /// Function similar to `flat_map`, but allows reusing a shared `Vec`. + fn flat_map_with_buffer(self, fill_buffer: F) -> FlatMapWithBuffer + where + F: Fn(Self::Item, &mut Vec), + Self: Sized, + { + FlatMapWithBuffer { + buffer: Vec::with_capacity(10), + fill_buffer, + underlying_it: self, + } + } +} + +impl FlatMapWithBufferIter for T where T: Iterator {} + +#[cfg(test)] +mod tests { + use crate::indexer::flat_map_with_buffer::FlatMapWithBufferIter; + + #[test] + fn test_flat_map_with_buffer_empty() { + let mut empty_iter = std::iter::empty::() + .flat_map_with_buffer(|_val: usize, _buffer: &mut Vec| {}); + assert!(empty_iter.next().is_none()); + } + + #[test] + fn test_flat_map_with_buffer_simple() { + let vals: Vec = (1..5) + .flat_map_with_buffer(|val: usize, buffer: &mut Vec| buffer.extend(0..val)) + .collect(); + assert_eq!(&[0, 0, 1, 0, 1, 2, 0, 1, 2, 3], &vals[..]); + } + + #[test] + fn test_flat_map_filling_no_elements_does_not_stop_iterator() { + let vals: Vec = [2, 0, 0, 3] + .into_iter() + .flat_map_with_buffer(|val: usize, buffer: &mut Vec| buffer.extend(0..val)) + .collect(); + assert_eq!(&[0, 1, 0, 1, 2], &vals[..]); + } +} diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index fa4db5e660..c557350cf1 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -3,6 +3,7 @@ pub mod delete_queue; pub mod demuxer; pub mod doc_id_mapping; mod doc_opstamp_mapping; +mod flat_map_with_buffer; pub mod index_writer; mod index_writer_status; mod json_term_writer; diff --git a/src/indexer/sorted_doc_id_multivalue_column.rs b/src/indexer/sorted_doc_id_multivalue_column.rs index 1e126af815..45df329f54 100644 --- a/src/indexer/sorted_doc_id_multivalue_column.rs +++ b/src/indexer/sorted_doc_id_multivalue_column.rs @@ -2,10 +2,11 @@ use std::cmp; use fastfield_codecs::Column; +use super::flat_map_with_buffer::FlatMapWithBufferIter; use crate::fastfield::MultiValuedFastFieldReader; use crate::indexer::doc_id_mapping::SegmentDocIdMapping; use crate::schema::Field; -use crate::SegmentReader; +use crate::{DocAddress, SegmentReader}; pub(crate) struct RemappedDocIdMultiValueColumn<'a> { doc_id_mapping: &'a SegmentDocIdMapping, @@ -74,11 +75,9 @@ impl<'a> Column for RemappedDocIdMultiValueColumn<'a> { Box::new( self.doc_id_mapping .iter_old_doc_addrs() - .flat_map(|old_doc_addr| { + .flat_map_with_buffer(|old_doc_addr: DocAddress, buffer| { let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize]; - let mut vals = Vec::new(); - ff_reader.get_vals(old_doc_addr.doc_id, &mut vals); - vals.into_iter() + ff_reader.get_vals(old_doc_addr.doc_id, buffer); }), ) }