From d77cf4d4a6ada99142e92262226ac500731bc08f Mon Sep 17 00:00:00 2001 From: Pascal Seitz Date: Wed, 5 Oct 2022 17:30:41 +0800 Subject: [PATCH] add flat_map_with_buffer to Iterator trait --- src/indexer/flat_map_with_buffer.rs | 59 +++++++++---------- src/indexer/mod.rs | 1 - .../sorted_doc_id_multivalue_column.rs | 18 +++--- 3 files changed, 37 insertions(+), 41 deletions(-) diff --git a/src/indexer/flat_map_with_buffer.rs b/src/indexer/flat_map_with_buffer.rs index 2e9194018c..88b509cdbe 100644 --- a/src/indexer/flat_map_with_buffer.rs +++ b/src/indexer/flat_map_with_buffer.rs @@ -1,10 +1,10 @@ -struct FlatMapWithgBuffer { +pub struct FlatMapWithBuffer { buffer: Vec, fill_buffer: F, underlying_it: Iter, } -impl Iterator for FlatMapWithgBuffer +impl Iterator for FlatMapWithBuffer where Iter: Iterator, F: Fn(I, &mut Vec), @@ -22,53 +22,48 @@ where } } -/// FUnction similar to `flat_map`, but the generating function fills a buffer -/// instead of returning an Iterator. -pub fn flat_map_with_buffer( - underlying_it: Iter, - fill_buffer: F, -) -> impl Iterator -where - F: Fn(I, &mut Vec), - Iter: Iterator, -{ - FlatMapWithgBuffer { - buffer: Vec::with_capacity(10), - fill_buffer, - underlying_it, +pub trait FlatMapWithBufferIter: Iterator { + /// Function similar to `flat_map`, but allows reusing a shared `Vec`. + fn flat_map_with_buffer(self, fill_buffer: F) -> FlatMapWithBuffer + where + F: Fn(Self::Item, &mut Vec), + Self: Sized, + { + FlatMapWithBuffer { + buffer: Vec::with_capacity(10), + fill_buffer, + underlying_it: self, + } } } +impl FlatMapWithBufferIter for T where T: Iterator {} + #[cfg(test)] mod tests { - use super::flat_map_with_buffer; + use crate::indexer::flat_map_with_buffer::FlatMapWithBufferIter; #[test] fn test_flat_map_with_buffer_empty() { - let vals: Vec = flat_map_with_buffer( - std::iter::empty::(), - |_val: usize, _buffer: &mut Vec| {}, - ) - .collect(); - assert!(vals.is_empty()); + let mut empty_iter = std::iter::empty::() + .flat_map_with_buffer(|_val: usize, _buffer: &mut Vec| {}); + assert!(empty_iter.next().is_none()); } #[test] fn test_flat_map_with_buffer_simple() { - let vals: Vec = flat_map_with_buffer(1..5, |val: usize, buffer: &mut Vec| { - buffer.extend(0..val) - }) - .collect(); + let vals: Vec = (1..5) + .flat_map_with_buffer(|val: usize, buffer: &mut Vec| buffer.extend(0..val)) + .collect(); assert_eq!(&[0, 0, 1, 0, 1, 2, 0, 1, 2, 3], &vals[..]); } #[test] fn test_flat_map_filling_no_elements_does_not_stop_iterator() { - let vals: Vec = flat_map_with_buffer( - [2, 0, 0, 3].into_iter(), - |val: usize, buffer: &mut Vec| buffer.extend(0..val), - ) - .collect(); + let vals: Vec = [2, 0, 0, 3] + .into_iter() + .flat_map_with_buffer(|val: usize, buffer: &mut Vec| buffer.extend(0..val)) + .collect(); assert_eq!(&[0, 1, 0, 1, 2], &vals[..]); } } diff --git a/src/indexer/mod.rs b/src/indexer/mod.rs index 28e845e636..c557350cf1 100644 --- a/src/indexer/mod.rs +++ b/src/indexer/mod.rs @@ -27,7 +27,6 @@ mod stamper; use crossbeam_channel as channel; use smallvec::SmallVec; -pub use self::flat_map_with_buffer::flat_map_with_buffer; pub use self::index_writer::IndexWriter; pub(crate) use self::json_term_writer::{ convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter, diff --git a/src/indexer/sorted_doc_id_multivalue_column.rs b/src/indexer/sorted_doc_id_multivalue_column.rs index c960a01bd2..725b8e4f4f 100644 --- a/src/indexer/sorted_doc_id_multivalue_column.rs +++ b/src/indexer/sorted_doc_id_multivalue_column.rs @@ -4,10 +4,11 @@ use fastfield_codecs::Column; use crate::fastfield::MultiValuedFastFieldReader; use crate::indexer::doc_id_mapping::SegmentDocIdMapping; -use crate::indexer::flat_map_with_buffer; use crate::schema::Field; use crate::{DocAddress, SegmentReader}; +use super::flat_map_with_buffer::FlatMapWithBufferIter; + pub(crate) struct RemappedDocIdMultiValueColumn<'a> { doc_id_mapping: &'a SegmentDocIdMapping, fast_field_readers: Vec>, @@ -72,13 +73,14 @@ impl<'a> Column for RemappedDocIdMultiValueColumn<'a> { } fn iter(&self) -> Box + '_> { - Box::new(flat_map_with_buffer( - self.doc_id_mapping.iter_old_doc_addrs(), - |old_doc_addr: DocAddress, buffer| { - let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize]; - ff_reader.get_vals(old_doc_addr.doc_id, buffer); - }, - )) + Box::new( + self.doc_id_mapping + .iter_old_doc_addrs() + .flat_map_with_buffer(|old_doc_addr: DocAddress, buffer| { + let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize]; + ff_reader.get_vals(old_doc_addr.doc_id, buffer); + }), + ) } fn min_value(&self) -> u64 { self.min_value