Skip to content

Commit

Permalink
add flat_map_with_buffer to Iterator trait
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz committed Oct 5, 2022
1 parent 7baa6e3 commit d77cf4d
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 41 deletions.
59 changes: 27 additions & 32 deletions src/indexer/flat_map_with_buffer.rs
@@ -1,10 +1,10 @@
struct FlatMapWithgBuffer<T, F, Iter> {
pub struct FlatMapWithBuffer<T, F, Iter> {
buffer: Vec<T>,
fill_buffer: F,
underlying_it: Iter,
}

impl<T, F, Iter, I> Iterator for FlatMapWithgBuffer<T, F, Iter>
impl<T, F, Iter, I> Iterator for FlatMapWithBuffer<T, F, Iter>
where
Iter: Iterator<Item = I>,
F: Fn(I, &mut Vec<T>),
Expand All @@ -22,53 +22,48 @@ where
}
}

/// FUnction similar to `flat_map`, but the generating function fills a buffer
/// instead of returning an Iterator.
pub fn flat_map_with_buffer<T, F, I, Iter>(
underlying_it: Iter,
fill_buffer: F,
) -> impl Iterator<Item = T>
where
F: Fn(I, &mut Vec<T>),
Iter: Iterator<Item = I>,
{
FlatMapWithgBuffer {
buffer: Vec::with_capacity(10),
fill_buffer,
underlying_it,
pub trait FlatMapWithBufferIter: Iterator {
/// Function similar to `flat_map`, but allows reusing a shared `Vec`.
fn flat_map_with_buffer<F, T>(self, fill_buffer: F) -> FlatMapWithBuffer<T, F, Self>
where
F: Fn(Self::Item, &mut Vec<T>),
Self: Sized,
{
FlatMapWithBuffer {
buffer: Vec::with_capacity(10),
fill_buffer,
underlying_it: self,
}
}
}

impl<T: ?Sized> FlatMapWithBufferIter for T where T: Iterator {}

#[cfg(test)]
mod tests {
use super::flat_map_with_buffer;
use crate::indexer::flat_map_with_buffer::FlatMapWithBufferIter;

#[test]
fn test_flat_map_with_buffer_empty() {
let vals: Vec<usize> = flat_map_with_buffer(
std::iter::empty::<usize>(),
|_val: usize, _buffer: &mut Vec<usize>| {},
)
.collect();
assert!(vals.is_empty());
let mut empty_iter = std::iter::empty::<usize>()
.flat_map_with_buffer(|_val: usize, _buffer: &mut Vec<usize>| {});
assert!(empty_iter.next().is_none());
}

#[test]
fn test_flat_map_with_buffer_simple() {
let vals: Vec<usize> = flat_map_with_buffer(1..5, |val: usize, buffer: &mut Vec<usize>| {
buffer.extend(0..val)
})
.collect();
let vals: Vec<usize> = (1..5)
.flat_map_with_buffer(|val: usize, buffer: &mut Vec<usize>| buffer.extend(0..val))
.collect();
assert_eq!(&[0, 0, 1, 0, 1, 2, 0, 1, 2, 3], &vals[..]);
}

#[test]
fn test_flat_map_filling_no_elements_does_not_stop_iterator() {
let vals: Vec<usize> = flat_map_with_buffer(
[2, 0, 0, 3].into_iter(),
|val: usize, buffer: &mut Vec<usize>| buffer.extend(0..val),
)
.collect();
let vals: Vec<usize> = [2, 0, 0, 3]
.into_iter()
.flat_map_with_buffer(|val: usize, buffer: &mut Vec<usize>| buffer.extend(0..val))
.collect();
assert_eq!(&[0, 1, 0, 1, 2], &vals[..]);
}
}
1 change: 0 additions & 1 deletion src/indexer/mod.rs
Expand Up @@ -27,7 +27,6 @@ mod stamper;
use crossbeam_channel as channel;
use smallvec::SmallVec;

pub use self::flat_map_with_buffer::flat_map_with_buffer;
pub use self::index_writer::IndexWriter;
pub(crate) use self::json_term_writer::{
convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
Expand Down
18 changes: 10 additions & 8 deletions src/indexer/sorted_doc_id_multivalue_column.rs
Expand Up @@ -4,10 +4,11 @@ use fastfield_codecs::Column;

use crate::fastfield::MultiValuedFastFieldReader;
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
use crate::indexer::flat_map_with_buffer;
use crate::schema::Field;
use crate::{DocAddress, SegmentReader};

use super::flat_map_with_buffer::FlatMapWithBufferIter;

pub(crate) struct RemappedDocIdMultiValueColumn<'a> {
doc_id_mapping: &'a SegmentDocIdMapping,
fast_field_readers: Vec<MultiValuedFastFieldReader<u64>>,
Expand Down Expand Up @@ -72,13 +73,14 @@ impl<'a> Column for RemappedDocIdMultiValueColumn<'a> {
}

fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
Box::new(flat_map_with_buffer(
self.doc_id_mapping.iter_old_doc_addrs(),
|old_doc_addr: DocAddress, buffer| {
let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize];
ff_reader.get_vals(old_doc_addr.doc_id, buffer);
},
))
Box::new(
self.doc_id_mapping
.iter_old_doc_addrs()
.flat_map_with_buffer(|old_doc_addr: DocAddress, buffer| {
let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize];
ff_reader.get_vals(old_doc_addr.doc_id, buffer);
}),
)
}
fn min_value(&self) -> u64 {
self.min_value
Expand Down

0 comments on commit d77cf4d

Please sign in to comment.