Skip to content

Commit

Permalink
Removing alloc on all .next() in MultiValueColumn
Browse files Browse the repository at this point in the history
  • Loading branch information
fulmicoton committed Oct 5, 2022
1 parent 0dc8c45 commit 7baa6e3
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 11 deletions.
74 changes: 74 additions & 0 deletions src/indexer/flat_map_with_buffer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
struct FlatMapWithgBuffer<T, F, Iter> {
buffer: Vec<T>,
fill_buffer: F,
underlying_it: Iter,
}

impl<T, F, Iter, I> Iterator for FlatMapWithgBuffer<T, F, Iter>
where
Iter: Iterator<Item = I>,
F: Fn(I, &mut Vec<T>),
{
type Item = T;

fn next(&mut self) -> Option<Self::Item> {
while self.buffer.is_empty() {
let next_el = self.underlying_it.next()?;
(self.fill_buffer)(next_el, &mut self.buffer);
// We will pop elements, so we reverse the buffer first.
self.buffer.reverse();
}
self.buffer.pop()
}
}

/// FUnction similar to `flat_map`, but the generating function fills a buffer
/// instead of returning an Iterator.
pub fn flat_map_with_buffer<T, F, I, Iter>(
underlying_it: Iter,
fill_buffer: F,
) -> impl Iterator<Item = T>
where
F: Fn(I, &mut Vec<T>),
Iter: Iterator<Item = I>,
{
FlatMapWithgBuffer {
buffer: Vec::with_capacity(10),
fill_buffer,
underlying_it,
}
}

#[cfg(test)]
mod tests {
use super::flat_map_with_buffer;

#[test]
fn test_flat_map_with_buffer_empty() {
let vals: Vec<usize> = flat_map_with_buffer(
std::iter::empty::<usize>(),
|_val: usize, _buffer: &mut Vec<usize>| {},
)
.collect();
assert!(vals.is_empty());
}

#[test]
fn test_flat_map_with_buffer_simple() {
let vals: Vec<usize> = flat_map_with_buffer(1..5, |val: usize, buffer: &mut Vec<usize>| {
buffer.extend(0..val)
})
.collect();
assert_eq!(&[0, 0, 1, 0, 1, 2, 0, 1, 2, 3], &vals[..]);
}

#[test]
fn test_flat_map_filling_no_elements_does_not_stop_iterator() {
let vals: Vec<usize> = flat_map_with_buffer(
[2, 0, 0, 3].into_iter(),
|val: usize, buffer: &mut Vec<usize>| buffer.extend(0..val),
)
.collect();
assert_eq!(&[0, 1, 0, 1, 2], &vals[..]);
}
}
2 changes: 2 additions & 0 deletions src/indexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ pub mod delete_queue;
pub mod demuxer;
pub mod doc_id_mapping;
mod doc_opstamp_mapping;
mod flat_map_with_buffer;
pub mod index_writer;
mod index_writer_status;
mod json_term_writer;
Expand All @@ -26,6 +27,7 @@ mod stamper;
use crossbeam_channel as channel;
use smallvec::SmallVec;

pub use self::flat_map_with_buffer::flat_map_with_buffer;
pub use self::index_writer::IndexWriter;
pub(crate) use self::json_term_writer::{
convert_to_fast_value_and_get_term, set_string_and_get_terms, JsonTermWriter,
Expand Down
20 changes: 9 additions & 11 deletions src/indexer/sorted_doc_id_multivalue_column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@ use fastfield_codecs::Column;

use crate::fastfield::MultiValuedFastFieldReader;
use crate::indexer::doc_id_mapping::SegmentDocIdMapping;
use crate::indexer::flat_map_with_buffer;
use crate::schema::Field;
use crate::SegmentReader;
use crate::{DocAddress, SegmentReader};

pub(crate) struct RemappedDocIdMultiValueColumn<'a> {
doc_id_mapping: &'a SegmentDocIdMapping,
Expand Down Expand Up @@ -71,16 +72,13 @@ impl<'a> Column for RemappedDocIdMultiValueColumn<'a> {
}

fn iter(&self) -> Box<dyn Iterator<Item = u64> + '_> {
Box::new(
self.doc_id_mapping
.iter_old_doc_addrs()
.flat_map(|old_doc_addr| {
let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize];
let mut vals = Vec::new();
ff_reader.get_vals(old_doc_addr.doc_id, &mut vals);
vals.into_iter()
}),
)
Box::new(flat_map_with_buffer(
self.doc_id_mapping.iter_old_doc_addrs(),
|old_doc_addr: DocAddress, buffer| {
let ff_reader = &self.fast_field_readers[old_doc_addr.segment_ord as usize];
ff_reader.get_vals(old_doc_addr.doc_id, buffer);
},
))
}
fn min_value(&self) -> u64 {
self.min_value
Expand Down

0 comments on commit 7baa6e3

Please sign in to comment.