From 7627627be8e82850a7e64414c1a347cbda18053b Mon Sep 17 00:00:00 2001 From: yangjiang Date: Tue, 8 Nov 2022 20:06:22 +0800 Subject: [PATCH] refactor avoid pub mod --- parquet/src/arrow/arrow_reader/mod.rs | 62 ++++++++++++++++++++- parquet/src/arrow/arrow_reader/selection.rs | 61 +------------------- 2 files changed, 62 insertions(+), 61 deletions(-) diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index ce76bde9070..de9e187ddc2 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -40,7 +40,7 @@ use crate::file::serialized_reader::ReadOptionsBuilder; use crate::schema::types::SchemaDescriptor; mod filter; -pub mod selection; +mod selection; pub use filter::{ArrowPredicate, ArrowPredicateFn, RowFilter}; pub use selection::{RowSelection, RowSelector}; @@ -590,6 +590,66 @@ impl ParquetRecordBatchReader { } } +// Combine two lists of `RowSelection` return the intersection of them +// For example: +// self: NNYYYYNNYYNYN +// other: NYNNNNNNY +// +// returned: NNNNNNNNYYNYN +pub fn intersect_row_selections( + left: Vec, + right: Vec, +) -> Vec { + let mut res = Vec::with_capacity(left.len()); + let mut l_iter = left.into_iter().peekable(); + let mut r_iter = right.into_iter().peekable(); + + while let (Some(a), Some(b)) = (l_iter.peek_mut(), r_iter.peek_mut()) { + if a.row_count == 0 { + l_iter.next().unwrap(); + continue; + } + if b.row_count == 0 { + r_iter.next().unwrap(); + continue; + } + match (a.skip, b.skip) { + // Keep both ranges + (false, false) => { + if a.row_count < b.row_count { + res.push(RowSelector::select(a.row_count)); + b.row_count -= a.row_count; + l_iter.next().unwrap(); + } else { + res.push(RowSelector::select(b.row_count)); + a.row_count -= b.row_count; + r_iter.next().unwrap(); + } + } + // skip at least one + _ => { + if a.row_count < b.row_count { + res.push(RowSelector::skip(a.row_count)); + b.row_count -= a.row_count; + l_iter.next().unwrap(); + } else { + res.push(RowSelector::skip(b.row_count)); + a.row_count -= b.row_count; + r_iter.next().unwrap(); + } + } + } + } + + if l_iter.peek().is_some() { + res.extend(l_iter); + } + if r_iter.peek().is_some() { + res.extend(r_iter); + } + res +} + /// Returns `true` if `selection` is `None` or selects some rows pub(crate) fn selects_any(selection: Option<&RowSelection>) -> bool { selection.map(|x| x.selects_any()).unwrap_or(true) diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 362a42da77d..7a04a0ff9e3 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -349,66 +349,6 @@ impl From for VecDeque { } } -// Combine two lists of `RowSelection` return the intersection of them -// For example: -// self: NNYYYYNNYYNYN -// other: NYNNNNNNY -// -// returned: NNNNNNNNYYNYN -pub fn intersect_row_selections( - left: Vec, - right: Vec, -) -> Vec { - let mut res = Vec::with_capacity(left.len()); - let mut l_iter = left.into_iter().peekable(); - let mut r_iter = right.into_iter().peekable(); - - while let (Some(a), Some(b)) = (l_iter.peek_mut(), r_iter.peek_mut()) { - if a.row_count == 0 { - l_iter.next().unwrap(); - continue; - } - if b.row_count == 0 { - r_iter.next().unwrap(); - continue; - } - match (a.skip, b.skip) { - // Keep both ranges - (false, false) => { - if a.row_count < b.row_count { - res.push(RowSelector::select(a.row_count)); - b.row_count -= a.row_count; - l_iter.next().unwrap(); - } else { - res.push(RowSelector::select(b.row_count)); - a.row_count -= b.row_count; - r_iter.next().unwrap(); - } - } - // skip at least one - _ => { - if a.row_count < b.row_count { - res.push(RowSelector::skip(a.row_count)); - b.row_count -= a.row_count; - l_iter.next().unwrap(); - } else { - res.push(RowSelector::skip(b.row_count)); - a.row_count -= b.row_count; - r_iter.next().unwrap(); - } - } - } - } - - if l_iter.peek().is_some() { - res.extend(l_iter); - } - if r_iter.peek().is_some() { - res.extend(r_iter); - } - res -} - fn add_selector(skip: bool, sum_row: usize, combined_result: &mut Vec) { let selector = if skip { RowSelector::skip(sum_row) @@ -421,6 +361,7 @@ fn add_selector(skip: bool, sum_row: usize, combined_result: &mut Vec