From 36a07b10ad60212b54c1dc22d63a42bb2e44468a Mon Sep 17 00:00:00 2001 From: yangjiang Date: Thu, 4 Aug 2022 13:39:01 +0800 Subject: [PATCH 1/2] Make skip_records in complex_object_array can skip cross row groups --- .../array_reader/complex_object_array.rs | 28 +++++++++++-------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/parquet/src/arrow/array_reader/complex_object_array.rs b/parquet/src/arrow/array_reader/complex_object_array.rs index 79b53733176..650934e483c 100644 --- a/parquet/src/arrow/array_reader/complex_object_array.rs +++ b/parquet/src/arrow/array_reader/complex_object_array.rs @@ -197,19 +197,25 @@ where } fn skip_records(&mut self, num_records: usize) -> Result { - match self.column_reader.as_mut() { - Some(reader) => reader.skip_records(num_records), - None => { - if self.next_column_reader()? { - self.column_reader - .as_mut() - .unwrap() - .skip_records(num_records) - } else { - Ok(0) - } + let mut num_read = 0; + while (self.column_reader.is_some() || self.next_column_reader()?) + && num_read < num_records + { + let remain_to_skip = num_records - num_read; + let skip = self + .column_reader + .as_mut() + .unwrap() + .skip_records(remain_to_skip) + .unwrap(); + num_read += skip; + // skip < remain_to_skip means end of row group + // self.next_column_reader() == false means end of file + if skip < remain_to_skip && !self.next_column_reader()? { + break; } } + Ok(num_read) } fn get_def_levels(&self) -> Option<&[i16]> { From 84259a1272ddedc51ea2f47fa5e32c5143086625 Mon Sep 17 00:00:00 2001 From: Yang Jiang Date: Sat, 6 Aug 2022 17:53:53 +0800 Subject: [PATCH 2/2] Update parquet/src/arrow/array_reader/complex_object_array.rs Co-authored-by: Kun Liu --- parquet/src/arrow/array_reader/complex_object_array.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/parquet/src/arrow/array_reader/complex_object_array.rs b/parquet/src/arrow/array_reader/complex_object_array.rs index 650934e483c..4f958fea446 100644 --- a/parquet/src/arrow/array_reader/complex_object_array.rs +++ b/parquet/src/arrow/array_reader/complex_object_array.rs @@ -206,8 +206,7 @@ where .column_reader .as_mut() .unwrap() - .skip_records(remain_to_skip) - .unwrap(); + .skip_records(remain_to_skip)?; num_read += skip; // skip < remain_to_skip means end of row group // self.next_column_reader() == false means end of file