Skip to content

Commit

Permalink
fix: Fix skip error in calculate_row_count. (#2329)
Browse files Browse the repository at this point in the history
* Fix skip error calculate_row_count error

* fix ut
  • Loading branch information
Ted-Jiang committed Aug 5, 2022
1 parent 8e30d06 commit b6eaf22
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 2 deletions.
4 changes: 3 additions & 1 deletion parquet/src/file/serialized_reader.rs
Expand Up @@ -1512,7 +1512,9 @@ mod tests {
if i != 351 {
assert!((meta.num_rows == 21) || (meta.num_rows == 20));
} else {
assert_eq!(meta.num_rows, 11);
// last page first row index is 7290, total row count is 7300
// because first row start with zero, last page row count should be 10.
assert_eq!(meta.num_rows, 10);
}
assert!(!meta.is_dict);
vec.push(meta);
Expand Down
44 changes: 43 additions & 1 deletion parquet/src/util/page_util.rs
Expand Up @@ -25,7 +25,8 @@ use crate::file::reader::ChunkReader;
/// Use column chunk's offset index to get the `page_num` page row count.
pub(crate) fn calculate_row_count(indexes: &[PageLocation], page_num: usize, total_row_count: i64) -> Result<usize> {
if page_num == indexes.len() - 1 {
Ok((total_row_count - indexes[page_num].first_row_index + 1) as usize)
// first_row_index start with 0, so no need to plus one additional.
Ok((total_row_count - indexes[page_num].first_row_index) as usize)
} else {
Ok((indexes[page_num + 1].first_row_index - indexes[page_num].first_row_index) as usize)
}
Expand All @@ -52,3 +53,44 @@ pub(crate) fn get_pages_readable_slices<T: Read + Send, R: ChunkReader<T=T>>(col
}
Ok((page_readers, has_dictionary_page))
}

#[cfg(test)]
mod tests {
use super::*;

/**
parquet-tools meta ./test.parquet got:
file schema: test_schema
--------------------------------------------------------------------------------
leaf: REQUIRED INT64 R:0 D:
row group 1: RC:256 TS:2216 OFFSET:4
--------------------------------------------------------------------------------
leaf: INT64 UNCOMPRESSED DO:0 FPO:4 SZ:2216/2216/1.00 VC:256 ENC:PLAIN,RLE ST:[min: 0, max: 255, num_nulls not defined
parquet-tools column-index -c leaf ./test.parquet got:
offset index for column leaf:
offset compressed size first row index
page-0 4 554 0
page-1 558 554 64
page-2 1112 554 128
page-3 1666 554 192
**/
#[test]
fn test_calculate_row_count() {
let total_row_count = 256;
let mut indexes = vec![];
indexes.push(PageLocation::new(4, 554, 0));
indexes.push(PageLocation::new(558, 554, 64));
indexes.push(PageLocation::new(1112, 554, 128));
indexes.push(PageLocation::new(1666, 554, 192));
for i in 0..4 {
// each page should has 64 rows.
assert_eq!(64, calculate_row_count(indexes.as_slice(), i, total_row_count).unwrap());
}

}
}

0 comments on commit b6eaf22

Please sign in to comment.