Skip to content

Commit

Permalink
Add more tests of apache#2025
Browse files Browse the repository at this point in the history
  • Loading branch information
tustvold committed Jul 8, 2022
1 parent 5a76697 commit 1d58fa4
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 10 deletions.
31 changes: 22 additions & 9 deletions parquet/src/arrow/arrow_reader.rs
Expand Up @@ -1529,8 +1529,7 @@ mod tests {
assert_eq!(total_rows, expected_rows);
}

#[test]
fn test_row_group_exact_multiple() {
fn test_row_group_batch(row_group_size: usize, batch_size: usize) {
let schema = Arc::new(Schema::new(vec![Field::new(
"list",
ArrowDataType::List(Box::new(Field::new("item", ArrowDataType::Int32, true))),
Expand All @@ -1544,14 +1543,14 @@ mod tests {
schema.clone(),
Some(
WriterProperties::builder()
.set_max_row_group_size(8)
.set_max_row_group_size(row_group_size)
.build(),
),
)
.unwrap();
for _ in 0..2 {
let mut list_builder = ListBuilder::new(Int32Builder::new(10));
for _ in 0..10 {
let mut list_builder = ListBuilder::new(Int32Builder::new(row_group_size));
for _ in 0..(batch_size) {
list_builder.append(true).unwrap();
}
let batch = RecordBatch::try_new(
Expand All @@ -1564,9 +1563,23 @@ mod tests {
writer.close().unwrap();

let mut file_reader = ParquetFileArrowReader::try_new(Bytes::from(buf)).unwrap();
let mut record_reader = file_reader.get_record_reader(8).unwrap();
assert_eq!(8, record_reader.next().unwrap().unwrap().num_rows());
assert_eq!(8, record_reader.next().unwrap().unwrap().num_rows());
assert_eq!(4, record_reader.next().unwrap().unwrap().num_rows());
let mut record_reader = file_reader.get_record_reader(batch_size).unwrap();
assert_eq!(
batch_size,
record_reader.next().unwrap().unwrap().num_rows()
);
assert_eq!(
batch_size,
record_reader.next().unwrap().unwrap().num_rows()
);
}

#[test]
fn test_row_group_exact_multiple() {
use crate::arrow::record_reader::MIN_BATCH_SIZE;
test_row_group_batch(8, 8);
test_row_group_batch(MIN_BATCH_SIZE, MIN_BATCH_SIZE);
test_row_group_batch(MIN_BATCH_SIZE + 1, MIN_BATCH_SIZE);
test_row_group_batch(MIN_BATCH_SIZE, MIN_BATCH_SIZE - 1);
}
}
3 changes: 2 additions & 1 deletion parquet/src/arrow/record_reader/mod.rs
Expand Up @@ -38,7 +38,8 @@ use crate::schema::types::ColumnDescPtr;
pub(crate) mod buffer;
mod definition_levels;

const MIN_BATCH_SIZE: usize = 1024;
/// The minimum number of levels read when reading a repeated field
pub(crate) const MIN_BATCH_SIZE: usize = 1024;

/// A `RecordReader` is a stateful column reader that delimits semantic records.
pub type RecordReader<T> =
Expand Down

0 comments on commit 1d58fa4

Please sign in to comment.