From 42b15a82e242060e696a998c58765e8845a098b3 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 1 Aug 2022 12:16:05 +0100 Subject: [PATCH] Add tests for nested decimal arrays (#2254) --- parquet/src/arrow/arrow_reader.rs | 82 +++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/parquet/src/arrow/arrow_reader.rs b/parquet/src/arrow/arrow_reader.rs index 19985818d17..26305cd41ba 100644 --- a/parquet/src/arrow/arrow_reader.rs +++ b/parquet/src/arrow/arrow_reader.rs @@ -397,6 +397,7 @@ mod tests { use tempfile::tempfile; use arrow::array::*; + use arrow::buffer::Buffer; use arrow::datatypes::{DataType as ArrowDataType, Field, Schema}; use arrow::error::Result as ArrowResult; use arrow::record_batch::{RecordBatch, RecordBatchReader}; @@ -729,6 +730,87 @@ mod tests { } } + #[test] + fn test_decimal_nullable_struct() { + let decimals = Decimal128Array::from_iter_values([1, 2, 3, 4, 5, 6, 7, 8]); + + let data = ArrayDataBuilder::new(ArrowDataType::Struct(vec![Field::new( + "decimals", + decimals.data_type().clone(), + false, + )])) + .len(8) + .null_bit_buffer(Some(Buffer::from(&[0b11101111]))) + .child_data(vec![decimals.into_data()]) + .build() + .unwrap(); + + let written = RecordBatch::try_from_iter([( + "struct", + Arc::new(StructArray::from(data)) as ArrayRef, + )]) + .unwrap(); + + let mut buffer = Vec::with_capacity(1024); + let mut writer = + ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap(); + writer.write(&written).unwrap(); + writer.close().unwrap(); + + let read = ParquetFileArrowReader::try_new(Bytes::from(buffer)) + .unwrap() + .get_record_reader(3) + .unwrap() + .collect::>>() + .unwrap(); + + assert_eq!(&written.slice(0, 3), &read[0]); + assert_eq!(&written.slice(3, 3), &read[1]); + assert_eq!(&written.slice(6, 2), &read[2]); + } + + #[test] + #[ignore] // https://github.com/apache/arrow-rs/issues/2253 + fn test_decimal_list() { + let decimals = Decimal128Array::from_iter_values([1, 2, 3, 4, 5, 6, 7, 8]); + + // [[], [1], [2, 3], null, [4], null, [6, 7, 8]] + let data = ArrayDataBuilder::new(ArrowDataType::List(Box::new(Field::new( + "item", + decimals.data_type().clone(), + false, + )))) + .len(7) + .add_buffer(Buffer::from_iter([0_i32, 0, 1, 3, 3, 4, 5, 8])) + .null_bit_buffer(Some(Buffer::from(&[0b01010111]))) + .child_data(vec![decimals.into_data()]) + .build() + .unwrap(); + + let written = RecordBatch::try_from_iter([( + "list", + Arc::new(ListArray::from(data)) as ArrayRef, + )]) + .unwrap(); + + let mut buffer = Vec::with_capacity(1024); + let mut writer = + ArrowWriter::try_new(&mut buffer, written.schema(), None).unwrap(); + writer.write(&written).unwrap(); + writer.close().unwrap(); + + let read = ParquetFileArrowReader::try_new(Bytes::from(buffer)) + .unwrap() + .get_record_reader(3) + .unwrap() + .collect::>>() + .unwrap(); + + assert_eq!(&written.slice(0, 3), &read[0]); + assert_eq!(&written.slice(3, 3), &read[1]); + assert_eq!(&written.slice(6, 1), &read[2]); + } + #[test] fn test_read_decimal_file() { use arrow::array::Decimal128Array;