diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs index dd21e0d5176..12c6978107d 100644 --- a/arrow/src/array/array_binary.rs +++ b/arrow/src/array/array_binary.rs @@ -236,7 +236,7 @@ impl<'a, T: OffsetSizeTrait> GenericBinaryArray { impl fmt::Debug for GenericBinaryArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" }; + let prefix = OffsetSize::PREFIX; write!(f, "{}BinaryArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { @@ -608,11 +608,9 @@ mod tests { .unwrap(); let binary_array1 = GenericBinaryArray::::from(array_data1); - let data_type = if O::IS_LARGE { - DataType::LargeList - } else { - DataType::List - }(Box::new(Field::new("item", DataType::UInt8, false))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::UInt8, false), + )); let array_data2 = ArrayData::builder(data_type) .len(3) @@ -660,11 +658,9 @@ mod tests { let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap()); let null_buffer = Buffer::from_slice_ref(&[0b101]); - let data_type = if O::IS_LARGE { - DataType::LargeList - } else { - DataType::List - }(Box::new(Field::new("item", DataType::UInt8, false))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::UInt8, false), + )); // [None, Some(b"Parquet")] let array_data = ArrayData::builder(data_type) @@ -707,11 +703,9 @@ mod tests { .unwrap(); let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap()); - let data_type = if O::IS_LARGE { - DataType::LargeList - } else { - DataType::List - }(Box::new(Field::new("item", DataType::UInt8, false))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::UInt8, false), + )); // [None, Some(b"Parquet")] let array_data = ArrayData::builder(data_type) diff --git a/arrow/src/array/array_list.rs b/arrow/src/array/array_list.rs index 543cd1fec81..b9c05014c3f 100644 --- a/arrow/src/array/array_list.rs +++ b/arrow/src/array/array_list.rs @@ -34,14 +34,17 @@ use crate::{ /// trait declaring an offset size, relevant for i32 vs i64 array types. pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer { const IS_LARGE: bool; + const PREFIX: &'static str; } impl OffsetSizeTrait for i32 { const IS_LARGE: bool = false; + const PREFIX: &'static str = ""; } impl OffsetSizeTrait for i64 { const IS_LARGE: bool = true; + const PREFIX: &'static str = "Large"; } /// Generic struct for a variable-size list array. @@ -57,6 +60,16 @@ pub struct GenericListArray { } impl GenericListArray { + /// The data type constructor of list array. + /// The input is the schema of the child array and + /// the output is the [`DataType`], List or LargeList. + pub const DATA_TYPE_CONSTRUCTOR: fn(Box) -> DataType = if OffsetSize::IS_LARGE + { + DataType::LargeList + } else { + DataType::List + }; + /// Returns a reference to the values of this list. pub fn values(&self) -> ArrayRef { self.values.clone() @@ -170,11 +183,7 @@ impl GenericListArray { .collect(); let field = Box::new(Field::new("item", T::DATA_TYPE, true)); - let data_type = if OffsetSize::IS_LARGE { - DataType::LargeList(field) - } else { - DataType::List(field) - }; + let data_type = Self::DATA_TYPE_CONSTRUCTOR(field); let array_data = ArrayData::builder(data_type) .len(null_buf.len()) .add_buffer(offsets.into()) @@ -274,7 +283,7 @@ impl<'a, OffsetSize: OffsetSizeTrait> ArrayAccessor for &'a GenericListArray fmt::Debug for GenericListArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" }; + let prefix = OffsetSize::PREFIX; write!(f, "{}ListArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { diff --git a/arrow/src/array/array_string.rs b/arrow/src/array/array_string.rs index 1bb99fce7ed..b72152cc4ac 100644 --- a/arrow/src/array/array_string.rs +++ b/arrow/src/array/array_string.rs @@ -294,7 +294,7 @@ impl<'a, T: OffsetSizeTrait> GenericStringArray { impl fmt::Debug for GenericStringArray { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" }; + let prefix = OffsetSize::PREFIX; write!(f, "{}StringArray\n[\n", prefix)?; print_long_array(self, f, |array, index, f| { @@ -707,11 +707,9 @@ mod tests { let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap()); let null_buffer = Buffer::from_slice_ref(&[0b101]); - let data_type = if O::IS_LARGE { - DataType::LargeList - } else { - DataType::List - }(Box::new(Field::new("item", DataType::UInt8, false))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::UInt8, false), + )); // [None, Some("Parquet")] let array_data = ArrayData::builder(data_type) @@ -754,11 +752,9 @@ mod tests { .unwrap(); let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap()); - let data_type = if O::IS_LARGE { - DataType::LargeList - } else { - DataType::List - }(Box::new(Field::new("item", DataType::UInt8, false))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::UInt8, false), + )); // [None, Some(b"Parquet")] let array_data = ArrayData::builder(data_type) @@ -792,11 +788,9 @@ mod tests { .unwrap(); let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap()); - let data_type = if O::IS_LARGE { - DataType::LargeList - } else { - DataType::List - }(Box::new(Field::new("item", DataType::UInt16, false))); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::UInt16, false), + )); let array_data = ArrayData::builder(data_type) .len(2) diff --git a/arrow/src/array/builder/generic_list_builder.rs b/arrow/src/array/builder/generic_list_builder.rs index 911182f6571..686156df13b 100644 --- a/arrow/src/array/builder/generic_list_builder.rs +++ b/arrow/src/array/builder/generic_list_builder.rs @@ -22,7 +22,6 @@ use crate::array::ArrayData; use crate::array::ArrayRef; use crate::array::GenericListArray; use crate::array::OffsetSizeTrait; -use crate::datatypes::DataType; use crate::datatypes::Field; use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder}; @@ -135,11 +134,7 @@ where values_data.data_type().clone(), true, // TODO: find a consistent way of getting this )); - let data_type = if OffsetSize::IS_LARGE { - DataType::LargeList(field) - } else { - DataType::List(field) - }; + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(field); let array_data_builder = ArrayData::builder(data_type) .len(len) .add_buffer(offset_buffer) @@ -163,6 +158,7 @@ mod tests { use crate::array::builder::ListBuilder; use crate::array::{Array, Int32Array, Int32Builder}; use crate::buffer::Buffer; + use crate::datatypes::DataType; fn _test_generic_list_array_builder() { let values_builder = Int32Builder::new(10); diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs index 564ef444a1d..409f2dd143e 100644 --- a/arrow/src/array/transform/mod.rs +++ b/arrow/src/array/transform/mod.rs @@ -313,11 +313,7 @@ fn preallocate_offset_and_binary_buffer( // offsets let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::()); // safety: `unsafe` code assumes that this buffer is initialized with one element - if Offset::IS_LARGE { - buffer.push(0i64); - } else { - buffer.push(0i32) - } + buffer.push(Offset::zero()); [ buffer, diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs index 29a90b65c23..974af9593e3 100644 --- a/arrow/src/compute/util.rs +++ b/arrow/src/compute/util.rs @@ -351,9 +351,7 @@ pub(super) mod tests { T: ArrowPrimitiveType, PrimitiveArray: From>>, { - use std::any::TypeId; - - let mut offset = vec![0]; + let mut offset = vec![S::zero()]; let mut values = vec![]; let list_len = data.len(); @@ -367,34 +365,18 @@ pub(super) mod tests { list_null_count += 1; bit_util::unset_bit(list_bitmap.as_slice_mut(), idx); } - offset.push(values.len() as i64); + offset.push(S::from_usize(values.len()).unwrap()); } let value_data = PrimitiveArray::::from(values).into_data(); - let (list_data_type, value_offsets) = if TypeId::of::() == TypeId::of::() - { - ( - DataType::List(Box::new(Field::new( - "item", - T::DATA_TYPE, - list_null_count == 0, - ))), - Buffer::from_slice_ref( - &offset.into_iter().map(|x| x as i32).collect::>(), - ), - ) - } else if TypeId::of::() == TypeId::of::() { - ( - DataType::LargeList(Box::new(Field::new( - "item", - T::DATA_TYPE, - list_null_count == 0, - ))), - Buffer::from_slice_ref(&offset), - ) - } else { - unreachable!() - }; + let (list_data_type, value_offsets) = ( + GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new(Field::new( + "item", + T::DATA_TYPE, + list_null_count == 0, + ))), + Buffer::from_slice_ref(&offset), + ); let list_data = ArrayData::builder(list_data_type) .len(list_len) diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs index 6c182cf1a8c..0716a49d634 100644 --- a/arrow/src/ffi.rs +++ b/arrow/src/ffi.rs @@ -1034,12 +1034,9 @@ mod tests { .collect::(); // Construct a list array from the above two - let list_data_type = match std::mem::size_of::() { - 4 => DataType::List(Box::new(Field::new("item", DataType::Int32, false))), - _ => { - DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false))) - } - }; + let list_data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(Box::new( + Field::new("item", DataType::Int32, false), + )); let list_data = ArrayData::builder(list_data_type) .len(3) diff --git a/parquet/src/arrow/array_reader/list_array.rs b/parquet/src/arrow/array_reader/list_array.rs index c245c61312f..94770794cb7 100644 --- a/parquet/src/arrow/array_reader/list_array.rs +++ b/parquet/src/arrow/array_reader/list_array.rs @@ -268,10 +268,7 @@ mod tests { item_nullable: bool, ) -> ArrowType { let field = Box::new(Field::new("item", data_type, item_nullable)); - match OffsetSize::IS_LARGE { - true => ArrowType::LargeList(field), - false => ArrowType::List(field), - } + GenericListArray::::DATA_TYPE_CONSTRUCTOR(field) } fn downcast(