Skip to content

Commit

Permalink
more const evaluations for list array (#2327)
Browse files Browse the repository at this point in the history
Signed-off-by: remzi <13716567376yh@gmail.com>
  • Loading branch information
HaoYang670 committed Aug 5, 2022
1 parent 3ed0e28 commit b1e2bd9
Show file tree
Hide file tree
Showing 8 changed files with 52 additions and 87 deletions.
26 changes: 10 additions & 16 deletions arrow/src/array/array_binary.rs
Expand Up @@ -236,7 +236,7 @@ impl<'a, T: OffsetSizeTrait> GenericBinaryArray<T> {

impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericBinaryArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" };
let prefix = OffsetSize::PREFIX;

write!(f, "{}BinaryArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
Expand Down Expand Up @@ -608,11 +608,9 @@ mod tests {
.unwrap();
let binary_array1 = GenericBinaryArray::<O>::from(array_data1);

let data_type = if O::IS_LARGE {
DataType::LargeList
} else {
DataType::List
}(Box::new(Field::new("item", DataType::UInt8, false)));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::UInt8, false),
));

let array_data2 = ArrayData::builder(data_type)
.len(3)
Expand Down Expand Up @@ -660,11 +658,9 @@ mod tests {

let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
let null_buffer = Buffer::from_slice_ref(&[0b101]);
let data_type = if O::IS_LARGE {
DataType::LargeList
} else {
DataType::List
}(Box::new(Field::new("item", DataType::UInt8, false)));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::UInt8, false),
));

// [None, Some(b"Parquet")]
let array_data = ArrayData::builder(data_type)
Expand Down Expand Up @@ -707,11 +703,9 @@ mod tests {
.unwrap();

let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
let data_type = if O::IS_LARGE {
DataType::LargeList
} else {
DataType::List
}(Box::new(Field::new("item", DataType::UInt8, false)));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::UInt8, false),
));

// [None, Some(b"Parquet")]
let array_data = ArrayData::builder(data_type)
Expand Down
21 changes: 15 additions & 6 deletions arrow/src/array/array_list.rs
Expand Up @@ -34,14 +34,17 @@ use crate::{
/// trait declaring an offset size, relevant for i32 vs i64 array types.
pub trait OffsetSizeTrait: ArrowNativeType + std::ops::AddAssign + Integer {
const IS_LARGE: bool;
const PREFIX: &'static str;
}

impl OffsetSizeTrait for i32 {
const IS_LARGE: bool = false;
const PREFIX: &'static str = "";
}

impl OffsetSizeTrait for i64 {
const IS_LARGE: bool = true;
const PREFIX: &'static str = "Large";
}

/// Generic struct for a variable-size list array.
Expand All @@ -57,6 +60,16 @@ pub struct GenericListArray<OffsetSize> {
}

impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
/// The data type constructor of list array.
/// The input is the schema of the child array and
/// the output is the [`DataType`], List or LargeList.
pub const DATA_TYPE_CONSTRUCTOR: fn(Box<Field>) -> DataType = if OffsetSize::IS_LARGE
{
DataType::LargeList
} else {
DataType::List
};

/// Returns a reference to the values of this list.
pub fn values(&self) -> ArrayRef {
self.values.clone()
Expand Down Expand Up @@ -170,11 +183,7 @@ impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> {
.collect();

let field = Box::new(Field::new("item", T::DATA_TYPE, true));
let data_type = if OffsetSize::IS_LARGE {
DataType::LargeList(field)
} else {
DataType::List(field)
};
let data_type = Self::DATA_TYPE_CONSTRUCTOR(field);
let array_data = ArrayData::builder(data_type)
.len(null_buf.len())
.add_buffer(offsets.into())
Expand Down Expand Up @@ -274,7 +283,7 @@ impl<'a, OffsetSize: OffsetSizeTrait> ArrayAccessor for &'a GenericListArray<Off

impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericListArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" };
let prefix = OffsetSize::PREFIX;

write!(f, "{}ListArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
Expand Down
26 changes: 10 additions & 16 deletions arrow/src/array/array_string.rs
Expand Up @@ -294,7 +294,7 @@ impl<'a, T: OffsetSizeTrait> GenericStringArray<T> {

impl<OffsetSize: OffsetSizeTrait> fmt::Debug for GenericStringArray<OffsetSize> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let prefix = if OffsetSize::IS_LARGE { "Large" } else { "" };
let prefix = OffsetSize::PREFIX;

write!(f, "{}StringArray\n[\n", prefix)?;
print_long_array(self, f, |array, index, f| {
Expand Down Expand Up @@ -707,11 +707,9 @@ mod tests {

let offsets = [0, 5, 8, 15].map(|n| O::from_usize(n).unwrap());
let null_buffer = Buffer::from_slice_ref(&[0b101]);
let data_type = if O::IS_LARGE {
DataType::LargeList
} else {
DataType::List
}(Box::new(Field::new("item", DataType::UInt8, false)));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::UInt8, false),
));

// [None, Some("Parquet")]
let array_data = ArrayData::builder(data_type)
Expand Down Expand Up @@ -754,11 +752,9 @@ mod tests {
.unwrap();

let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
let data_type = if O::IS_LARGE {
DataType::LargeList
} else {
DataType::List
}(Box::new(Field::new("item", DataType::UInt8, false)));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::UInt8, false),
));

// [None, Some(b"Parquet")]
let array_data = ArrayData::builder(data_type)
Expand Down Expand Up @@ -792,11 +788,9 @@ mod tests {
.unwrap();

let offsets = [0, 2, 3].map(|n| O::from_usize(n).unwrap());
let data_type = if O::IS_LARGE {
DataType::LargeList
} else {
DataType::List
}(Box::new(Field::new("item", DataType::UInt16, false)));
let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::UInt16, false),
));

let array_data = ArrayData::builder(data_type)
.len(2)
Expand Down
8 changes: 2 additions & 6 deletions arrow/src/array/builder/generic_list_builder.rs
Expand Up @@ -22,7 +22,6 @@ use crate::array::ArrayData;
use crate::array::ArrayRef;
use crate::array::GenericListArray;
use crate::array::OffsetSizeTrait;
use crate::datatypes::DataType;
use crate::datatypes::Field;

use super::{ArrayBuilder, BufferBuilder, NullBufferBuilder};
Expand Down Expand Up @@ -135,11 +134,7 @@ where
values_data.data_type().clone(),
true, // TODO: find a consistent way of getting this
));
let data_type = if OffsetSize::IS_LARGE {
DataType::LargeList(field)
} else {
DataType::List(field)
};
let data_type = GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field);
let array_data_builder = ArrayData::builder(data_type)
.len(len)
.add_buffer(offset_buffer)
Expand All @@ -163,6 +158,7 @@ mod tests {
use crate::array::builder::ListBuilder;
use crate::array::{Array, Int32Array, Int32Builder};
use crate::buffer::Buffer;
use crate::datatypes::DataType;

fn _test_generic_list_array_builder<O: OffsetSizeTrait>() {
let values_builder = Int32Builder::new(10);
Expand Down
6 changes: 1 addition & 5 deletions arrow/src/array/transform/mod.rs
Expand Up @@ -313,11 +313,7 @@ fn preallocate_offset_and_binary_buffer<Offset: OffsetSizeTrait>(
// offsets
let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<Offset>());
// safety: `unsafe` code assumes that this buffer is initialized with one element
if Offset::IS_LARGE {
buffer.push(0i64);
} else {
buffer.push(0i32)
}
buffer.push(Offset::zero());

[
buffer,
Expand Down
38 changes: 10 additions & 28 deletions arrow/src/compute/util.rs
Expand Up @@ -351,9 +351,7 @@ pub(super) mod tests {
T: ArrowPrimitiveType,
PrimitiveArray<T>: From<Vec<Option<T::Native>>>,
{
use std::any::TypeId;

let mut offset = vec![0];
let mut offset = vec![S::zero()];
let mut values = vec![];

let list_len = data.len();
Expand All @@ -367,34 +365,18 @@ pub(super) mod tests {
list_null_count += 1;
bit_util::unset_bit(list_bitmap.as_slice_mut(), idx);
}
offset.push(values.len() as i64);
offset.push(S::from_usize(values.len()).unwrap());
}

let value_data = PrimitiveArray::<T>::from(values).into_data();
let (list_data_type, value_offsets) = if TypeId::of::<S>() == TypeId::of::<i32>()
{
(
DataType::List(Box::new(Field::new(
"item",
T::DATA_TYPE,
list_null_count == 0,
))),
Buffer::from_slice_ref(
&offset.into_iter().map(|x| x as i32).collect::<Vec<i32>>(),
),
)
} else if TypeId::of::<S>() == TypeId::of::<i64>() {
(
DataType::LargeList(Box::new(Field::new(
"item",
T::DATA_TYPE,
list_null_count == 0,
))),
Buffer::from_slice_ref(&offset),
)
} else {
unreachable!()
};
let (list_data_type, value_offsets) = (
GenericListArray::<S>::DATA_TYPE_CONSTRUCTOR(Box::new(Field::new(
"item",
T::DATA_TYPE,
list_null_count == 0,
))),
Buffer::from_slice_ref(&offset),
);

let list_data = ArrayData::builder(list_data_type)
.len(list_len)
Expand Down
9 changes: 3 additions & 6 deletions arrow/src/ffi.rs
Expand Up @@ -1034,12 +1034,9 @@ mod tests {
.collect::<Buffer>();

// Construct a list array from the above two
let list_data_type = match std::mem::size_of::<Offset>() {
4 => DataType::List(Box::new(Field::new("item", DataType::Int32, false))),
_ => {
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)))
}
};
let list_data_type = GenericListArray::<Offset>::DATA_TYPE_CONSTRUCTOR(Box::new(
Field::new("item", DataType::Int32, false),
));

let list_data = ArrayData::builder(list_data_type)
.len(3)
Expand Down
5 changes: 1 addition & 4 deletions parquet/src/arrow/array_reader/list_array.rs
Expand Up @@ -268,10 +268,7 @@ mod tests {
item_nullable: bool,
) -> ArrowType {
let field = Box::new(Field::new("item", data_type, item_nullable));
match OffsetSize::IS_LARGE {
true => ArrowType::LargeList(field),
false => ArrowType::List(field),
}
GenericListArray::<OffsetSize>::DATA_TYPE_CONSTRUCTOR(field)
}

fn downcast<OffsetSize: OffsetSizeTrait>(
Expand Down

0 comments on commit b1e2bd9

Please sign in to comment.