Skip to content

Commit

Permalink
Use FixedSizeListArray::new in FixedSizeListBuilder (#5612)
Browse files Browse the repository at this point in the history
* Use FixedSizeListArray::new in FixedSizeListBuilder

* Fix handling of entirely null zero-sized list array (#5614)

* Fix
  • Loading branch information
tustvold committed Apr 9, 2024
1 parent 12c0d00 commit 755616f
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 106 deletions.
37 changes: 26 additions & 11 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,16 +152,22 @@ impl FixedSizeListArray {
ArrowError::InvalidArgumentError(format!("Size cannot be negative, got {}", size))
})?;

let len = values.len() / s.max(1);
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
len,
n.len(),
)));
let len = match s {
0 => nulls.as_ref().map(|x| x.len()).unwrap_or_default(),
_ => {
let len = values.len() / s.max(1);
if let Some(n) = nulls.as_ref() {
if n.len() != len {
return Err(ArrowError::InvalidArgumentError(format!(
"Incorrect length of null buffer for FixedSizeListArray, expected {} got {}",
len,
n.len(),
)));
}
}
len
}
}
};

if field.data_type() != values.data_type() {
return Err(ArrowError::InvalidArgumentError(format!(
Expand Down Expand Up @@ -460,7 +466,7 @@ mod tests {

use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::Int32Array;
use crate::{new_empty_array, Int32Array};

use super::*;

Expand Down Expand Up @@ -656,7 +662,7 @@ mod tests {
);

let list = FixedSizeListArray::new(field.clone(), 0, values.clone(), None);
assert_eq!(list.len(), 6);
assert_eq!(list.len(), 0);

let nulls = NullBuffer::new_null(2);
let err = FixedSizeListArray::try_new(field, 2, values.clone(), Some(nulls)).unwrap_err();
Expand All @@ -674,4 +680,13 @@ mod tests {
let err = FixedSizeListArray::try_new(field, 2, values, None).unwrap_err();
assert_eq!(err.to_string(), "Invalid argument error: FixedSizeListArray expected data type Int64 got Int32 for \"item\"");
}

#[test]
fn empty_fixed_size_list() {
let field = Arc::new(Field::new("item", DataType::Int32, true));
let nulls = NullBuffer::new_null(2);
let values = new_empty_array(&DataType::Int32);
let list = FixedSizeListArray::new(field.clone(), 0, values, Some(nulls));
assert_eq!(list.len(), 2);
}
}
121 changes: 26 additions & 95 deletions arrow-array/src/builder/fixed_size_list_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field, FieldRef};
use arrow_schema::{Field, FieldRef};
use std::any::Any;
use std::sync::Arc;

Expand Down Expand Up @@ -94,9 +93,9 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
}
}

/// Override the field passed to [`ArrayData::builder`]
/// Override the field passed to [`FixedSizeListArray::new`]
///
/// By default a nullable field is created with the name `item`
/// By default, a nullable field is created with the name `item`
///
/// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
/// field's data type does not match that of `T`
Expand Down Expand Up @@ -169,116 +168,52 @@ where
/// Builds the [`FixedSizeListBuilder`] and reset this builder.
pub fn finish(&mut self) -> FixedSizeListArray {
let len = self.len();
let values_arr = self.values_builder.finish();
let values_data = values_arr.to_data();
let values = self.values_builder.finish();
let nulls = self.null_buffer_builder.finish();

assert_eq!(
values_data.len(), len * self.list_len as usize,
values.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
values_data.len(),
values.len(),
self.list_len,
len,
);

let nulls = self.null_buffer_builder.finish();
let field = self
.field
.clone()
.unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));

let field = match &self.field {
Some(f) => {
let size = self.value_length();
assert_eq!(
f.data_type(),
values_data.data_type(),
"DataType of field ({}) should be the same as the values_builder DataType ({})",
f.data_type(),
values_data.data_type()
);

if let Some(a) = values_arr.logical_nulls() {
let nulls_valid = f.is_nullable()
|| nulls
.as_ref()
.map(|n| n.expand(size as _).contains(&a))
.unwrap_or_default();

assert!(
nulls_valid,
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
f.name()
);
}
f.clone()
}
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
};

let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
.len(len)
.add_child_data(values_data)
.nulls(nulls);

let array_data = unsafe { array_data.build_unchecked() };

FixedSizeListArray::from(array_data)
FixedSizeListArray::new(field, self.list_len, values, nulls)
}

/// Builds the [`FixedSizeListBuilder`] without resetting the builder.
pub fn finish_cloned(&self) -> FixedSizeListArray {
let len = self.len();
let values_arr = self.values_builder.finish_cloned();
let values_data = values_arr.to_data();
let values = self.values_builder.finish_cloned();
let nulls = self.null_buffer_builder.finish_cloned();

assert_eq!(
values_data.len(), len * self.list_len as usize,
values.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
values_data.len(),
values.len(),
self.list_len,
len,
);

let nulls = self.null_buffer_builder.finish_cloned();
let field = self
.field
.clone()
.unwrap_or_else(|| Arc::new(Field::new("item", values.data_type().clone(), true)));

let field = match &self.field {
Some(f) => {
let size = self.value_length();
assert_eq!(
f.data_type(),
values_data.data_type(),
"DataType of field ({}) should be the same as the values_builder DataType ({})",
f.data_type(),
values_data.data_type()
);
if let Some(a) = values_arr.logical_nulls() {
let nulls_valid = f.is_nullable()
|| nulls
.as_ref()
.map(|n| n.expand(size as _).contains(&a))
.unwrap_or_default();

assert!(
nulls_valid,
"Found unmasked nulls for non-nullable FixedSizeListBuilder field {:?}",
f.name()
);
}
f.clone()
}
None => Arc::new(Field::new("item", values_data.data_type().clone(), true)),
};

let array_data = ArrayData::builder(DataType::FixedSizeList(field, self.list_len))
.len(len)
.add_child_data(values_data)
.nulls(nulls);

let array_data = unsafe { array_data.build_unchecked() };

FixedSizeListArray::from(array_data)
FixedSizeListArray::new(field, self.list_len, values, nulls)
}
}

#[cfg(test)]
mod tests {
use super::*;
use arrow_schema::DataType;

use crate::builder::Int32Builder;
use crate::Array;
Expand Down Expand Up @@ -368,7 +303,7 @@ mod tests {
}

#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
fn test_fixed_size_list_array_builder_with_field_null_panic() {
let builder = make_list_builder(true, true);
let mut builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
Expand All @@ -377,9 +312,7 @@ mod tests {
}

#[test]
#[should_panic(
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
)]
#[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
fn test_fixed_size_list_array_builder_with_field_type_panic() {
let values_builder = Int32Builder::new();
let builder = FixedSizeListBuilder::new(values_builder, 3);
Expand Down Expand Up @@ -417,7 +350,7 @@ mod tests {
}

#[test]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListBuilder field")]
#[should_panic(expected = "Found unmasked nulls for non-nullable FixedSizeListArray")]
fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
let builder = make_list_builder(true, true);
let builder = builder.with_field(Field::new("list_item", DataType::Int32, false));
Expand All @@ -439,9 +372,7 @@ mod tests {
}

#[test]
#[should_panic(
expected = "DataType of field (Int64) should be the same as the values_builder DataType (Int32)"
)]
#[should_panic(expected = "FixedSizeListArray expected data type Int64 got Int32")]
fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
let builder = make_list_builder(false, false);
let builder = builder.with_field(Field::new("list_item", DataType::Int64, true));
Expand Down

0 comments on commit 755616f

Please sign in to comment.