diff --git a/arrow-pyarrow-integration-testing/src/c_stream.rs b/arrow-pyarrow-integration-testing/src/c_stream.rs index 4f10f5cb4c5..1c512040880 100644 --- a/arrow-pyarrow-integration-testing/src/c_stream.rs +++ b/arrow-pyarrow-integration-testing/src/c_stream.rs @@ -32,7 +32,7 @@ pub fn to_rust_iterator(ob: PyObject, py: Python) -> PyResult> { pub fn from_rust_iterator(py: Python) -> PyResult { // initialize an array let array = Int32Array::from(&[Some(2), None, Some(1), None]); - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(vec![Field::new("a", array.data_type().clone(), true)]), vec![array.boxed()], None, diff --git a/benches/filter_kernels.rs b/benches/filter_kernels.rs index 921950595f6..3d61a0cbb03 100644 --- a/benches/filter_kernels.rs +++ b/benches/filter_kernels.rs @@ -37,7 +37,7 @@ fn add_benchmark(c: &mut Criterion) { let filter_array = create_boolean_array(size, 0.0, 0.9); let filter_array = - BooleanArray::from_data(DataType::Boolean, filter_array.values().clone(), None); + BooleanArray::new(DataType::Boolean, filter_array.values().clone(), None); let arr_a = create_primitive_array::(size, 0.0); c.bench_function(&format!("filter 2^{} f32", log2_size), |b| { diff --git a/benches/iter_list.rs b/benches/iter_list.rs index f77c9536e6d..44482567dde 100644 --- a/benches/iter_list.rs +++ b/benches/iter_list.rs @@ -14,7 +14,7 @@ fn add_benchmark(c: &mut Criterion) { let size = 2usize.pow(log2_size); let values = Buffer::from_iter(0..size as i32); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let offsets = (0..=size as i32).step_by(2).collect::>(); @@ -23,7 +23,7 @@ fn add_benchmark(c: &mut Criterion) { .collect::(); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type, offsets.try_into().unwrap(), Box::new(values), diff --git a/guide/src/high_level.md b/guide/src/high_level.md index 78d71215db4..008bdd83dbe 100644 --- a/guide/src/high_level.md +++ b/guide/src/high_level.md @@ -253,7 +253,7 @@ where let values = array.values().iter().map(|v| op(*v)).collect::>(); // create the new array, cloning its validity - PrimitiveArray::::from_data(data_type.clone(), values.into(), array.validity().cloned()) + PrimitiveArray::::new(data_type.clone(), values.into(), array.validity().cloned()) } ``` diff --git a/src/array/binary/mod.rs b/src/array/binary/mod.rs index 1a5abdcc330..0c26d6e4c5c 100644 --- a/src/array/binary/mod.rs +++ b/src/array/binary/mod.rs @@ -6,7 +6,7 @@ use crate::{ buffer::Buffer, datatypes::DataType, error::Error, - offset::{Offset, OffsetsBuffer}, + offset::{Offset, Offsets, OffsetsBuffer}, trusted_len::TrustedLen, }; @@ -273,12 +273,15 @@ impl BinaryArray { mutable_values.into(), Some(mutable_bitmap.into()), )), - (Some(values), Some(offsets)) => Right(MutableBinaryArray::from_data( - self.data_type, - offsets, - values, - Some(mutable_bitmap), - )), + (Some(values), Some(offsets)) => Right( + MutableBinaryArray::try_new( + self.data_type, + offsets, + values, + Some(mutable_bitmap), + ) + .unwrap(), + ), }, } } else { @@ -304,12 +307,9 @@ impl BinaryArray { values.into(), None, )), - (Some(values), Some(offsets)) => Right(MutableBinaryArray::from_data( - self.data_type, - offsets, - values, - None, - )), + (Some(values), Some(offsets)) => Right( + MutableBinaryArray::try_new(self.data_type, offsets, values, None).unwrap(), + ), } } } @@ -324,7 +324,7 @@ impl BinaryArray { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - vec![O::default(); 1 + length].try_into().unwrap(), + Offsets::new_zeroed(length).into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), ) @@ -413,16 +413,6 @@ impl BinaryArray { // soundness: I: TrustedLen unsafe { Self::try_from_trusted_len_iter_unchecked(iter) } } - - /// Alias for `new` - pub fn from_data( - data_type: DataType, - offsets: OffsetsBuffer, - values: Buffer, - validity: Option, - ) -> Self { - Self::new(data_type, offsets, values, validity) - } } impl Array for BinaryArray { diff --git a/src/array/binary/mutable.rs b/src/array/binary/mutable.rs index 0f8655e33bf..a4b6e2abddc 100644 --- a/src/array/binary/mutable.rs +++ b/src/array/binary/mutable.rs @@ -160,16 +160,6 @@ impl MutableBinaryArray { validity.shrink_to_fit() } } - - /// Equivalent to `Self::try_new(...).unwrap()` - pub fn from_data( - data_type: DataType, - offsets: Offsets, - values: Vec, - validity: Option, - ) -> Self { - Self::try_new(data_type, offsets, values, validity).unwrap() - } } impl MutableBinaryArray { @@ -204,25 +194,13 @@ impl MutableArray for MutableBinaryArray { } fn as_box(&mut self) -> Box { - let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner(); - BinaryArray::new( - data_type, - offsets.into(), - values.into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - .boxed() + let array: BinaryArray = std::mem::take(self).into(); + array.boxed() } fn as_arc(&mut self) -> Arc { - let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner(); - BinaryArray::new( - data_type, - offsets.into(), - values.into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - .arced() + let array: BinaryArray = std::mem::take(self).into(); + array.arced() } fn data_type(&self) -> &DataType { @@ -270,7 +248,7 @@ impl MutableBinaryArray { { let (validity, offsets, values) = trusted_len_unzip(iterator); - Self::from_data(Self::default_data_type(), offsets, values, validity) + Self::try_new(Self::default_data_type(), offsets, values, validity).unwrap() } /// Creates a [`MutableBinaryArray`] from an iterator of trusted length. @@ -293,7 +271,7 @@ impl MutableBinaryArray { iterator: I, ) -> Self { let (offsets, values) = trusted_len_values_iter(iterator); - Self::from_data(Self::default_data_type(), offsets, values, None) + Self::try_new(Self::default_data_type(), offsets, values, None).unwrap() } /// Creates a new [`BinaryArray`] from a [`TrustedLen`] of `&[u8]`. @@ -326,12 +304,7 @@ impl MutableBinaryArray { validity = None; } - Ok(Self::from_data( - Self::default_data_type(), - offsets, - values, - validity, - )) + Ok(Self::try_new(Self::default_data_type(), offsets, values, validity).unwrap()) } /// Creates a [`MutableBinaryArray`] from an falible iterator of trusted length. @@ -427,7 +400,7 @@ impl MutableBinaryArray { /// Creates a new [`MutableBinaryArray`] from a [`Iterator`] of `&[u8]`. pub fn from_iter_values, I: Iterator>(iterator: I) -> Self { let (offsets, values) = values_iter(iterator); - Self::from_data(Self::default_data_type(), offsets, values, None) + Self::try_new(Self::default_data_type(), offsets, values, None).unwrap() } } diff --git a/src/array/binary/mutable_values.rs b/src/array/binary/mutable_values.rs index 56179456112..59f42b238fe 100644 --- a/src/array/binary/mutable_values.rs +++ b/src/array/binary/mutable_values.rs @@ -37,7 +37,8 @@ impl From> for BinaryArray { impl From> for MutableBinaryArray { fn from(other: MutableBinaryValuesArray) -> Self { - MutableBinaryArray::::from_data(other.data_type, other.offsets, other.values, None) + MutableBinaryArray::::try_new(other.data_type, other.offsets, other.values, None) + .expect("MutableBinaryValuesArray is consistent with MutableBinaryArray") } } diff --git a/src/array/boolean/mod.rs b/src/array/boolean/mod.rs index 1d364cfe567..01e12f83ca8 100644 --- a/src/array/boolean/mod.rs +++ b/src/array/boolean/mod.rs @@ -85,6 +85,11 @@ impl BooleanArray { }) } + /// Alias to `Self::try_new().unwrap()` + pub fn new(data_type: DataType, values: Bitmap, validity: Option) -> Self { + Self::try_new(data_type, values, validity).unwrap() + } + /// Returns an iterator over the optional values of this [`BooleanArray`]. #[inline] pub fn iter(&self) -> ZipValidity { @@ -246,21 +251,18 @@ impl BooleanArray { immutable, Some(mutable_bitmap.into()), )), - Right(mutable) => Right(MutableBooleanArray::from_data( - self.data_type, - mutable, - Some(mutable_bitmap), - )), + Right(mutable) => Right( + MutableBooleanArray::try_new(self.data_type, mutable, Some(mutable_bitmap)) + .unwrap(), + ), }, } } else { match self.values.into_mut() { Left(immutable) => Left(BooleanArray::new(self.data_type, immutable, None)), - Right(mutable) => Right(MutableBooleanArray::from_data( - self.data_type, - mutable, - None, - )), + Right(mutable) => { + Right(MutableBooleanArray::try_new(self.data_type, mutable, None).unwrap()) + } } } } @@ -369,20 +371,6 @@ impl BooleanArray { } = self; (data_type, values, validity) } - - /// The canonical method to create a [`BooleanArray`] - /// # Panics - /// This function errors iff: - /// * The validity is not `None` and its length is different from `values`'s length - /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. - pub fn new(data_type: DataType, values: Bitmap, validity: Option) -> Self { - Self::try_new(data_type, values, validity).unwrap() - } - - /// Alias for `new` - pub fn from_data(data_type: DataType, values: Bitmap, validity: Option) -> Self { - Self::new(data_type, values, validity) - } } impl Array for BooleanArray { diff --git a/src/array/boolean/mutable.rs b/src/array/boolean/mutable.rs index f6336cda3a4..4a4ed3f9cee 100644 --- a/src/array/boolean/mutable.rs +++ b/src/array/boolean/mutable.rs @@ -8,7 +8,7 @@ use crate::{ }, bitmap::MutableBitmap, datatypes::{DataType, PhysicalType}, - error::Result, + error::Error, trusted_len::TrustedLen, }; @@ -54,6 +54,38 @@ impl MutableBooleanArray { Self::with_capacity(0) } + /// The canonical method to create a [`MutableBooleanArray`] out of low-end APIs. + /// # Errors + /// This function errors iff: + /// * The validity is not `None` and its length is different from `values`'s length + /// * The `data_type`'s [`PhysicalType`] is not equal to [`PhysicalType::Boolean`]. + pub fn try_new( + data_type: DataType, + values: MutableBitmap, + validity: Option, + ) -> Result { + if validity + .as_ref() + .map_or(false, |validity| validity.len() != values.len()) + { + return Err(Error::oos( + "validity mask length must match the number of values", + )); + } + + if data_type.to_physical_type() != PhysicalType::Boolean { + return Err(Error::oos( + "MutableBooleanArray can only be initialized with a DataType whose physical type is Boolean", + )); + } + + Ok(Self { + data_type, + values, + validity, + }) + } + /// Creates an new [`MutableBooleanArray`] with a capacity of values. pub fn with_capacity(capacity: usize) -> Self { Self { @@ -71,22 +103,6 @@ impl MutableBooleanArray { } } - /// Canonical method to create a new [`MutableBooleanArray`]. - pub fn from_data( - data_type: DataType, - values: MutableBitmap, - validity: Option, - ) -> Self { - if data_type.to_physical_type() != PhysicalType::Boolean { - panic!("MutableBooleanArray can only be initialized with DataType::Boolean") - } - Self { - data_type, - values, - validity, - } - } - /// Pushes a new entry to [`MutableBooleanArray`]. pub fn push(&mut self, value: Option) { match value { @@ -232,11 +248,12 @@ impl MutableBooleanArray { /// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`. #[inline] pub fn from_trusted_len_values_iter>(iterator: I) -> Self { - Self::from_data( + Self::try_new( DataType::Boolean, MutableBitmap::from_trusted_len_iter(iterator), None, ) + .unwrap() } /// Creates a new [`MutableBooleanArray`] from an [`TrustedLen`] of `bool`. @@ -251,7 +268,7 @@ impl MutableBooleanArray { ) -> Self { let mut mutable = MutableBitmap::new(); mutable.extend_from_trusted_len_iter_unchecked(iterator); - MutableBooleanArray::from_data(DataType::Boolean, mutable, None) + MutableBooleanArray::try_new(DataType::Boolean, mutable, None).unwrap() } /// Creates a new [`MutableBooleanArray`] from a slice of `bool`. @@ -274,7 +291,7 @@ impl MutableBooleanArray { { let (validity, values) = trusted_len_unzip(iterator); - Self::from_data(DataType::Boolean, values, validity) + Self::try_new(DataType::Boolean, values, validity).unwrap() } /// Creates a [`BooleanArray`] from a [`TrustedLen`]. @@ -308,7 +325,7 @@ impl MutableBooleanArray { None }; - Ok(Self::from_data(DataType::Boolean, values, validity)) + Ok(Self::try_new(DataType::Boolean, values, validity).unwrap()) } /// Creates a [`BooleanArray`] from a [`TrustedLen`]. @@ -452,7 +469,13 @@ impl>> FromIterator for MutableBoolea }) .collect(); - MutableBooleanArray::from_data(DataType::Boolean, values, validity.into()) + let validity = if validity.unset_bits() > 0 { + Some(validity) + } else { + None + }; + + MutableBooleanArray::try_new(DataType::Boolean, values, validity).unwrap() } } @@ -466,19 +489,13 @@ impl MutableArray for MutableBooleanArray { } fn as_box(&mut self) -> Box { - Box::new(BooleanArray::from_data( - self.data_type.clone(), - std::mem::take(&mut self.values).into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + let array: BooleanArray = std::mem::take(self).into(); + array.boxed() } fn as_arc(&mut self) -> Arc { - Arc::new(BooleanArray::from_data( - self.data_type.clone(), - std::mem::take(&mut self.values).into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - )) + let array: BooleanArray = std::mem::take(self).into(); + array.arced() } fn data_type(&self) -> &DataType { @@ -517,7 +534,7 @@ impl Extend> for MutableBooleanArray { impl TryExtend> for MutableBooleanArray { /// This is infalible and is implemented for consistency with all other types - fn try_extend>>(&mut self, iter: I) -> Result<()> { + fn try_extend>>(&mut self, iter: I) -> Result<(), Error> { self.extend(iter); Ok(()) } @@ -525,7 +542,7 @@ impl TryExtend> for MutableBooleanArray { impl TryPush> for MutableBooleanArray { /// This is infalible and is implemented for consistency with all other types - fn try_push(&mut self, item: Option) -> Result<()> { + fn try_push(&mut self, item: Option) -> Result<(), Error> { self.push(item); Ok(()) } @@ -538,7 +555,7 @@ impl PartialEq for MutableBooleanArray { } impl TryExtendFromSelf for MutableBooleanArray { - fn try_extend_from_self(&mut self, other: &Self) -> Result<()> { + fn try_extend_from_self(&mut self, other: &Self) -> Result<(), Error> { extend_validity(self.len(), &mut self.validity, &other.validity); let slice = other.values.as_slice(); diff --git a/src/array/fixed_size_binary/mod.rs b/src/array/fixed_size_binary/mod.rs index 441b58d2fcc..8eea1c6c191 100644 --- a/src/array/fixed_size_binary/mod.rs +++ b/src/array/fixed_size_binary/mod.rs @@ -69,11 +69,6 @@ impl FixedSizeBinaryArray { Self::try_new(data_type, values, validity).unwrap() } - /// Alias for `new` - pub fn from_data(data_type: DataType, values: Buffer, validity: Option) -> Self { - Self::new(data_type, values, validity) - } - /// Returns a new empty [`FixedSizeBinaryArray`]. pub fn new_empty(data_type: DataType) -> Self { Self::new(data_type, Buffer::new(), None) diff --git a/src/array/fixed_size_binary/mutable.rs b/src/array/fixed_size_binary/mutable.rs index 9e3cce628e3..b6d60bade58 100644 --- a/src/array/fixed_size_binary/mutable.rs +++ b/src/array/fixed_size_binary/mutable.rs @@ -4,7 +4,7 @@ use crate::{ array::{physical_binary::extend_validity, Array, MutableArray, TryExtendFromSelf}, bitmap::MutableBitmap, datatypes::DataType, - error::{Error, Result}, + error::Error, }; use super::{FixedSizeBinaryArray, FixedSizeBinaryValues}; @@ -32,31 +32,44 @@ impl From for FixedSizeBinaryArray { } impl MutableFixedSizeBinaryArray { - /// Canonical method to create a new [`MutableFixedSizeBinaryArray`]. - pub fn from_data( + /// Creates a new [`MutableFixedSizeBinaryArray`]. + /// + /// # Errors + /// This function returns an error iff: + /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`] + /// * The length of `values` is not a multiple of `size` in `data_type` + /// * the validity's length is not equal to `values.len() / size`. + pub fn try_new( data_type: DataType, values: Vec, validity: Option, - ) -> Self { - let size = FixedSizeBinaryArray::get_size(&data_type); - assert_eq!( - values.len() % size, - 0, - "The len of values must be a multiple of size" - ); - if let Some(validity) = &validity { - assert_eq!( - validity.len(), - values.len() / size, - "The len of the validity must be equal to values / size" - ); + ) -> Result { + let size = FixedSizeBinaryArray::maybe_get_size(&data_type)?; + + if values.len() % size != 0 { + return Err(Error::oos(format!( + "values (of len {}) must be a multiple of size ({}) in FixedSizeBinaryArray.", + values.len(), + size + ))); } - Self { - data_type, + let len = values.len() / size; + + if validity + .as_ref() + .map_or(false, |validity| validity.len() != len) + { + return Err(Error::oos( + "validity mask length must be equal to the number of values divided by size", + )); + } + + Ok(Self { size, + data_type, values, validity, - } + }) } /// Creates a new empty [`MutableFixedSizeBinaryArray`]. @@ -66,11 +79,12 @@ impl MutableFixedSizeBinaryArray { /// Creates a new [`MutableFixedSizeBinaryArray`] with capacity for `capacity` entries. pub fn with_capacity(size: usize, capacity: usize) -> Self { - Self::from_data( + Self::try_new( DataType::FixedSizeBinary(size), Vec::::with_capacity(capacity * size), None, ) + .unwrap() } /// Creates a new [`MutableFixedSizeBinaryArray`] from a slice of optional `[u8]`. @@ -87,14 +101,14 @@ impl MutableFixedSizeBinaryArray { .iter() .map(|x| x.is_some()) .collect::(); - Self::from_data(DataType::FixedSizeBinary(N), values, validity.into()) + Self::try_new(DataType::FixedSizeBinary(N), values, validity.into()).unwrap() } /// tries to push a new entry to [`MutableFixedSizeBinaryArray`]. /// # Error /// Errors iff the size of `value` is not equal to its own size. #[inline] - pub fn try_push>(&mut self, value: Option

) -> Result<()> { + pub fn try_push>(&mut self, value: Option

) -> Result<(), Error> { match value { Some(bytes) => { let bytes = bytes.as_ref(); @@ -156,7 +170,7 @@ impl MutableFixedSizeBinaryArray { pub fn try_from_iter, I: IntoIterator>>( iter: I, size: usize, - ) -> Result { + ) -> Result { let iterator = iter.into_iter(); let (lower, _) = iterator.size_hint(); let mut primitive = Self::with_capacity(size, lower); @@ -238,19 +252,21 @@ impl MutableArray for MutableFixedSizeBinaryArray { } fn as_box(&mut self) -> Box { - Box::new(FixedSizeBinaryArray::new( + FixedSizeBinaryArray::new( DataType::FixedSizeBinary(self.size), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .boxed() } fn as_arc(&mut self) -> Arc { - Arc::new(FixedSizeBinaryArray::new( + FixedSizeBinaryArray::new( DataType::FixedSizeBinary(self.size), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .arced() } fn data_type(&self) -> &DataType { @@ -297,7 +313,7 @@ impl PartialEq for MutableFixedSizeBinaryArray { } impl TryExtendFromSelf for MutableFixedSizeBinaryArray { - fn try_extend_from_self(&mut self, other: &Self) -> Result<()> { + fn try_extend_from_self(&mut self, other: &Self) -> Result<(), Error> { extend_validity(self.len(), &mut self.validity, &other.validity); let slice = other.values.as_slice(); diff --git a/src/array/fixed_size_list/mod.rs b/src/array/fixed_size_list/mod.rs index c603f6bf6aa..298f11716dc 100644 --- a/src/array/fixed_size_list/mod.rs +++ b/src/array/fixed_size_list/mod.rs @@ -74,13 +74,7 @@ impl FixedSizeListArray { }) } - /// Creates a new [`FixedSizeListArray`]. - /// # Panics - /// This function panics iff: - /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`] - /// * The `data_type`'s inner field's data type is not equal to `values.data_type`. - /// * The length of `values` is not a multiple of `size` in `data_type` - /// * the validity's length is not equal to `values.len() / size`. + /// Alias to `Self::try_new(...).unwrap()` pub fn new(data_type: DataType, values: Box, validity: Option) -> Self { Self::try_new(data_type, values, validity).unwrap() } @@ -90,15 +84,6 @@ impl FixedSizeListArray { self.size } - /// Alias for `new` - pub fn from_data( - data_type: DataType, - values: Box, - validity: Option, - ) -> Self { - Self::new(data_type, values, validity) - } - /// Returns a new empty [`FixedSizeListArray`]. pub fn new_empty(data_type: DataType) -> Self { let values = new_empty_array(Self::get_child_and_size(&data_type).0.data_type().clone()); diff --git a/src/array/fixed_size_list/mutable.rs b/src/array/fixed_size_list/mutable.rs index 17d401b7bab..0c6c09fe3d7 100644 --- a/src/array/fixed_size_list/mutable.rs +++ b/src/array/fixed_size_list/mutable.rs @@ -140,19 +140,21 @@ impl MutableArray for MutableFixedSizeListArray { } fn as_box(&mut self) -> Box { - Box::new(FixedSizeListArray::new( + FixedSizeListArray::new( self.data_type.clone(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .boxed() } fn as_arc(&mut self) -> Arc { - Arc::new(FixedSizeListArray::new( + FixedSizeListArray::new( self.data_type.clone(), self.values.as_box(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .arced() } fn data_type(&self) -> &DataType { diff --git a/src/array/growable/binary.rs b/src/array/growable/binary.rs index aebfb1580cd..5b43c9fcdf4 100644 --- a/src/array/growable/binary.rs +++ b/src/array/growable/binary.rs @@ -82,11 +82,11 @@ impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { } fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) + self.to().arced() } fn as_box(&mut self) -> Box { - Box::new(self.to()) + self.to().boxed() } } diff --git a/src/array/list/mod.rs b/src/array/list/mod.rs index 7c7e96c9c04..9cb1157978e 100644 --- a/src/array/list/mod.rs +++ b/src/array/list/mod.rs @@ -2,7 +2,7 @@ use crate::{ bitmap::Bitmap, datatypes::{DataType, Field}, error::Error, - offset::{Offset, OffsetsBuffer}, + offset::{Offset, Offsets, OffsetsBuffer}, }; use std::sync::Arc; @@ -87,16 +87,6 @@ impl ListArray { Self::try_new(data_type, offsets, values, validity).unwrap() } - /// Alias of `new` - pub fn from_data( - data_type: DataType, - offsets: OffsetsBuffer, - values: Box, - validity: Option, - ) -> Self { - Self::new(data_type, offsets, values, validity) - } - /// Returns a new empty [`ListArray`]. pub fn new_empty(data_type: DataType) -> Self { let values = new_empty_array(Self::get_child_type(&data_type).clone()); @@ -109,7 +99,7 @@ impl ListArray { let child = Self::get_child_type(&data_type).clone(); Self::new( data_type, - vec![O::zero(); 1 + length].try_into().unwrap(), + Offsets::new_zeroed(length).into(), new_empty_array(child), Some(Bitmap::new_zeroed(length)), ) diff --git a/src/array/map/mod.rs b/src/array/map/mod.rs index fe8adb1deab..92e4ef90315 100644 --- a/src/array/map/mod.rs +++ b/src/array/map/mod.rs @@ -88,16 +88,6 @@ impl MapArray { Self::try_new(data_type, offsets, field, validity).unwrap() } - /// Alias for `new` - pub fn from_data( - data_type: DataType, - offsets: OffsetsBuffer, - field: Box, - validity: Option, - ) -> Self { - Self::new(data_type, offsets, field, validity) - } - /// Returns a new null [`MapArray`] of `length`. pub fn new_null(data_type: DataType, length: usize) -> Self { let field = new_empty_array(Self::get_field(&data_type).data_type().clone()); diff --git a/src/array/null.rs b/src/array/null.rs index 041b59ada1d..75bd6b6dcbe 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -37,11 +37,6 @@ impl NullArray { Self::try_new(data_type, length).unwrap() } - /// Alias for `new` - pub fn from_data(data_type: DataType, length: usize) -> Self { - Self::new(data_type, length) - } - /// Returns a new empty [`NullArray`]. pub fn new_empty(data_type: DataType) -> Self { Self::new(data_type, 0) diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 6b62fe220cd..d5d96df6422 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -53,15 +53,12 @@ pub struct PrimitiveArray { validity: Option, } -fn check( +pub(super) fn check( data_type: &DataType, values: &[T], - validity: &Option, + validity_len: Option, ) -> Result<(), Error> { - if validity - .as_ref() - .map_or(false, |validity| validity.len() != values.len()) - { + if validity_len.map_or(false, |len| len != values.len()) { return Err(Error::oos( "validity mask length must match the number of values", )); @@ -89,7 +86,7 @@ impl PrimitiveArray { values: Buffer, validity: Option, ) -> Result { - check(&data_type, &values, &validity)?; + check(&data_type, &values, validity.as_ref().map(|v| v.len()))?; Ok(Self { data_type, values, @@ -117,7 +114,12 @@ impl PrimitiveArray { #[inline] #[must_use] pub fn to(self, data_type: DataType) -> Self { - check(&data_type, &self.values, &self.validity).unwrap(); + check( + &data_type, + &self.values, + self.validity.as_ref().map(|v| v.len()), + ) + .unwrap(); Self { data_type, values: self.values, @@ -325,11 +327,14 @@ impl PrimitiveArray { Some(bitmap), )), Right(mutable_bitmap) => match self.values.get_mut().map(std::mem::take) { - Some(values) => Right(MutablePrimitiveArray::from_data( - self.data_type, - values, - Some(mutable_bitmap), - )), + Some(values) => Right( + MutablePrimitiveArray::try_new( + self.data_type, + values, + Some(mutable_bitmap), + ) + .unwrap(), + ), None => Left(PrimitiveArray::new( self.data_type, self.values, @@ -339,11 +344,9 @@ impl PrimitiveArray { } } else { match self.values.get_mut().map(std::mem::take) { - Some(values) => Right(MutablePrimitiveArray::from_data( - self.data_type, - values, - None, - )), + Some(values) => { + Right(MutablePrimitiveArray::try_new(self.data_type, values, None).unwrap()) + } None => Left(PrimitiveArray::new(self.data_type, self.values, None)), } } @@ -428,11 +431,6 @@ impl PrimitiveArray { pub fn new(data_type: DataType, values: Buffer, validity: Option) -> Self { Self::try_new(data_type, values, validity).unwrap() } - - /// Alias for `Self::try_new(..).unwrap()`. - pub fn from_data(data_type: DataType, values: Buffer, validity: Option) -> Self { - Self::new(data_type, values, validity) - } } impl Array for PrimitiveArray { diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index c71c41c164e..ce6c8fa3c95 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -7,12 +7,12 @@ use crate::{ array::{Array, MutableArray, TryExtend, TryPush}, bitmap::MutableBitmap, datatypes::DataType, - error::{Error, Result}, + error::Error, trusted_len::TrustedLen, types::NativeType, }; -use super::PrimitiveArray; +use super::{check, PrimitiveArray}; /// The Arrow's equivalent to `Vec>` where `T` is byte-size (e.g. `i32`). /// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`. @@ -55,32 +55,29 @@ impl MutablePrimitiveArray { Self::with_capacity_from(capacity, T::PRIMITIVE.into()) } - /// Create a [`MutablePrimitiveArray`] out of low-end APIs. - /// # Panics - /// This function panics iff: - /// * `data_type` is not supported by the physical type - /// * The validity is not `None` and its length is different from the `values`'s length - pub fn from_data(data_type: DataType, values: Vec, validity: Option) -> Self { - if !data_type.to_physical_type().eq_primitive(T::PRIMITIVE) { - Err(Error::InvalidArgumentError(format!( - "Type {} does not support logical type {:?}", - std::any::type_name::(), - data_type - ))) - .unwrap() - } - if let Some(ref validity) = validity { - assert_eq!(values.len(), validity.len()); - } - Self { + /// The canonical method to create a [`MutablePrimitiveArray`] out of its internal components. + /// # Implementation + /// This function is `O(1)`. + /// + /// # Errors + /// This function errors iff: + /// * The validity is not `None` and its length is different from `values`'s length + /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`] + pub fn try_new( + data_type: DataType, + values: Vec, + validity: Option, + ) -> Result { + check(&data_type, &values, validity.as_ref().map(|x| x.len()))?; + Ok(Self { data_type, values, validity, - } + }) } /// Extract the low-end APIs from the [`MutablePrimitiveArray`]. - pub fn into_data(self) -> (DataType, Vec, Option) { + pub fn into_inner(self) -> (DataType, Vec, Option) { (self.data_type, self.values, self.validity) } @@ -267,7 +264,7 @@ impl MutablePrimitiveArray { /// This operation is `O(1)`. #[inline] pub fn to(self, data_type: DataType) -> Self { - Self::from_data(data_type, self.values, self.validity) + Self::try_new(data_type, self.values, self.validity).unwrap() } /// Converts itself into an [`Array`]. @@ -366,7 +363,7 @@ impl Extend> for MutablePrimitiveArray { impl TryExtend> for MutablePrimitiveArray { /// This is infalible and is implemented for consistency with all other types - fn try_extend>>(&mut self, iter: I) -> Result<()> { + fn try_extend>>(&mut self, iter: I) -> Result<(), Error> { self.extend(iter); Ok(()) } @@ -374,7 +371,7 @@ impl TryExtend> for MutablePrimitiveArray { impl TryPush> for MutablePrimitiveArray { /// This is infalible and is implemented for consistency with all other types - fn try_push(&mut self, item: Option) -> Result<()> { + fn try_push(&mut self, item: Option) -> Result<(), Error> { self.push(item); Ok(()) } @@ -390,19 +387,21 @@ impl MutableArray for MutablePrimitiveArray { } fn as_box(&mut self) -> Box { - Box::new(PrimitiveArray::new( + PrimitiveArray::new( self.data_type.clone(), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .boxed() } fn as_arc(&mut self) -> Arc { - Arc::new(PrimitiveArray::new( + PrimitiveArray::new( self.data_type.clone(), std::mem::take(&mut self.values).into(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .arced() } fn data_type(&self) -> &DataType { @@ -510,7 +509,7 @@ impl MutablePrimitiveArray { /// Creates a (non-null) [`MutablePrimitiveArray`] from a vector of values. /// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`]. pub fn from_vec(values: Vec) -> Self { - Self::from_data(T::PRIMITIVE.into(), values, None) + Self::try_new(T::PRIMITIVE.into(), values, None).unwrap() } /// Creates a new [`MutablePrimitiveArray`] from an iterator over values @@ -659,7 +658,7 @@ impl PartialEq for MutablePrimitiveArray { } impl TryExtendFromSelf for MutablePrimitiveArray { - fn try_extend_from_self(&mut self, other: &Self) -> Result<()> { + fn try_extend_from_self(&mut self, other: &Self) -> Result<(), Error> { extend_validity(self.len(), &mut self.validity, &other.validity); let slice = other.values.as_slice(); diff --git a/src/array/struct_/mod.rs b/src/array/struct_/mod.rs index a371147f3d8..19383386592 100644 --- a/src/array/struct_/mod.rs +++ b/src/array/struct_/mod.rs @@ -120,15 +120,6 @@ impl StructArray { Self::try_new(data_type, values, validity).unwrap() } - /// Alias for `new` - pub fn from_data( - data_type: DataType, - values: Vec>, - validity: Option, - ) -> Self { - Self::new(data_type, values, validity) - } - /// Creates an empty [`StructArray`]. pub fn new_empty(data_type: DataType) -> Self { if let DataType::Struct(fields) = &data_type { diff --git a/src/array/struct_/mutable.rs b/src/array/struct_/mutable.rs index b35e1064b94..c13323da89a 100644 --- a/src/array/struct_/mutable.rs +++ b/src/array/struct_/mutable.rs @@ -4,6 +4,7 @@ use crate::{ array::{Array, MutableArray}, bitmap::MutableBitmap, datatypes::DataType, + error::Error, }; use super::StructArray; @@ -16,6 +17,60 @@ pub struct MutableStructArray { validity: Option, } +fn check( + data_type: &DataType, + values: &[Box], + validity: Option, +) -> Result<(), Error> { + let fields = StructArray::try_get_fields(data_type)?; + if fields.is_empty() { + return Err(Error::oos("A StructArray must contain at least one field")); + } + if fields.len() != values.len() { + return Err(Error::oos( + "A StructArray must have a number of fields in its DataType equal to the number of child values", + )); + } + + fields + .iter().map(|a| &a.data_type) + .zip(values.iter().map(|a| a.data_type())) + .enumerate() + .try_for_each(|(index, (data_type, child))| { + if data_type != child { + Err(Error::oos(format!( + "The children DataTypes of a StructArray must equal the children data types. + However, the field {index} has data type {data_type:?} but the value has data type {child:?}" + ))) + } else { + Ok(()) + } + })?; + + let len = values[0].len(); + values + .iter() + .map(|a| a.len()) + .enumerate() + .try_for_each(|(index, a_len)| { + if a_len != len { + Err(Error::oos(format!( + "The children must have an equal number of values. + However, the values at index {index} have a length of {a_len}, which is different from values at index 0, {len}." + ))) + } else { + Ok(()) + } + })?; + + if validity.map_or(false, |validity| validity != len) { + return Err(Error::oos( + "The validity length of a StructArray must match its number of elements", + )); + } + Ok(()) +} + impl From for StructArray { fn from(other: MutableStructArray) -> Self { let validity = if other.validity.as_ref().map(|x| x.unset_bits()).unwrap_or(0) > 0 { @@ -24,7 +79,7 @@ impl From for StructArray { None }; - StructArray::from_data( + StructArray::new( other.data_type, other.values.into_iter().map(|mut v| v.as_box()).collect(), validity, @@ -35,51 +90,30 @@ impl From for StructArray { impl MutableStructArray { /// Creates a new [`MutableStructArray`]. pub fn new(data_type: DataType, values: Vec>) -> Self { - Self::from_data(data_type, values, None) + Self::try_new(data_type, values, None).unwrap() } /// Create a [`MutableStructArray`] out of low-end APIs. - /// # Panics - /// This function panics iff: + /// # Errors + /// This function errors iff: /// * `data_type` is not [`DataType::Struct`] /// * The inner types of `data_type` are not equal to those of `values` /// * `validity` is not `None` and its length is different from the `values`'s length - pub fn from_data( + pub fn try_new( data_type: DataType, values: Vec>, validity: Option, - ) -> Self { - match data_type.to_logical_type() { - DataType::Struct(ref fields) => assert!(fields - .iter() - .map(|f| f.data_type()) - .eq(values.iter().map(|f| f.data_type()))), - _ => panic!("StructArray must be initialized with DataType::Struct"), - }; - let self_ = Self { + ) -> Result { + check(&data_type, &values, validity.as_ref().map(|x| x.len()))?; + Ok(Self { data_type, values, validity, - }; - self_.assert_lengths(); - self_ - } - - fn assert_lengths(&self) { - let first_len = self.values.first().map(|v| v.len()); - if let Some(len) = first_len { - if !self.values.iter().all(|x| x.len() == len) { - let lengths: Vec<_> = self.values.iter().map(|v| v.len()).collect(); - panic!("StructArray child lengths differ: {:?}", lengths); - } - } - if let Some(validity) = &self.validity { - assert_eq!(first_len.unwrap_or(0), validity.len()); - } + }) } /// Extract the low-end APIs from the [`MutableStructArray`]. - pub fn into_data(self) -> (DataType, Vec>, Option) { + pub fn into_inner(self) -> (DataType, Vec>, Option) { (self.data_type, self.values, self.validity) } @@ -165,25 +199,27 @@ impl MutableArray for MutableStructArray { } fn as_box(&mut self) -> Box { - Box::new(StructArray::from_data( + StructArray::new( self.data_type.clone(), std::mem::take(&mut self.values) .into_iter() .map(|mut v| v.as_box()) .collect(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .boxed() } fn as_arc(&mut self) -> Arc { - Arc::new(StructArray::from_data( + StructArray::new( self.data_type.clone(), std::mem::take(&mut self.values) .into_iter() .map(|mut v| v.as_box()) .collect(), std::mem::take(&mut self.validity).map(|x| x.into()), - )) + ) + .arced() } fn data_type(&self) -> &DataType { diff --git a/src/array/union/mod.rs b/src/array/union/mod.rs index 3bda5915688..70028d6cf2c 100644 --- a/src/array/union/mod.rs +++ b/src/array/union/mod.rs @@ -117,16 +117,6 @@ impl UnionArray { Self::try_new(data_type, types, fields, offsets).unwrap() } - /// Alias for `new` - pub fn from_data( - data_type: DataType, - types: Buffer, - fields: Vec>, - offsets: Option>, - ) -> Self { - Self::new(data_type, types, fields, offsets) - } - /// Creates a new null [`UnionArray`]. pub fn new_null(data_type: DataType, length: usize) -> Self { if let DataType::Union(f, _, mode) = &data_type { diff --git a/src/array/utf8/mod.rs b/src/array/utf8/mod.rs index f8b8b86a8b8..7643ad58c07 100644 --- a/src/array/utf8/mod.rs +++ b/src/array/utf8/mod.rs @@ -6,7 +6,7 @@ use crate::{ buffer::Buffer, datatypes::DataType, error::{Error, Result}, - offset::{Offset, OffsetsBuffer}, + offset::{Offset, Offsets, OffsetsBuffer}, trusted_len::TrustedLen, }; @@ -312,7 +312,7 @@ impl Utf8Array { }) } (Some(values), Some(offsets)) => Right(unsafe { - MutableUtf8Array::from_data_unchecked( + MutableUtf8Array::new_unchecked( self.data_type, offsets, values, @@ -336,7 +336,7 @@ impl Utf8Array { Utf8Array::new_unchecked(self.data_type, self.offsets, values.into(), None) }), (Some(values), Some(offsets)) => Right(unsafe { - MutableUtf8Array::from_data_unchecked(self.data_type, offsets, values, None) + MutableUtf8Array::new_unchecked(self.data_type, offsets, values, None) }), } } @@ -347,7 +347,7 @@ impl Utf8Array { /// The array is guaranteed to have no elements nor validity. #[inline] pub fn new_empty(data_type: DataType) -> Self { - unsafe { Self::from_data_unchecked(data_type, OffsetsBuffer::new(), Buffer::new(), None) } + unsafe { Self::new_unchecked(data_type, OffsetsBuffer::new(), Buffer::new(), None) } } /// Returns a new [`Utf8Array`] whose all slots are null / `None`. @@ -355,7 +355,7 @@ impl Utf8Array { pub fn new_null(data_type: DataType, length: usize) -> Self { Self::new( data_type, - vec![O::default(); 1 + length].try_into().unwrap(), + Offsets::new_zeroed(length).into(), Buffer::new(), Some(Bitmap::new_zeroed(length)), ) @@ -515,29 +515,6 @@ impl Utf8Array { { MutableUtf8Array::::try_from_trusted_len_iter(iter).map(|x| x.into()) } - - /// Alias for `new` - pub fn from_data( - data_type: DataType, - offsets: OffsetsBuffer, - values: Buffer, - validity: Option, - ) -> Self { - Self::new(data_type, offsets, values, validity) - } - - /// Alias for [`Self::new_unchecked`] - /// # Safety - /// This function is unsafe iff: - /// * The `values` between two consecutive `offsets` are not valid utf8 - pub unsafe fn from_data_unchecked( - data_type: DataType, - offsets: OffsetsBuffer, - values: Buffer, - validity: Option, - ) -> Self { - Self::new_unchecked(data_type, offsets, values, validity) - } } impl Array for Utf8Array { diff --git a/src/array/utf8/mutable.rs b/src/array/utf8/mutable.rs index cb66f056dd1..a967615e8bc 100644 --- a/src/array/utf8/mutable.rs +++ b/src/array/utf8/mutable.rs @@ -99,33 +99,6 @@ impl MutableUtf8Array { Self { values, validity } } - /// Alias of `new_unchecked` - /// # Safety - /// The caller must ensure that every value between offsets is a valid utf8. - pub unsafe fn from_data_unchecked( - data_type: DataType, - offsets: Offsets, - values: Vec, - validity: Option, - ) -> Self { - Self::new_unchecked(data_type, offsets, values, validity) - } - - /// The canonical method to create a [`MutableUtf8Array`] out of low-end APIs. - /// # Panics - /// This function panics iff: - /// * The `offsets` and `values` are inconsistent - /// * The `values` between `offsets` are not utf8 encoded - /// * The validity is not `None` and its length is different from `offsets`'s length minus one. - pub fn from_data( - data_type: DataType, - offsets: Offsets, - values: Vec, - validity: Option, - ) -> Self { - Self::try_new(data_type, offsets, values, validity).unwrap() - } - /// Creates a new [`MutableUtf8Array`] from a slice of optional `&[u8]`. // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it. pub fn from, P: AsRef<[Option]>>(slice: P) -> Self { @@ -263,35 +236,13 @@ impl MutableArray for MutableUtf8Array { } fn as_box(&mut self) -> Box { - // Safety: - // `MutableUtf8Array` has the same invariants as `Utf8Array` and thus - // `Utf8Array` can be safely created from `MutableUtf8Array` without checks. - let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner(); - unsafe { - Utf8Array::from_data_unchecked( - data_type, - offsets.into(), - values.into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - } - .boxed() + let array: Utf8Array = std::mem::take(self).into(); + array.boxed() } fn as_arc(&mut self) -> Arc { - // Safety: - // `MutableUtf8Array` has the same invariants as `Utf8Array` and thus - // `Utf8Array` can be safely created from `MutableUtf8Array` without checks. - let (data_type, offsets, values) = std::mem::take(&mut self.values).into_inner(); - unsafe { - Utf8Array::from_data_unchecked( - data_type, - offsets.into(), - values.into(), - std::mem::take(&mut self.validity).map(|x| x.into()), - ) - } - .arced() + let array: Utf8Array = std::mem::take(self).into(); + array.arced() } fn data_type(&self) -> &DataType { @@ -422,7 +373,7 @@ impl MutableUtf8Array { let (validity, offsets, values) = trusted_len_unzip(iterator); // soundness: P is `str` - Self::from_data_unchecked(Self::default_data_type(), offsets, values, validity) + Self::new_unchecked(Self::default_data_type(), offsets, values, validity) } /// Creates a [`MutableUtf8Array`] from an iterator of trusted length. @@ -488,7 +439,7 @@ impl MutableUtf8Array { let (validity, offsets, values) = try_trusted_len_unzip(iterator)?; // soundness: P is `str` - Ok(Self::from_data_unchecked( + Ok(Self::new_unchecked( Self::default_data_type(), offsets, values, diff --git a/src/array/utf8/mutable_values.rs b/src/array/utf8/mutable_values.rs index edb5dcfd89e..0cbc846b906 100644 --- a/src/array/utf8/mutable_values.rs +++ b/src/array/utf8/mutable_values.rs @@ -30,7 +30,7 @@ impl From> for Utf8Array { // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. unsafe { - Utf8Array::::from_data_unchecked( + Utf8Array::::new_unchecked( other.data_type, other.offsets.into(), other.values.into(), @@ -45,12 +45,7 @@ impl From> for MutableUtf8Array { // Safety: // `MutableUtf8ValuesArray` has the same invariants as `MutableUtf8Array` unsafe { - MutableUtf8Array::::from_data_unchecked( - other.data_type, - other.offsets, - other.values, - None, - ) + MutableUtf8Array::::new_unchecked(other.data_type, other.offsets, other.values, None) } } } @@ -244,21 +239,13 @@ impl MutableArray for MutableUtf8ValuesArray { } fn as_box(&mut self) -> Box { - // Safety: - // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus - // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. - let (data_type, offsets, values) = std::mem::take(self).into_inner(); - unsafe { Utf8Array::from_data_unchecked(data_type, offsets.into(), values.into(), None) } - .boxed() + let array: Utf8Array = std::mem::take(self).into(); + array.boxed() } fn as_arc(&mut self) -> Arc { - // Safety: - // `MutableUtf8ValuesArray` has the same invariants as `Utf8Array` and thus - // `Utf8Array` can be safely created from `MutableUtf8ValuesArray` without checks. - let (data_type, offsets, values) = std::mem::take(self).into_inner(); - unsafe { Utf8Array::from_data_unchecked(data_type, offsets.into(), values.into(), None) } - .arced() + let array: Utf8Array = std::mem::take(self).into(); + array.arced() } fn data_type(&self) -> &DataType { diff --git a/src/compute/cast/utf8_to.rs b/src/compute/cast/utf8_to.rs index 165c24a1025..6ee38588696 100644 --- a/src/compute/cast/utf8_to.rs +++ b/src/compute/cast/utf8_to.rs @@ -151,7 +151,7 @@ pub fn utf8_to_large_utf8(from: &Utf8Array) -> Utf8Array { let offsets = from.offsets().into(); // Safety: sound because `values` fulfills the same invariants as `from.values()` - unsafe { Utf8Array::::from_data_unchecked(data_type, offsets, values, validity) } + unsafe { Utf8Array::::new_unchecked(data_type, offsets, values, validity) } } /// Conversion of utf8 @@ -162,7 +162,7 @@ pub fn utf8_large_to_utf8(from: &Utf8Array) -> Result> { let offsets = from.offsets().try_into()?; // Safety: sound because `values` fulfills the same invariants as `from.values()` - Ok(unsafe { Utf8Array::::from_data_unchecked(data_type, offsets, values, validity) }) + Ok(unsafe { Utf8Array::::new_unchecked(data_type, offsets, values, validity) }) } /// Conversion to binary diff --git a/src/compute/take/utf8.rs b/src/compute/take/utf8.rs index 490e76bf4b8..1422132170a 100644 --- a/src/compute/take/utf8.rs +++ b/src/compute/take/utf8.rs @@ -38,7 +38,7 @@ pub fn take( (false, true) => take_indices_validity(values.offsets(), values.values(), indices), (true, true) => take_values_indices_validity(values, indices), }; - unsafe { Utf8Array::::from_data_unchecked(data_type, offsets, values, validity) } + unsafe { Utf8Array::::new_unchecked(data_type, offsets, values, validity) } } #[cfg(test)] diff --git a/src/io/odbc/read/deserialize.rs b/src/io/odbc/read/deserialize.rs index 7ebf79b8b9a..3e18fa279b7 100644 --- a/src/io/odbc/read/deserialize.rs +++ b/src/io/odbc/read/deserialize.rs @@ -92,7 +92,7 @@ fn bitmap(values: &[isize]) -> Option { } fn primitive(data_type: DataType, values: &[T]) -> PrimitiveArray { - PrimitiveArray::from_data(data_type, values.to_vec().into(), None) + PrimitiveArray::new(data_type, values.to_vec().into(), None) } fn primitive_optional( @@ -101,20 +101,20 @@ fn primitive_optional( indicators: &[isize], ) -> PrimitiveArray { let validity = bitmap(indicators); - PrimitiveArray::from_data(data_type, values.to_vec().into(), validity) + PrimitiveArray::new(data_type, values.to_vec().into(), validity) } fn bool(data_type: DataType, values: &[Bit]) -> BooleanArray { let values = values.iter().map(|x| x.as_bool()); let values = Bitmap::from_trusted_len_iter(values); - BooleanArray::from_data(data_type, values, None) + BooleanArray::new(data_type, values, None) } fn bool_optional(data_type: DataType, values: &[Bit], indicators: &[isize]) -> BooleanArray { let validity = bitmap(indicators); let values = values.iter().map(|x| x.as_bool()); let values = Bitmap::from_trusted_len_iter(values); - BooleanArray::from_data(data_type, values, validity) + BooleanArray::new(data_type, values, validity) } fn binary_generic<'a>( @@ -143,21 +143,19 @@ fn binary_generic<'a>( fn binary(data_type: DataType, view: BinColumnView) -> BinaryArray { let (offsets, values, validity) = binary_generic(view.iter()); - - // this O(N) check is not necessary - BinaryArray::from_data(data_type, offsets, values, validity) + BinaryArray::new(data_type, offsets, values, validity) } fn utf8(data_type: DataType, view: TextColumnView) -> Utf8Array { let (offsets, values, validity) = binary_generic(view.iter()); // this O(N) check is necessary for the utf8 validity - Utf8Array::from_data(data_type, offsets, values, validity) + Utf8Array::new(data_type, offsets, values, validity) } fn date(data_type: DataType, values: &[odbc_api::sys::Date]) -> PrimitiveArray { let values = values.iter().map(days_since_epoch).collect::>(); - PrimitiveArray::from_data(data_type, values.into(), None) + PrimitiveArray::new(data_type, values.into(), None) } fn date_optional( @@ -167,7 +165,7 @@ fn date_optional( ) -> PrimitiveArray { let values = values.iter().map(days_since_epoch).collect::>(); let validity = bitmap(indicators); - PrimitiveArray::from_data(data_type, values.into(), validity) + PrimitiveArray::new(data_type, values.into(), validity) } fn days_since_epoch(date: &odbc_api::sys::Date) -> i32 { @@ -180,7 +178,7 @@ fn days_since_epoch(date: &odbc_api::sys::Date) -> i32 { fn time(data_type: DataType, values: &[odbc_api::sys::Time]) -> PrimitiveArray { let values = values.iter().map(time_since_midnight).collect::>(); - PrimitiveArray::from_data(data_type, values.into(), None) + PrimitiveArray::new(data_type, values.into(), None) } fn time_since_midnight(date: &odbc_api::sys::Time) -> i32 { @@ -194,7 +192,7 @@ fn time_optional( ) -> PrimitiveArray { let values = values.iter().map(time_since_midnight).collect::>(); let validity = bitmap(indicators); - PrimitiveArray::from_data(data_type, values.into(), validity) + PrimitiveArray::new(data_type, values.into(), validity) } fn timestamp(data_type: DataType, values: &[odbc_api::sys::Timestamp]) -> PrimitiveArray { @@ -209,7 +207,7 @@ fn timestamp(data_type: DataType, values: &[odbc_api::sys::Timestamp]) -> Primit TimeUnit::Microsecond => values.iter().map(timestamp_us).collect::>(), TimeUnit::Nanosecond => values.iter().map(timestamp_ns).collect::>(), }; - PrimitiveArray::from_data(data_type, values.into(), None) + PrimitiveArray::new(data_type, values.into(), None) } fn timestamp_optional( @@ -229,7 +227,7 @@ fn timestamp_optional( TimeUnit::Nanosecond => values.iter().map(timestamp_ns).collect::>(), }; let validity = bitmap(indicators); - PrimitiveArray::from_data(data_type, values.into(), validity) + PrimitiveArray::new(data_type, values.into(), validity) } fn timestamp_to_naive(timestamp: &odbc_api::sys::Timestamp) -> Option { diff --git a/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs b/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs index 70b4ccab699..680834ad270 100644 --- a/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs +++ b/src/io/parquet/read/deserialize/fixed_size_binary/dictionary.rs @@ -54,7 +54,9 @@ fn read_dict(data_type: DataType, dict: &DictPage) -> Box { let values = dict.buffer.clone(); - FixedSizeBinaryArray::from_data(data_type, values.into(), None).boxed() + FixedSizeBinaryArray::try_new(data_type, values.into(), None) + .unwrap() + .boxed() } impl Iterator for DictIter diff --git a/src/io/parquet/read/deserialize/primitive/basic.rs b/src/io/parquet/read/deserialize/primitive/basic.rs index 34a3702f3d8..cc41c3159d6 100644 --- a/src/io/parquet/read/deserialize/primitive/basic.rs +++ b/src/io/parquet/read/deserialize/primitive/basic.rs @@ -281,7 +281,7 @@ pub(super) fn finish( } else { Some(validity) }; - MutablePrimitiveArray::from_data(data_type.clone(), values, validity) + MutablePrimitiveArray::try_new(data_type.clone(), values, validity).unwrap() } /// An [`Iterator`] adapter over [`Pages`] assumed to be encoded as primitive arrays diff --git a/src/io/parquet/read/deserialize/primitive/nested.rs b/src/io/parquet/read/deserialize/primitive/nested.rs index 4798409acb3..7b890bf7507 100644 --- a/src/io/parquet/read/deserialize/primitive/nested.rs +++ b/src/io/parquet/read/deserialize/primitive/nested.rs @@ -164,7 +164,7 @@ fn finish( values: Vec, validity: MutableBitmap, ) -> PrimitiveArray { - PrimitiveArray::from_data(data_type.clone(), values.into(), validity.into()) + PrimitiveArray::new(data_type.clone(), values.into(), validity.into()) } /// An iterator adapter over [`Pages`] assumed to be encoded as boolean arrays diff --git a/src/io/parquet/read/deserialize/struct_.rs b/src/io/parquet/read/deserialize/struct_.rs index dea07ad18dd..9fb1242b953 100644 --- a/src/io/parquet/read/deserialize/struct_.rs +++ b/src/io/parquet/read/deserialize/struct_.rs @@ -47,7 +47,7 @@ impl<'a> Iterator for StructIterator<'a> { Some(Ok(( nested, - Box::new(StructArray::from_data( + Box::new(StructArray::new( DataType::Struct(self.fields.clone()), new_values, validity.and_then(|x| x.into()), diff --git a/src/io/parquet/read/indexes/fixed_len_binary.rs b/src/io/parquet/read/indexes/fixed_len_binary.rs index 12ef77667ba..16e3348746c 100644 --- a/src/io/parquet/read/indexes/fixed_len_binary.rs +++ b/src/io/parquet/read/indexes/fixed_len_binary.rs @@ -43,11 +43,12 @@ fn deserialize_binary_iter<'a, I: TrustedLen>>>( }))) } _ => { - let mut a = MutableFixedSizeBinaryArray::from_data( + let mut a = MutableFixedSizeBinaryArray::try_new( data_type, Vec::with_capacity(iter.size_hint().0), None, - ); + ) + .unwrap(); for item in iter { a.push(item); } diff --git a/src/io/parquet/read/statistics/mod.rs b/src/io/parquet/read/statistics/mod.rs index 8b136360b15..ad30859cfa5 100644 --- a/src/io/parquet/read/statistics/mod.rs +++ b/src/io/parquet/read/statistics/mod.rs @@ -174,11 +174,10 @@ fn make_mutable(data_type: &DataType, capacity: usize) -> Result { Box::new(MutableUtf8Array::::with_capacity(capacity)) as Box } - PhysicalType::FixedSizeBinary => Box::new(MutableFixedSizeBinaryArray::from_data( - data_type.clone(), - vec![], - None, - )) as _, + PhysicalType::FixedSizeBinary => { + Box::new(MutableFixedSizeBinaryArray::try_new(data_type.clone(), vec![], None).unwrap()) + as _ + } PhysicalType::LargeList | PhysicalType::List => Box::new( DynMutableListArray::try_with_capacity(data_type.clone(), capacity)?, ) as Box, diff --git a/src/io/parquet/write/mod.rs b/src/io/parquet/write/mod.rs index 54815ccbd3e..150211f7c22 100644 --- a/src/io/parquet/write/mod.rs +++ b/src/io/parquet/write/mod.rs @@ -450,7 +450,7 @@ pub fn array_to_page_simple( values.extend_from_slice(&[0; 4]); // months values.extend_from_slice(bytes); // days and seconds }); - let array = FixedSizeBinaryArray::from_data( + let array = FixedSizeBinaryArray::new( DataType::FixedSizeBinary(12), values.into(), array.validity().cloned(), @@ -518,7 +518,7 @@ pub fn array_to_page_simple( let bytes = &x.to_be_bytes()[16 - size..]; values.extend_from_slice(bytes) }); - let array = FixedSizeBinaryArray::from_data( + let array = FixedSizeBinaryArray::new( DataType::FixedSizeBinary(size), values.into(), array.validity().cloned(), diff --git a/src/io/parquet/write/pages.rs b/src/io/parquet/write/pages.rs index b83335aa570..1e4bcbe9071 100644 --- a/src/io/parquet/write/pages.rs +++ b/src/io/parquet/write/pages.rs @@ -245,7 +245,7 @@ mod tests { Field::new("c", DataType::Int32, false), ]; - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(fields), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), @@ -309,7 +309,7 @@ mod tests { Field::new("c", DataType::Int32, false), ]; - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(fields), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), @@ -320,7 +320,7 @@ mod tests { Field::new("c", array.data_type().clone(), true), ]; - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(fields), vec![Box::new(array.clone()), Box::new(array)], None, @@ -412,7 +412,7 @@ mod tests { Field::new("c", DataType::Int32, false), ]; - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(fields), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), diff --git a/src/offset.rs b/src/offset.rs index 113358ca54b..7096b443400 100644 --- a/src/offset.rs +++ b/src/offset.rs @@ -63,6 +63,12 @@ impl Offsets { Self(vec![O::zero()]) } + /// Returns an [`Offsets`] whose all lengths are zero. + #[inline] + pub fn new_zeroed(length: usize) -> Self { + Self(vec![O::zero(); length + 1]) + } + /// Creates a new [`Offsets`] from an iterator of lengths #[inline] pub fn try_from_iter>(iter: I) -> Result { diff --git a/tests/it/array/binary/mod.rs b/tests/it/array/binary/mod.rs index 7609058afa5..2dc185bde9e 100644 --- a/tests/it/array/binary/mod.rs +++ b/tests/it/array/binary/mod.rs @@ -30,7 +30,7 @@ fn basics() { assert!(!array.is_valid(1)); assert!(array.is_valid(2)); - let array2 = BinaryArray::::from_data( + let array2 = BinaryArray::::new( DataType::Binary, array.offsets().clone(), array.values().clone(), @@ -101,7 +101,7 @@ fn with_validity() { fn wrong_offsets() { let offsets = vec![0, 5, 4].try_into().unwrap(); // invalid offsets let values = Buffer::from(b"abbbbb".to_vec()); - BinaryArray::::from_data(DataType::Binary, offsets, values, None); + BinaryArray::::new(DataType::Binary, offsets, values, None); } #[test] @@ -109,7 +109,7 @@ fn wrong_offsets() { fn wrong_data_type() { let offsets = vec![0, 4].try_into().unwrap(); let values = Buffer::from(b"abbb".to_vec()); - BinaryArray::::from_data(DataType::Int8, offsets, values, None); + BinaryArray::::new(DataType::Int8, offsets, values, None); } #[test] @@ -118,7 +118,7 @@ fn value_with_wrong_offsets_panics() { let offsets = vec![0, 10, 11, 4].try_into().unwrap(); let values = Buffer::from(b"abbb".to_vec()); // the 10-11 is not checked - let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); + let array = BinaryArray::::new(DataType::Binary, offsets, values, None); // but access is still checked (and panics) // without checks, this would result in reading beyond bounds @@ -130,7 +130,7 @@ fn value_with_wrong_offsets_panics() { fn index_out_of_bounds_panics() { let offsets = vec![0, 1, 2, 4].try_into().unwrap(); let values = Buffer::from(b"abbb".to_vec()); - let array = BinaryArray::::from_data(DataType::Utf8, offsets, values, None); + let array = BinaryArray::::new(DataType::Utf8, offsets, values, None); array.value(3); } @@ -141,7 +141,7 @@ fn value_unchecked_with_wrong_offsets_panics() { let offsets = vec![0, 10, 11, 4].try_into().unwrap(); let values = Buffer::from(b"abbb".to_vec()); // the 10-11 is not checked - let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); + let array = BinaryArray::::new(DataType::Binary, offsets, values, None); // but access is still checked (and panics) // without checks, this would result in reading beyond bounds, @@ -162,7 +162,7 @@ fn into_mut_1() { let values = Buffer::from(b"a".to_vec()); let a = values.clone(); // cloned values assert_eq!(a, values); - let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); + let array = BinaryArray::::new(DataType::Binary, offsets, values, None); assert!(array.into_mut().is_left()); } @@ -172,7 +172,7 @@ fn into_mut_2() { let values = Buffer::from(b"a".to_vec()); let a = offsets.clone(); // cloned offsets assert_eq!(a, offsets); - let array = BinaryArray::::from_data(DataType::Binary, offsets, values, None); + let array = BinaryArray::::new(DataType::Binary, offsets, values, None); assert!(array.into_mut().is_left()); } diff --git a/tests/it/array/boolean/mod.rs b/tests/it/array/boolean/mod.rs index a38b59a07cb..c9fa33906f8 100644 --- a/tests/it/array/boolean/mod.rs +++ b/tests/it/array/boolean/mod.rs @@ -27,7 +27,7 @@ fn basics() { assert!(!array.is_valid(1)); assert!(array.is_valid(2)); - let array2 = BooleanArray::from_data( + let array2 = BooleanArray::new( DataType::Boolean, array.values().clone(), array.validity().cloned(), @@ -51,7 +51,7 @@ fn try_new_invalid() { #[test] fn with_validity() { let bitmap = Bitmap::from([true, false, true]); - let a = BooleanArray::from_data(DataType::Boolean, bitmap, None); + let a = BooleanArray::new(DataType::Boolean, bitmap, None); let a = a.with_validity(Some(Bitmap::from([true, false, true]))); assert!(a.validity().is_some()); } @@ -65,12 +65,12 @@ fn debug() { #[test] fn into_mut_valid() { let bitmap = Bitmap::from([true, false, true]); - let a = BooleanArray::from_data(DataType::Boolean, bitmap, None); + let a = BooleanArray::new(DataType::Boolean, bitmap, None); let _ = a.into_mut().right().unwrap(); let bitmap = Bitmap::from([true, false, true]); let validity = Bitmap::from([true, false, true]); - let a = BooleanArray::from_data(DataType::Boolean, bitmap, Some(validity)); + let a = BooleanArray::new(DataType::Boolean, bitmap, Some(validity)); let _ = a.into_mut().right().unwrap(); } @@ -78,13 +78,13 @@ fn into_mut_valid() { fn into_mut_invalid() { let bitmap = Bitmap::from([true, false, true]); let _other = bitmap.clone(); // values is shared - let a = BooleanArray::from_data(DataType::Boolean, bitmap, None); + let a = BooleanArray::new(DataType::Boolean, bitmap, None); let _ = a.into_mut().left().unwrap(); let bitmap = Bitmap::from([true, false, true]); let validity = Bitmap::from([true, false, true]); let _other = validity.clone(); // validity is shared - let a = BooleanArray::from_data(DataType::Boolean, bitmap, Some(validity)); + let a = BooleanArray::new(DataType::Boolean, bitmap, Some(validity)); let _ = a.into_mut().left().unwrap(); } diff --git a/tests/it/array/boolean/mutable.rs b/tests/it/array/boolean/mutable.rs index de960b7a77f..1f1d85631a5 100644 --- a/tests/it/array/boolean/mutable.rs +++ b/tests/it/array/boolean/mutable.rs @@ -99,11 +99,12 @@ fn try_from_trusted_len_iter() { #[test] fn reserve() { - let mut a = MutableBooleanArray::from_data( + let mut a = MutableBooleanArray::try_new( DataType::Boolean, MutableBitmap::new(), Some(MutableBitmap::new()), - ); + ) + .unwrap(); a.reserve(10); assert!(a.validity().unwrap().capacity() > 0); diff --git a/tests/it/array/equal/list.rs b/tests/it/array/equal/list.rs index 67a458017b4..bf2ccf22d8b 100644 --- a/tests/it/array/equal/list.rs +++ b/tests/it/array/equal/list.rs @@ -77,14 +77,14 @@ fn test_bla() { Some(6), ])); let validity = Bitmap::from([true, false, true]); - let lhs = ListArray::::from_data(data_type, offsets, values, Some(validity)); + let lhs = ListArray::::new(data_type, offsets, values, Some(validity)); let lhs = lhs.slice(1, 2); let offsets = vec![0, 0, 3].try_into().unwrap(); let data_type = ListArray::::default_datatype(DataType::Int32); let values = Box::new(Int32Array::from([Some(4), None, Some(6)])); let validity = Bitmap::from([false, true]); - let rhs = ListArray::::from_data(data_type, offsets, values, Some(validity)); + let rhs = ListArray::::new(data_type, offsets, values, Some(validity)); assert_eq!(lhs, rhs); } diff --git a/tests/it/array/fixed_size_binary/mod.rs b/tests/it/array/fixed_size_binary/mod.rs index 3163f669732..920b5925b28 100644 --- a/tests/it/array/fixed_size_binary/mod.rs +++ b/tests/it/array/fixed_size_binary/mod.rs @@ -4,7 +4,7 @@ mod mutable; #[test] fn basics() { - let array = FixedSizeBinaryArray::from_data( + let array = FixedSizeBinaryArray::new( DataType::FixedSizeBinary(2), Buffer::from(vec![1, 2, 3, 4, 5, 6]), Some(Bitmap::from([true, false, true])), @@ -23,18 +23,20 @@ fn basics() { #[test] fn with_validity() { - let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]); - let a = FixedSizeBinaryArray::new(DataType::FixedSizeBinary(2), values, None); + let a = FixedSizeBinaryArray::new( + DataType::FixedSizeBinary(2), + vec![1, 2, 3, 4, 5, 6].into(), + None, + ); let a = a.with_validity(Some(Bitmap::from([true, false, true]))); assert!(a.validity().is_some()); } #[test] fn debug() { - let values = Buffer::from(vec![1, 2, 3, 4, 5, 6]); - let a = FixedSizeBinaryArray::from_data( + let a = FixedSizeBinaryArray::new( DataType::FixedSizeBinary(2), - values, + vec![1, 2, 3, 4, 5, 6].into(), Some(Bitmap::from([true, false, true])), ); assert_eq!( diff --git a/tests/it/array/fixed_size_binary/mutable.rs b/tests/it/array/fixed_size_binary/mutable.rs index 3cffcaa899a..ad2ea25b3fd 100644 --- a/tests/it/array/fixed_size_binary/mutable.rs +++ b/tests/it/array/fixed_size_binary/mutable.rs @@ -4,11 +4,12 @@ use arrow2::datatypes::DataType; #[test] fn basic() { - let a = MutableFixedSizeBinaryArray::from_data( + let a = MutableFixedSizeBinaryArray::try_new( DataType::FixedSizeBinary(2), Vec::from([1, 2, 3, 4]), None, - ); + ) + .unwrap(); assert_eq!(a.len(), 2); assert_eq!(a.data_type(), &DataType::FixedSizeBinary(2)); assert_eq!(a.values(), &Vec::from([1, 2, 3, 4])); @@ -20,29 +21,30 @@ fn basic() { #[allow(clippy::eq_op)] #[test] fn equal() { - let a = MutableFixedSizeBinaryArray::from_data( + let a = MutableFixedSizeBinaryArray::try_new( DataType::FixedSizeBinary(2), Vec::from([1, 2, 3, 4]), None, - ); + ) + .unwrap(); assert_eq!(a, a); - let b = MutableFixedSizeBinaryArray::from_data( - DataType::FixedSizeBinary(2), - Vec::from([1, 2]), - None, - ); + let b = + MutableFixedSizeBinaryArray::try_new(DataType::FixedSizeBinary(2), Vec::from([1, 2]), None) + .unwrap(); assert_eq!(b, b); assert!(a != b); - let a = MutableFixedSizeBinaryArray::from_data( + let a = MutableFixedSizeBinaryArray::try_new( DataType::FixedSizeBinary(2), Vec::from([1, 2, 3, 4]), Some(MutableBitmap::from([true, false])), - ); - let b = MutableFixedSizeBinaryArray::from_data( + ) + .unwrap(); + let b = MutableFixedSizeBinaryArray::try_new( DataType::FixedSizeBinary(2), Vec::from([1, 2, 3, 4]), Some(MutableBitmap::from([false, true])), - ); + ) + .unwrap(); assert_eq!(a, a); assert_eq!(b, b); assert!(a != b); diff --git a/tests/it/array/growable/null.rs b/tests/it/array/growable/null.rs index 47e25ed3b9b..7e26e71cdda 100644 --- a/tests/it/array/growable/null.rs +++ b/tests/it/array/growable/null.rs @@ -15,6 +15,6 @@ fn null() { let result: NullArray = mutable.into(); - let expected = NullArray::from_data(DataType::Null, 3); + let expected = NullArray::new(DataType::Null, 3); assert_eq!(result, expected); } diff --git a/tests/it/array/growable/struct_.rs b/tests/it/array/growable/struct_.rs index 55b5f4b0979..e31058e112f 100644 --- a/tests/it/array/growable/struct_.rs +++ b/tests/it/array/growable/struct_.rs @@ -31,14 +31,14 @@ fn some_values() -> (DataType, Vec>) { fn basic() { let (fields, values) = some_values(); - let array = StructArray::from_data(fields.clone(), values.clone(), None); + let array = StructArray::new(fields.clone(), values.clone(), None); let mut a = GrowableStruct::new(vec![&array], false, 0); a.extend(0, 1, 2); let result: StructArray = a.into(); - let expected = StructArray::from_data( + let expected = StructArray::new( fields, vec![values[0].slice(1, 2), values[1].slice(1, 2)], None, @@ -50,14 +50,14 @@ fn basic() { fn offset() { let (fields, values) = some_values(); - let array = StructArray::from_data(fields.clone(), values.clone(), None).slice(1, 3); + let array = StructArray::new(fields.clone(), values.clone(), None).slice(1, 3); let mut a = GrowableStruct::new(vec![&array], false, 0); a.extend(0, 1, 2); let result: StructArray = a.into(); - let expected = StructArray::from_data( + let expected = StructArray::new( fields, vec![values[0].slice(2, 2), values[1].slice(2, 2)], None, @@ -70,7 +70,7 @@ fn offset() { fn nulls() { let (fields, values) = some_values(); - let array = StructArray::from_data( + let array = StructArray::new( fields.clone(), values.clone(), Some(Bitmap::from_u8_slice([0b00000010], 5)), @@ -81,7 +81,7 @@ fn nulls() { a.extend(0, 1, 2); let result: StructArray = a.into(); - let expected = StructArray::from_data( + let expected = StructArray::new( fields, vec![values[0].slice(1, 2), values[1].slice(1, 2)], Some(Bitmap::from_u8_slice([0b00000010], 5).slice(1, 2)), @@ -94,7 +94,7 @@ fn nulls() { fn many() { let (fields, values) = some_values(); - let array = StructArray::from_data(fields.clone(), values.clone(), None); + let array = StructArray::new(fields.clone(), values.clone(), None); let mut mutable = GrowableStruct::new(vec![&array, &array], true, 0); @@ -118,7 +118,7 @@ fn many() { None, ])); - let expected = StructArray::from_data( + let expected = StructArray::new( fields, vec![expected_string, expected_int], Some(Bitmap::from([true, true, true, true, false])), diff --git a/tests/it/array/growable/union.rs b/tests/it/array/growable/union.rs index d4581f4685c..607ef13d3e5 100644 --- a/tests/it/array/growable/union.rs +++ b/tests/it/array/growable/union.rs @@ -19,7 +19,7 @@ fn sparse() -> Result<()> { Int32Array::from(&[Some(1), None, Some(2)]).boxed(), Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields, None); + let array = UnionArray::new(data_type, types, fields, None); for length in 1..2 { for index in 0..(array.len() - length + 1) { @@ -51,7 +51,7 @@ fn dense() -> Result<()> { ]; let offsets = Some(vec![0, 1, 0].into()); - let array = UnionArray::from_data(data_type, types, fields, offsets); + let array = UnionArray::new(data_type, types, fields, offsets); for length in 1..2 { for index in 0..(array.len() - length + 1) { diff --git a/tests/it/array/list/mod.rs b/tests/it/array/list/mod.rs index cf8aec30f9f..bc3caef921f 100644 --- a/tests/it/array/list/mod.rs +++ b/tests/it/array/list/mod.rs @@ -7,10 +7,10 @@ mod mutable; #[test] fn debug() { let values = Buffer::from(vec![1, 2, 3, 4, 5]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type, vec![0, 2, 2, 3, 5].try_into().unwrap(), Box::new(values), @@ -24,10 +24,10 @@ fn debug() { #[should_panic] fn test_nested_panic() { let values = Buffer::from(vec![1, 2, 3, 4, 5]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type.clone(), vec![0, 2, 2, 3, 5].try_into().unwrap(), Box::new(values), @@ -36,7 +36,7 @@ fn test_nested_panic() { // The datatype for the nested array has to be created considering // the nested structure of the child data - let _ = ListArray::::from_data( + let _ = ListArray::::new( data_type, vec![0, 2, 4].try_into().unwrap(), Box::new(array), @@ -47,10 +47,10 @@ fn test_nested_panic() { #[test] fn test_nested_display() { let values = Buffer::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type, vec![0, 2, 4, 7, 7, 8, 10].try_into().unwrap(), Box::new(values), @@ -58,7 +58,7 @@ fn test_nested_display() { ); let data_type = ListArray::::default_datatype(array.data_type().clone()); - let nested = ListArray::::from_data( + let nested = ListArray::::new( data_type, vec![0, 2, 5, 6].try_into().unwrap(), Box::new(array), diff --git a/tests/it/array/list/mutable.rs b/tests/it/array/list/mutable.rs index a5732315ea2..e72167277fa 100644 --- a/tests/it/array/list/mutable.rs +++ b/tests/it/array/list/mutable.rs @@ -12,14 +12,14 @@ fn basics() { array.try_extend(data).unwrap(); let array: ListArray = array.into(); - let values = PrimitiveArray::::from_data( + let values = PrimitiveArray::::new( DataType::Int32, Buffer::from(vec![1, 2, 3, 4, 0, 6]), Some(Bitmap::from([true, true, true, true, false, true])), ); let data_type = ListArray::::default_datatype(DataType::Int32); - let expected = ListArray::::from_data( + let expected = ListArray::::new( data_type, vec![0, 3, 3, 6].try_into().unwrap(), Box::new(values), diff --git a/tests/it/array/primitive/mod.rs b/tests/it/array/primitive/mod.rs index 270a141742c..c3e3369f3ef 100644 --- a/tests/it/array/primitive/mod.rs +++ b/tests/it/array/primitive/mod.rs @@ -24,7 +24,7 @@ fn basics() { assert!(!array.is_valid(1)); assert!(array.is_valid(2)); - let array2 = Int32Array::from_data( + let array2 = Int32Array::new( DataType::Int32, array.values().clone(), array.validity().cloned(), diff --git a/tests/it/array/primitive/mutable.rs b/tests/it/array/primitive/mutable.rs index 46f9c190254..d7e0b86c061 100644 --- a/tests/it/array/primitive/mutable.rs +++ b/tests/it/array/primitive/mutable.rs @@ -8,13 +8,14 @@ use std::iter::FromIterator; #[test] fn from_and_into_data() { - let a = MutablePrimitiveArray::from_data( + let a = MutablePrimitiveArray::try_new( DataType::Int32, vec![1i32, 0], Some(MutableBitmap::from([true, false])), - ); + ) + .unwrap(); assert_eq!(a.len(), 2); - let (a, b, c) = a.into_data(); + let (a, b, c) = a.into_inner(); assert_eq!(a, DataType::Int32); assert_eq!(b, Vec::from([1i32, 0])); assert_eq!(c, Some(MutableBitmap::from([true, false]))); @@ -28,22 +29,24 @@ fn from_vec() { #[test] fn to() { - let a = MutablePrimitiveArray::from_data( + let a = MutablePrimitiveArray::try_new( DataType::Int32, vec![1i32, 0], Some(MutableBitmap::from([true, false])), - ); + ) + .unwrap(); let a = a.to(DataType::Date32); assert_eq!(a.data_type(), &DataType::Date32); } #[test] fn values_mut_slice() { - let mut a = MutablePrimitiveArray::from_data( + let mut a = MutablePrimitiveArray::try_new( DataType::Int32, vec![1i32, 0], Some(MutableBitmap::from([true, false])), - ); + ) + .unwrap(); let values = a.values_mut_slice(); values[0] = 10; @@ -311,10 +314,8 @@ fn try_from_trusted_len_iter() { } #[test] -#[should_panic] fn wrong_data_type() { - let values = vec![1u8]; - MutablePrimitiveArray::from_data(DataType::Utf8, values, None); + assert!(MutablePrimitiveArray::::try_new(DataType::Utf8, vec![], None).is_err()); } #[test] diff --git a/tests/it/array/primitive/to_mutable.rs b/tests/it/array/primitive/to_mutable.rs index eadd5fb853d..ee3a1f8ee34 100644 --- a/tests/it/array/primitive/to_mutable.rs +++ b/tests/it/array/primitive/to_mutable.rs @@ -6,7 +6,7 @@ use either::Either; #[test] fn array_to_mutable() { let data = vec![1, 2, 3]; - let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), None); + let arr = PrimitiveArray::new(DataType::Int32, data.into(), None); // to mutable push and freeze again let mut mut_arr = arr.into_mut().unwrap_right(); @@ -24,7 +24,7 @@ fn array_to_mutable() { #[test] fn array_to_mutable_not_owned() { let data = vec![1, 2, 3]; - let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), None); + let arr = PrimitiveArray::new(DataType::Int32, data.into(), None); let arr2 = arr.clone(); // to the `to_mutable` should fail and we should get back the original array @@ -43,11 +43,11 @@ fn array_to_mutable_validity() { // both have a single reference should be ok let bitmap = Bitmap::from_iter([true, false, true]); - let arr = PrimitiveArray::from_data(DataType::Int32, data.clone().into(), Some(bitmap)); + let arr = PrimitiveArray::new(DataType::Int32, data.clone().into(), Some(bitmap)); assert!(matches!(arr.into_mut(), Either::Right(_))); // now we clone the bitmap increasing the ref count let bitmap = Bitmap::from_iter([true, false, true]); - let arr = PrimitiveArray::from_data(DataType::Int32, data.into(), Some(bitmap.clone())); + let arr = PrimitiveArray::new(DataType::Int32, data.into(), Some(bitmap.clone())); assert!(matches!(arr.into_mut(), Either::Left(_))); } diff --git a/tests/it/array/struct_/iterator.rs b/tests/it/array/struct_/iterator.rs index 4c1638390c2..be4a5eefbb4 100644 --- a/tests/it/array/struct_/iterator.rs +++ b/tests/it/array/struct_/iterator.rs @@ -12,7 +12,7 @@ fn test_simple_iter() { Field::new("c", DataType::Int32, false), ]; - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(fields), vec![boolean.clone(), int.clone()], None, diff --git a/tests/it/array/struct_/mod.rs b/tests/it/array/struct_/mod.rs index 5e8495ca3c7..3eb2f6b5cbc 100644 --- a/tests/it/array/struct_/mod.rs +++ b/tests/it/array/struct_/mod.rs @@ -15,7 +15,7 @@ fn debug() { Field::new("c", DataType::Int32, false), ]; - let array = StructArray::from_data( + let array = StructArray::new( DataType::Struct(fields), vec![boolean.clone(), int.clone()], Some(Bitmap::from([true, true, false, true])), diff --git a/tests/it/array/union.rs b/tests/it/array/union.rs index 93622b415ad..69ba887d250 100644 --- a/tests/it/array/union.rs +++ b/tests/it/array/union.rs @@ -32,7 +32,7 @@ fn sparse_debug() -> Result<()> { Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields, None); + let array = UnionArray::new(data_type, types, fields, None); assert_eq!(format!("{:?}", array), "UnionArray[1, None, c]"); @@ -53,7 +53,7 @@ fn dense_debug() -> Result<()> { ]; let offsets = Some(vec![0, 1, 0].into()); - let array = UnionArray::from_data(data_type, types, fields, offsets); + let array = UnionArray::new(data_type, types, fields, offsets); assert_eq!(format!("{:?}", array), "UnionArray[1, None, c]"); @@ -73,7 +73,7 @@ fn slice() -> Result<()> { Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type.clone(), types, fields.clone(), None); + let array = UnionArray::new(data_type.clone(), types, fields.clone(), None); let result = array.slice(1, 2); @@ -82,7 +82,7 @@ fn slice() -> Result<()> { Int32Array::from(&[None, Some(2)]).boxed(), Utf8Array::::from([Some("b"), Some("c")]).boxed(), ]; - let expected = UnionArray::from_data(data_type, sliced_types, sliced_fields, None); + let expected = UnionArray::new(data_type, sliced_types, sliced_fields, None); assert_eq!(expected, result); Ok(()) @@ -101,7 +101,7 @@ fn iter_sparse() -> Result<()> { Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields.clone(), None); + let array = UnionArray::new(data_type, types, fields.clone(), None); let mut iter = array.iter(); assert_eq!( @@ -135,7 +135,7 @@ fn iter_dense() -> Result<()> { Utf8Array::::from([Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields.clone(), Some(offsets)); + let array = UnionArray::new(data_type, types, fields.clone(), Some(offsets)); let mut iter = array.iter(); assert_eq!( @@ -168,7 +168,7 @@ fn iter_sparse_slice() -> Result<()> { Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields.clone(), None); + let array = UnionArray::new(data_type, types, fields.clone(), None); let array_slice = array.slice(1, 1); let mut iter = array_slice.iter(); @@ -195,7 +195,7 @@ fn iter_dense_slice() -> Result<()> { Utf8Array::::from([Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields.clone(), Some(offsets)); + let array = UnionArray::new(data_type, types, fields.clone(), Some(offsets)); let array_slice = array.slice(1, 1); let mut iter = array_slice.iter(); @@ -222,7 +222,7 @@ fn scalar() -> Result<()> { Utf8Array::::from([Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields.clone(), Some(offsets)); + let array = UnionArray::new(data_type, types, fields.clone(), Some(offsets)); let scalar = new_scalar(&array, 0); let union_scalar = scalar.as_any().downcast_ref::().unwrap(); diff --git a/tests/it/array/utf8/mod.rs b/tests/it/array/utf8/mod.rs index e60a9e28b83..d16479c1740 100644 --- a/tests/it/array/utf8/mod.rs +++ b/tests/it/array/utf8/mod.rs @@ -27,7 +27,7 @@ fn basics() { assert!(!array.is_valid(1)); assert!(array.is_valid(2)); - let array2 = Utf8Array::::from_data( + let array2 = Utf8Array::::new( DataType::Utf8, array.offsets().clone(), array.values().clone(), @@ -67,7 +67,7 @@ fn from_slice() { let values = b"abcc".to_vec().into(); assert_eq!( b, - Utf8Array::::from_data(DataType::Utf8, offsets, values, None) + Utf8Array::::new(DataType::Utf8, offsets, values, None) ); } @@ -79,7 +79,7 @@ fn from_iter_values() { let values = b"abcc".to_vec().into(); assert_eq!( b, - Utf8Array::::from_data(DataType::Utf8, offsets, values, None) + Utf8Array::::new(DataType::Utf8, offsets, values, None) ); } @@ -92,7 +92,7 @@ fn from_trusted_len_iter() { let values = b"abcc".to_vec().into(); assert_eq!( b, - Utf8Array::::from_data(DataType::Utf8, offsets, values, None) + Utf8Array::::new(DataType::Utf8, offsets, values, None) ); } @@ -109,7 +109,7 @@ fn try_from_trusted_len_iter() { let values = b"abcc".to_vec().into(); assert_eq!( b, - Utf8Array::::from_data(DataType::Utf8, offsets, values, None) + Utf8Array::::new(DataType::Utf8, offsets, values, None) ); } @@ -147,7 +147,7 @@ fn out_of_bounds_offsets_panics() { fn index_out_of_bounds_panics() { let offsets = vec![0, 1, 2, 4].try_into().unwrap(); let values = b"abbb".to_vec().into(); - let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); + let array = Utf8Array::::new(DataType::Utf8, offsets, values, None); array.value(3); } @@ -165,7 +165,7 @@ fn into_mut_1() { let values = Buffer::from(b"a".to_vec()); let a = values.clone(); // cloned values assert_eq!(a, values); - let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); + let array = Utf8Array::::new(DataType::Utf8, offsets, values, None); assert!(array.into_mut().is_left()); } @@ -175,7 +175,7 @@ fn into_mut_2() { let values = b"a".to_vec().into(); let a = offsets.clone(); // cloned offsets assert_eq!(a, offsets); - let array = Utf8Array::::from_data(DataType::Utf8, offsets, values, None); + let array = Utf8Array::::new(DataType::Utf8, offsets, values, None); assert!(array.into_mut().is_left()); } diff --git a/tests/it/array/utf8/mutable.rs b/tests/it/array/utf8/mutable.rs index faa4868a58f..964337e7b2c 100644 --- a/tests/it/array/utf8/mutable.rs +++ b/tests/it/array/utf8/mutable.rs @@ -67,19 +67,17 @@ fn pop_all_some() { /// Safety guarantee #[test] -#[should_panic] fn not_utf8() { let offsets = vec![0, 4].try_into().unwrap(); let values = vec![0, 159, 146, 150]; // invalid utf8 - MutableUtf8Array::::from_data(DataType::Utf8, offsets, values, None); + assert!(MutableUtf8Array::::try_new(DataType::Utf8, offsets, values, None).is_err()); } #[test] -#[should_panic] fn wrong_data_type() { let offsets = vec![0, 4].try_into().unwrap(); let values = vec![1, 2, 3, 4]; - MutableUtf8Array::::from_data(DataType::Int8, offsets, values, None); + assert!(MutableUtf8Array::::try_new(DataType::Int8, offsets, values, None).is_err()); } #[test] diff --git a/tests/it/compute/take.rs b/tests/it/compute/take.rs index 75b55d76f53..39fcf94da6e 100644 --- a/tests/it/compute/take.rs +++ b/tests/it/compute/take.rs @@ -71,7 +71,7 @@ fn create_test_struct() -> StructArray { Field::new("a", DataType::Boolean, true), Field::new("b", DataType::Int32, true), ]; - StructArray::from_data( + StructArray::new( DataType::Struct(fields), vec![boolean.boxed(), int.boxed()], validity, @@ -92,7 +92,7 @@ fn test_struct_with_nulls() { .into_iter() .collect::() .into(); - let expected = StructArray::from_data( + let expected = StructArray::new( array.data_type().clone(), vec![boolean.boxed(), int.boxed()], validity, @@ -171,10 +171,10 @@ fn unsigned_take() { #[test] fn list_with_no_none() { let values = Buffer::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type, vec![0, 2, 2, 6, 9, 10].try_into().unwrap(), Box::new(values), @@ -185,9 +185,9 @@ fn list_with_no_none() { let result = take(&array, &indices).unwrap(); let expected_values = Buffer::from(vec![9, 6, 7, 8]); - let expected_values = PrimitiveArray::::from_data(DataType::Int32, expected_values, None); + let expected_values = PrimitiveArray::::new(DataType::Int32, expected_values, None); let expected_type = ListArray::::default_datatype(DataType::Int32); - let expected = ListArray::::from_data( + let expected = ListArray::::new( expected_type, vec![0, 1, 1, 4].try_into().unwrap(), Box::new(expected_values), @@ -200,13 +200,13 @@ fn list_with_no_none() { #[test] fn list_with_none() { let values = Buffer::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let validity_values = vec![true, false, true, true, true]; let validity = Bitmap::from_trusted_len_iter(validity_values.into_iter()); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type, vec![0, 2, 2, 6, 9, 10].try_into().unwrap(), Box::new(values), @@ -262,10 +262,10 @@ fn list_both_validity() { #[test] fn test_nested() { let values = Buffer::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); - let values = PrimitiveArray::::from_data(DataType::Int32, values, None); + let values = PrimitiveArray::::new(DataType::Int32, values, None); let data_type = ListArray::::default_datatype(DataType::Int32); - let array = ListArray::::from_data( + let array = ListArray::::new( data_type, vec![0, 2, 4, 7, 7, 8, 10].try_into().unwrap(), Box::new(values), @@ -273,7 +273,7 @@ fn test_nested() { ); let data_type = ListArray::::default_datatype(array.data_type().clone()); - let nested = ListArray::::from_data( + let nested = ListArray::::new( data_type, vec![0, 2, 5, 6].try_into().unwrap(), Box::new(array), @@ -285,10 +285,10 @@ fn test_nested() { // expected data let expected_values = Buffer::from(vec![1, 2, 3, 4, 5, 6, 7, 8]); - let expected_values = PrimitiveArray::::from_data(DataType::Int32, expected_values, None); + let expected_values = PrimitiveArray::::new(DataType::Int32, expected_values, None); let expected_data_type = ListArray::::default_datatype(DataType::Int32); - let expected_array = ListArray::::from_data( + let expected_array = ListArray::::new( expected_data_type, vec![0, 2, 4, 7, 7, 8].try_into().unwrap(), Box::new(expected_values), @@ -296,7 +296,7 @@ fn test_nested() { ); let expected_data_type = ListArray::::default_datatype(expected_array.data_type().clone()); - let expected = ListArray::::from_data( + let expected = ListArray::::new( expected_data_type, vec![0, 2, 5].try_into().unwrap(), Box::new(expected_array), diff --git a/tests/it/ffi/data.rs b/tests/it/ffi/data.rs index 165c7107926..bed327e8348 100644 --- a/tests/it/ffi/data.rs +++ b/tests/it/ffi/data.rs @@ -293,7 +293,7 @@ fn struct_() -> Result<()> { let values = vec![Int32Array::from([Some(1), None, Some(3)]).boxed()]; let validity = Bitmap::from([true, false, true]); - let array = StructArray::from_data(data_type, values, validity.into()); + let array = StructArray::new(data_type, values, validity.into()); test_round_trip(array) } diff --git a/tests/it/io/avro/read.rs b/tests/it/io/avro/read.rs index 1deb984a852..90aefbf4240 100644 --- a/tests/it/io/avro/read.rs +++ b/tests/it/io/avro/read.rs @@ -116,24 +116,24 @@ pub(super) fn data() -> Chunk> { BooleanArray::from_slice([true, false]).boxed(), Utf8Array::::from([Some("foo"), None]).boxed(), array.into_box(), - StructArray::from_data( + StructArray::new( DataType::Struct(vec![Field::new("e", DataType::Float64, false)]), - vec![Box::new(PrimitiveArray::::from_slice([1.0, 2.0]))], + vec![PrimitiveArray::::from_slice([1.0, 2.0]).boxed()], None, ) .boxed(), DictionaryArray::try_from_keys( Int32Array::from_slice([1, 0]), - Box::new(Utf8Array::::from_slice(["SPADES", "HEARTS"])), + Utf8Array::::from_slice(["SPADES", "HEARTS"]).boxed(), ) .unwrap() .boxed(), PrimitiveArray::::from_slice([12345678i128, -12345678i128]) .to(DataType::Decimal(18, 5)) .boxed(), - StructArray::from_data( + StructArray::new( DataType::Struct(vec![Field::new("e", DataType::Float64, false)]), - vec![Box::new(PrimitiveArray::::from_slice([1.0, 0.0]))], + vec![PrimitiveArray::::from_slice([1.0, 0.0]).boxed()], Some([true, false].into()), ) .boxed(), diff --git a/tests/it/io/json/read.rs b/tests/it/io/json/read.rs index f2fb8570b8c..2c84f0460df 100644 --- a/tests/it/io/json/read.rs +++ b/tests/it/io/json/read.rs @@ -23,7 +23,7 @@ fn read_json() -> Result<()> { let result = read::deserialize(&json, data_type)?; - let expected = StructArray::from_data( + let expected = StructArray::new( DataType::Struct(vec![Field::new("a", DataType::Int64, true)]), vec![Box::new(Int64Array::from_slice([1, 2, 3])) as _], None, diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index 3b0695cf631..ae7daf9bf95 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -19,7 +19,6 @@ macro_rules! test { #[test] fn int32() -> Result<()> { let array = Int32Array::from([Some(1), Some(2), Some(3), None, Some(5)]); - //let b = Utf8Array::::from(&vec![Some("a"), Some("b"), Some("c"), Some("d"), None]); let expected = r#"[1,2,3,null,5]"#; @@ -62,7 +61,7 @@ fn struct_() -> Result<()> { Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ]); - let array = StructArray::from_data(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); + let array = StructArray::new(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); let expected = r#"[{"c1":1,"c2":"a"},{"c1":2,"c2":"b"},{"c1":3,"c2":"c"},{"c1":null,"c2":"d"},{"c1":5,"c2":null}]"#; @@ -80,18 +79,19 @@ fn nested_struct_with_validity() -> Result<()> { Field::new("c12", DataType::Struct(inner.clone()), false), ]; - let c1 = StructArray::from_data( + let c1 = StructArray::new( DataType::Struct(fields), vec![ - Box::new(Int32Array::from(&[Some(1), None, Some(5)])), - Box::new(StructArray::from_data( + Int32Array::from(&[Some(1), None, Some(5)]).boxed(), + StructArray::new( DataType::Struct(inner), vec![ - Box::new(Utf8Array::::from(&vec![None, Some("f"), Some("g")])), - Box::new(Int32Array::from(&[Some(20), None, Some(43)])), + Utf8Array::::from(&vec![None, Some("f"), Some("g")]).boxed(), + Int32Array::from(&[Some(20), None, Some(43)]).boxed(), ], Some(Bitmap::from([false, true, true])), - )), + ) + .boxed(), ], Some(Bitmap::from([true, true, false])), ); @@ -101,7 +101,7 @@ fn nested_struct_with_validity() -> Result<()> { Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ]); - let array = StructArray::from_data(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); + let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":{"c11":1,"c12":null},"c2":"a"},{"c1":{"c11":null,"c12":{"c121":"f","c122":null}},"c2":"b"},{"c1":null,"c2":"c"}]"#; @@ -116,11 +116,11 @@ fn nested_struct() -> Result<()> { Field::new("c12", DataType::Struct(vec![c121.clone()]), false), ]; - let c1 = StructArray::from_data( + let c1 = StructArray::new( DataType::Struct(fields), vec![ - Box::new(Int32Array::from(&[Some(1), None, Some(5)])), - Box::new(StructArray::from_data( + Int32Array::from(&[Some(1), None, Some(5)]).boxed(), + StructArray::new( DataType::Struct(vec![c121]), vec![Box::new(Utf8Array::::from(&vec![ Some("e"), @@ -128,7 +128,8 @@ fn nested_struct() -> Result<()> { Some("g"), ]))], None, - )), + ) + .boxed(), ], None, ); @@ -139,7 +140,7 @@ fn nested_struct() -> Result<()> { Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ]); - let array = StructArray::from_data(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); + let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":{"c11":1,"c12":{"c121":"e"}},"c2":"a"},{"c1":{"c11":null,"c12":{"c121":"f"}},"c2":"b"},{"c1":{"c11":5,"c12":{"c121":"g"}},"c2":"c"}]"#; @@ -168,7 +169,7 @@ fn struct_with_list_field() -> Result<()> { Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ]); - let array = StructArray::from_data(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); + let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":["a","a1"],"c2":1},{"c1":["b"],"c2":2},{"c1":["c"],"c2":3},{"c1":["d"],"c2":4},{"c1":["e"],"c2":5}]"#; @@ -203,7 +204,7 @@ fn nested_list() -> Result<()> { Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ]); - let array = StructArray::from_data(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); + let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":[[1,2],[3]],"c2":"foo"},{"c1":[],"c2":"bar"},{"c1":[[4,5,6]],"c2":null}]"#; @@ -298,11 +299,11 @@ fn list_of_struct() -> Result<()> { false, ))); - let s = StructArray::from_data( + let s = StructArray::new( DataType::Struct(fields), vec![ - Box::new(Int32Array::from(&[Some(1), None, Some(5)])), - Box::new(StructArray::from_data( + Int32Array::from(&[Some(1), None, Some(5)]).boxed(), + StructArray::new( DataType::Struct(inner), vec![Box::new(Utf8Array::::from(&vec![ Some("e"), @@ -310,7 +311,8 @@ fn list_of_struct() -> Result<()> { Some("g"), ]))], Some(Bitmap::from([false, true, true])), - )), + ) + .boxed(), ], Some(Bitmap::from([true, true, false])), ); @@ -319,11 +321,11 @@ fn list_of_struct() -> Result<()> { // [{"c11": 1, "c12": {"c121": "e"}}, {"c12": {"c121": "f"}}], // null, // [{"c11": 5, "c12": {"c121": "g"}}] - let c1 = ListArray::::from_data( + let c1 = ListArray::::new( c1_datatype, Buffer::from(vec![0, 2, 2, 3]).try_into().unwrap(), - Box::new(s), - Some(Bitmap::from_u8_slice([0b00000101], 3)), + s.boxed(), + Some(Bitmap::from([true, false, true])), ); let c2 = Int32Array::from_slice([1, 2, 3]); @@ -332,7 +334,7 @@ fn list_of_struct() -> Result<()> { Field::new("c1", c1.data_type().clone(), true), Field::new("c2", c2.data_type().clone(), true), ]); - let array = StructArray::from_data(data_type, vec![Box::new(c1) as _, Box::new(c2)], None); + let array = StructArray::new(data_type, vec![c1.boxed(), c2.boxed()], None); let expected = r#"[{"c1":[{"c11":1,"c12":null},{"c11":null,"c12":{"c121":"f"}}],"c2":1},{"c1":null,"c2":2},{"c1":[null],"c2":3}]"#; @@ -359,8 +361,7 @@ fn escaped_quotation_marks_in_utf8() -> Result<()> { #[test] fn write_date32() -> Result<()> { - let array = - PrimitiveArray::from_data(DataType::Date32, vec![1000i32, 8000, 10000].into(), None); + let array = PrimitiveArray::new(DataType::Date32, vec![1000i32, 8000, 10000].into(), None); let expected = r#"["1972-09-27","1991-11-27","1997-05-19"]"#; @@ -369,7 +370,7 @@ fn write_date32() -> Result<()> { #[test] fn write_timestamp() -> Result<()> { - let array = PrimitiveArray::from_data( + let array = PrimitiveArray::new( DataType::Timestamp(TimeUnit::Second, None), vec![10i64, 1 << 32, 1 << 33].into(), None, diff --git a/tests/it/io/ndjson/mod.rs b/tests/it/io/ndjson/mod.rs index b2e2b2fc895..ecdaefc2e21 100644 --- a/tests/it/io/ndjson/mod.rs +++ b/tests/it/io/ndjson/mod.rs @@ -82,7 +82,7 @@ fn case_list() -> (String, Box) { let d = Utf8Array::::from([Some("4"), None, Some("text")]); - let array = StructArray::from_data( + let array = StructArray::new( data_type, vec![a.boxed(), b.boxed(), c.boxed(), d.boxed()], None, @@ -130,11 +130,7 @@ fn case_dict() -> (String, Box) { ( data, - Box::new(StructArray::from_data( - DataType::Struct(fields), - vec![array.boxed()], - None, - )), + StructArray::new(DataType::Struct(fields), vec![array.boxed()], None).boxed(), ) } @@ -149,7 +145,7 @@ fn case_basics() -> (String, Box) { Field::new("c", DataType::Boolean, true), Field::new("d", DataType::Utf8, true), ]); - let array = StructArray::from_data( + let array = StructArray::new( data_type, vec![ Int64Array::from_slice([1, -10, 100000000]).boxed(), @@ -159,7 +155,7 @@ fn case_basics() -> (String, Box) { ], None, ); - (data, Box::new(array)) + (data, array.boxed()) } fn case_projection() -> (String, Box) { @@ -174,7 +170,7 @@ fn case_projection() -> (String, Box) { // note how "d" is not here Field::new("e", DataType::Binary, true), ]); - let array = StructArray::from_data( + let array = StructArray::new( data_type, vec![ UInt32Array::from_slice([1, 10, 100000000]).boxed(), @@ -184,7 +180,7 @@ fn case_projection() -> (String, Box) { ], None, ); - (data, Box::new(array)) + (data, array.boxed()) } fn case_struct() -> (String, Box) { @@ -208,29 +204,25 @@ fn case_struct() -> (String, Box) { // build expected output let d = Utf8Array::::from(&vec![Some("text"), None, Some("text"), None]); - let c = StructArray::from_data( + let c = StructArray::new( DataType::Struct(vec![d_field]), - vec![Box::new(d)], - Some(Bitmap::from_u8_slice([0b11111101], 4)), + vec![d.boxed()], + Some([true, false, true, true].into()), ); let b = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]); let inner = DataType::Struct(vec![Field::new("b", DataType::Boolean, true), c_field]); - let expected = StructArray::from_data( + let expected = StructArray::new( inner, - vec![Box::new(b), Box::new(c)], - Some(Bitmap::from_u8_slice([0b11110111], 4)), + vec![b.boxed(), c.boxed()], + Some([true, true, true, false].into()), ); let data_type = DataType::Struct(fields); ( data, - Box::new(StructArray::from_data( - data_type, - vec![expected.boxed()], - None, - )), + StructArray::new(data_type, vec![expected.boxed()], None).boxed(), ) } @@ -264,7 +256,7 @@ fn case_nested_list() -> (String, Box) { None, ]); - let c = StructArray::from_data( + let c = StructArray::new( DataType::Struct(vec![d_field]), vec![d.boxed()], Some(Bitmap::from_u8_slice([0b11111011], 6)), @@ -278,23 +270,24 @@ fn case_nested_list() -> (String, Box) { None, Some(true), ]); - let a_struct = StructArray::from_data( + let a_struct = StructArray::new( DataType::Struct(vec![b_field, c_field]), vec![b.boxed(), c.boxed()], None, ); - let expected = ListArray::from_data( + let expected = ListArray::new( a_list_data_type, vec![0i32, 2, 3, 6, 6, 6].try_into().unwrap(), a_struct.boxed(), - Some(Bitmap::from_u8_slice([0b00010111], 5)), + Some([true, true, true, false, true].into()), ); - let array = Box::new(StructArray::from_data( + let array = StructArray::new( DataType::Struct(vec![a_field]), - vec![Box::new(expected)], + vec![expected.boxed()], None, - )); + ) + .boxed(); (data, array) } diff --git a/tests/it/io/ndjson/read.rs b/tests/it/io/ndjson/read.rs index d7aa3b3feb2..a4afd53afe7 100644 --- a/tests/it/io/ndjson/read.rs +++ b/tests/it/io/ndjson/read.rs @@ -103,9 +103,9 @@ fn case_nested_struct() -> (String, Box) { BooleanArray::from([None, None, Some(true), None]).boxed(), ]; - let values = vec![StructArray::from_data(inner, values, None).boxed()]; + let values = vec![StructArray::new(inner, values, None).boxed()]; - let array = Box::new(StructArray::from_data(data_type, values, None)); + let array = StructArray::new(data_type, values, None).boxed(); (ndjson.to_string(), array) } diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 4790479cc2f..f373a590b0d 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -201,9 +201,7 @@ pub fn pyarrow_nested_nullable(column: &str) -> Box { ])), "list_nested_i64" | "list_nested_inner_required_i64" - | "list_nested_inner_required_required_i64" => { - Box::new(NullArray::from_data(DataType::Null, 1)) - } + | "list_nested_inner_required_required_i64" => Box::new(NullArray::new(DataType::Null, 1)), other => unreachable!("{}", other), }; @@ -211,16 +209,12 @@ pub fn pyarrow_nested_nullable(column: &str) -> Box { "list_int64_required_required" => { // [[0, 1], [], [2, 0, 3], [4, 5, 6], [], [7, 8, 9], [], [10]] let data_type = DataType::List(Box::new(Field::new("item", DataType::Int64, false))); - Box::new(ListArray::::from_data( - data_type, offsets, values, None, - )) + ListArray::::new(data_type, offsets, values, None).boxed() } "list_int64_optional_required" => { // [[0, 1], [], [2, 0, 3], [4, 5, 6], [], [7, 8, 9], [], [10]] let data_type = DataType::List(Box::new(Field::new("item", DataType::Int64, true))); - Box::new(ListArray::::from_data( - data_type, offsets, values, None, - )) + ListArray::::new(data_type, offsets, values, None).boxed() } "list_nested_i64" => { // [[0, 1]], None, [[2, None], [3]], [[4, 5], [6]], [], [[7], None, [9]], [[], [None], None], [[10]] @@ -295,9 +289,7 @@ pub fn pyarrow_nested_nullable(column: &str) -> Box { // [0, 2, 2, 5, 8, 8, 11, 11, 12] // [[a1, a2], None, [a3, a4, a5], [a6, a7, a8], [], [a9, a10, a11], None, [a12]] let data_type = DataType::List(Box::new(field)); - Box::new(ListArray::::from_data( - data_type, offsets, values, validity, - )) + ListArray::::new(data_type, offsets, values, validity).boxed() } } } @@ -1197,7 +1189,7 @@ fn generic_data() -> Result<(Schema, Chunk>)> { let values = BinaryArray::::from_slice([b"ab", b"ac"]).boxed(); let array4 = DictionaryArray::try_from_keys(indices.clone(), values).unwrap(); - let values = FixedSizeBinaryArray::from_data( + let values = FixedSizeBinaryArray::new( DataType::FixedSizeBinary(2), vec![b'a', b'b', b'a', b'c'].into(), None, diff --git a/tests/it/io/print.rs b/tests/it/io/print.rs index f96af000fee..206d33f181d 100644 --- a/tests/it/io/print.rs +++ b/tests/it/io/print.rs @@ -327,7 +327,7 @@ fn write_struct() -> Result<()> { let validity = Some(Bitmap::from(&[true, false, true])); - let array = StructArray::from_data(DataType::Struct(fields), values, validity); + let array = StructArray::new(DataType::Struct(fields), values, validity); let columns = Chunk::new(vec![&array as &dyn Array]); @@ -363,7 +363,7 @@ fn write_union() -> Result<()> { Utf8Array::::from([Some("a"), Some("b"), Some("c")]).boxed(), ]; - let array = UnionArray::from_data(data_type, types, fields, None); + let array = UnionArray::new(data_type, types, fields, None); let batch = Chunk::new(vec![&array as &dyn Array]);