diff --git a/arrow-array/src/builder/boolean_builder.rs b/arrow-array/src/builder/boolean_builder.rs index 96711dd1f6f..96f436253c5 100644 --- a/arrow-array/src/builder/boolean_builder.rs +++ b/arrow-array/src/builder/boolean_builder.rs @@ -18,6 +18,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BooleanBufferBuilder}; use crate::{ArrayRef, BooleanArray}; +use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; use std::any::Any; @@ -154,6 +155,23 @@ impl BooleanBuilder { let array_data = unsafe { builder.build_unchecked() }; BooleanArray::from(array_data) } + + /// Builds the [BooleanArray] without resetting the builder. + pub fn finish_cloned(&self) -> BooleanArray { + let len = self.len(); + let null_bit_buffer = self + .null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref); + let value_buffer = Buffer::from_slice_ref(self.values_builder.as_slice()); + let builder = ArrayData::builder(DataType::Boolean) + .len(len) + .add_buffer(value_buffer) + .null_bit_buffer(null_bit_buffer); + + let array_data = unsafe { builder.build_unchecked() }; + BooleanArray::from(array_data) + } } impl ArrayBuilder for BooleanBuilder { @@ -186,6 +204,11 @@ impl ArrayBuilder for BooleanBuilder { fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } #[cfg(test)] @@ -259,4 +282,26 @@ mod tests { assert_eq!(0, array.null_count()); assert!(array.data().null_buffer().is_none()); } + + #[test] + fn test_boolean_array_builder_finish_cloned() { + let mut builder = BooleanArray::builder(16); + builder.append_option(Some(true)); + builder.append_value(false); + builder.append_slice(&[true, false, true]); + let mut array = builder.finish_cloned(); + assert_eq!(3, array.true_count()); + assert_eq!(2, array.false_count()); + + builder + .append_values(&[false, false, true], &[true, true, true]) + .unwrap(); + + array = builder.finish(); + assert_eq!(4, array.true_count()); + assert_eq!(4, array.false_count()); + + assert_eq!(0, array.null_count()); + assert!(array.data().null_buffer().is_none()); + } } diff --git a/arrow-array/src/builder/fixed_size_binary_builder.rs b/arrow-array/src/builder/fixed_size_binary_builder.rs index 15b840d0a95..e9581922cca 100644 --- a/arrow-array/src/builder/fixed_size_binary_builder.rs +++ b/arrow-array/src/builder/fixed_size_binary_builder.rs @@ -18,6 +18,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, UInt8BufferBuilder}; use crate::{ArrayRef, FixedSizeBinaryArray}; +use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType}; use std::any::Any; @@ -87,6 +88,23 @@ impl FixedSizeBinaryBuilder { let array_data = unsafe { array_data_builder.build_unchecked() }; FixedSizeBinaryArray::from(array_data) } + + /// Builds the [`FixedSizeBinaryArray`] without resetting the builder. + pub fn finish_cloned(&self) -> FixedSizeBinaryArray { + let array_length = self.len(); + let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice()); + let array_data_builder = + ArrayData::builder(DataType::FixedSizeBinary(self.value_length)) + .add_buffer(values_buffer) + .null_bit_buffer( + self.null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref), + ) + .len(array_length); + let array_data = unsafe { array_data_builder.build_unchecked() }; + FixedSizeBinaryArray::from(array_data) + } } impl ArrayBuilder for FixedSizeBinaryBuilder { @@ -119,6 +137,11 @@ impl ArrayBuilder for FixedSizeBinaryBuilder { fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } #[cfg(test)] @@ -146,6 +169,36 @@ mod tests { assert_eq!(5, array.value_length()); } + #[test] + fn test_fixed_size_binary_builder_finish_cloned() { + let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5); + + // [b"hello", null, "arrow"] + builder.append_value(b"hello").unwrap(); + builder.append_null(); + builder.append_value(b"arrow").unwrap(); + let mut array: FixedSizeBinaryArray = builder.finish_cloned(); + + assert_eq!(&DataType::FixedSizeBinary(5), array.data_type()); + assert_eq!(3, array.len()); + assert_eq!(1, array.null_count()); + assert_eq!(10, array.value_offset(2)); + assert_eq!(5, array.value_length()); + + // [b"finis", null, "clone"] + builder.append_value(b"finis").unwrap(); + builder.append_null(); + builder.append_value(b"clone").unwrap(); + + array = builder.finish(); + + assert_eq!(&DataType::FixedSizeBinary(5), array.data_type()); + assert_eq!(6, array.len()); + assert_eq!(2, array.null_count()); + assert_eq!(25, array.value_offset(5)); + assert_eq!(5, array.value_length()); + } + #[test] fn test_fixed_size_binary_builder_with_zero_value_length() { let mut builder = FixedSizeBinaryBuilder::new(0); diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs b/arrow-array/src/builder/fixed_size_list_builder.rs index f6388d7899b..516c2292578 100644 --- a/arrow-array/src/builder/fixed_size_list_builder.rs +++ b/arrow-array/src/builder/fixed_size_list_builder.rs @@ -18,6 +18,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::ArrayBuilder; use crate::{ArrayRef, FixedSizeListArray}; +use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{DataType, Field}; use std::any::Any; @@ -84,6 +85,11 @@ where fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } impl FixedSizeListBuilder @@ -135,6 +141,37 @@ where FixedSizeListArray::from(array_data) } + + /// Builds the [`FixedSizeListBuilder`] without resetting the builder. + pub fn finish_cloned(&self) -> FixedSizeListArray { + let len = self.len(); + let values_arr = self.values_builder.finish_cloned(); + let values_data = values_arr.data(); + + assert_eq!( + values_data.len(), len * self.list_len as usize, + "Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).", + values_data.len(), + self.list_len, + len, + ); + + let null_bit_buffer = self + .null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref); + let array_data = ArrayData::builder(DataType::FixedSizeList( + Box::new(Field::new("item", values_data.data_type().clone(), true)), + self.list_len, + )) + .len(len) + .add_child_data(values_data.clone()) + .null_bit_buffer(null_bit_buffer); + + let array_data = unsafe { array_data.build_unchecked() }; + + FixedSizeListArray::from(array_data) + } } #[cfg(test)] @@ -176,6 +213,48 @@ mod tests { assert_eq!(3, list_array.value_length()); } + #[test] + fn test_fixed_size_list_array_builder_finish_cloned() { + let values_builder = Int32Builder::new(); + let mut builder = FixedSizeListBuilder::new(values_builder, 3); + + // [[0, 1, 2], null, [3, null, 5], [6, 7, null]] + builder.values().append_value(0); + builder.values().append_value(1); + builder.values().append_value(2); + builder.append(true); + builder.values().append_null(); + builder.values().append_null(); + builder.values().append_null(); + builder.append(false); + builder.values().append_value(3); + builder.values().append_null(); + builder.values().append_value(5); + builder.append(true); + let mut list_array = builder.finish_cloned(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(3, list_array.len()); + assert_eq!(1, list_array.null_count()); + assert_eq!(3, list_array.value_length()); + + builder.values().append_value(6); + builder.values().append_value(7); + builder.values().append_null(); + builder.append(true); + builder.values().append_null(); + builder.values().append_null(); + builder.values().append_null(); + builder.append(false); + list_array = builder.finish(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(5, list_array.len()); + assert_eq!(2, list_array.null_count()); + assert_eq!(6, list_array.value_offset(2)); + assert_eq!(3, list_array.value_length()); + } + #[test] fn test_fixed_size_list_array_builder_empty() { let values_builder = Int32Array::builder(5); diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs index fa0a31ad79e..9f9078c708c 100644 --- a/arrow-array/src/builder/generic_bytes_builder.rs +++ b/arrow-array/src/builder/generic_bytes_builder.rs @@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder}; use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType}; use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait}; -use arrow_buffer::ArrowNativeType; +use arrow_buffer::{ArrowNativeType, Buffer}; use arrow_data::ArrayDataBuilder; use std::any::Any; use std::sync::Arc; @@ -94,6 +94,25 @@ impl GenericByteBuilder { GenericByteArray::from(array_data) } + /// Builds the [`GenericByteArray`] without resetting the builder. + pub fn finish_cloned(&self) -> GenericByteArray { + let array_type = T::DATA_TYPE; + let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice()); + let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice()); + let array_builder = ArrayDataBuilder::new(array_type) + .len(self.len()) + .add_buffer(offset_buffer) + .add_buffer(value_buffer) + .null_bit_buffer( + self.null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref), + ); + + let array_data = unsafe { array_builder.build_unchecked() }; + GenericByteArray::from(array_data) + } + /// Returns the current values buffer as a slice pub fn values_slice(&self) -> &[u8] { self.value_builder.as_slice() @@ -138,6 +157,11 @@ impl ArrayBuilder for GenericByteBuilder { Arc::new(self.finish()) } + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } + /// Returns the builder as a non-mutable `Any` reference. fn as_any(&self) -> &dyn Any { self @@ -325,4 +349,34 @@ mod tests { fn test_large_string_array_builder_finish() { _test_generic_string_array_builder_finish::() } + + fn _test_generic_string_array_builder_finish_cloned() { + let mut builder = GenericStringBuilder::::with_capacity(3, 11); + + builder.append_value("hello"); + builder.append_value("rust"); + builder.append_null(); + + let mut arr = builder.finish_cloned(); + assert!(!builder.is_empty()); + assert_eq!(3, arr.len()); + + builder.append_value("arrow"); + builder.append_value("parquet"); + arr = builder.finish(); + + assert!(arr.data().null_buffer().is_some()); + assert_eq!(&[O::zero()], builder.offsets_slice()); + assert_eq!(5, arr.len()); + } + + #[test] + fn test_string_array_builder_finish_cloned() { + _test_generic_string_array_builder_finish_cloned::() + } + + #[test] + fn test_large_string_array_builder_finish_cloned() { + _test_generic_string_array_builder_finish_cloned::() + } } diff --git a/arrow-array/src/builder/generic_list_builder.rs b/arrow-array/src/builder/generic_list_builder.rs index 11656786454..8f3f881c4b3 100644 --- a/arrow-array/src/builder/generic_list_builder.rs +++ b/arrow-array/src/builder/generic_list_builder.rs @@ -18,6 +18,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BufferBuilder}; use crate::{ArrayRef, GenericListArray, OffsetSizeTrait}; +use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::Field; use std::any::Any; @@ -85,6 +86,11 @@ where fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } impl GenericListBuilder @@ -138,6 +144,34 @@ where GenericListArray::::from(array_data) } + /// Builds the [`GenericListArray`] without resetting the builder. + pub fn finish_cloned(&self) -> GenericListArray { + let len = self.len(); + let values_arr = self.values_builder.finish_cloned(); + let values_data = values_arr.data(); + + let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice()); + let null_bit_buffer = self + .null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref); + let field = Box::new(Field::new( + "item", + values_data.data_type().clone(), + true, // TODO: find a consistent way of getting this + )); + let data_type = GenericListArray::::DATA_TYPE_CONSTRUCTOR(field); + let array_data_builder = ArrayData::builder(data_type) + .len(len) + .add_buffer(offset_buffer) + .add_child_data(values_data.clone()) + .null_bit_buffer(null_bit_buffer); + + let array_data = unsafe { array_data_builder.build_unchecked() }; + + GenericListArray::::from(array_data) + } + /// Returns the current offsets buffer as a slice pub fn offsets_slice(&self) -> &[OffsetSize] { self.offsets_builder.as_slice() @@ -255,6 +289,27 @@ mod tests { assert!(builder.is_empty()); } + #[test] + fn test_list_array_builder_finish_cloned() { + let values_builder = Int32Array::builder(5); + let mut builder = ListBuilder::new(values_builder); + + builder.values().append_slice(&[1, 2, 3]); + builder.append(true); + builder.values().append_slice(&[4, 5, 6]); + builder.append(true); + + let mut arr = builder.finish_cloned(); + assert_eq!(2, arr.len()); + assert!(!builder.is_empty()); + + builder.values().append_slice(&[7, 8, 9]); + builder.append(true); + arr = builder.finish(); + assert_eq!(3, arr.len()); + assert!(builder.is_empty()); + } + #[test] fn test_list_list_array_builder() { let primitive_builder = Int32Builder::with_capacity(10); diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs index 4b75972482b..5602f88636c 100644 --- a/arrow-array/src/builder/map_builder.rs +++ b/arrow-array/src/builder/map_builder.rs @@ -18,6 +18,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BufferBuilder}; use crate::{Array, ArrayRef, MapArray, StructArray}; +use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{ArrowError, DataType, Field}; use std::any::Any; @@ -142,6 +143,48 @@ impl MapBuilder { MapArray::from(array_data) } + + pub fn finish_cloned(&self) -> MapArray { + let len = self.len(); + + // Build the keys + let keys_arr = self.key_builder.finish_cloned(); + let values_arr = self.value_builder.finish_cloned(); + + let keys_field = Field::new( + self.field_names.key.as_str(), + keys_arr.data_type().clone(), + false, // always nullable + ); + let values_field = Field::new( + self.field_names.value.as_str(), + values_arr.data_type().clone(), + true, + ); + + let struct_array = + StructArray::from(vec![(keys_field, keys_arr), (values_field, values_arr)]); + + let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice()); + let null_bit_buffer = self + .null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref); + let map_field = Box::new(Field::new( + self.field_names.entry.as_str(), + struct_array.data_type().clone(), + false, // always non-nullable + )); + let array_data = ArrayData::builder(DataType::Map(map_field, false)) // TODO: support sorted keys + .len(len) + .add_buffer(offset_buffer) + .add_child_data(struct_array.into_data()) + .null_bit_buffer(null_bit_buffer); + + let array_data = unsafe { array_data.build_unchecked() }; + + MapArray::from(array_data) + } } impl ArrayBuilder for MapBuilder { @@ -157,6 +200,11 @@ impl ArrayBuilder for MapBuilder { Arc::new(self.finish()) } + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } + fn as_any(&self) -> &dyn Any { self } diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index a5c1e3d4b2f..eaf8243973b 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -107,6 +107,9 @@ pub trait ArrayBuilder: Any + Send { /// Builds the array fn finish(&mut self) -> ArrayRef; + /// Builds the array without resetting the underlying builder. + fn finish_cloned(&self) -> ArrayRef; + /// Returns the builder as a non-mutable `Any` reference. /// /// This is most useful when one wants to call non-mutable APIs on a specific builder diff --git a/arrow-array/src/builder/null_buffer_builder.rs b/arrow-array/src/builder/null_buffer_builder.rs index fef7214d5aa..b3c788fe599 100644 --- a/arrow-array/src/builder/null_buffer_builder.rs +++ b/arrow-array/src/builder/null_buffer_builder.rs @@ -135,7 +135,11 @@ impl NullBufferBuilder { buf } - #[inline] + /// Returns the inner bitmap builder as slice + pub fn as_slice(&self) -> Option<&[u8]> { + Some(self.bitmap_builder.as_ref()?.as_slice()) + } + fn materialize_if_needed(&mut self) { if self.bitmap_builder.is_none() { self.materialize() diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs index 55d8bac0189..7a1fbafc76f 100644 --- a/arrow-array/src/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::{ArrayBuilder, BufferBuilder}; use crate::types::*; use crate::{ArrayRef, ArrowPrimitiveType, PrimitiveArray}; -use arrow_buffer::MutableBuffer; +use arrow_buffer::{Buffer, MutableBuffer}; use arrow_data::ArrayData; use std::any::Any; use std::sync::Arc; @@ -93,6 +93,11 @@ impl ArrayBuilder for PrimitiveBuilder { fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } impl Default for PrimitiveBuilder { @@ -219,6 +224,23 @@ impl PrimitiveBuilder { PrimitiveArray::::from(array_data) } + /// Builds the [`PrimitiveArray`] without resetting the builder. + pub fn finish_cloned(&self) -> PrimitiveArray { + let len = self.len(); + let null_bit_buffer = self + .null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref); + let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice()); + let builder = ArrayData::builder(T::DATA_TYPE) + .len(len) + .add_buffer(values_buffer) + .null_bit_buffer(null_bit_buffer); + + let array_data = unsafe { builder.build_unchecked() }; + PrimitiveArray::::from(array_data) + } + /// Returns the current values buffer as a slice pub fn values_slice(&self) -> &[T::Native] { self.values_builder.as_slice() @@ -431,4 +453,26 @@ mod tests { assert_eq!(5, arr.len()); assert_eq!(0, builder.len()); } + + #[test] + fn test_primitive_array_builder_finish_cloned() { + let mut builder = Int32Builder::new(); + builder.append_value(23); + builder.append_value(45); + let result = builder.finish_cloned(); + assert_eq!(result, Int32Array::from(vec![23, 45])); + builder.append_value(56); + assert_eq!(builder.finish_cloned(), Int32Array::from(vec![23, 45, 56])); + + builder.append_slice(&[2, 4, 6, 8]); + let mut arr = builder.finish(); + assert_eq!(7, arr.len()); + assert_eq!(arr, Int32Array::from(vec![23, 45, 56, 2, 4, 6, 8])); + assert_eq!(0, builder.len()); + + builder.append_slice(&[1, 3, 5, 7, 9]); + arr = builder.finish(); + assert_eq!(5, arr.len()); + assert_eq!(0, builder.len()); + } } diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs b/arrow-array/src/builder/primitive_dictionary_builder.rs index c43416e5af3..5b8a7283528 100644 --- a/arrow-array/src/builder/primitive_dictionary_builder.rs +++ b/arrow-array/src/builder/primitive_dictionary_builder.rs @@ -160,6 +160,11 @@ where fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } impl PrimitiveDictionaryBuilder @@ -210,6 +215,23 @@ where DictionaryArray::from(unsafe { builder.build_unchecked() }) } + + /// Builds the `DictionaryArray` without resetting the builder. + pub fn finish_cloned(&self) -> DictionaryArray { + let values = self.values_builder.finish_cloned(); + let keys = self.keys_builder.finish_cloned(); + + let data_type = + DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE)); + + let builder = keys + .into_data() + .into_builder() + .data_type(data_type) + .child_data(vec![values.into_data()]); + + DictionaryArray::from(unsafe { builder.build_unchecked() }) + } } #[cfg(test)] diff --git a/arrow-array/src/builder/string_dictionary_builder.rs b/arrow-array/src/builder/string_dictionary_builder.rs index e41086c872f..f44756b6bcc 100644 --- a/arrow-array/src/builder/string_dictionary_builder.rs +++ b/arrow-array/src/builder/string_dictionary_builder.rs @@ -222,6 +222,11 @@ where fn finish(&mut self) -> ArrayRef { Arc::new(self.finish()) } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } } impl StringDictionaryBuilder @@ -287,6 +292,23 @@ where DictionaryArray::from(unsafe { builder.build_unchecked() }) } + + /// Builds the `DictionaryArray` without resetting the builder. + pub fn finish_cloned(&self) -> DictionaryArray { + let values = self.values_builder.finish_cloned(); + let keys = self.keys_builder.finish_cloned(); + + let data_type = + DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8)); + + let builder = keys + .into_data() + .into_builder() + .data_type(data_type) + .child_data(vec![values.into_data()]); + + DictionaryArray::from(unsafe { builder.build_unchecked() }) + } } fn get_bytes<'a, K: ArrowNativeType>(values: &'a StringBuilder, key: &K) -> &'a [u8] { @@ -331,6 +353,57 @@ mod tests { assert_eq!(ava.value(1), "def"); } + #[test] + fn test_string_dictionary_builder_finish_cloned() { + let mut builder = StringDictionaryBuilder::::new(); + builder.append("abc").unwrap(); + builder.append_null(); + builder.append("def").unwrap(); + builder.append("def").unwrap(); + builder.append("abc").unwrap(); + let mut array = builder.finish_cloned(); + + assert_eq!( + array.keys(), + &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)]) + ); + + // Values are polymorphic and so require a downcast. + let av = array.values(); + let ava: &StringArray = av.as_any().downcast_ref::().unwrap(); + + assert_eq!(ava.value(0), "abc"); + assert_eq!(ava.value(1), "def"); + + builder.append("abc").unwrap(); + builder.append("ghi").unwrap(); + builder.append("def").unwrap(); + + array = builder.finish(); + + assert_eq!( + array.keys(), + &Int8Array::from(vec![ + Some(0), + None, + Some(1), + Some(1), + Some(0), + Some(0), + Some(2), + Some(1) + ]) + ); + + // Values are polymorphic and so require a downcast. + let av2 = array.values(); + let ava2: &StringArray = av2.as_any().downcast_ref::().unwrap(); + + assert_eq!(ava2.value(0), "abc"); + assert_eq!(ava2.value(1), "def"); + assert_eq!(ava2.value(2), "ghi"); + } + #[test] fn test_string_dictionary_builder_with_existing_dictionary() { let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]); diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index f00f81d1a5c..98d0e1a1d27 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -18,6 +18,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder; use crate::builder::*; use crate::{Array, ArrayRef, StructArray}; +use arrow_buffer::Buffer; use arrow_data::ArrayData; use arrow_schema::{DataType, Field, IntervalUnit, TimeUnit}; use std::any::Any; @@ -63,6 +64,11 @@ impl ArrayBuilder for StructBuilder { Arc::new(self.finish()) } + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } + /// Returns the builder as a non-mutable `Any` reference. /// /// This is most useful when one wants to call non-mutable APIs on a specific builder @@ -230,6 +236,30 @@ impl StructBuilder { StructArray::from(array_data) } + /// Builds the `StructArray` without resetting the builder. + pub fn finish_cloned(&self) -> StructArray { + self.validate_content(); + + let mut child_data = Vec::with_capacity(self.field_builders.len()); + for f in &self.field_builders { + let arr = f.finish_cloned(); + child_data.push(arr.data().clone()); + } + let length = self.len(); + let null_bit_buffer = self + .null_buffer_builder + .as_slice() + .map(Buffer::from_slice_ref); + + let builder = ArrayData::builder(DataType::Struct(self.fields.clone())) + .len(length) + .child_data(child_data) + .null_bit_buffer(null_bit_buffer); + + let array_data = unsafe { builder.build_unchecked() }; + StructArray::from(array_data) + } + /// Constructs and validates contents in the builder to ensure that /// - fields and field_builders are of equal length /// - the number of items in individual field_builders are equal to self.len() @@ -374,6 +404,64 @@ mod tests { assert_eq!(0, builder.len()); } + #[test] + fn test_struct_array_builder_finish_cloned() { + let int_builder = Int32Builder::new(); + let bool_builder = BooleanBuilder::new(); + + let mut fields = Vec::new(); + let mut field_builders = Vec::new(); + fields.push(Field::new("f1", DataType::Int32, false)); + field_builders.push(Box::new(int_builder) as Box); + fields.push(Field::new("f2", DataType::Boolean, false)); + field_builders.push(Box::new(bool_builder) as Box); + + let mut builder = StructBuilder::new(fields, field_builders); + builder + .field_builder::(0) + .unwrap() + .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + builder + .field_builder::(1) + .unwrap() + .append_slice(&[ + false, true, false, true, false, true, false, true, false, true, + ]); + + // Append slot values - all are valid. + for _ in 0..10 { + builder.append(true); + } + + assert_eq!(10, builder.len()); + + let mut arr = builder.finish_cloned(); + + assert_eq!(10, arr.len()); + assert_eq!(10, builder.len()); + + builder + .field_builder::(0) + .unwrap() + .append_slice(&[1, 3, 5, 7, 9]); + builder + .field_builder::(1) + .unwrap() + .append_slice(&[false, true, false, true, false]); + + // Append slot values - all are valid. + for _ in 0..5 { + builder.append(true); + } + + assert_eq!(15, builder.len()); + + arr = builder.finish(); + + assert_eq!(15, arr.len()); + assert_eq!(0, builder.len()); + } + #[test] fn test_struct_array_builder_from_schema() { let mut fields = vec![