Skip to content

Commit

Permalink
Add finish_cloned to ArrayBuilder (#3158)
Browse files Browse the repository at this point in the history
* add finish_cloned to PrimitiveBuilder

* Add finish_cloned on array builders

* incorporate PR comments and other PR merges

* remove build_clone from union builder

Co-authored-by: askoa <askoa@local>
  • Loading branch information
askoa and askoa committed Nov 23, 2022
1 parent 12a67b9 commit 6c466af
Show file tree
Hide file tree
Showing 12 changed files with 571 additions and 3 deletions.
45 changes: 45 additions & 0 deletions arrow-array/src/builder/boolean_builder.rs
Expand Up @@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BooleanBufferBuilder};
use crate::{ArrayRef, BooleanArray};
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
Expand Down Expand Up @@ -154,6 +155,23 @@ impl BooleanBuilder {
let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}

/// Builds the [BooleanArray] without resetting the builder.
pub fn finish_cloned(&self) -> BooleanArray {
let len = self.len();
let null_bit_buffer = self
.null_buffer_builder
.as_slice()
.map(Buffer::from_slice_ref);
let value_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
let builder = ArrayData::builder(DataType::Boolean)
.len(len)
.add_buffer(value_buffer)
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { builder.build_unchecked() };
BooleanArray::from(array_data)
}
}

impl ArrayBuilder for BooleanBuilder {
Expand Down Expand Up @@ -186,6 +204,11 @@ impl ArrayBuilder for BooleanBuilder {
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
}

#[cfg(test)]
Expand Down Expand Up @@ -259,4 +282,26 @@ mod tests {
assert_eq!(0, array.null_count());
assert!(array.data().null_buffer().is_none());
}

#[test]
fn test_boolean_array_builder_finish_cloned() {
let mut builder = BooleanArray::builder(16);
builder.append_option(Some(true));
builder.append_value(false);
builder.append_slice(&[true, false, true]);
let mut array = builder.finish_cloned();
assert_eq!(3, array.true_count());
assert_eq!(2, array.false_count());

builder
.append_values(&[false, false, true], &[true, true, true])
.unwrap();

array = builder.finish();
assert_eq!(4, array.true_count());
assert_eq!(4, array.false_count());

assert_eq!(0, array.null_count());
assert!(array.data().null_buffer().is_none());
}
}
53 changes: 53 additions & 0 deletions arrow-array/src/builder/fixed_size_binary_builder.rs
Expand Up @@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, UInt8BufferBuilder};
use crate::{ArrayRef, FixedSizeBinaryArray};
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
use std::any::Any;
Expand Down Expand Up @@ -87,6 +88,23 @@ impl FixedSizeBinaryBuilder {
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
}

/// Builds the [`FixedSizeBinaryArray`] without resetting the builder.
pub fn finish_cloned(&self) -> FixedSizeBinaryArray {
let array_length = self.len();
let values_buffer = Buffer::from_slice_ref(self.values_builder.as_slice());
let array_data_builder =
ArrayData::builder(DataType::FixedSizeBinary(self.value_length))
.add_buffer(values_buffer)
.null_bit_buffer(
self.null_buffer_builder
.as_slice()
.map(Buffer::from_slice_ref),
)
.len(array_length);
let array_data = unsafe { array_data_builder.build_unchecked() };
FixedSizeBinaryArray::from(array_data)
}
}

impl ArrayBuilder for FixedSizeBinaryBuilder {
Expand Down Expand Up @@ -119,6 +137,11 @@ impl ArrayBuilder for FixedSizeBinaryBuilder {
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
}

#[cfg(test)]
Expand Down Expand Up @@ -146,6 +169,36 @@ mod tests {
assert_eq!(5, array.value_length());
}

#[test]
fn test_fixed_size_binary_builder_finish_cloned() {
let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);

// [b"hello", null, "arrow"]
builder.append_value(b"hello").unwrap();
builder.append_null();
builder.append_value(b"arrow").unwrap();
let mut array: FixedSizeBinaryArray = builder.finish_cloned();

assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
assert_eq!(3, array.len());
assert_eq!(1, array.null_count());
assert_eq!(10, array.value_offset(2));
assert_eq!(5, array.value_length());

// [b"finis", null, "clone"]
builder.append_value(b"finis").unwrap();
builder.append_null();
builder.append_value(b"clone").unwrap();

array = builder.finish();

assert_eq!(&DataType::FixedSizeBinary(5), array.data_type());
assert_eq!(6, array.len());
assert_eq!(2, array.null_count());
assert_eq!(25, array.value_offset(5));
assert_eq!(5, array.value_length());
}

#[test]
fn test_fixed_size_binary_builder_with_zero_value_length() {
let mut builder = FixedSizeBinaryBuilder::new(0);
Expand Down
79 changes: 79 additions & 0 deletions arrow-array/src/builder/fixed_size_list_builder.rs
Expand Up @@ -18,6 +18,7 @@
use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use arrow_buffer::Buffer;
use arrow_data::ArrayData;
use arrow_schema::{DataType, Field};
use std::any::Any;
Expand Down Expand Up @@ -84,6 +85,11 @@ where
fn finish(&mut self) -> ArrayRef {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
}

impl<T: ArrayBuilder> FixedSizeListBuilder<T>
Expand Down Expand Up @@ -135,6 +141,37 @@ where

FixedSizeListArray::from(array_data)
}

/// Builds the [`FixedSizeListBuilder`] without resetting the builder.
pub fn finish_cloned(&self) -> FixedSizeListArray {
let len = self.len();
let values_arr = self.values_builder.finish_cloned();
let values_data = values_arr.data();

assert_eq!(
values_data.len(), len * self.list_len as usize,
"Length of the child array ({}) must be the multiple of the value length ({}) and the array length ({}).",
values_data.len(),
self.list_len,
len,
);

let null_bit_buffer = self
.null_buffer_builder
.as_slice()
.map(Buffer::from_slice_ref);
let array_data = ArrayData::builder(DataType::FixedSizeList(
Box::new(Field::new("item", values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
.add_child_data(values_data.clone())
.null_bit_buffer(null_bit_buffer);

let array_data = unsafe { array_data.build_unchecked() };

FixedSizeListArray::from(array_data)
}
}

#[cfg(test)]
Expand Down Expand Up @@ -176,6 +213,48 @@ mod tests {
assert_eq!(3, list_array.value_length());
}

#[test]
fn test_fixed_size_list_array_builder_finish_cloned() {
let values_builder = Int32Builder::new();
let mut builder = FixedSizeListBuilder::new(values_builder, 3);

// [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
builder.values().append_value(0);
builder.values().append_value(1);
builder.values().append_value(2);
builder.append(true);
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);
builder.values().append_value(3);
builder.values().append_null();
builder.values().append_value(5);
builder.append(true);
let mut list_array = builder.finish_cloned();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(3, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(3, list_array.value_length());

builder.values().append_value(6);
builder.values().append_value(7);
builder.values().append_null();
builder.append(true);
builder.values().append_null();
builder.values().append_null();
builder.values().append_null();
builder.append(false);
list_array = builder.finish();

assert_eq!(DataType::Int32, list_array.value_type());
assert_eq!(5, list_array.len());
assert_eq!(2, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}

#[test]
fn test_fixed_size_list_array_builder_empty() {
let values_builder = Int32Array::builder(5);
Expand Down
56 changes: 55 additions & 1 deletion arrow-array/src/builder/generic_bytes_builder.rs
Expand Up @@ -19,7 +19,7 @@ use crate::builder::null_buffer_builder::NullBufferBuilder;
use crate::builder::{ArrayBuilder, BufferBuilder, UInt8BufferBuilder};
use crate::types::{ByteArrayType, GenericBinaryType, GenericStringType};
use crate::{ArrayRef, GenericByteArray, OffsetSizeTrait};
use arrow_buffer::ArrowNativeType;
use arrow_buffer::{ArrowNativeType, Buffer};
use arrow_data::ArrayDataBuilder;
use std::any::Any;
use std::sync::Arc;
Expand Down Expand Up @@ -94,6 +94,25 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
GenericByteArray::from(array_data)
}

/// Builds the [`GenericByteArray`] without resetting the builder.
pub fn finish_cloned(&self) -> GenericByteArray<T> {
let array_type = T::DATA_TYPE;
let offset_buffer = Buffer::from_slice_ref(self.offsets_builder.as_slice());
let value_buffer = Buffer::from_slice_ref(self.value_builder.as_slice());
let array_builder = ArrayDataBuilder::new(array_type)
.len(self.len())
.add_buffer(offset_buffer)
.add_buffer(value_buffer)
.null_bit_buffer(
self.null_buffer_builder
.as_slice()
.map(Buffer::from_slice_ref),
);

let array_data = unsafe { array_builder.build_unchecked() };
GenericByteArray::from(array_data)
}

/// Returns the current values buffer as a slice
pub fn values_slice(&self) -> &[u8] {
self.value_builder.as_slice()
Expand Down Expand Up @@ -138,6 +157,11 @@ impl<T: ByteArrayType> ArrayBuilder for GenericByteBuilder<T> {
Arc::new(self.finish())
}

/// Builds the array without resetting the builder.
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}

/// Returns the builder as a non-mutable `Any` reference.
fn as_any(&self) -> &dyn Any {
self
Expand Down Expand Up @@ -325,4 +349,34 @@ mod tests {
fn test_large_string_array_builder_finish() {
_test_generic_string_array_builder_finish::<i64>()
}

fn _test_generic_string_array_builder_finish_cloned<O: OffsetSizeTrait>() {
let mut builder = GenericStringBuilder::<O>::with_capacity(3, 11);

builder.append_value("hello");
builder.append_value("rust");
builder.append_null();

let mut arr = builder.finish_cloned();
assert!(!builder.is_empty());
assert_eq!(3, arr.len());

builder.append_value("arrow");
builder.append_value("parquet");
arr = builder.finish();

assert!(arr.data().null_buffer().is_some());
assert_eq!(&[O::zero()], builder.offsets_slice());
assert_eq!(5, arr.len());
}

#[test]
fn test_string_array_builder_finish_cloned() {
_test_generic_string_array_builder_finish_cloned::<i32>()
}

#[test]
fn test_large_string_array_builder_finish_cloned() {
_test_generic_string_array_builder_finish_cloned::<i64>()
}
}

0 comments on commit 6c466af

Please sign in to comment.