diff --git a/arrow/src/array/builder/buffer_builder.rs b/arrow/src/array/builder/buffer_builder.rs index 83b2afb44e7..9dd13839800 100644 --- a/arrow/src/array/builder/buffer_builder.rs +++ b/arrow/src/array/builder/buffer_builder.rs @@ -22,29 +22,6 @@ use crate::datatypes::ArrowNativeType; use super::PhantomData; -/// Converts a `MutableBuffer` to a `BufferBuilder`. -/// -/// `slots` is the number of array slots currently represented in the `MutableBuffer`. -pub(crate) fn mutable_buffer_to_builder( - mutable_buffer: MutableBuffer, - slots: usize, -) -> BufferBuilder { - BufferBuilder:: { - buffer: mutable_buffer, - len: slots, - _marker: PhantomData, - } -} - -/// Converts a `BufferBuilder` into its underlying `MutableBuffer`. -/// -/// `From` is not implemented because associated type bounds are unstable. -pub(crate) fn builder_to_mutable_buffer( - builder: BufferBuilder, -) -> MutableBuffer { - builder.buffer -} - /// Builder for creating a [`Buffer`](crate::buffer::Buffer) object. /// /// A [`Buffer`](crate::buffer::Buffer) is the underlying data @@ -168,8 +145,7 @@ impl BufferBuilder { /// ``` #[inline] pub fn advance(&mut self, i: usize) { - let new_buffer_len = (self.len + i) * mem::size_of::(); - self.buffer.resize(new_buffer_len, 0); + self.buffer.extend_zeros(i * mem::size_of::()); self.len += i; } @@ -232,6 +208,24 @@ impl BufferBuilder { self.len += n; } + /// Appends `n`, zero-initialized values + /// + /// # Example: + /// + /// ``` + /// use arrow::array::UInt32BufferBuilder; + /// + /// let mut builder = UInt32BufferBuilder::new(10); + /// builder.append_n_zeroed(3); + /// + /// assert_eq!(builder.len(), 3); + /// assert_eq!(builder.as_slice(), &[0, 0, 0]) + #[inline] + pub fn append_n_zeroed(&mut self, n: usize) { + self.buffer.extend_zeros(n * mem::size_of::()); + self.len += n; + } + /// Appends a slice of type `T`, growing the internal buffer as needed. /// /// # Example: @@ -250,6 +244,78 @@ impl BufferBuilder { self.len += slice.len(); } + /// View the contents of this buffer as a slice + /// + /// ``` + /// use arrow::array::Float64BufferBuilder; + /// + /// let mut builder = Float64BufferBuilder::new(10); + /// builder.append(1.3); + /// builder.append_n(2, 2.3); + /// + /// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]); + /// ``` + #[inline] + pub fn as_slice(&self) -> &[T] { + // SAFETY + // + // - MutableBuffer is aligned and initialized for len elements of T + // - MutableBuffer corresponds to a single allocation + // - MutableBuffer does not support modification whilst active immutable borrows + unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) } + } + + /// View the contents of this buffer as a mutable slice + /// + /// # Example: + /// + /// ``` + /// use arrow::array::Float32BufferBuilder; + /// + /// let mut builder = Float32BufferBuilder::new(10); + /// + /// builder.append_slice(&[1., 2., 3.4]); + /// assert_eq!(builder.as_slice(), &[1., 2., 3.4]); + /// + /// builder.as_slice_mut()[1] = 4.2; + /// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]); + /// ``` + #[inline] + pub fn as_slice_mut(&mut self) -> &mut [T] { + // SAFETY + // + // - MutableBuffer is aligned and initialized for len elements of T + // - MutableBuffer corresponds to a single allocation + // - MutableBuffer does not support modification whilst active immutable borrows + unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) } + } + + /// Shorten this BufferBuilder to `len` items + /// + /// If `len` is greater than the builder's current length, this has no effect + /// + /// # Example: + /// + /// ``` + /// use arrow::array::UInt16BufferBuilder; + /// + /// let mut builder = UInt16BufferBuilder::new(10); + /// + /// builder.append_slice(&[42, 44, 46]); + /// assert_eq!(builder.as_slice(), &[42, 44, 46]); + /// + /// builder.truncate(2); + /// assert_eq!(builder.as_slice(), &[42, 44]); + /// + /// builder.append(12); + /// assert_eq!(builder.as_slice(), &[42, 44, 12]); + /// ``` + #[inline] + pub fn truncate(&mut self, len: usize) { + self.buffer.truncate(len * mem::size_of::()); + self.len = len; + } + /// # Safety /// This requires the iterator be a trusted length. This could instead require /// the iterator implement `TrustedLen` once that is stabilized. diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index a7925358b8f..e7e9ec6a58f 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -18,19 +18,13 @@ use std::any::Any; use std::sync::Arc; -use crate::array::ArrayBuilder; use crate::array::ArrayRef; use crate::array::DecimalArray; -use crate::array::FixedSizeBinaryArray; -use crate::array::OffsetSizeTrait; use crate::array::UInt8Builder; -use crate::array::{GenericBinaryArray, GenericStringArray}; +use crate::array::{ArrayBuilder, FixedSizeListBuilder}; use crate::error::{ArrowError, Result}; -use super::{FixedSizeBinaryBuilder, FixedSizeListBuilder}; -use super::{GenericBinaryBuilder, GenericListBuilder, GenericStringBuilder}; - use crate::datatypes::validate_decimal_precision; /// Array Builder for [`DecimalArray`] @@ -48,284 +42,6 @@ pub struct DecimalBuilder { value_validation: bool, } -impl ArrayBuilder for GenericBinaryBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl ArrayBuilder for GenericStringBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - let a = GenericStringBuilder::::finish(self); - Arc::new(a) - } -} - -impl ArrayBuilder for FixedSizeBinaryBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl ArrayBuilder for DecimalBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl GenericBinaryBuilder { - /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values - /// array - pub fn new(capacity: usize) -> Self { - let values_builder = UInt8Builder::new(capacity); - Self { - builder: GenericListBuilder::new(values_builder), - } - } - - /// Appends a single byte value into the builder's values array. - /// - /// Note, when appending individual byte values you must call `append` to delimit each - /// distinct list value. - #[inline] - pub fn append_byte(&mut self, value: u8) -> Result<()> { - self.builder.values().append_value(value)?; - Ok(()) - } - - /// Appends a byte slice into the builder. - /// - /// Automatically calls the `append` method to delimit the slice appended in as a - /// distinct array element. - #[inline] - pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { - self.builder.values().append_slice(value.as_ref())?; - self.builder.append(true)?; - Ok(()) - } - - /// Finish the current variable-length list array slot. - #[inline] - pub fn append(&mut self, is_valid: bool) -> Result<()> { - self.builder.append(is_valid) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) -> Result<()> { - self.append(false) - } - - /// Builds the `BinaryArray` and reset this builder. - pub fn finish(&mut self) -> GenericBinaryArray { - GenericBinaryArray::::from(self.builder.finish()) - } -} - -impl GenericStringBuilder { - /// Creates a new `StringBuilder`, - /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder - pub fn new(capacity: usize) -> Self { - let values_builder = UInt8Builder::new(capacity); - Self { - builder: GenericListBuilder::new(values_builder), - } - } - - /// Creates a new `StringBuilder`, - /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder - /// `item_capacity` is the number of items to pre-allocate space for in this builder - pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { - let values_builder = UInt8Builder::new(data_capacity); - Self { - builder: GenericListBuilder::with_capacity(values_builder, item_capacity), - } - } - - /// Appends a string into the builder. - /// - /// Automatically calls the `append` method to delimit the string appended in as a - /// distinct array element. - #[inline] - pub fn append_value(&mut self, value: impl AsRef) -> Result<()> { - self.builder - .values() - .append_slice(value.as_ref().as_bytes())?; - self.builder.append(true)?; - Ok(()) - } - - /// Finish the current variable-length list array slot. - #[inline] - pub fn append(&mut self, is_valid: bool) -> Result<()> { - self.builder.append(is_valid) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) -> Result<()> { - self.append(false) - } - - /// Append an `Option` value to the array. - #[inline] - pub fn append_option(&mut self, value: Option>) -> Result<()> { - match value { - None => self.append_null()?, - Some(v) => self.append_value(v)?, - }; - Ok(()) - } - - /// Builds the `StringArray` and reset this builder. - pub fn finish(&mut self) -> GenericStringArray { - GenericStringArray::::from(self.builder.finish()) - } -} - -impl FixedSizeBinaryBuilder { - /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values - /// array - pub fn new(capacity: usize, byte_width: i32) -> Self { - let values_builder = UInt8Builder::new(capacity); - Self { - builder: FixedSizeListBuilder::new(values_builder, byte_width), - } - } - - /// Appends a byte slice into the builder. - /// - /// Automatically calls the `append` method to delimit the slice appended in as a - /// distinct array element. - #[inline] - pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { - if self.builder.value_length() != value.as_ref().len() as i32 { - return Err(ArrowError::InvalidArgumentError( - "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() - )); - } - self.builder.values().append_slice(value.as_ref())?; - self.builder.append(true) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) -> Result<()> { - let length: usize = self.builder.value_length() as usize; - self.builder.values().append_slice(&vec![0u8; length][..])?; - self.builder.append(false) - } - - /// Builds the `FixedSizeBinaryArray` and reset this builder. - pub fn finish(&mut self) -> FixedSizeBinaryArray { - FixedSizeBinaryArray::from(self.builder.finish()) - } -} - impl DecimalBuilder { /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values /// array @@ -406,6 +122,38 @@ impl DecimalBuilder { } } +impl ArrayBuilder for DecimalBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs new file mode 100644 index 00000000000..1d40b4c5bcd --- /dev/null +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -0,0 +1,99 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::array::{ + ArrayBuilder, ArrayRef, FixedSizeBinaryArray, FixedSizeListBuilder, UInt8Builder, +}; +use crate::error::{ArrowError, Result}; +use std::any::Any; +use std::sync::Arc; + +#[derive(Debug)] +pub struct FixedSizeBinaryBuilder { + builder: FixedSizeListBuilder, +} + +impl FixedSizeBinaryBuilder { + /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values + /// array + pub fn new(capacity: usize, byte_width: i32) -> Self { + let values_builder = UInt8Builder::new(capacity); + Self { + builder: FixedSizeListBuilder::new(values_builder, byte_width), + } + } + + /// Appends a byte slice into the builder. + /// + /// Automatically calls the `append` method to delimit the slice appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { + if self.builder.value_length() != value.as_ref().len() as i32 { + return Err(ArrowError::InvalidArgumentError( + "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() + )); + } + self.builder.values().append_slice(value.as_ref())?; + self.builder.append(true) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + let length: usize = self.builder.value_length() as usize; + self.builder.values().append_slice(&vec![0u8; length][..])?; + self.builder.append(false) + } + + /// Builds the `FixedSizeBinaryArray` and reset this builder. + pub fn finish(&mut self) -> FixedSizeBinaryArray { + FixedSizeBinaryArray::from(self.builder.finish()) + } +} + +impl ArrayBuilder for FixedSizeBinaryBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs new file mode 100644 index 00000000000..fc64eb0a278 --- /dev/null +++ b/arrow/src/array/builder/generic_binary_builder.rs @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::array::{ + ArrayBuilder, ArrayRef, GenericBinaryArray, GenericListBuilder, OffsetSizeTrait, + UInt8Builder, +}; +use crate::error::Result; +use std::any::Any; +use std::sync::Arc; + +/// Array builder for `BinaryArray` +#[derive(Debug)] +pub struct GenericBinaryBuilder { + builder: GenericListBuilder, +} + +impl GenericBinaryBuilder { + /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values + /// array + pub fn new(capacity: usize) -> Self { + let values_builder = UInt8Builder::new(capacity); + Self { + builder: GenericListBuilder::new(values_builder), + } + } + + /// Appends a single byte value into the builder's values array. + /// + /// Note, when appending individual byte values you must call `append` to delimit each + /// distinct list value. + #[inline] + pub fn append_byte(&mut self, value: u8) -> Result<()> { + self.builder.values().append_value(value)?; + Ok(()) + } + + /// Appends a byte slice into the builder. + /// + /// Automatically calls the `append` method to delimit the slice appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { + self.builder.values().append_slice(value.as_ref())?; + self.builder.append(true)?; + Ok(()) + } + + /// Finish the current variable-length list array slot. + #[inline] + pub fn append(&mut self, is_valid: bool) -> Result<()> { + self.builder.append(is_valid) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + self.append(false) + } + + /// Builds the `BinaryArray` and reset this builder. + pub fn finish(&mut self) -> GenericBinaryArray { + GenericBinaryArray::::from(self.builder.finish()) + } +} + +impl ArrayBuilder for GenericBinaryBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} diff --git a/arrow/src/array/builder/generic_string_builder.rs b/arrow/src/array/builder/generic_string_builder.rs new file mode 100644 index 00000000000..ee391c4d4f8 --- /dev/null +++ b/arrow/src/array/builder/generic_string_builder.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::array::{ + ArrayBuilder, ArrayRef, GenericListBuilder, GenericStringArray, OffsetSizeTrait, + UInt8Builder, +}; +use crate::error::Result; +use std::any::Any; +use std::sync::Arc; + +#[derive(Debug)] +pub struct GenericStringBuilder { + builder: GenericListBuilder, +} + +impl GenericStringBuilder { + /// Creates a new `StringBuilder`, + /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder + pub fn new(capacity: usize) -> Self { + let values_builder = UInt8Builder::new(capacity); + Self { + builder: GenericListBuilder::new(values_builder), + } + } + + /// Creates a new `StringBuilder`, + /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder + /// `item_capacity` is the number of items to pre-allocate space for in this builder + pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { + let values_builder = UInt8Builder::new(data_capacity); + Self { + builder: GenericListBuilder::with_capacity(values_builder, item_capacity), + } + } + + /// Appends a string into the builder. + /// + /// Automatically calls the `append` method to delimit the string appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: impl AsRef) -> Result<()> { + self.builder + .values() + .append_slice(value.as_ref().as_bytes())?; + self.builder.append(true)?; + Ok(()) + } + + /// Finish the current variable-length list array slot. + #[inline] + pub fn append(&mut self, is_valid: bool) -> Result<()> { + self.builder.append(is_valid) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + self.append(false) + } + + /// Append an `Option` value to the array. + #[inline] + pub fn append_option(&mut self, value: Option>) -> Result<()> { + match value { + None => self.append_null()?, + Some(v) => self.append_value(v)?, + }; + Ok(()) + } + + /// Builds the `StringArray` and reset this builder. + pub fn finish(&mut self) -> GenericStringArray { + GenericStringArray::::from(self.builder.finish()) + } +} + +impl ArrayBuilder for GenericStringBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + let a = GenericStringBuilder::::finish(self); + Arc::new(a) + } +} diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs index 4cd82d9bfe3..634ef772f3c 100644 --- a/arrow/src/array/builder/mod.rs +++ b/arrow/src/array/builder/mod.rs @@ -24,8 +24,11 @@ mod boolean_buffer_builder; mod boolean_builder; mod buffer_builder; mod decimal_builder; +mod fixed_size_binary_builder; mod fixed_size_list_builder; +mod generic_binary_builder; mod generic_list_builder; +mod generic_string_builder; mod map_builder; mod primitive_builder; mod primitive_dictionary_builder; @@ -38,24 +41,23 @@ use std::marker::PhantomData; use std::ops::Range; use super::ArrayRef; -use super::OffsetSizeTrait; -use super::UInt8Builder; pub use boolean_buffer_builder::BooleanBufferBuilder; pub use boolean_builder::BooleanBuilder; pub use buffer_builder::BufferBuilder; pub use decimal_builder::DecimalBuilder; +pub use fixed_size_binary_builder::FixedSizeBinaryBuilder; pub use fixed_size_list_builder::FixedSizeListBuilder; +pub use generic_binary_builder::GenericBinaryBuilder; pub use generic_list_builder::GenericListBuilder; +pub use generic_string_builder::GenericStringBuilder; pub use map_builder::MapBuilder; pub use primitive_builder::PrimitiveBuilder; pub use primitive_dictionary_builder::PrimitiveDictionaryBuilder; pub use string_dictionary_builder::StringDictionaryBuilder; -pub use struct_builder::StructBuilder; +pub use struct_builder::{make_builder, StructBuilder}; pub use union_builder::UnionBuilder; -pub use struct_builder::make_builder; - /// Trait for dealing with different array builders at runtime /// /// # Example @@ -139,27 +141,8 @@ pub trait ArrayBuilder: Any + Send { pub type ListBuilder = GenericListBuilder; pub type LargeListBuilder = GenericListBuilder; -/// Array builder for `BinaryArray` -#[derive(Debug)] -pub struct GenericBinaryBuilder { - builder: GenericListBuilder, -} - pub type BinaryBuilder = GenericBinaryBuilder; pub type LargeBinaryBuilder = GenericBinaryBuilder; -#[derive(Debug)] -pub struct GenericStringBuilder { - builder: GenericListBuilder, -} - pub type StringBuilder = GenericStringBuilder; pub type LargeStringBuilder = GenericStringBuilder; - -#[derive(Debug)] -pub struct FixedSizeBinaryBuilder { - builder: FixedSizeListBuilder, -} - -#[cfg(test)] -mod tests {} diff --git a/arrow/src/array/builder/union_builder.rs b/arrow/src/array/builder/union_builder.rs index 78f9a3f4b43..95d9ea40a3d 100644 --- a/arrow/src/array/builder/union_builder.rs +++ b/arrow/src/array/builder/union_builder.rs @@ -15,28 +15,22 @@ // specific language governing permissions and limitations // under the License. +use std::any::Any; use std::collections::HashMap; use crate::array::ArrayDataBuilder; use crate::array::Int32BufferBuilder; use crate::array::Int8BufferBuilder; use crate::array::UnionArray; -use crate::buffer::MutableBuffer; +use crate::buffer::Buffer; -use crate::datatypes::ArrowPrimitiveType; use crate::datatypes::DataType; use crate::datatypes::Field; -use crate::datatypes::IntervalMonthDayNanoType; -use crate::datatypes::IntervalUnit; -use crate::datatypes::{Float32Type, Float64Type}; -use crate::datatypes::{Int16Type, Int32Type, Int64Type, Int8Type}; -use crate::datatypes::{UInt16Type, UInt32Type, UInt64Type, UInt8Type}; +use crate::datatypes::{ArrowNativeType, ArrowPrimitiveType}; use crate::error::{ArrowError, Result}; use super::{BooleanBufferBuilder, BufferBuilder}; -use super::buffer_builder::builder_to_mutable_buffer; -use super::buffer_builder::mutable_buffer_to_builder; use crate::array::make_array; /// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`. @@ -47,101 +41,65 @@ struct FieldData { /// The Arrow data type represented in the `values_buffer`, which is untyped data_type: DataType, /// A buffer containing the values for this field in raw bytes - values_buffer: Option, + values_buffer: Box, /// The number of array slots represented by the buffer slots: usize, /// A builder for the null bitmap bitmap_builder: BooleanBufferBuilder, } +/// A type-erased [`BufferBuilder`] used by [`FieldData`] +trait FieldDataValues: std::fmt::Debug { + fn as_mut_any(&mut self) -> &mut dyn Any; + + fn append_null(&mut self); + + fn finish(&mut self) -> Buffer; +} + +impl FieldDataValues for BufferBuilder { + fn as_mut_any(&mut self) -> &mut dyn Any { + self + } + + fn append_null(&mut self) { + self.advance(1) + } + + fn finish(&mut self) -> Buffer { + self.finish() + } +} + impl FieldData { /// Creates a new `FieldData`. - fn new(type_id: i8, data_type: DataType) -> Self { + fn new(type_id: i8, data_type: DataType) -> Self { Self { type_id, data_type, - values_buffer: Some(MutableBuffer::new(1)), slots: 0, + values_buffer: Box::new(BufferBuilder::::new(1)), bitmap_builder: BooleanBufferBuilder::new(1), } } /// Appends a single value to this `FieldData`'s `values_buffer`. - #[allow(clippy::unnecessary_wraps)] - fn append_to_values_buffer( - &mut self, - v: T::Native, - ) -> Result<()> { - let values_buffer = self - .values_buffer - .take() - .expect("Values buffer was never created"); - let mut builder: BufferBuilder = - mutable_buffer_to_builder(values_buffer, self.slots); - builder.append(v); - let mutable_buffer = builder_to_mutable_buffer(builder); - self.values_buffer = Some(mutable_buffer); + fn append_value(&mut self, v: T::Native) { + self.values_buffer + .as_mut_any() + .downcast_mut::>() + .expect("Tried to append unexpected type") + .append(v); - self.slots += 1; self.bitmap_builder.append(true); - Ok(()) + self.slots += 1; } /// Appends a null to this `FieldData`. - #[allow(clippy::unnecessary_wraps)] - fn append_null(&mut self) -> Result<()> { - let values_buffer = self - .values_buffer - .take() - .expect("Values buffer was never created"); - - let mut builder: BufferBuilder = - mutable_buffer_to_builder(values_buffer, self.slots); - - builder.advance(1); - let mutable_buffer = builder_to_mutable_buffer(builder); - self.values_buffer = Some(mutable_buffer); - self.slots += 1; + fn append_null(&mut self) { + self.values_buffer.append_null(); self.bitmap_builder.append(false); - Ok(()) - } - - /// Appends a null to this `FieldData` when the type is not known at compile time. - /// - /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null - /// slots to the fields that are not being appended to in the case of sparse unions. This - /// method solves this problem by appending dynamically based on `DataType`. - /// - /// Note, this method does **not** update the length of the `UnionArray` (this is done by the - /// main append operation) and assumes that it is called from a method that is generic over `T` - /// where `T` satisfies the bound `ArrowPrimitiveType`. - fn append_null_dynamic(&mut self) -> Result<()> { - match self.data_type { - DataType::Null => unimplemented!(), - DataType::Int8 => self.append_null::()?, - DataType::Int16 => self.append_null::()?, - DataType::Int32 - | DataType::Date32 - | DataType::Time32(_) - | DataType::Interval(IntervalUnit::YearMonth) => { - self.append_null::()? - } - DataType::Int64 - | DataType::Timestamp(_, _) - | DataType::Date64 - | DataType::Time64(_) - | DataType::Interval(IntervalUnit::DayTime) - | DataType::Duration(_) => self.append_null::()?, - DataType::Interval(IntervalUnit::MonthDayNano) => self.append_null::()?, - DataType::UInt8 => self.append_null::()?, - DataType::UInt16 => self.append_null::()?, - DataType::UInt32 => self.append_null::()?, - DataType::UInt64 => self.append_null::()?, - DataType::Float32 => self.append_null::()?, - DataType::Float64 => self.append_null::()?, - _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."), - }; - Ok(()) + self.slots += 1; } } @@ -257,11 +215,12 @@ impl UnionBuilder { data } None => match self.value_offset_builder { - Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE), + Some(_) => FieldData::new::(self.fields.len() as i8, T::DATA_TYPE), None => { - let mut fd = FieldData::new(self.fields.len() as i8, T::DATA_TYPE); + let mut fd = + FieldData::new::(self.fields.len() as i8, T::DATA_TYPE); for _ in 0..self.len { - fd.append_null::()?; + fd.append_null(); } fd } @@ -278,14 +237,14 @@ impl UnionBuilder { None => { for (_, fd) in self.fields.iter_mut() { // Append to all bar the FieldData currently being appended to - fd.append_null_dynamic()?; + fd.append_null(); } } } match v { - Some(v) => field_data.append_to_values_buffer::(v)?, - None => field_data.append_null::()?, + Some(v) => field_data.append_value::(v), + None => field_data.append_null(), } self.fields.insert(type_name, field_data); @@ -303,15 +262,13 @@ impl UnionBuilder { FieldData { type_id, data_type, - values_buffer, + mut values_buffer, slots, mut bitmap_builder, }, ) in self.fields.into_iter() { - let buffer = values_buffer - .expect("The `values_buffer` should only ever be None inside the `append` method.") - .into(); + let buffer = values_buffer.finish(); let arr_data_builder = ArrayDataBuilder::new(data_type.clone()) .add_buffer(buffer) .len(slots) @@ -333,6 +290,3 @@ impl UnionBuilder { UnionArray::try_new(&type_ids, type_id_buffer, value_offsets_buffer, children) } } - -#[cfg(test)] -mod tests {}