diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index a7925358b8f..e7e9ec6a58f 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -18,19 +18,13 @@ use std::any::Any; use std::sync::Arc; -use crate::array::ArrayBuilder; use crate::array::ArrayRef; use crate::array::DecimalArray; -use crate::array::FixedSizeBinaryArray; -use crate::array::OffsetSizeTrait; use crate::array::UInt8Builder; -use crate::array::{GenericBinaryArray, GenericStringArray}; +use crate::array::{ArrayBuilder, FixedSizeListBuilder}; use crate::error::{ArrowError, Result}; -use super::{FixedSizeBinaryBuilder, FixedSizeListBuilder}; -use super::{GenericBinaryBuilder, GenericListBuilder, GenericStringBuilder}; - use crate::datatypes::validate_decimal_precision; /// Array Builder for [`DecimalArray`] @@ -48,284 +42,6 @@ pub struct DecimalBuilder { value_validation: bool, } -impl ArrayBuilder for GenericBinaryBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl ArrayBuilder for GenericStringBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - let a = GenericStringBuilder::::finish(self); - Arc::new(a) - } -} - -impl ArrayBuilder for FixedSizeBinaryBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl ArrayBuilder for DecimalBuilder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl GenericBinaryBuilder { - /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values - /// array - pub fn new(capacity: usize) -> Self { - let values_builder = UInt8Builder::new(capacity); - Self { - builder: GenericListBuilder::new(values_builder), - } - } - - /// Appends a single byte value into the builder's values array. - /// - /// Note, when appending individual byte values you must call `append` to delimit each - /// distinct list value. - #[inline] - pub fn append_byte(&mut self, value: u8) -> Result<()> { - self.builder.values().append_value(value)?; - Ok(()) - } - - /// Appends a byte slice into the builder. - /// - /// Automatically calls the `append` method to delimit the slice appended in as a - /// distinct array element. - #[inline] - pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { - self.builder.values().append_slice(value.as_ref())?; - self.builder.append(true)?; - Ok(()) - } - - /// Finish the current variable-length list array slot. - #[inline] - pub fn append(&mut self, is_valid: bool) -> Result<()> { - self.builder.append(is_valid) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) -> Result<()> { - self.append(false) - } - - /// Builds the `BinaryArray` and reset this builder. - pub fn finish(&mut self) -> GenericBinaryArray { - GenericBinaryArray::::from(self.builder.finish()) - } -} - -impl GenericStringBuilder { - /// Creates a new `StringBuilder`, - /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder - pub fn new(capacity: usize) -> Self { - let values_builder = UInt8Builder::new(capacity); - Self { - builder: GenericListBuilder::new(values_builder), - } - } - - /// Creates a new `StringBuilder`, - /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder - /// `item_capacity` is the number of items to pre-allocate space for in this builder - pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { - let values_builder = UInt8Builder::new(data_capacity); - Self { - builder: GenericListBuilder::with_capacity(values_builder, item_capacity), - } - } - - /// Appends a string into the builder. - /// - /// Automatically calls the `append` method to delimit the string appended in as a - /// distinct array element. - #[inline] - pub fn append_value(&mut self, value: impl AsRef) -> Result<()> { - self.builder - .values() - .append_slice(value.as_ref().as_bytes())?; - self.builder.append(true)?; - Ok(()) - } - - /// Finish the current variable-length list array slot. - #[inline] - pub fn append(&mut self, is_valid: bool) -> Result<()> { - self.builder.append(is_valid) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) -> Result<()> { - self.append(false) - } - - /// Append an `Option` value to the array. - #[inline] - pub fn append_option(&mut self, value: Option>) -> Result<()> { - match value { - None => self.append_null()?, - Some(v) => self.append_value(v)?, - }; - Ok(()) - } - - /// Builds the `StringArray` and reset this builder. - pub fn finish(&mut self) -> GenericStringArray { - GenericStringArray::::from(self.builder.finish()) - } -} - -impl FixedSizeBinaryBuilder { - /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values - /// array - pub fn new(capacity: usize, byte_width: i32) -> Self { - let values_builder = UInt8Builder::new(capacity); - Self { - builder: FixedSizeListBuilder::new(values_builder, byte_width), - } - } - - /// Appends a byte slice into the builder. - /// - /// Automatically calls the `append` method to delimit the slice appended in as a - /// distinct array element. - #[inline] - pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { - if self.builder.value_length() != value.as_ref().len() as i32 { - return Err(ArrowError::InvalidArgumentError( - "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() - )); - } - self.builder.values().append_slice(value.as_ref())?; - self.builder.append(true) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) -> Result<()> { - let length: usize = self.builder.value_length() as usize; - self.builder.values().append_slice(&vec![0u8; length][..])?; - self.builder.append(false) - } - - /// Builds the `FixedSizeBinaryArray` and reset this builder. - pub fn finish(&mut self) -> FixedSizeBinaryArray { - FixedSizeBinaryArray::from(self.builder.finish()) - } -} - impl DecimalBuilder { /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values /// array @@ -406,6 +122,38 @@ impl DecimalBuilder { } } +impl ArrayBuilder for DecimalBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs new file mode 100644 index 00000000000..1238547c508 --- /dev/null +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -0,0 +1,82 @@ +use crate::array::{ + ArrayBuilder, ArrayRef, FixedSizeBinaryArray, FixedSizeListBuilder, UInt8Builder, +}; +use crate::error::{ArrowError, Result}; +use std::any::Any; +use std::sync::Arc; + +#[derive(Debug)] +pub struct FixedSizeBinaryBuilder { + builder: FixedSizeListBuilder, +} + +impl FixedSizeBinaryBuilder { + /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values + /// array + pub fn new(capacity: usize, byte_width: i32) -> Self { + let values_builder = UInt8Builder::new(capacity); + Self { + builder: FixedSizeListBuilder::new(values_builder, byte_width), + } + } + + /// Appends a byte slice into the builder. + /// + /// Automatically calls the `append` method to delimit the slice appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { + if self.builder.value_length() != value.as_ref().len() as i32 { + return Err(ArrowError::InvalidArgumentError( + "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() + )); + } + self.builder.values().append_slice(value.as_ref())?; + self.builder.append(true) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + let length: usize = self.builder.value_length() as usize; + self.builder.values().append_slice(&vec![0u8; length][..])?; + self.builder.append(false) + } + + /// Builds the `FixedSizeBinaryArray` and reset this builder. + pub fn finish(&mut self) -> FixedSizeBinaryArray { + FixedSizeBinaryArray::from(self.builder.finish()) + } +} + +impl ArrayBuilder for FixedSizeBinaryBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} diff --git a/arrow/src/array/builder/generic_binary_builder.rs b/arrow/src/array/builder/generic_binary_builder.rs new file mode 100644 index 00000000000..b8d8be46a4a --- /dev/null +++ b/arrow/src/array/builder/generic_binary_builder.rs @@ -0,0 +1,94 @@ +use crate::array::{ + ArrayBuilder, ArrayRef, GenericBinaryArray, GenericListBuilder, OffsetSizeTrait, + UInt8Builder, +}; +use crate::error::Result; +use std::any::Any; +use std::sync::Arc; + +/// Array builder for `BinaryArray` +#[derive(Debug)] +pub struct GenericBinaryBuilder { + builder: GenericListBuilder, +} + +impl GenericBinaryBuilder { + /// Creates a new `GenericBinaryBuilder`, `capacity` is the number of bytes in the values + /// array + pub fn new(capacity: usize) -> Self { + let values_builder = UInt8Builder::new(capacity); + Self { + builder: GenericListBuilder::new(values_builder), + } + } + + /// Appends a single byte value into the builder's values array. + /// + /// Note, when appending individual byte values you must call `append` to delimit each + /// distinct list value. + #[inline] + pub fn append_byte(&mut self, value: u8) -> Result<()> { + self.builder.values().append_value(value)?; + Ok(()) + } + + /// Appends a byte slice into the builder. + /// + /// Automatically calls the `append` method to delimit the slice appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: impl AsRef<[u8]>) -> Result<()> { + self.builder.values().append_slice(value.as_ref())?; + self.builder.append(true)?; + Ok(()) + } + + /// Finish the current variable-length list array slot. + #[inline] + pub fn append(&mut self, is_valid: bool) -> Result<()> { + self.builder.append(is_valid) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + self.append(false) + } + + /// Builds the `BinaryArray` and reset this builder. + pub fn finish(&mut self) -> GenericBinaryArray { + GenericBinaryArray::::from(self.builder.finish()) + } +} + +impl ArrayBuilder for GenericBinaryBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } +} diff --git a/arrow/src/array/builder/generic_string_builder.rs b/arrow/src/array/builder/generic_string_builder.rs new file mode 100644 index 00000000000..66421112142 --- /dev/null +++ b/arrow/src/array/builder/generic_string_builder.rs @@ -0,0 +1,103 @@ +use std::any::Any; +use std::sync::Arc; +use crate::array::{ArrayBuilder, ArrayRef, GenericListBuilder, GenericStringArray, OffsetSizeTrait, UInt8Builder}; +use crate::error::{Result}; + +#[derive(Debug)] +pub struct GenericStringBuilder { + builder: GenericListBuilder, +} + +impl GenericStringBuilder { + /// Creates a new `StringBuilder`, + /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder + pub fn new(capacity: usize) -> Self { + let values_builder = UInt8Builder::new(capacity); + Self { + builder: GenericListBuilder::new(values_builder), + } + } + + /// Creates a new `StringBuilder`, + /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder + /// `item_capacity` is the number of items to pre-allocate space for in this builder + pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { + let values_builder = UInt8Builder::new(data_capacity); + Self { + builder: GenericListBuilder::with_capacity(values_builder, item_capacity), + } + } + + /// Appends a string into the builder. + /// + /// Automatically calls the `append` method to delimit the string appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: impl AsRef) -> Result<()> { + self.builder + .values() + .append_slice(value.as_ref().as_bytes())?; + self.builder.append(true)?; + Ok(()) + } + + /// Finish the current variable-length list array slot. + #[inline] + pub fn append(&mut self, is_valid: bool) -> Result<()> { + self.builder.append(is_valid) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + self.append(false) + } + + /// Append an `Option` value to the array. + #[inline] + pub fn append_option(&mut self, value: Option>) -> Result<()> { + match value { + None => self.append_null()?, + Some(v) => self.append_value(v)?, + }; + Ok(()) + } + + /// Builds the `StringArray` and reset this builder. + pub fn finish(&mut self) -> GenericStringArray { + GenericStringArray::::from(self.builder.finish()) + } +} + +impl ArrayBuilder for GenericStringBuilder { + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.builder.len() + } + + /// Returns whether the number of array slots is zero + fn is_empty(&self) -> bool { + self.builder.is_empty() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + let a = GenericStringBuilder::::finish(self); + Arc::new(a) + } +} diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs index 4cd82d9bfe3..634ef772f3c 100644 --- a/arrow/src/array/builder/mod.rs +++ b/arrow/src/array/builder/mod.rs @@ -24,8 +24,11 @@ mod boolean_buffer_builder; mod boolean_builder; mod buffer_builder; mod decimal_builder; +mod fixed_size_binary_builder; mod fixed_size_list_builder; +mod generic_binary_builder; mod generic_list_builder; +mod generic_string_builder; mod map_builder; mod primitive_builder; mod primitive_dictionary_builder; @@ -38,24 +41,23 @@ use std::marker::PhantomData; use std::ops::Range; use super::ArrayRef; -use super::OffsetSizeTrait; -use super::UInt8Builder; pub use boolean_buffer_builder::BooleanBufferBuilder; pub use boolean_builder::BooleanBuilder; pub use buffer_builder::BufferBuilder; pub use decimal_builder::DecimalBuilder; +pub use fixed_size_binary_builder::FixedSizeBinaryBuilder; pub use fixed_size_list_builder::FixedSizeListBuilder; +pub use generic_binary_builder::GenericBinaryBuilder; pub use generic_list_builder::GenericListBuilder; +pub use generic_string_builder::GenericStringBuilder; pub use map_builder::MapBuilder; pub use primitive_builder::PrimitiveBuilder; pub use primitive_dictionary_builder::PrimitiveDictionaryBuilder; pub use string_dictionary_builder::StringDictionaryBuilder; -pub use struct_builder::StructBuilder; +pub use struct_builder::{make_builder, StructBuilder}; pub use union_builder::UnionBuilder; -pub use struct_builder::make_builder; - /// Trait for dealing with different array builders at runtime /// /// # Example @@ -139,27 +141,8 @@ pub trait ArrayBuilder: Any + Send { pub type ListBuilder = GenericListBuilder; pub type LargeListBuilder = GenericListBuilder; -/// Array builder for `BinaryArray` -#[derive(Debug)] -pub struct GenericBinaryBuilder { - builder: GenericListBuilder, -} - pub type BinaryBuilder = GenericBinaryBuilder; pub type LargeBinaryBuilder = GenericBinaryBuilder; -#[derive(Debug)] -pub struct GenericStringBuilder { - builder: GenericListBuilder, -} - pub type StringBuilder = GenericStringBuilder; pub type LargeStringBuilder = GenericStringBuilder; - -#[derive(Debug)] -pub struct FixedSizeBinaryBuilder { - builder: FixedSizeListBuilder, -} - -#[cfg(test)] -mod tests {}