From b3fd4bd088d744a4c31cadef8d4861c3cc3215b0 Mon Sep 17 00:00:00 2001 From: Palladium Date: Fri, 19 Aug 2022 21:18:54 +0530 Subject: [PATCH] Refactor FixedSizeBinaryBuilder Constructors (#2516) * refactor FixedSizeBinaryBuilder apis * Fixing errors --- arrow/src/array/builder/decimal_builder.rs | 4 ++-- .../array/builder/fixed_size_binary_builder.rs | 17 +++++++++++------ arrow/src/array/builder/struct_builder.rs | 2 +- arrow/src/array/equal/mod.rs | 2 +- arrow/src/util/integration_util.rs | 2 +- arrow/src/util/pretty.rs | 2 +- parquet/src/arrow/arrow_writer/mod.rs | 2 +- parquet/src/arrow/buffer/converter.rs | 3 ++- 8 files changed, 20 insertions(+), 14 deletions(-) diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index 7021dce372e..988aba1ec78 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -65,7 +65,7 @@ impl Decimal128Builder { /// array pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { Self { - builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH), + builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH), precision, scale, value_validation: true, @@ -159,7 +159,7 @@ impl Decimal256Builder { /// array pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { Self { - builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH), + builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH), precision, scale, value_validation: true, diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs index 96f90c4cae2..f0483ccfb7d 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -33,9 +33,14 @@ pub struct FixedSizeBinaryBuilder { } impl FixedSizeBinaryBuilder { + /// Creates a new [`FixedSizeBinaryBuilder`] + pub fn new(byte_width: i32) -> Self { + Self::with_capacity(1024, byte_width) + } + /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values /// buffer - pub fn new(capacity: usize, byte_width: i32) -> Self { + pub fn with_capacity(capacity: usize, byte_width: i32) -> Self { assert!( byte_width >= 0, "value length ({}) of the array must >= 0", @@ -132,7 +137,7 @@ mod tests { #[test] fn test_fixed_size_binary_builder() { - let mut builder = FixedSizeBinaryBuilder::new(15, 5); + let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5); // [b"hello", null, "arrow"] builder.append_value(b"hello").unwrap(); @@ -149,7 +154,7 @@ mod tests { #[test] fn test_fixed_size_binary_builder_with_zero_value_length() { - let mut builder = FixedSizeBinaryBuilder::new(0, 0); + let mut builder = FixedSizeBinaryBuilder::new(0); builder.append_value(b"").unwrap(); builder.append_null(); @@ -171,12 +176,12 @@ mod tests { expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths" )] fn test_fixed_size_binary_builder_with_inconsistent_value_length() { - let mut builder = FixedSizeBinaryBuilder::new(15, 4); + let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 4); builder.append_value(b"hello").unwrap(); } #[test] fn test_fixed_size_binary_builder_empty() { - let mut builder = FixedSizeBinaryBuilder::new(15, 5); + let mut builder = FixedSizeBinaryBuilder::new(5); assert!(builder.is_empty()); let fixed_size_binary_array = builder.finish(); @@ -190,6 +195,6 @@ mod tests { #[test] #[should_panic(expected = "value length (-1) of the array must >= 0")] fn test_fixed_size_binary_builder_invalid_value_length() { - let _ = FixedSizeBinaryBuilder::new(15, -1); + let _ = FixedSizeBinaryBuilder::with_capacity(15, -1); } } diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs index 01a792b5d19..a2f8707b39c 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow/src/array/builder/struct_builder.rs @@ -109,7 +109,7 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box Box::new(Float64Builder::new(capacity)), DataType::Binary => Box::new(BinaryBuilder::new(capacity)), DataType::FixedSizeBinary(len) => { - Box::new(FixedSizeBinaryBuilder::new(capacity, *len)) + Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len)) } DataType::Decimal128(precision, scale) => { Box::new(Decimal128Builder::new(capacity, *precision, *scale)) diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index debb8e7240c..dd32574bbc6 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -768,7 +768,7 @@ mod tests { fn create_fixed_size_binary_array, T: AsRef<[Option]>>( data: T, ) -> ArrayData { - let mut builder = FixedSizeBinaryBuilder::new(15, 5); + let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5); for d in data.as_ref() { if let Some(v) = d { diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs index ee5c947a2ff..4b7d292590b 100644 --- a/arrow/src/util/integration_util.rs +++ b/arrow/src/util/integration_util.rs @@ -643,7 +643,7 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::FixedSizeBinary(len) => { - let mut b = FixedSizeBinaryBuilder::new(json_col.count, *len); + let mut b = FixedSizeBinaryBuilder::with_capacity(json_col.count, *len); for (is_valid, value) in json_col .validity .as_ref() diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs index b92e658f1f2..c9e432453db 100644 --- a/arrow/src/util/pretty.rs +++ b/arrow/src/util/pretty.rs @@ -317,7 +317,7 @@ mod tests { let field_type = DataType::FixedSizeBinary(3); let schema = Arc::new(Schema::new(vec![Field::new("d1", field_type, true)])); - let mut builder = FixedSizeBinaryBuilder::new(3, 3); + let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 3); builder.append_value(&[1, 2, 3]).unwrap(); builder.append_null(); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index d09cb712ea2..e7416432a1e 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -1477,7 +1477,7 @@ mod tests { #[test] fn fixed_size_binary_single_column() { - let mut builder = FixedSizeBinaryBuilder::new(16, 4); + let mut builder = FixedSizeBinaryBuilder::new(4); builder.append_value(b"0123").unwrap(); builder.append_null(); builder.append_value(b"8910").unwrap(); diff --git a/parquet/src/arrow/buffer/converter.rs b/parquet/src/arrow/buffer/converter.rs index aeca548bde7..c0e6ffcd61b 100644 --- a/parquet/src/arrow/buffer/converter.rs +++ b/parquet/src/arrow/buffer/converter.rs @@ -55,7 +55,8 @@ impl Converter>, FixedSizeBinaryArray> &self, source: Vec>, ) -> Result { - let mut builder = FixedSizeBinaryBuilder::new(source.len(), self.byte_width); + let mut builder = + FixedSizeBinaryBuilder::with_capacity(source.len(), self.byte_width); for v in source { match v { Some(array) => builder.append_value(array.data())?,