diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs index b6142339646..7651c9e67ed 100644 --- a/arrow/benches/builder.rs +++ b/arrow/benches/builder.rs @@ -112,7 +112,7 @@ fn bench_decimal128(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { let mut rng = rand::thread_rng(); - let mut decimal_builder = Decimal128Builder::new(BATCH_SIZE, 38, 0); + let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0); for _ in 0..BATCH_SIZE { decimal_builder .append_value(rng.gen_range::(0..9999999999)) @@ -127,7 +127,8 @@ fn bench_decimal256(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { let mut rng = rand::thread_rng(); - let mut decimal_builder = Decimal256Builder::new(BATCH_SIZE, 76, 10); + let mut decimal_builder = + Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10); for _ in 0..BATCH_SIZE { decimal_builder .append_value(&Decimal256::from(BigInt::from( diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs index a7bae35e712..4e52e76842c 100644 --- a/arrow/benches/cast_kernels.rs +++ b/arrow/benches/cast_kernels.rs @@ -84,7 +84,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef { fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef { let mut rng = seedable_rng(); - let mut builder = Decimal128Builder::new(size, precision, scale); + let mut builder = Decimal128Builder::with_capacity(size, precision, scale); for _ in 0..size { let _ = builder.append_value(rng.gen_range::(0..1000000000)); @@ -94,7 +94,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef { let mut rng = seedable_rng(); - let mut builder = Decimal256Builder::new(size, precision, scale); + let mut builder = Decimal256Builder::with_capacity(size, precision, scale); let mut bytes = [0; 32]; for _ in 0..size { let num = rng.gen_range::(0..1000000000); diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs index 1c9406abc78..555373e4a63 100644 --- a/arrow/benches/decimal_validate.rs +++ b/arrow/benches/decimal_validate.rs @@ -40,7 +40,7 @@ fn validate_decimal256_array(array: Decimal256Array) { fn validate_decimal128_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let size: i128 = 20000; - let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0); + let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0); for _ in 0..size { decimal_builder .append_value(rng.gen_range::(0..999999999999)) @@ -59,7 +59,7 @@ fn validate_decimal128_benchmark(c: &mut Criterion) { fn validate_decimal256_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let size: i128 = 20000; - let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0); + let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0); for _ in 0..size { let v = rng.gen_range::(0..999999999999999); let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap(); diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 540024a923c..aa25e66a90c 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -571,7 +571,7 @@ mod tests { #[test] #[cfg(not(feature = "force_validate"))] fn test_decimal_append_error_value() { - let mut decimal_builder = Decimal128Builder::new(10, 5, 3); + let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3); let mut result = decimal_builder.append_value(123456); let mut error = result.unwrap_err(); assert_eq!( @@ -588,7 +588,7 @@ mod tests { let arr = decimal_builder.finish(); assert_eq!("12.345", arr.value_as_string(1)); - decimal_builder = Decimal128Builder::new(10, 2, 1); + decimal_builder = Decimal128Builder::new(2, 1); result = decimal_builder.append_value(100); error = result.unwrap_err(); assert_eq!( @@ -884,7 +884,7 @@ mod tests { #[test] fn test_decimal256_iter() { - let mut builder = Decimal256Builder::new(30, 76, 6); + let mut builder = Decimal256Builder::with_capacity(30, 76, 6); let value = BigInt::from_str_radix("12345", 10).unwrap(); let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap(); builder.append_value(&decimal1).unwrap(); diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index 988aba1ec78..b4c877c7110 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -61,9 +61,14 @@ pub struct Decimal256Builder { impl Decimal128Builder { const BYTE_LENGTH: i32 = 16; - /// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values - /// array - pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { + /// Creates a new [`Decimal128Builder`] + pub fn new(precision: usize, scale: usize) -> Self { + Self::with_capacity(1024, precision, scale) + } + + /// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values + /// that can be appended without reallocating + pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self { Self { builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH), precision, @@ -155,9 +160,14 @@ impl ArrayBuilder for Decimal128Builder { impl Decimal256Builder { const BYTE_LENGTH: i32 = 32; - /// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values - /// array - pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { + /// Creates a new [`Decimal256Builder`] + pub fn new(precision: usize, scale: usize) -> Self { + Self::with_capacity(1024, precision, scale) + } + + /// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values + /// that can be appended without reallocating + pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self { Self { builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH), precision, @@ -245,7 +255,7 @@ mod tests { #[test] fn test_decimal_builder() { - let mut builder = Decimal128Builder::new(30, 38, 6); + let mut builder = Decimal128Builder::new(38, 6); builder.append_value(8_887_000_000_i128).unwrap(); builder.append_null(); @@ -263,7 +273,7 @@ mod tests { #[test] fn test_decimal_builder_with_decimal128() { - let mut builder = Decimal128Builder::new(30, 38, 6); + let mut builder = Decimal128Builder::new(38, 6); builder .append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128)) @@ -283,7 +293,7 @@ mod tests { #[test] fn test_decimal256_builder() { - let mut builder = Decimal256Builder::new(30, 40, 6); + let mut builder = Decimal256Builder::new(40, 6); let mut bytes = [0_u8; 32]; bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes()); @@ -327,7 +337,7 @@ mod tests { expected = "Decimal value does not have the same precision or scale as Decimal256Builder" )] fn test_decimal256_builder_unmatched_precision_scale() { - let mut builder = Decimal256Builder::new(30, 10, 6); + let mut builder = Decimal256Builder::with_capacity(30, 10, 6); let mut bytes = [0_u8; 32]; bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes()); @@ -340,7 +350,7 @@ mod tests { expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999" )] fn test_decimal256_builder_out_of_range_precision_scale() { - let mut builder = Decimal256Builder::new(30, 75, 6); + let mut builder = Decimal256Builder::new(75, 6); let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap(); let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap(); @@ -352,7 +362,7 @@ mod tests { expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999" )] fn test_decimal256_data_validation() { - let mut builder = Decimal256Builder::new(30, 75, 6); + let mut builder = Decimal256Builder::new(75, 6); // Disable validation at builder unsafe { builder.disable_value_validation(); diff --git a/arrow/src/array/builder/fixed_size_binary_builder.rs b/arrow/src/array/builder/fixed_size_binary_builder.rs index f0483ccfb7d..30c25e0a62b 100644 --- a/arrow/src/array/builder/fixed_size_binary_builder.rs +++ b/arrow/src/array/builder/fixed_size_binary_builder.rs @@ -38,8 +38,8 @@ impl FixedSizeBinaryBuilder { Self::with_capacity(1024, byte_width) } - /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values - /// buffer + /// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices + /// that can be appended without reallocating pub fn with_capacity(capacity: usize, byte_width: i32) -> Self { assert!( byte_width >= 0, @@ -47,12 +47,8 @@ impl FixedSizeBinaryBuilder { byte_width ); Self { - values_builder: UInt8BufferBuilder::new(capacity), - null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 { - capacity / byte_width as usize - } else { - 0 - }), + values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize), + null_buffer_builder: NullBufferBuilder::new(capacity), value_length: byte_width, } } @@ -137,7 +133,7 @@ mod tests { #[test] fn test_fixed_size_binary_builder() { - let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5); + let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5); // [b"hello", null, "arrow"] builder.append_value(b"hello").unwrap(); @@ -176,7 +172,7 @@ mod tests { expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths" )] fn test_fixed_size_binary_builder_with_inconsistent_value_length() { - let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 4); + let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4); builder.append_value(b"hello").unwrap(); } #[test] diff --git a/arrow/src/array/builder/struct_builder.rs b/arrow/src/array/builder/struct_builder.rs index be7fa1405aa..48932f1991b 100644 --- a/arrow/src/array/builder/struct_builder.rs +++ b/arrow/src/array/builder/struct_builder.rs @@ -111,9 +111,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len)) } - DataType::Decimal128(precision, scale) => { - Box::new(Decimal128Builder::new(capacity, *precision, *scale)) - } + DataType::Decimal128(precision, scale) => Box::new( + Decimal128Builder::with_capacity(capacity, *precision, *scale), + ), DataType::Utf8 => Box::new(StringBuilder::new(capacity)), DataType::Date32 => Box::new(Date32Builder::new(capacity)), DataType::Date64 => Box::new(Date64Builder::new(capacity)), diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 1a7f991accc..1ed3f01bb7c 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -2847,7 +2847,7 @@ mod tests { #[test] fn test_decimal_validation() { - let mut builder = Decimal128Builder::new(4, 10, 4); + let mut builder = Decimal128Builder::with_capacity(4, 10, 4); builder.append_value(10000).unwrap(); builder.append_value(20000).unwrap(); let array = builder.finish(); diff --git a/arrow/src/array/equal/mod.rs b/arrow/src/array/equal/mod.rs index dd32574bbc6..2984dad8972 100644 --- a/arrow/src/array/equal/mod.rs +++ b/arrow/src/array/equal/mod.rs @@ -768,7 +768,7 @@ mod tests { fn create_fixed_size_binary_array, T: AsRef<[Option]>>( data: T, ) -> ArrayData { - let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5); + let mut builder = FixedSizeBinaryBuilder::with_capacity(data.as_ref().len(), 5); for d in data.as_ref() { if let Some(v) = d { diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index ac26b377ffe..5917f6797dc 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -699,7 +699,8 @@ fn build_decimal_array( precision: usize, scale: usize, ) -> Result { - let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale); + let mut decimal_builder = + Decimal128Builder::with_capacity(rows.len(), precision, scale); for row in rows { let col_s = row.get(col_idx); match col_s { diff --git a/arrow/src/util/integration_util.rs b/arrow/src/util/integration_util.rs index 4a3cc2fcb44..5268f55d97d 100644 --- a/arrow/src/util/integration_util.rs +++ b/arrow/src/util/integration_util.rs @@ -776,7 +776,8 @@ pub fn array_from_json( } } DataType::Decimal128(precision, scale) => { - let mut b = Decimal128Builder::new(json_col.count, *precision, *scale); + let mut b = + Decimal128Builder::with_capacity(json_col.count, *precision, *scale); // C++ interop tests involve incompatible decimal values unsafe { b.disable_value_validation(); @@ -798,7 +799,8 @@ pub fn array_from_json( Ok(Arc::new(b.finish())) } DataType::Decimal256(precision, scale) => { - let mut b = Decimal256Builder::new(json_col.count, *precision, *scale); + let mut b = + Decimal256Builder::with_capacity(json_col.count, *precision, *scale); // C++ interop tests involve incompatible decimal values unsafe { b.disable_value_validation();