Skip to content

Commit

Permalink
Refactoring DecimalBuilder constructors (#2517)
Browse files Browse the repository at this point in the history
* Refactor decimal builder

* Fixing errors

* refactor decimal256builder constructor

* Capacity in terms of elements

Co-authored-by: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
  • Loading branch information
psvri and tustvold committed Aug 20, 2022
1 parent 9921cd5 commit 65cae43
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 39 deletions.
5 changes: 3 additions & 2 deletions arrow/benches/builder.rs
Expand Up @@ -112,7 +112,7 @@ fn bench_decimal128(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
let mut decimal_builder = Decimal128Builder::new(BATCH_SIZE, 38, 0);
let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..9999999999))
Expand All @@ -127,7 +127,8 @@ fn bench_decimal256(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
let mut decimal_builder = Decimal256Builder::new(BATCH_SIZE, 76, 10);
let mut decimal_builder =
Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(&Decimal256::from(BigInt::from(
Expand Down
4 changes: 2 additions & 2 deletions arrow/benches/cast_kernels.rs
Expand Up @@ -84,7 +84,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {

fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal128Builder::new(size, precision, scale);
let mut builder = Decimal128Builder::with_capacity(size, precision, scale);

for _ in 0..size {
let _ = builder.append_value(rng.gen_range::<i128, _>(0..1000000000));
Expand All @@ -94,7 +94,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR

fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal256Builder::new(size, precision, scale);
let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
let mut bytes = [0; 32];
for _ in 0..size {
let num = rng.gen_range::<i128, _>(0..1000000000);
Expand Down
4 changes: 2 additions & 2 deletions arrow/benches/decimal_validate.rs
Expand Up @@ -40,7 +40,7 @@ fn validate_decimal256_array(array: Decimal256Array) {
fn validate_decimal128_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0);
for _ in 0..size {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..999999999999))
Expand All @@ -59,7 +59,7 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0);
for _ in 0..size {
let v = rng.gen_range::<i128, _>(0..999999999999999);
let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -571,7 +571,7 @@ mod tests {
#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_append_error_value() {
let mut decimal_builder = Decimal128Builder::new(10, 5, 3);
let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3);
let mut result = decimal_builder.append_value(123456);
let mut error = result.unwrap_err();
assert_eq!(
Expand All @@ -588,7 +588,7 @@ mod tests {
let arr = decimal_builder.finish();
assert_eq!("12.345", arr.value_as_string(1));

decimal_builder = Decimal128Builder::new(10, 2, 1);
decimal_builder = Decimal128Builder::new(2, 1);
result = decimal_builder.append_value(100);
error = result.unwrap_err();
assert_eq!(
Expand Down Expand Up @@ -884,7 +884,7 @@ mod tests {

#[test]
fn test_decimal256_iter() {
let mut builder = Decimal256Builder::new(30, 76, 6);
let mut builder = Decimal256Builder::with_capacity(30, 76, 6);
let value = BigInt::from_str_radix("12345", 10).unwrap();
let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal1).unwrap();
Expand Down
34 changes: 22 additions & 12 deletions arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -61,9 +61,14 @@ pub struct Decimal256Builder {

impl Decimal128Builder {
const BYTE_LENGTH: i32 = 16;
/// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
/// Creates a new [`Decimal128Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -155,9 +160,14 @@ impl ArrayBuilder for Decimal128Builder {

impl Decimal256Builder {
const BYTE_LENGTH: i32 = 32;
/// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
/// Creates a new [`Decimal256Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -245,7 +255,7 @@ mod tests {

#[test]
fn test_decimal_builder() {
let mut builder = Decimal128Builder::new(30, 38, 6);
let mut builder = Decimal128Builder::new(38, 6);

builder.append_value(8_887_000_000_i128).unwrap();
builder.append_null();
Expand All @@ -263,7 +273,7 @@ mod tests {

#[test]
fn test_decimal_builder_with_decimal128() {
let mut builder = Decimal128Builder::new(30, 38, 6);
let mut builder = Decimal128Builder::new(38, 6);

builder
.append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128))
Expand All @@ -283,7 +293,7 @@ mod tests {

#[test]
fn test_decimal256_builder() {
let mut builder = Decimal256Builder::new(30, 40, 6);
let mut builder = Decimal256Builder::new(40, 6);

let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
Expand Down Expand Up @@ -327,7 +337,7 @@ mod tests {
expected = "Decimal value does not have the same precision or scale as Decimal256Builder"
)]
fn test_decimal256_builder_unmatched_precision_scale() {
let mut builder = Decimal256Builder::new(30, 10, 6);
let mut builder = Decimal256Builder::with_capacity(30, 10, 6);

let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
Expand All @@ -340,7 +350,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_builder_out_of_range_precision_scale() {
let mut builder = Decimal256Builder::new(30, 75, 6);
let mut builder = Decimal256Builder::new(75, 6);

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap();
Expand All @@ -352,7 +362,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_data_validation() {
let mut builder = Decimal256Builder::new(30, 75, 6);
let mut builder = Decimal256Builder::new(75, 6);
// Disable validation at builder
unsafe {
builder.disable_value_validation();
Expand Down
16 changes: 6 additions & 10 deletions arrow/src/array/builder/fixed_size_binary_builder.rs
Expand Up @@ -38,21 +38,17 @@ impl FixedSizeBinaryBuilder {
Self::with_capacity(1024, byte_width)
}

/// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values
/// buffer
/// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
assert!(
byte_width >= 0,
"value length ({}) of the array must >= 0",
byte_width
);
Self {
values_builder: UInt8BufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 {
capacity / byte_width as usize
} else {
0
}),
values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
null_buffer_builder: NullBufferBuilder::new(capacity),
value_length: byte_width,
}
}
Expand Down Expand Up @@ -137,7 +133,7 @@ mod tests {

#[test]
fn test_fixed_size_binary_builder() {
let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);

// [b"hello", null, "arrow"]
builder.append_value(b"hello").unwrap();
Expand Down Expand Up @@ -176,7 +172,7 @@ mod tests {
expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
)]
fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 4);
let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
builder.append_value(b"hello").unwrap();
}
#[test]
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/array/builder/struct_builder.rs
Expand Up @@ -111,9 +111,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
DataType::Decimal128(precision, scale) => {
Box::new(Decimal128Builder::new(capacity, *precision, *scale))
}
DataType::Decimal128(precision, scale) => Box::new(
Decimal128Builder::with_capacity(capacity, *precision, *scale),
),
DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
DataType::Date32 => Box::new(Date32Builder::new(capacity)),
DataType::Date64 => Box::new(Date64Builder::new(capacity)),
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/data.rs
Expand Up @@ -2847,7 +2847,7 @@ mod tests {

#[test]
fn test_decimal_validation() {
let mut builder = Decimal128Builder::new(4, 10, 4);
let mut builder = Decimal128Builder::with_capacity(4, 10, 4);
builder.append_value(10000).unwrap();
builder.append_value(20000).unwrap();
let array = builder.finish();
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/equal/mod.rs
Expand Up @@ -768,7 +768,7 @@ mod tests {
fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
data: T,
) -> ArrayData {
let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
let mut builder = FixedSizeBinaryBuilder::with_capacity(data.as_ref().len(), 5);

for d in data.as_ref() {
if let Some(v) = d {
Expand Down
3 changes: 2 additions & 1 deletion arrow/src/csv/reader.rs
Expand Up @@ -699,7 +699,8 @@ fn build_decimal_array(
precision: usize,
scale: usize,
) -> Result<ArrayRef> {
let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale);
let mut decimal_builder =
Decimal128Builder::with_capacity(rows.len(), precision, scale);
for row in rows {
let col_s = row.get(col_idx);
match col_s {
Expand Down
6 changes: 4 additions & 2 deletions arrow/src/util/integration_util.rs
Expand Up @@ -776,7 +776,8 @@ pub fn array_from_json(
}
}
DataType::Decimal128(precision, scale) => {
let mut b = Decimal128Builder::new(json_col.count, *precision, *scale);
let mut b =
Decimal128Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
Expand All @@ -798,7 +799,8 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::Decimal256(precision, scale) => {
let mut b = Decimal256Builder::new(json_col.count, *precision, *scale);
let mut b =
Decimal256Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
Expand Down

0 comments on commit 65cae43

Please sign in to comment.