Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring DecimalBuilder constructors #2517

Merged
merged 5 commits into from Aug 20, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions arrow/benches/builder.rs
Expand Up @@ -112,7 +112,7 @@ fn bench_decimal128(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
let mut decimal_builder = Decimal128Builder::new(BATCH_SIZE, 38, 0);
let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..9999999999))
Expand All @@ -127,7 +127,8 @@ fn bench_decimal256(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
let mut decimal_builder = Decimal256Builder::new(BATCH_SIZE, 76, 10);
let mut decimal_builder =
Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(&Decimal256::from(BigInt::from(
Expand Down
4 changes: 2 additions & 2 deletions arrow/benches/cast_kernels.rs
Expand Up @@ -84,7 +84,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {

fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal128Builder::new(size, precision, scale);
let mut builder = Decimal128Builder::with_capacity(size, precision, scale);

for _ in 0..size {
let _ = builder.append_value(rng.gen_range::<i128, _>(0..1000000000));
Expand All @@ -94,7 +94,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR

fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal256Builder::new(size, precision, scale);
let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
let mut bytes = [0; 32];
for _ in 0..size {
let num = rng.gen_range::<i128, _>(0..1000000000);
Expand Down
4 changes: 2 additions & 2 deletions arrow/benches/decimal_validate.rs
Expand Up @@ -40,7 +40,7 @@ fn validate_decimal256_array(array: Decimal256Array) {
fn validate_decimal128_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0);
for _ in 0..size {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..999999999999))
Expand All @@ -59,7 +59,7 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0);
for _ in 0..size {
let v = rng.gen_range::<i128, _>(0..999999999999999);
let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -571,7 +571,7 @@ mod tests {
#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_append_error_value() {
let mut decimal_builder = Decimal128Builder::new(10, 5, 3);
let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3);
let mut result = decimal_builder.append_value(123456);
let mut error = result.unwrap_err();
assert_eq!(
Expand All @@ -588,7 +588,7 @@ mod tests {
let arr = decimal_builder.finish();
assert_eq!("12.345", arr.value_as_string(1));

decimal_builder = Decimal128Builder::new(10, 2, 1);
decimal_builder = Decimal128Builder::new(2, 1);
result = decimal_builder.append_value(100);
error = result.unwrap_err();
assert_eq!(
Expand Down Expand Up @@ -884,7 +884,7 @@ mod tests {

#[test]
fn test_decimal256_iter() {
let mut builder = Decimal256Builder::new(30, 76, 6);
let mut builder = Decimal256Builder::with_capacity(30, 76, 6);
let value = BigInt::from_str_radix("12345", 10).unwrap();
let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal1).unwrap();
Expand Down
26 changes: 18 additions & 8 deletions arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -61,9 +61,14 @@ pub struct Decimal256Builder {

impl Decimal128Builder {
const BYTE_LENGTH: i32 = 16;
/// Creates a new [`Decimal128Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values
/// array
Copy link
Contributor

@tustvold tustvold Aug 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to change the definition of capacity to be the number of decimal elements, instead of the number of decimal bytes. Eliminating this inconsistency is the major motivator for #2054

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It occurs to me I didn't catch this on #2516 ....

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I took the liberty of fixing this

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you

pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -155,9 +160,14 @@ impl ArrayBuilder for Decimal128Builder {

impl Decimal256Builder {
const BYTE_LENGTH: i32 = 32;
/// Creates a new [`Decimal256Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -245,7 +255,7 @@ mod tests {

#[test]
fn test_decimal_builder() {
let mut builder = Decimal128Builder::new(30, 38, 6);
let mut builder = Decimal128Builder::new(38, 6);

builder.append_value(8_887_000_000_i128).unwrap();
builder.append_null();
Expand All @@ -263,7 +273,7 @@ mod tests {

#[test]
fn test_decimal_builder_with_decimal128() {
let mut builder = Decimal128Builder::new(30, 38, 6);
let mut builder = Decimal128Builder::new(38, 6);

builder
.append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128))
Expand All @@ -283,7 +293,7 @@ mod tests {

#[test]
fn test_decimal256_builder() {
let mut builder = Decimal256Builder::new(30, 40, 6);
let mut builder = Decimal256Builder::new(40, 6);

let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
Expand Down Expand Up @@ -327,7 +337,7 @@ mod tests {
expected = "Decimal value does not have the same precision or scale as Decimal256Builder"
)]
fn test_decimal256_builder_unmatched_precision_scale() {
let mut builder = Decimal256Builder::new(30, 10, 6);
let mut builder = Decimal256Builder::with_capacity(30, 10, 6);

let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
Expand All @@ -340,7 +350,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_builder_out_of_range_precision_scale() {
let mut builder = Decimal256Builder::new(30, 75, 6);
let mut builder = Decimal256Builder::new(75, 6);

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap();
Expand All @@ -352,7 +362,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_data_validation() {
let mut builder = Decimal256Builder::new(30, 75, 6);
let mut builder = Decimal256Builder::new(75, 6);
// Disable validation at builder
unsafe {
builder.disable_value_validation();
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/array/builder/struct_builder.rs
Expand Up @@ -111,9 +111,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
DataType::Decimal128(precision, scale) => {
Box::new(Decimal128Builder::new(capacity, *precision, *scale))
}
DataType::Decimal128(precision, scale) => Box::new(
Decimal128Builder::with_capacity(capacity, *precision, *scale),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out a lot of callsites were assuming this capacity was in terms of elements, when in actuality it was in terms of bytes (which was an extremely confusing API)

),
DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
DataType::Date32 => Box::new(Date32Builder::new(capacity)),
DataType::Date64 => Box::new(Date64Builder::new(capacity)),
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/data.rs
Expand Up @@ -2847,7 +2847,7 @@ mod tests {

#[test]
fn test_decimal_validation() {
let mut builder = Decimal128Builder::new(4, 10, 4);
let mut builder = Decimal128Builder::with_capacity(4, 10, 4);
builder.append_value(10000).unwrap();
builder.append_value(20000).unwrap();
let array = builder.finish();
Expand Down
3 changes: 2 additions & 1 deletion arrow/src/csv/reader.rs
Expand Up @@ -699,7 +699,8 @@ fn build_decimal_array(
precision: usize,
scale: usize,
) -> Result<ArrayRef> {
let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale);
let mut decimal_builder =
Decimal128Builder::with_capacity(rows.len(), precision, scale);
for row in rows {
let col_s = row.get(col_idx);
match col_s {
Expand Down
6 changes: 4 additions & 2 deletions arrow/src/util/integration_util.rs
Expand Up @@ -776,7 +776,8 @@ pub fn array_from_json(
}
}
DataType::Decimal128(precision, scale) => {
let mut b = Decimal128Builder::new(json_col.count, *precision, *scale);
let mut b =
Decimal128Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
Expand All @@ -798,7 +799,8 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::Decimal256(precision, scale) => {
let mut b = Decimal256Builder::new(json_col.count, *precision, *scale);
let mut b =
Decimal256Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
Expand Down