Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring DecimalBuilder constructors #2517

Merged
merged 5 commits into from Aug 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions arrow/benches/builder.rs
Expand Up @@ -112,7 +112,7 @@ fn bench_decimal128(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
let mut decimal_builder = Decimal128Builder::new(BATCH_SIZE, 38, 0);
let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..9999999999))
Expand All @@ -127,7 +127,8 @@ fn bench_decimal256(c: &mut Criterion) {
c.bench_function("bench_decimal128_builder", |b| {
b.iter(|| {
let mut rng = rand::thread_rng();
let mut decimal_builder = Decimal256Builder::new(BATCH_SIZE, 76, 10);
let mut decimal_builder =
Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10);
for _ in 0..BATCH_SIZE {
decimal_builder
.append_value(&Decimal256::from(BigInt::from(
Expand Down
4 changes: 2 additions & 2 deletions arrow/benches/cast_kernels.rs
Expand Up @@ -84,7 +84,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {

fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal128Builder::new(size, precision, scale);
let mut builder = Decimal128Builder::with_capacity(size, precision, scale);

for _ in 0..size {
let _ = builder.append_value(rng.gen_range::<i128, _>(0..1000000000));
Expand All @@ -94,7 +94,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR

fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal256Builder::new(size, precision, scale);
let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
let mut bytes = [0; 32];
for _ in 0..size {
let num = rng.gen_range::<i128, _>(0..1000000000);
Expand Down
4 changes: 2 additions & 2 deletions arrow/benches/decimal_validate.rs
Expand Up @@ -40,7 +40,7 @@ fn validate_decimal256_array(array: Decimal256Array) {
fn validate_decimal128_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0);
let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0);
for _ in 0..size {
decimal_builder
.append_value(rng.gen_range::<i128, _>(0..999999999999))
Expand All @@ -59,7 +59,7 @@ fn validate_decimal128_benchmark(c: &mut Criterion) {
fn validate_decimal256_benchmark(c: &mut Criterion) {
let mut rng = rand::thread_rng();
let size: i128 = 20000;
let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0);
let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0);
for _ in 0..size {
let v = rng.gen_range::<i128, _>(0..999999999999999);
let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap();
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -571,7 +571,7 @@ mod tests {
#[test]
#[cfg(not(feature = "force_validate"))]
fn test_decimal_append_error_value() {
let mut decimal_builder = Decimal128Builder::new(10, 5, 3);
let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3);
let mut result = decimal_builder.append_value(123456);
let mut error = result.unwrap_err();
assert_eq!(
Expand All @@ -588,7 +588,7 @@ mod tests {
let arr = decimal_builder.finish();
assert_eq!("12.345", arr.value_as_string(1));

decimal_builder = Decimal128Builder::new(10, 2, 1);
decimal_builder = Decimal128Builder::new(2, 1);
result = decimal_builder.append_value(100);
error = result.unwrap_err();
assert_eq!(
Expand Down Expand Up @@ -884,7 +884,7 @@ mod tests {

#[test]
fn test_decimal256_iter() {
let mut builder = Decimal256Builder::new(30, 76, 6);
let mut builder = Decimal256Builder::with_capacity(30, 76, 6);
let value = BigInt::from_str_radix("12345", 10).unwrap();
let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal1).unwrap();
Expand Down
34 changes: 22 additions & 12 deletions arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -61,9 +61,14 @@ pub struct Decimal256Builder {

impl Decimal128Builder {
const BYTE_LENGTH: i32 = 16;
/// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
/// Creates a new [`Decimal128Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -155,9 +160,14 @@ impl ArrayBuilder for Decimal128Builder {

impl Decimal256Builder {
const BYTE_LENGTH: i32 = 32;
/// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
/// Creates a new [`Decimal256Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -245,7 +255,7 @@ mod tests {

#[test]
fn test_decimal_builder() {
let mut builder = Decimal128Builder::new(30, 38, 6);
let mut builder = Decimal128Builder::new(38, 6);

builder.append_value(8_887_000_000_i128).unwrap();
builder.append_null();
Expand All @@ -263,7 +273,7 @@ mod tests {

#[test]
fn test_decimal_builder_with_decimal128() {
let mut builder = Decimal128Builder::new(30, 38, 6);
let mut builder = Decimal128Builder::new(38, 6);

builder
.append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128))
Expand All @@ -283,7 +293,7 @@ mod tests {

#[test]
fn test_decimal256_builder() {
let mut builder = Decimal256Builder::new(30, 40, 6);
let mut builder = Decimal256Builder::new(40, 6);

let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
Expand Down Expand Up @@ -327,7 +337,7 @@ mod tests {
expected = "Decimal value does not have the same precision or scale as Decimal256Builder"
)]
fn test_decimal256_builder_unmatched_precision_scale() {
let mut builder = Decimal256Builder::new(30, 10, 6);
let mut builder = Decimal256Builder::with_capacity(30, 10, 6);

let mut bytes = [0_u8; 32];
bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes());
Expand All @@ -340,7 +350,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_builder_out_of_range_precision_scale() {
let mut builder = Decimal256Builder::new(30, 75, 6);
let mut builder = Decimal256Builder::new(75, 6);

let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap();
let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap();
Expand All @@ -352,7 +362,7 @@ mod tests {
expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999"
)]
fn test_decimal256_data_validation() {
let mut builder = Decimal256Builder::new(30, 75, 6);
let mut builder = Decimal256Builder::new(75, 6);
// Disable validation at builder
unsafe {
builder.disable_value_validation();
Expand Down
16 changes: 6 additions & 10 deletions arrow/src/array/builder/fixed_size_binary_builder.rs
Expand Up @@ -38,21 +38,17 @@ impl FixedSizeBinaryBuilder {
Self::with_capacity(1024, byte_width)
}

/// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of bytes in the values
/// buffer
/// Creates a new [`FixedSizeBinaryBuilder`], `capacity` is the number of byte slices
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, byte_width: i32) -> Self {
assert!(
byte_width >= 0,
"value length ({}) of the array must >= 0",
byte_width
);
Self {
values_builder: UInt8BufferBuilder::new(capacity),
null_buffer_builder: NullBufferBuilder::new(if byte_width > 0 {
capacity / byte_width as usize
} else {
0
}),
values_builder: UInt8BufferBuilder::new(capacity * byte_width as usize),
null_buffer_builder: NullBufferBuilder::new(capacity),
value_length: byte_width,
}
}
Expand Down Expand Up @@ -137,7 +133,7 @@ mod tests {

#[test]
fn test_fixed_size_binary_builder() {
let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
let mut builder = FixedSizeBinaryBuilder::with_capacity(3, 5);

// [b"hello", null, "arrow"]
builder.append_value(b"hello").unwrap();
Expand Down Expand Up @@ -176,7 +172,7 @@ mod tests {
expected = "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths"
)]
fn test_fixed_size_binary_builder_with_inconsistent_value_length() {
let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 4);
let mut builder = FixedSizeBinaryBuilder::with_capacity(1, 4);
builder.append_value(b"hello").unwrap();
}
#[test]
Expand Down
6 changes: 3 additions & 3 deletions arrow/src/array/builder/struct_builder.rs
Expand Up @@ -111,9 +111,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilde
DataType::FixedSizeBinary(len) => {
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
}
DataType::Decimal128(precision, scale) => {
Box::new(Decimal128Builder::new(capacity, *precision, *scale))
}
DataType::Decimal128(precision, scale) => Box::new(
Decimal128Builder::with_capacity(capacity, *precision, *scale),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It turns out a lot of callsites were assuming this capacity was in terms of elements, when in actuality it was in terms of bytes (which was an extremely confusing API)

),
DataType::Utf8 => Box::new(StringBuilder::new(capacity)),
DataType::Date32 => Box::new(Date32Builder::new(capacity)),
DataType::Date64 => Box::new(Date64Builder::new(capacity)),
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/data.rs
Expand Up @@ -2847,7 +2847,7 @@ mod tests {

#[test]
fn test_decimal_validation() {
let mut builder = Decimal128Builder::new(4, 10, 4);
let mut builder = Decimal128Builder::with_capacity(4, 10, 4);
builder.append_value(10000).unwrap();
builder.append_value(20000).unwrap();
let array = builder.finish();
Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/equal/mod.rs
Expand Up @@ -768,7 +768,7 @@ mod tests {
fn create_fixed_size_binary_array<U: AsRef<[u8]>, T: AsRef<[Option<U>]>>(
data: T,
) -> ArrayData {
let mut builder = FixedSizeBinaryBuilder::with_capacity(15, 5);
let mut builder = FixedSizeBinaryBuilder::with_capacity(data.as_ref().len(), 5);

for d in data.as_ref() {
if let Some(v) = d {
Expand Down
3 changes: 2 additions & 1 deletion arrow/src/csv/reader.rs
Expand Up @@ -699,7 +699,8 @@ fn build_decimal_array(
precision: usize,
scale: usize,
) -> Result<ArrayRef> {
let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale);
let mut decimal_builder =
Decimal128Builder::with_capacity(rows.len(), precision, scale);
for row in rows {
let col_s = row.get(col_idx);
match col_s {
Expand Down
6 changes: 4 additions & 2 deletions arrow/src/util/integration_util.rs
Expand Up @@ -776,7 +776,8 @@ pub fn array_from_json(
}
}
DataType::Decimal128(precision, scale) => {
let mut b = Decimal128Builder::new(json_col.count, *precision, *scale);
let mut b =
Decimal128Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
Expand All @@ -798,7 +799,8 @@ pub fn array_from_json(
Ok(Arc::new(b.finish()))
}
DataType::Decimal256(precision, scale) => {
let mut b = Decimal256Builder::new(json_col.count, *precision, *scale);
let mut b =
Decimal256Builder::with_capacity(json_col.count, *precision, *scale);
// C++ interop tests involve incompatible decimal values
unsafe {
b.disable_value_validation();
Expand Down