From 0e974911bbb5a5512c8e31d789db65e29a875a60 Mon Sep 17 00:00:00 2001 From: Kun Liu Date: Fri, 12 Aug 2022 20:45:56 +0800 Subject: [PATCH] refine validation for decimal128 array (#2428) --- arrow/benches/decimal_validate.rs | 30 +++++++++++++++------- arrow/src/array/array_decimal.rs | 12 ++++++--- arrow/src/array/builder/decimal_builder.rs | 27 +++---------------- arrow/src/array/data.rs | 6 +---- arrow/src/csv/reader.rs | 10 ++++++-- arrow/src/datatypes/datatype.rs | 4 +-- 6 files changed, 44 insertions(+), 45 deletions(-) diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs index 1c726341e17..1c9406abc78 100644 --- a/arrow/benches/decimal_validate.rs +++ b/arrow/benches/decimal_validate.rs @@ -18,8 +18,12 @@ #[macro_use] extern crate criterion; -use arrow::array::{Array, Decimal128Array, Decimal256Array, Decimal256Builder}; +use arrow::array::{ + Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder, +}; use criterion::Criterion; +use num::BigInt; +use rand::Rng; extern crate arrow; @@ -34,7 +38,15 @@ fn validate_decimal256_array(array: Decimal256Array) { } fn validate_decimal128_benchmark(c: &mut Criterion) { - let decimal_array = Decimal128Array::from_iter_values(vec![12324; 20000]); + let mut rng = rand::thread_rng(); + let size: i128 = 20000; + let mut decimal_builder = Decimal128Builder::new(size as usize, 38, 0); + for _ in 0..size { + decimal_builder + .append_value(rng.gen_range::(0..999999999999)) + .unwrap(); + } + let decimal_array = decimal_builder.finish(); let data = decimal_array.into_data(); c.bench_function("validate_decimal128_array 20000", |b| { b.iter(|| { @@ -45,13 +57,13 @@ fn validate_decimal128_benchmark(c: &mut Criterion) { } fn validate_decimal256_benchmark(c: &mut Criterion) { - let mut decimal_builder = Decimal256Builder::new(20000, 76, 0); - let mut bytes = vec![0; 32]; - bytes[0..16].clone_from_slice(&12324_i128.to_le_bytes()); - for _ in 0..20000 { - decimal_builder - .append_value(&Decimal256::new(76, 0, &bytes)) - .unwrap(); + let mut rng = rand::thread_rng(); + let size: i128 = 20000; + let mut decimal_builder = Decimal256Builder::new(size as usize, 76, 0); + for _ in 0..size { + let v = rng.gen_range::(0..999999999999999); + let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap(); + decimal_builder.append_value(&decimal).unwrap(); } let decimal_array256_data = decimal_builder.finish(); let data = decimal_array256_data.into_data(); diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 455383124c0..0565a402725 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -273,10 +273,14 @@ impl Decimal128Array { // Validates decimal values in this array can be properly interpreted // with the specified precision. fn validate_decimal_precision(&self, precision: usize) -> Result<()> { - for v in self.iter().flatten() { - validate_decimal_precision(v.as_i128(), precision)?; - } - Ok(()) + (0..self.len()).try_for_each(|idx| { + if self.is_valid(idx) { + let decimal = unsafe { self.value_unchecked(idx) }; + validate_decimal_precision(decimal.as_i128(), precision) + } else { + Ok(()) + } + }) } /// Returns a Decimal array with the same data as self, with the diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index 5527679c3b7..9a76a31dd52 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -85,33 +85,14 @@ impl Decimal128Builder { /// Appends a decimal value into the builder. #[inline] pub fn append_value(&mut self, value: impl Into) -> Result<()> { - let value = if self.value_validation { - validate_decimal_precision(value.into(), self.precision)? - } else { - value.into() - }; - - let value_as_bytes = - Self::from_i128_to_fixed_size_bytes(value, Self::BYTE_LENGTH as usize)?; - if Self::BYTE_LENGTH != value_as_bytes.len() as i32 { - return Err(ArrowError::InvalidArgumentError( - "Byte slice does not have the same length as Decimal128Builder value lengths".to_string() - )); + let value = value.into(); + if self.value_validation { + validate_decimal_precision(value, self.precision)? } + let value_as_bytes: [u8; 16] = value.to_le_bytes(); self.builder.append_value(value_as_bytes.as_slice()) } - pub(crate) fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result> { - if size > 16 { - return Err(ArrowError::InvalidArgumentError( - "Decimal128Builder only supports values up to 16 bytes.".to_string(), - )); - } - let res = v.to_le_bytes(); - let start_byte = 16 - size; - Ok(res[start_byte..16].to_vec()) - } - /// Append a null value to the array. #[inline] pub fn append_null(&mut self) { diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 37581b93fde..3993d51d9b8 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -2825,11 +2825,7 @@ mod tests { let byte_width = 16; let mut fixed_size_builder = FixedSizeListBuilder::new(values_builder, byte_width); - let value_as_bytes = Decimal128Builder::from_i128_to_fixed_size_bytes( - 123456, - fixed_size_builder.value_length() as usize, - ) - .unwrap(); + let value_as_bytes = 123456_i128.to_le_bytes(); fixed_size_builder .values() .append_slice(value_as_bytes.as_slice()); diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index f01ce37c739..ac26b377ffe 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -776,8 +776,14 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu if negative { result = result.neg(); } - validate_decimal_precision(result, precision) - .map_err(|e| ArrowError::ParseError(format!("parse decimal overflow: {}", e))) + + match validate_decimal_precision(result, precision) { + Ok(_) => Ok(result), + Err(e) => Err(ArrowError::ParseError(format!( + "parse decimal overflow: {}", + e + ))), + } } else { Err(ArrowError::ParseError(format!( "can't parse the string value {} to decimal", diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index 1d922c8ebe6..29f7d726930 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -989,7 +989,7 @@ pub const DECIMAL_DEFAULT_SCALE: usize = 10; /// Validates that the specified `i128` value can be properly /// interpreted as a Decimal number with precision `precision` #[inline] -pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result { +pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<()> { if precision > DECIMAL128_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "Max precision of a Decimal128 is {}, but got {}", @@ -1011,7 +1011,7 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul value, precision, min ))) } else { - Ok(value) + Ok(()) } }