From 86e1ededd6ec1e9da00ec87861a002b3f7ed54cf Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 8 Aug 2022 10:39:33 -0700 Subject: [PATCH] Cast between decimal128 and decimal256 --- arrow/src/array/array_decimal.rs | 2 +- arrow/src/array/builder/decimal_builder.rs | 3 +- arrow/src/array/data.rs | 13 +- arrow/src/compute/kernels/cast.rs | 260 +++++++++++++++++++-- arrow/src/datatypes/datatype.rs | 10 +- arrow/src/util/decimal.rs | 5 + 6 files changed, 259 insertions(+), 34 deletions(-) diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 9d7644befd6..7134f2767b2 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -360,7 +360,7 @@ impl BasicDecimalArray for Decimal256Array { fn validate_decimal_precision(&self, precision: usize) -> Result<()> { if precision < self.precision { for v in self.iter().flatten() { - validate_decimal256_precision(&v.to_string(), precision)?; + validate_decimal256_precision(&v.to_big_int(), precision)?; } } Ok(()) diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index 22c1490e86f..a1f4b0ed398 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -202,8 +202,7 @@ impl Decimal256Builder { let value = if self.value_validation { let raw_bytes = value.raw_value(); let integer = BigInt::from_signed_bytes_le(raw_bytes); - let value_str = integer.to_string(); - validate_decimal256_precision(&value_str, self.precision)?; + validate_decimal256_precision(&integer, self.precision)?; value } else { value diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs index 43c43b04a51..3d2057bbc45 100644 --- a/arrow/src/array/data.rs +++ b/arrow/src/array/data.rs @@ -401,11 +401,17 @@ impl ArrayData { #[inline] pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self { assert!( - matches!(self.data_type, DataType::Decimal128(_, _)), + matches!( + self.data_type, + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) + ), "only DecimalType is supported for existing type" ); assert!( - matches!(new_data_type, DataType::Decimal128(_, _)), + matches!( + new_data_type, + DataType::Decimal128(_, _) | DataType::Decimal256(_, _) + ), "only DecimalType is supported for new datatype" ); self.data_type = new_data_type; @@ -1044,8 +1050,7 @@ impl ArrayData { let offset = pos * 32; let raw_bytes = &values[offset..offset + 32]; let integer = BigInt::from_signed_bytes_le(raw_bytes); - let value_str = integer.to_string(); - validate_decimal256_precision(&value_str, *p)?; + validate_decimal256_precision(&integer, *p)?; } Ok(()) } diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index c6b8f477986..dcbe313949d 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -36,6 +36,7 @@ //! ``` use chrono::Timelike; +use std::ops::{Div, Mul}; use std::str; use std::sync::Arc; @@ -50,10 +51,11 @@ use crate::temporal_conversions::{ EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS, SECONDS_IN_DAY, }; +use crate::util::decimal::BasicDecimal; use crate::{array::*, compute::take}; use crate::{buffer::Buffer, util::serialization::lexical_to_string}; use num::cast::AsPrimitive; -use num::{NumCast, ToPrimitive}; +use num::{BigInt, NumCast, ToPrimitive}; /// CastOptions provides a way to override the default cast behaviors #[derive(Debug)] @@ -78,6 +80,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { // TODO UTF8/unsigned numeric to decimal // cast one decimal type to another decimal type (Decimal128(_, _), Decimal128(_, _)) => true, + (Decimal256(_, _), Decimal256(_, _)) => true, + (Decimal128(_, _), Decimal256(_, _)) => true, + (Decimal256(_, _), Decimal128(_, _)) => true, // signed numeric to decimal (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) | // decimal to signed numeric @@ -431,9 +436,30 @@ pub fn cast_with_options( return Ok(array.clone()); } match (from_type, to_type) { - (Decimal128(_, s1), Decimal128(p2, s2)) => { - cast_decimal_to_decimal(array, s1, p2, s2) - } + (Decimal128(_, s1), Decimal128(p2, s2)) => cast_decimal_to_decimal::< + crate::util::decimal::Decimal128, + crate::util::decimal::Decimal128, + Decimal128Array, + Decimal128Array, + >(array, s1, p2, s2), + (Decimal256(_, s1), Decimal256(p2, s2)) => cast_decimal_to_decimal::< + crate::util::decimal::Decimal256, + crate::util::decimal::Decimal256, + Decimal256Array, + Decimal256Array, + >(array, s1, p2, s2), + (Decimal128(_, s1), Decimal256(p2, s2)) => cast_decimal_to_decimal::< + crate::util::decimal::Decimal128, + crate::util::decimal::Decimal256, + Decimal128Array, + Decimal256Array, + >(array, s1, p2, s2), + (Decimal256(_, s1), Decimal128(p2, s2)) => cast_decimal_to_decimal::< + crate::util::decimal::Decimal256, + crate::util::decimal::Decimal128, + Decimal256Array, + Decimal128Array, + >(array, s1, p2, s2), (Decimal128(_, scale), _) => { // cast decimal to other type match to_type { @@ -1252,34 +1278,124 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 { } /// Cast one type of decimal array to another type of decimal array -fn cast_decimal_to_decimal( +fn cast_decimal_to_decimal< + T1: BasicDecimal, + T2: BasicDecimal, + D1: BasicDecimalArray + From, + D2: BasicDecimalArray + From, +>( array: &ArrayRef, input_scale: &usize, output_precision: &usize, output_scale: &usize, ) -> Result { - let array = array.as_any().downcast_ref::().unwrap(); - - let output_array = if input_scale > output_scale { + if input_scale > output_scale { // For example, input_scale is 4 and output_scale is 3; // Original value is 11234_i128, and will be cast to 1123_i128. let div = 10_i128.pow((input_scale - output_scale) as u32); - array - .iter() - .map(|v| v.map(|v| v.as_i128() / div)) - .collect::() + if D1::VALUE_LENGTH == 16 { + let array = array.as_any().downcast_ref::().unwrap(); + let iter = array.iter().map(|v| v.map(|v| v.as_i128() / div)); + if D2::VALUE_LENGTH == 16 { + let output_array = iter + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } else { + let output_array = iter + .map(|v| v.map(|v| BigInt::from(v))) + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } + } else { + let array = array.as_any().downcast_ref::().unwrap(); + let iter = array.iter().map(|v| v.map(|v| v.to_big_int().div(div))); + if D2::VALUE_LENGTH == 16 { + let values = iter + .map(|v| { + if v.is_none() { + Ok(None) + } else { + v.and_then(|v| v.to_i128()) + .ok_or(ArrowError::InvalidArgumentError(format!( + "Cannot be casted to 128-bit integer for Decimal128", + ))) + .map(|v| Some(v)) + } + }) + .collect::>>()?; + + let output_array = values + .into_iter() + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } else { + let output_array = iter + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } + } } else { // For example, input_scale is 3 and output_scale is 4; // Original value is 1123_i128, and will be cast to 11230_i128. let mul = 10_i128.pow((output_scale - input_scale) as u32); - array - .iter() - .map(|v| v.map(|v| v.as_i128() * mul)) - .collect::() - } - .with_precision_and_scale(*output_precision, *output_scale)?; + if D1::VALUE_LENGTH == 16 { + let array = array.as_any().downcast_ref::().unwrap(); + let iter = array.iter().map(|v| v.map(|v| v.as_i128() * mul)); + if D2::VALUE_LENGTH == 16 { + let output_array = iter + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } else { + let output_array = iter + .map(|v| v.map(|v| BigInt::from(v))) + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; - Ok(Arc::new(output_array)) + Ok(Arc::new(output_array)) + } + } else { + let array = array.as_any().downcast_ref::().unwrap(); + let iter = array.iter().map(|v| v.map(|v| v.to_big_int().mul(mul))); + if D2::VALUE_LENGTH == 16 { + let values = iter + .map(|v| { + if v.is_none() { + Ok(None) + } else { + v.and_then(|v| v.to_i128()) + .ok_or(ArrowError::InvalidArgumentError(format!( + "Cannot be casted to 128-bit integer for Decimal128", + ))) + .map(|v| Some(v)) + } + }) + .collect::>>()?; + + let output_array = values + .into_iter() + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } else { + let output_array = iter + .collect::() + .with_precision_and_scale(*output_precision, *output_scale)?; + + Ok(Arc::new(output_array)) + } + } + } } /// Cast an array by changing its array_data type to the desired type @@ -2422,7 +2538,7 @@ mod tests { use super::*; use crate::array::BasicDecimalArray; use crate::datatypes::TimeUnit; - use crate::util::decimal::Decimal128; + use crate::util::decimal::{Decimal128, Decimal256}; use crate::{buffer::Buffer, util::display::array_value_to_string}; macro_rules! generate_cast_test_case { @@ -2461,8 +2577,19 @@ mod tests { .with_precision_and_scale(precision, scale) } + fn create_decimal256_array( + array: Vec>, + precision: usize, + scale: usize, + ) -> Result { + array + .into_iter() + .collect::() + .with_precision_and_scale(precision, scale) + } + #[test] - fn test_cast_decimal_to_decimal() { + fn test_cast_decimal128_to_decimal128() { let input_type = DataType::Decimal128(20, 3); let output_type = DataType::Decimal128(20, 4); assert!(can_cast_types(&input_type, &output_type)); @@ -2490,6 +2617,97 @@ mod tests { result.unwrap_err().to_string()); } + #[test] + fn test_cast_decimal128_to_decimal256() { + let input_type = DataType::Decimal128(20, 3); + let output_type = DataType::Decimal256(20, 4); + assert!(can_cast_types(&input_type, &output_type)); + let array = vec![Some(1123456), Some(2123456), Some(3123456), None]; + let input_decimal_array = create_decimal_array(&array, 20, 3).unwrap(); + let array = Arc::new(input_decimal_array) as ArrayRef; + generate_cast_test_case!( + &array, + Decimal256Array, + &output_type, + vec![ + Some( + Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4) + .unwrap() + ), + Some( + Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4) + .unwrap() + ), + Some( + Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4) + .unwrap() + ), + None + ] + ); + } + + #[test] + fn test_cast_decimal256_to_decimal128() { + let input_type = DataType::Decimal256(20, 3); + let output_type = DataType::Decimal128(20, 4); + assert!(can_cast_types(&input_type, &output_type)); + let array = vec![ + Some(BigInt::from(1123456)), + Some(BigInt::from(2123456)), + Some(BigInt::from(3123456)), + None, + ]; + let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap(); + let array = Arc::new(input_decimal_array) as ArrayRef; + generate_cast_test_case!( + &array, + Decimal128Array, + &output_type, + vec![ + Some(Decimal128::new_from_i128(20, 4, 11234560_i128)), + Some(Decimal128::new_from_i128(20, 4, 21234560_i128)), + Some(Decimal128::new_from_i128(20, 4, 31234560_i128)), + None + ] + ); + } + + #[test] + fn test_cast_decimal256_to_decimal256() { + let input_type = DataType::Decimal256(20, 3); + let output_type = DataType::Decimal256(20, 4); + assert!(can_cast_types(&input_type, &output_type)); + let array = vec![ + Some(BigInt::from(1123456)), + Some(BigInt::from(2123456)), + Some(BigInt::from(3123456)), + None, + ]; + let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap(); + let array = Arc::new(input_decimal_array) as ArrayRef; + generate_cast_test_case!( + &array, + Decimal256Array, + &output_type, + vec![ + Some( + Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4) + .unwrap() + ), + Some( + Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4) + .unwrap() + ), + Some( + Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4) + .unwrap() + ), + None + ] + ); + } + #[test] fn test_cast_decimal_to_numeric() { let decimal_type = DataType::Decimal128(38, 2); diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index 034920d3753..bf91dbf4cb4 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -483,9 +483,9 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul /// interpreted as a Decimal256 number with precision `precision` #[inline] pub(crate) fn validate_decimal256_precision( - value: &str, + value: &BigInt, precision: usize, -) -> Result { +) -> Result<&BigInt> { if precision > 38 { let max_str = MAX_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1]; let min_str = MIN_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1]; @@ -493,13 +493,12 @@ pub(crate) fn validate_decimal256_precision( let max = BigInt::from_str_radix(max_str, 10).unwrap(); let min = BigInt::from_str_radix(min_str, 10).unwrap(); - let value = BigInt::from_str_radix(value, 10).unwrap(); - if value > max { + if value > &max { Err(ArrowError::InvalidArgumentError(format!( "{} is too large to store in a Decimal256 of precision {}. Max is {}", value, precision, max ))) - } else if value < min { + } else if value < &min { Err(ArrowError::InvalidArgumentError(format!( "{} is too small to store in a Decimal256 of precision {}. Min is {}", value, precision, min @@ -510,7 +509,6 @@ pub(crate) fn validate_decimal256_precision( } else { let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1]; let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1]; - let value = BigInt::from_str_radix(value, 10).unwrap(); if value.to_i128().unwrap() > max { Err(ArrowError::InvalidArgumentError(format!( diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 62a95079537..8a64076cc54 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -178,6 +178,11 @@ impl Decimal256 { bytes[0..num_bytes.len()].clone_from_slice(num_bytes); Decimal256::try_new_from_bytes(precision, scale, &bytes) } + + /// Constructs a `BigInt` from this `Decimal256` value. + pub(crate) fn to_big_int(&self) -> BigInt { + BigInt::from_signed_bytes_le(&self.value) + } } macro_rules! def_decimal {