From 86e1ededd6ec1e9da00ec87861a002b3f7ed54cf Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Mon, 8 Aug 2022 10:39:33 -0700
Subject: [PATCH] Cast between decimal128 and decimal256

---
 arrow/src/array/array_decimal.rs           |   2 +-
 arrow/src/array/builder/decimal_builder.rs |   3 +-
 arrow/src/array/data.rs                    |  13 +-
 arrow/src/compute/kernels/cast.rs          | 260 +++++++++++++++++++--
 arrow/src/datatypes/datatype.rs            |  10 +-
 arrow/src/util/decimal.rs                  |   5 +
 6 files changed, 259 insertions(+), 34 deletions(-)

diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
index 9d7644befd6..7134f2767b2 100644
--- a/arrow/src/array/array_decimal.rs
+++ b/arrow/src/array/array_decimal.rs
@@ -360,7 +360,7 @@ impl BasicDecimalArray<Decimal256, Decimal256Array> for Decimal256Array {
     fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
         if precision < self.precision {
             for v in self.iter().flatten() {
-                validate_decimal256_precision(&v.to_string(), precision)?;
+                validate_decimal256_precision(&v.to_big_int(), precision)?;
             }
         }
         Ok(())
diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
index 22c1490e86f..a1f4b0ed398 100644
--- a/arrow/src/array/builder/decimal_builder.rs
+++ b/arrow/src/array/builder/decimal_builder.rs
@@ -202,8 +202,7 @@ impl Decimal256Builder {
         let value = if self.value_validation {
             let raw_bytes = value.raw_value();
             let integer = BigInt::from_signed_bytes_le(raw_bytes);
-            let value_str = integer.to_string();
-            validate_decimal256_precision(&value_str, self.precision)?;
+            validate_decimal256_precision(&integer, self.precision)?;
             value
         } else {
             value
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 43c43b04a51..3d2057bbc45 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -401,11 +401,17 @@ impl ArrayData {
     #[inline]
     pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self {
         assert!(
-            matches!(self.data_type, DataType::Decimal128(_, _)),
+            matches!(
+                self.data_type,
+                DataType::Decimal128(_, _) | DataType::Decimal256(_, _)
+            ),
             "only DecimalType is supported for existing type"
         );
         assert!(
-            matches!(new_data_type, DataType::Decimal128(_, _)),
+            matches!(
+                new_data_type,
+                DataType::Decimal128(_, _) | DataType::Decimal256(_, _)
+            ),
             "only DecimalType is supported for new datatype"
         );
         self.data_type = new_data_type;
@@ -1044,8 +1050,7 @@ impl ArrayData {
                     let offset = pos * 32;
                     let raw_bytes = &values[offset..offset + 32];
                     let integer = BigInt::from_signed_bytes_le(raw_bytes);
-                    let value_str = integer.to_string();
-                    validate_decimal256_precision(&value_str, *p)?;
+                    validate_decimal256_precision(&integer, *p)?;
                 }
                 Ok(())
             }
diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
index c6b8f477986..dcbe313949d 100644
--- a/arrow/src/compute/kernels/cast.rs
+++ b/arrow/src/compute/kernels/cast.rs
@@ -36,6 +36,7 @@
 //! ```
 
 use chrono::Timelike;
+use std::ops::{Div, Mul};
 use std::str;
 use std::sync::Arc;
 
@@ -50,10 +51,11 @@ use crate::temporal_conversions::{
     EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS,
     SECONDS_IN_DAY,
 };
+use crate::util::decimal::BasicDecimal;
 use crate::{array::*, compute::take};
 use crate::{buffer::Buffer, util::serialization::lexical_to_string};
 use num::cast::AsPrimitive;
-use num::{NumCast, ToPrimitive};
+use num::{BigInt, NumCast, ToPrimitive};
 
 /// CastOptions provides a way to override the default cast behaviors
 #[derive(Debug)]
@@ -78,6 +80,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
         // TODO UTF8/unsigned numeric to decimal
         // cast one decimal type to another decimal type
         (Decimal128(_, _), Decimal128(_, _)) => true,
+        (Decimal256(_, _), Decimal256(_, _)) => true,
+        (Decimal128(_, _), Decimal256(_, _)) => true,
+        (Decimal256(_, _), Decimal128(_, _)) => true,
         // signed numeric to decimal
         (Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
         // decimal to signed numeric
@@ -431,9 +436,30 @@ pub fn cast_with_options(
         return Ok(array.clone());
     }
     match (from_type, to_type) {
-        (Decimal128(_, s1), Decimal128(p2, s2)) => {
-            cast_decimal_to_decimal(array, s1, p2, s2)
-        }
+        (Decimal128(_, s1), Decimal128(p2, s2)) => cast_decimal_to_decimal::<
+            crate::util::decimal::Decimal128,
+            crate::util::decimal::Decimal128,
+            Decimal128Array,
+            Decimal128Array,
+        >(array, s1, p2, s2),
+        (Decimal256(_, s1), Decimal256(p2, s2)) => cast_decimal_to_decimal::<
+            crate::util::decimal::Decimal256,
+            crate::util::decimal::Decimal256,
+            Decimal256Array,
+            Decimal256Array,
+        >(array, s1, p2, s2),
+        (Decimal128(_, s1), Decimal256(p2, s2)) => cast_decimal_to_decimal::<
+            crate::util::decimal::Decimal128,
+            crate::util::decimal::Decimal256,
+            Decimal128Array,
+            Decimal256Array,
+        >(array, s1, p2, s2),
+        (Decimal256(_, s1), Decimal128(p2, s2)) => cast_decimal_to_decimal::<
+            crate::util::decimal::Decimal256,
+            crate::util::decimal::Decimal128,
+            Decimal256Array,
+            Decimal128Array,
+        >(array, s1, p2, s2),
         (Decimal128(_, scale), _) => {
             // cast decimal to other type
             match to_type {
@@ -1252,34 +1278,124 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
 }
 
 /// Cast one type of decimal array to another type of decimal array
-fn cast_decimal_to_decimal(
+fn cast_decimal_to_decimal<
+    T1: BasicDecimal,
+    T2: BasicDecimal,
+    D1: BasicDecimalArray<T1, D1> + From<ArrayData>,
+    D2: BasicDecimalArray<T2, D2> + From<ArrayData>,
+>(
     array: &ArrayRef,
     input_scale: &usize,
     output_precision: &usize,
     output_scale: &usize,
 ) -> Result<ArrayRef> {
-    let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
-
-    let output_array = if input_scale > output_scale {
+    if input_scale > output_scale {
         // For example, input_scale is 4 and output_scale is 3;
         // Original value is 11234_i128, and will be cast to 1123_i128.
         let div = 10_i128.pow((input_scale - output_scale) as u32);
-        array
-            .iter()
-            .map(|v| v.map(|v| v.as_i128() / div))
-            .collect::<Decimal128Array>()
+        if D1::VALUE_LENGTH == 16 {
+            let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
+            let iter = array.iter().map(|v| v.map(|v| v.as_i128() / div));
+            if D2::VALUE_LENGTH == 16 {
+                let output_array = iter
+                    .collect::<Decimal128Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            } else {
+                let output_array = iter
+                    .map(|v| v.map(|v| BigInt::from(v)))
+                    .collect::<Decimal256Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            }
+        } else {
+            let array = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
+            let iter = array.iter().map(|v| v.map(|v| v.to_big_int().div(div)));
+            if D2::VALUE_LENGTH == 16 {
+                let values = iter
+                    .map(|v| {
+                        if v.is_none() {
+                            Ok(None)
+                        } else {
+                            v.and_then(|v| v.to_i128())
+                                .ok_or(ArrowError::InvalidArgumentError(format!(
+                                    "Cannot be casted to 128-bit integer for Decimal128",
+                                )))
+                                .map(|v| Some(v))
+                        }
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                let output_array = values
+                    .into_iter()
+                    .collect::<Decimal128Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            } else {
+                let output_array = iter
+                    .collect::<Decimal256Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            }
+        }
     } else {
         // For example, input_scale is 3 and output_scale is 4;
         // Original value is 1123_i128, and will be cast to 11230_i128.
         let mul = 10_i128.pow((output_scale - input_scale) as u32);
-        array
-            .iter()
-            .map(|v| v.map(|v| v.as_i128() * mul))
-            .collect::<Decimal128Array>()
-    }
-    .with_precision_and_scale(*output_precision, *output_scale)?;
+        if D1::VALUE_LENGTH == 16 {
+            let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
+            let iter = array.iter().map(|v| v.map(|v| v.as_i128() * mul));
+            if D2::VALUE_LENGTH == 16 {
+                let output_array = iter
+                    .collect::<Decimal128Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            } else {
+                let output_array = iter
+                    .map(|v| v.map(|v| BigInt::from(v)))
+                    .collect::<Decimal256Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
 
-    Ok(Arc::new(output_array))
+                Ok(Arc::new(output_array))
+            }
+        } else {
+            let array = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
+            let iter = array.iter().map(|v| v.map(|v| v.to_big_int().mul(mul)));
+            if D2::VALUE_LENGTH == 16 {
+                let values = iter
+                    .map(|v| {
+                        if v.is_none() {
+                            Ok(None)
+                        } else {
+                            v.and_then(|v| v.to_i128())
+                                .ok_or(ArrowError::InvalidArgumentError(format!(
+                                    "Cannot be casted to 128-bit integer for Decimal128",
+                                )))
+                                .map(|v| Some(v))
+                        }
+                    })
+                    .collect::<Result<Vec<_>>>()?;
+
+                let output_array = values
+                    .into_iter()
+                    .collect::<Decimal128Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            } else {
+                let output_array = iter
+                    .collect::<Decimal256Array>()
+                    .with_precision_and_scale(*output_precision, *output_scale)?;
+
+                Ok(Arc::new(output_array))
+            }
+        }
+    }
 }
 
 /// Cast an array by changing its array_data type to the desired type
@@ -2422,7 +2538,7 @@ mod tests {
     use super::*;
     use crate::array::BasicDecimalArray;
     use crate::datatypes::TimeUnit;
-    use crate::util::decimal::Decimal128;
+    use crate::util::decimal::{Decimal128, Decimal256};
     use crate::{buffer::Buffer, util::display::array_value_to_string};
 
     macro_rules! generate_cast_test_case {
@@ -2461,8 +2577,19 @@ mod tests {
             .with_precision_and_scale(precision, scale)
     }
 
+    fn create_decimal256_array(
+        array: Vec<Option<BigInt>>,
+        precision: usize,
+        scale: usize,
+    ) -> Result<Decimal256Array> {
+        array
+            .into_iter()
+            .collect::<Decimal256Array>()
+            .with_precision_and_scale(precision, scale)
+    }
+
     #[test]
-    fn test_cast_decimal_to_decimal() {
+    fn test_cast_decimal128_to_decimal128() {
         let input_type = DataType::Decimal128(20, 3);
         let output_type = DataType::Decimal128(20, 4);
         assert!(can_cast_types(&input_type, &output_type));
@@ -2490,6 +2617,97 @@ mod tests {
                    result.unwrap_err().to_string());
     }
 
+    #[test]
+    fn test_cast_decimal128_to_decimal256() {
+        let input_type = DataType::Decimal128(20, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
+        let input_decimal_array = create_decimal_array(&array, 20, 3).unwrap();
+        let array = Arc::new(input_decimal_array) as ArrayRef;
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(
+                    Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4)
+                        .unwrap()
+                ),
+                Some(
+                    Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4)
+                        .unwrap()
+                ),
+                Some(
+                    Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4)
+                        .unwrap()
+                ),
+                None
+            ]
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal256_to_decimal128() {
+        let input_type = DataType::Decimal256(20, 3);
+        let output_type = DataType::Decimal128(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![
+            Some(BigInt::from(1123456)),
+            Some(BigInt::from(2123456)),
+            Some(BigInt::from(3123456)),
+            None,
+        ];
+        let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap();
+        let array = Arc::new(input_decimal_array) as ArrayRef;
+        generate_cast_test_case!(
+            &array,
+            Decimal128Array,
+            &output_type,
+            vec![
+                Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
+                Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
+                Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
+                None
+            ]
+        );
+    }
+
+    #[test]
+    fn test_cast_decimal256_to_decimal256() {
+        let input_type = DataType::Decimal256(20, 3);
+        let output_type = DataType::Decimal256(20, 4);
+        assert!(can_cast_types(&input_type, &output_type));
+        let array = vec![
+            Some(BigInt::from(1123456)),
+            Some(BigInt::from(2123456)),
+            Some(BigInt::from(3123456)),
+            None,
+        ];
+        let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap();
+        let array = Arc::new(input_decimal_array) as ArrayRef;
+        generate_cast_test_case!(
+            &array,
+            Decimal256Array,
+            &output_type,
+            vec![
+                Some(
+                    Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4)
+                        .unwrap()
+                ),
+                Some(
+                    Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4)
+                        .unwrap()
+                ),
+                Some(
+                    Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4)
+                        .unwrap()
+                ),
+                None
+            ]
+        );
+    }
+
     #[test]
     fn test_cast_decimal_to_numeric() {
         let decimal_type = DataType::Decimal128(38, 2);
diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
index 034920d3753..bf91dbf4cb4 100644
--- a/arrow/src/datatypes/datatype.rs
+++ b/arrow/src/datatypes/datatype.rs
@@ -483,9 +483,9 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul
 /// interpreted as a Decimal256 number with precision `precision`
 #[inline]
 pub(crate) fn validate_decimal256_precision(
-    value: &str,
+    value: &BigInt,
     precision: usize,
-) -> Result<BigInt> {
+) -> Result<&BigInt> {
     if precision > 38 {
         let max_str = MAX_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
         let min_str = MIN_DECIMAL_FOR_LARGER_PRECISION[precision - 38 - 1];
@@ -493,13 +493,12 @@ pub(crate) fn validate_decimal256_precision(
         let max = BigInt::from_str_radix(max_str, 10).unwrap();
         let min = BigInt::from_str_radix(min_str, 10).unwrap();
 
-        let value = BigInt::from_str_radix(value, 10).unwrap();
-        if value > max {
+        if value > &max {
             Err(ArrowError::InvalidArgumentError(format!(
                 "{} is too large to store in a Decimal256 of precision {}. Max is {}",
                 value, precision, max
             )))
-        } else if value < min {
+        } else if value < &min {
             Err(ArrowError::InvalidArgumentError(format!(
                 "{} is too small to store in a Decimal256 of precision {}. Min is {}",
                 value, precision, min
@@ -510,7 +509,6 @@ pub(crate) fn validate_decimal256_precision(
     } else {
         let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1];
         let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1];
-        let value = BigInt::from_str_radix(value, 10).unwrap();
 
         if value.to_i128().unwrap() > max {
             Err(ArrowError::InvalidArgumentError(format!(
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
index 62a95079537..8a64076cc54 100644
--- a/arrow/src/util/decimal.rs
+++ b/arrow/src/util/decimal.rs
@@ -178,6 +178,11 @@ impl Decimal256 {
         bytes[0..num_bytes.len()].clone_from_slice(num_bytes);
         Decimal256::try_new_from_bytes(precision, scale, &bytes)
     }
+
+    /// Constructs a `BigInt` from this `Decimal256` value.
+    pub(crate) fn to_big_int(&self) -> BigInt {
+        BigInt::from_signed_bytes_le(&self.value)
+    }
 }
 
 macro_rules! def_decimal {