From e4887220f2ba4bb90e1ea065662e4ba6d1be4d38 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sun, 19 Jun 2022 21:47:52 -0700
Subject: [PATCH] Add Decimal256

---
 arrow/src/array/array_binary.rs |   2 +-
 arrow/src/util/decimal.rs       | 269 ++++++++++++++++++++++++--------
 2 files changed, 205 insertions(+), 66 deletions(-)
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 3efb25888be..ba3fa13ff32 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -33,7 +33,7 @@ use crate::datatypes::{
 };
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;
-use crate::util::decimal::Decimal128;
+use crate::util::decimal::{BasicDecimal, Decimal128};
 use crate::{buffer::MutableBuffer, datatypes::DataType};
 
 /// See [`BinaryArray`] and [`LargeBinaryArray`] for storing
diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs
index b78af3acc6c..b135ba33977 100644
--- a/arrow/src/util/decimal.rs
+++ b/arrow/src/util/decimal.rs
@@ -17,109 +17,211 @@
 
 //! Decimal related utils
 
+use crate::error::{ArrowError, Result};
+use num::bigint::BigInt;
 use std::cmp::Ordering;
 
+pub trait BasicDecimal: PartialOrd + Ord + PartialEq + Eq {
+    /// The bit-width of the internal representation.
+    const BIT_WIDTH: usize;
+
+    /// Tries to create a decimal value from precision, scale and bytes.
+    /// If the length of bytes isn't same as the bit width of this decimal,
+    /// returning an error. The bytes should be stored in little-endian order.
+    ///
+    /// Safety:
+    /// This method doesn't validate if the decimal value represented by the bytes
+    /// can be fitted into the specified precision.
+    fn try_new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Result<Self>
+    where
+        Self: Sized,
+    {
+        if bytes.len() == Self::BIT_WIDTH / 8 {
+            Ok(Self::new(precision, scale, bytes))
+        } else {
+            Err(ArrowError::InvalidArgumentError(format!(
+                "Input to Decimal{} must be {} bytes",
+                Self::BIT_WIDTH,
+                Self::BIT_WIDTH / 8
+            )))
+        }
+    }
+
+    /// Creates a decimal value from precision, scale, and bytes.
+    ///
+    /// Safety:
+    /// This method doesn't check if the length of bytes is compatible with this decimal.
+    /// Use `try_new_from_bytes` for safe constructor.
+    fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self;
+
+    /// Returns the raw bytes of the integer representation of the decimal.
+    fn raw_value(&self) -> &[u8];
+
+    /// Returns the precision of the decimal.
+    fn precision(&self) -> usize;
+
+    /// Returns the scale of the decimal.
+    fn scale(&self) -> usize;
+
+    /// Returns the string representation of the decimal.
+    fn as_string(&self) -> String {
+        let raw_bytes = self.raw_value();
+        let integer = BigInt::from_signed_bytes_le(raw_bytes);
+        let value_str = integer.to_string();
+
+        if self.scale() == 0 {
+            value_str
+        } else {
+            let (sign, rest) =
+                value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 });
+
+            if rest.len() > self.scale() {
+                // Decimal separator is in the middle of the string
+                let (whole, decimal) = value_str.split_at(value_str.len() - self.scale());
+                format!("{}.{}", whole, decimal)
+            } else {
+                // String has to be padded
+                format!("{}0.{:0>width$}", sign, rest, width = self.scale())
+            }
+        }
+    }
+}
+
 /// Represents a decimal value with precision and scale.
-/// The decimal value is represented by a signed 128-bit integer.
+/// The decimal value could represented by a signed 128-bit integer.
 #[derive(Debug)]
 pub struct Decimal128 {
     #[allow(dead_code)]
     precision: usize,
     scale: usize,
-    value: i128,
+    value: [u8; 16],
 }
 
-impl PartialOrd for Decimal128 {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.partial_cmp(&other.value)
+impl BasicDecimal for Decimal128 {
+    const BIT_WIDTH: usize = 128;
+
+    fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
+        Decimal128 {
+            precision,
+            scale,
+            value: bytes.try_into().unwrap(),
+        }
     }
-}
 
-impl Ord for Decimal128 {
-    fn cmp(&self, other: &Self) -> Ordering {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.cmp(&other.value)
+    fn raw_value(&self) -> &[u8] {
+        &self.value
     }
-}
 
-impl PartialEq<Self> for Decimal128 {
-    fn eq(&self, other: &Self) -> bool {
-        assert_eq!(
-            self.scale, other.scale,
-            "Cannot compare two Decimal128 with different scale: {}, {}",
-            self.scale, other.scale
-        );
-        self.value.eq(&other.value)
+    fn precision(&self) -> usize {
+        self.precision
     }
-}
 
-impl Eq for Decimal128 {}
+    fn scale(&self) -> usize {
+        self.scale
+    }
+}
 
 impl Decimal128 {
-    pub fn new_from_bytes(precision: usize, scale: usize, bytes: &[u8]) -> Self {
-        let as_array = bytes.try_into();
-        let value = match as_array {
-            Ok(v) if bytes.len() == 16 => i128::from_le_bytes(v),
-            _ => panic!("Input to Decimal128 is not 128bit integer."),
-        };
-
+    /// Creates `Decimal128` from an `i128` value.
+    pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
         Decimal128 {
             precision,
             scale,
-            value,
+            value: value.to_le_bytes(),
         }
     }
 
-    pub fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self {
-        Decimal128 {
+    /// Returns `i128` representation of the decimal.
+    pub fn as_i128(&self) -> i128 {
+        i128::from_le_bytes(self.value)
+    }
+}
+
+impl From<Decimal128> for i128 {
+    fn from(decimal: Decimal128) -> Self {
+        decimal.as_i128()
+    }
+}
+
+/// Represents a decimal value with precision and scale.
+/// The decimal value could be represented by a signed 256-bit integer.
+#[derive(Debug)]
+pub struct Decimal256 {
+    #[allow(dead_code)]
+    precision: usize,
+    scale: usize,
+    value: [u8; 32],
+}
+
+impl BasicDecimal for Decimal256 {
+    const BIT_WIDTH: usize = 256;
+
+    fn new(precision: usize, scale: usize, bytes: &[u8]) -> Self {
+        Decimal256 {
             precision,
             scale,
-            value,
+            value: bytes.try_into().unwrap(),
         }
     }
 
-    pub fn as_i128(&self) -> i128 {
-        self.value
+    fn raw_value(&self) -> &[u8] {
+        &self.value
     }
 
-    pub fn as_string(&self) -> String {
-        let value_str = self.value.to_string();
+    fn precision(&self) -> usize {
+        self.precision
+    }
 
-        if self.scale == 0 {
-            value_str
-        } else {
-            let (sign, rest) = value_str.split_at(if self.value >= 0 { 0 } else { 1 });
+    fn scale(&self) -> usize {
+        self.scale
+    }
+}
 
-            if rest.len() > self.scale {
-                // Decimal separator is in the middle of the string
-                let (whole, decimal) = value_str.split_at(value_str.len() - self.scale);
-                format!("{}.{}", whole, decimal)
-            } else {
-                // String has to be padded
-                format!("{}0.{:0>width$}", sign, rest, width = self.scale)
+macro_rules! def_eq_ord_for_decimal {
+    ($ty:ident) => {
+        impl PartialOrd for $ty {
+            fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.partial_cmp(&other.value)
             }
         }
-    }
-}
 
-impl From<Decimal128> for i128 {
-    fn from(decimal: Decimal128) -> Self {
-        decimal.as_i128()
-    }
+        impl Ord for $ty {
+            fn cmp(&self, other: &Self) -> Ordering {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.cmp(&other.value)
+            }
+        }
+
+        impl PartialEq<Self> for $ty {
+            fn eq(&self, other: &Self) -> bool {
+                assert_eq!(
+                    self.scale, other.scale,
+                    "Cannot compare two Decimals with different scale: {}, {}",
+                    self.scale, other.scale
+                );
+                self.value.eq(&other.value)
+            }
+        }
+
+        impl Eq for $ty {}
+    };
 }
 
+def_eq_ord_for_decimal!(Decimal128);
+def_eq_ord_for_decimal!(Decimal256);
+
 #[cfg(test)]
 mod tests {
-    use crate::util::decimal::Decimal128;
+    use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
 
     #[test]
     fn decimal_128_to_string() {
@@ -132,9 +234,46 @@ mod tests {
 
     #[test]
     fn decimal_128_from_bytes() {
-        let bytes = 100_i128.to_le_bytes();
-        let value = Decimal128::new_from_bytes(5, 2, &bytes);
+        let mut bytes = 100_i128.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.as_string(), "1.00");
+
+        bytes = (-1_i128).to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.as_string(), "-0.01");
+
+        bytes = i128::MAX.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(
+            value.as_string(),
+            "1701411834604692317316873037158841057.27"
+        );
+
+        bytes = i128::MIN.to_le_bytes();
+        let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(
+            value.as_string(),
+            "-1701411834604692317316873037158841057.28"
+        );
+    }
+
+    #[test]
+    fn decimal_256_from_bytes() {
+        let mut bytes = vec![0; 32];
+        bytes[0..16].clone_from_slice(&100_i128.to_le_bytes());
+        let value = Decimal256::try_new_from_bytes(5, 2, bytes.as_slice()).unwrap();
         assert_eq!(value.as_string(), "1.00");
+
+        bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes());
+        let value = Decimal256::try_new_from_bytes(5, 4, &bytes).unwrap();
+        assert_eq!(
+            value.as_string(),
+            "17014118346046923173168730371588410.5727"
+        );
+
+        bytes = vec![255; 32];
+        let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap();
+        assert_eq!(value.as_string(), "-0.01");
     }
 
     fn i128_func(value: impl Into<i128>) -> i128 {