Add precision/scale check. Add decimal128 and decimal256 iters.

apache · Jul 22, 2022 · 0e7a5cd · 0e7a5cd
1 parent 91b2d7e
commit 0e7a5cd
Show file tree

Hide file tree

Showing 6 changed files with 182 additions and 44 deletions.
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
@@ -24,7 +24,6 @@ use super::{
     FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
 };
 use crate::array::array::ArrayAccessor;
-pub use crate::array::DecimalIter;
 use crate::buffer::Buffer;
 use crate::error::{ArrowError, Result};
 use crate::util::bit_util;

diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use crate::array::{ArrayAccessor, Decimal128Iter, Decimal256Iter};
 use std::borrow::Borrow;
 use std::convert::From;
 use std::fmt;
@@ -24,13 +25,11 @@ use super::{
     array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
 };
 use super::{BooleanBufferBuilder, FixedSizeBinaryArray};
+#[allow(deprecated)]
 pub use crate::array::DecimalIter;
 use crate::buffer::Buffer;
-use crate::datatypes::DataType;
-use crate::datatypes::{
-    validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION,
-    DECIMAL_MAX_SCALE,
-};
+use crate::datatypes::{validate_decimal_precision, DECIMAL_DEFAULT_SCALE};
+use crate::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE};
 use crate::error::{ArrowError, Result};
 use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
 
@@ -103,11 +102,18 @@ pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:
 
     /// Returns the element at index `i`.
     fn value(&self, i: usize) -> T {
-        let data = self.data();
-        assert!(i < data.len(), "Out of bounds access");
+        assert!(i < self.data().len(), "Out of bounds access");
 
+        unsafe { self.value_unchecked(i) }
+    }
+
+    /// Returns the element at index `i`.
+    /// # Safety
+    /// Caller is responsible for ensuring that the index is within the bounds of the array
+    unsafe fn value_unchecked(&self, i: usize) -> T {
+        let data = self.data();
         let offset = i + data.offset();
-        let raw_val = unsafe {
+        let raw_val = {
             let pos = self.value_offset_at(offset);
             std::slice::from_raw_parts(
                 self.raw_value_data_ptr().offset(pos as isize),
@@ -270,24 +276,24 @@ impl Decimal128Array {
     /// specified precision.
     ///
     /// Returns an Error if:
-    /// 1. `precision` is larger than [`DECIMAL_MAX_PRECISION`]
-    /// 2. `scale` is larger than [`DECIMAL_MAX_SCALE`];
+    /// 1. `precision` is larger than [`DECIMAL128_MAX_PRECISION`]
+    /// 2. `scale` is larger than [`DECIMAL128_MAX_SCALE`];
     /// 3. `scale` is > `precision`
     pub fn with_precision_and_scale(
         mut self,
         precision: usize,
         scale: usize,
     ) -> Result<Self> {
-        if precision > DECIMAL_MAX_PRECISION {
+        if precision > DECIMAL128_MAX_PRECISION {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "precision {} is greater than max {}",
-                precision, DECIMAL_MAX_PRECISION
+                precision, DECIMAL128_MAX_PRECISION
             )));
         }
-        if scale > DECIMAL_MAX_SCALE {
+        if scale > DECIMAL128_MAX_SCALE {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "scale {} is greater than max {}",
-                scale, DECIMAL_MAX_SCALE
+                scale, DECIMAL128_MAX_SCALE
             )));
         }
         if scale > precision {
@@ -302,7 +308,7 @@ impl Decimal128Array {
         // decreased
         if precision < self.precision {
             for v in self.iter().flatten() {
-                validate_decimal_precision(v, precision)?;
+                validate_decimal_precision(v.as_i128(), precision)?;
             }
         }
 
@@ -322,7 +328,7 @@ impl Decimal128Array {
     /// The default precision and scale used when not specified.
     pub fn default_type() -> DataType {
         // Keep maximum precision
-        DataType::Decimal(DECIMAL_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
+        DataType::Decimal(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
     }
 }
 
@@ -368,19 +374,13 @@ impl From<ArrayData> for Decimal256Array {
     }
 }
 
-impl<'a> IntoIterator for &'a Decimal128Array {
-    type Item = Option<i128>;
-    type IntoIter = DecimalIter<'a>;
-
-    fn into_iter(self) -> Self::IntoIter {
-        DecimalIter::<'a>::new(self)
-    }
-}
-
 impl<'a> Decimal128Array {
-    /// constructs a new iterator
-    pub fn iter(&'a self) -> DecimalIter<'a> {
-        DecimalIter::new(self)
+    /// Constructs a new iterator that iterates `Decimal128` values as i128 values.
+    /// This is kept mostly for back-compatibility purpose.
+    /// Suggests to use `iter()` that returns `Decimal128Iter`.
+    #[allow(deprecated)]
+    pub fn i128_iter(&'a self) -> DecimalIter<'a> {
+        DecimalIter::<'a>::new(self)
     }
 }
 
@@ -421,7 +421,7 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for Decimal128Array {
 }
 
 macro_rules! def_decimal_array {
-    ($ty:ident, $array_name:expr) => {
+    ($ty:ident, $array_name:expr, $decimal_ty:ident, $iter_ty:ident) => {
         impl private_decimal::DecimalArrayPrivate for $ty {
             fn raw_value_data_ptr(&self) -> *const u8 {
                 self.value_data.as_ptr()
@@ -463,15 +463,55 @@ macro_rules! def_decimal_array {
                 write!(f, "]")
             }
         }
+
+        impl<'a> ArrayAccessor for &'a $ty {
+            type Item = $decimal_ty;
+
+            fn value(&self, index: usize) -> Self::Item {
+                $ty::value(self, index)
+            }
+
+            unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
+                $ty::value_unchecked(self, index)
+            }
+        }
+
+        impl<'a> IntoIterator for &'a $ty {
+            type Item = Option<$decimal_ty>;
+            type IntoIter = $iter_ty<'a>;
+
+            fn into_iter(self) -> Self::IntoIter {
+                $iter_ty::<'a>::new(self)
+            }
+        }
+
+        impl<'a> $ty {
+            /// constructs a new iterator
+            pub fn iter(&'a self) -> $iter_ty<'a> {
+                $iter_ty::<'a>::new(self)
+            }
+        }
     };
 }
 
-def_decimal_array!(Decimal128Array, "Decimal128Array");
-def_decimal_array!(Decimal256Array, "Decimal256Array");
+def_decimal_array!(
+    Decimal128Array,
+    "Decimal128Array",
+    Decimal128,
+    Decimal128Iter
+);
+def_decimal_array!(
+    Decimal256Array,
+    "Decimal256Array",
+    Decimal256,
+    Decimal256Iter
+);
 
 #[cfg(test)]
 mod tests {
+    use crate::array::Decimal256Builder;
     use crate::{array::Decimal128Builder, datatypes::Field};
+    use num::{BigInt, Num};
 
     use super::*;
 
@@ -567,7 +607,7 @@ mod tests {
         let data = vec![Some(-100), None, Some(101)];
         let array: Decimal128Array = data.clone().into_iter().collect();
 
-        let collected: Vec<_> = array.iter().collect();
+        let collected: Vec<_> = array.iter().map(|d| d.map(|v| v.as_i128())).collect();
         assert_eq!(data, collected);
     }
 
@@ -576,7 +616,8 @@ mod tests {
         let data = vec![Some(-100), None, Some(101)];
         let array: Decimal128Array = data.clone().into_iter().collect();
 
-        let collected: Vec<_> = array.into_iter().collect();
+        let collected: Vec<_> =
+            array.into_iter().map(|d| d.map(|v| v.as_i128())).collect();
         assert_eq!(data, collected);
     }
 
@@ -750,4 +791,24 @@ mod tests {
         assert!(decimal.is_null(0));
         assert_eq!(decimal.value_as_string(1), "56".to_string());
     }
+
+    #[test]
+    fn test_decimal256_iter() {
+        // TODO: Impl FromIterator for Decimal256Array
+        let mut builder = Decimal256Builder::new(30, 76, 6);
+        let value = BigInt::from_str_radix("12345", 10).unwrap();
+        let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
+        builder.append_value(&decimal1).unwrap();
+
+        builder.append_null();
+
+        let value = BigInt::from_str_radix("56789", 10).unwrap();
+        let decimal2 = Decimal256::from_big_int(&value, 76, 6).unwrap();
+        builder.append_value(&decimal2).unwrap();
+
+        let array: Decimal256Array = builder.finish();
+
+        let collected: Vec<_> = array.iter().collect();
+        assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
+    }
 }
diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::array::array::ArrayAccessor;
-use crate::array::BasicDecimalArray;
+use crate::array::{BasicDecimalArray, Decimal256Array};
 
 use super::{
     Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
@@ -104,15 +104,25 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
 pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
 pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;
 
+/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a
+/// [`Decimal128Array`]
+pub type Decimal128Iter<'a> = ArrayIter<&'a Decimal128Array>;
+
+/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a
+/// [`Decimal256Array`]
+pub type Decimal256Iter<'a> = ArrayIter<&'a Decimal256Array>;
+
 /// an iterator that returns `Some(i128)` or `None`, that can be used on a
 /// [`Decimal128Array`]
 #[derive(Debug)]
+#[deprecated(note = "Please use `Decimal128Iter` instead")]
 pub struct DecimalIter<'a> {
     array: &'a Decimal128Array,
     current: usize,
     current_end: usize,
 }
 
+#[allow(deprecated)]
 impl<'a> DecimalIter<'a> {
     pub fn new(array: &'a Decimal128Array) -> Self {
         Self {
@@ -123,6 +133,7 @@ impl<'a> DecimalIter<'a> {
     }
 }
 
+#[allow(deprecated)]
 impl<'a> std::iter::Iterator for DecimalIter<'a> {
     type Item = Option<i128>;
 
@@ -150,6 +161,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
 }
 
 /// iterator has known size.
+#[allow(deprecated)]
 impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}
 
 #[cfg(test)]

diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
@@ -1205,15 +1205,15 @@ fn cast_decimal_to_decimal(
         let div = 10_i128.pow((input_scale - output_scale) as u32);
         array
             .iter()
-            .map(|v| v.map(|v| v / div))
+            .map(|v| v.map(|v| v.as_i128() / div))
             .collect::<Decimal128Array>()
     } else {
         // For example, input_scale is 3 and output_scale is 4;
         // Original value is 1123_i128, and will be cast to 11230_i128.
         let mul = 10_i128.pow((output_scale - input_scale) as u32);
         array
             .iter()
-            .map(|v| v.map(|v| v * mul))
+            .map(|v| v.map(|v| v.as_i128() * mul))
             .collect::<Decimal128Array>()
     }
     .with_precision_and_scale(*output_precision, *output_scale)?;

diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs
@@ -431,10 +431,10 @@ pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
 ];
 
 /// The maximum precision for [DataType::Decimal] values
-pub const DECIMAL_MAX_PRECISION: usize = 38;
+pub const DECIMAL128_MAX_PRECISION: usize = 38;
 
 /// The maximum scale for [DataType::Decimal] values
-pub const DECIMAL_MAX_SCALE: usize = 38;
+pub const DECIMAL128_MAX_SCALE: usize = 38;
 
 /// The maximum precision for [DataType::Decimal256] values
 pub const DECIMAL256_MAX_PRECISION: usize = 76;