RFC: Simplify decimal (#2440) (#2477)

* Simplify decimal (#2440) * Format * Fix doc * Review feedback * Add docs * Fix logical merge conflict
apache · Aug 18, 2022 · 15f42b2 · 15f42b2
1 parent e60eef3
commit 15f42b2
Show file tree

Hide file tree

Showing 6 changed files with 195 additions and 180 deletions.
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
@@ -18,27 +18,30 @@
 use crate::array::ArrayAccessor;
 use std::convert::From;
 use std::fmt;
+use std::marker::PhantomData;
 use std::{any::Any, iter::FromIterator};
 
 use super::{
     array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
 };
-use super::{BasicDecimalIter, BooleanBufferBuilder, FixedSizeBinaryArray};
+use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray};
 #[allow(deprecated)]
-pub use crate::array::DecimalIter;
 use crate::buffer::{Buffer, MutableBuffer};
 use crate::datatypes::validate_decimal_precision;
-use crate::datatypes::{validate_decimal256_precision_with_lt_bytes, DataType};
+use crate::datatypes::{
+    validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type,
+    Decimal256Type, DecimalType, NativeDecimalType,
+};
 use crate::error::{ArrowError, Result};
-use crate::util::decimal::{BasicDecimal, Decimal256};
+use crate::util::decimal::{Decimal, Decimal256};
 
 /// `Decimal128Array` stores fixed width decimal numbers,
 /// with a fixed precision and scale.
 ///
 /// # Examples
 ///
 /// ```
-///    use arrow::array::{Array, BasicDecimalArray, Decimal128Array};
+///    use arrow::array::{Array, DecimalArray, Decimal128Array};
 ///    use arrow::datatypes::DataType;
 ///
 ///    // Create a DecimalArray with the default precision and scale
@@ -66,24 +69,29 @@ use crate::util::decimal::{BasicDecimal, Decimal256};
 ///    assert_eq!(6, decimal_array.scale());
 /// ```
 ///
-pub type Decimal128Array = BasicDecimalArray<16>;
+pub type Decimal128Array = DecimalArray<Decimal128Type>;
 
-pub type Decimal256Array = BasicDecimalArray<32>;
+/// `Decimal256Array` stores fixed width decimal numbers,
+/// with a fixed precision and scale
+pub type Decimal256Array = DecimalArray<Decimal256Type>;
 
-pub struct BasicDecimalArray<const BYTE_WIDTH: usize> {
+/// A generic [`Array`] for fixed width decimal numbers
+///
+/// See [`Decimal128Array`] and [`Decimal256Array`]
+pub struct DecimalArray<T: DecimalType> {
     data: ArrayData,
     value_data: RawPtrBox<u8>,
     precision: usize,
     scale: usize,
+    _phantom: PhantomData<T>,
 }
 
-impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
-    pub const VALUE_LENGTH: i32 = BYTE_WIDTH as i32;
-    const DEFAULT_TYPE: DataType = BasicDecimal::<BYTE_WIDTH>::DEFAULT_TYPE;
-    pub const MAX_PRECISION: usize = BasicDecimal::<BYTE_WIDTH>::MAX_PRECISION;
-    pub const MAX_SCALE: usize = BasicDecimal::<BYTE_WIDTH>::MAX_SCALE;
-    const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType =
-        BasicDecimal::<BYTE_WIDTH>::TYPE_CONSTRUCTOR;
+impl<T: DecimalType> DecimalArray<T> {
+    pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32;
+    const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE;
+    pub const MAX_PRECISION: usize = T::MAX_PRECISION;
+    pub const MAX_SCALE: usize = T::MAX_SCALE;
+    const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR;
 
     pub fn data(&self) -> &ArrayData {
         &self.data
@@ -100,7 +108,7 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
     }
 
     /// Returns the element at index `i`.
-    pub fn value(&self, i: usize) -> BasicDecimal<BYTE_WIDTH> {
+    pub fn value(&self, i: usize) -> Decimal<T> {
         assert!(i < self.data().len(), "Out of bounds access");
 
         unsafe { self.value_unchecked(i) }
@@ -109,19 +117,17 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
     /// Returns the element at index `i`.
     /// # Safety
     /// Caller is responsible for ensuring that the index is within the bounds of the array
-    pub unsafe fn value_unchecked(&self, i: usize) -> BasicDecimal<BYTE_WIDTH> {
+    pub unsafe fn value_unchecked(&self, i: usize) -> Decimal<T> {
         let data = self.data();
         let offset = i + data.offset();
         let raw_val = {
             let pos = self.value_offset_at(offset);
-            std::slice::from_raw_parts(
+            T::Native::from_slice(std::slice::from_raw_parts(
                 self.raw_value_data_ptr().offset(pos as isize),
                 Self::VALUE_LENGTH as usize,
-            )
-            .try_into()
-            .unwrap()
+            ))
         };
-        BasicDecimal::<BYTE_WIDTH>::new(self.precision(), self.scale(), raw_val)
+        Decimal::new(self.precision(), self.scale(), &raw_val)
     }
 
     /// Returns the offset for the element at index `i`.
@@ -304,7 +310,8 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
 
     // validate all the data in the array are valid within the new precision or not
     fn validate_data(&self, precision: usize) -> Result<()> {
-        match BYTE_WIDTH {
+        // TODO: Move into DecimalType
+        match Self::VALUE_LENGTH {
             16 => self
                 .as_any()
                 .downcast_ref::<Decimal128Array>()
@@ -376,15 +383,15 @@ impl Decimal256Array {
     }
 }
 
-impl<const BYTE_WIDTH: usize> From<ArrayData> for BasicDecimalArray<BYTE_WIDTH> {
+impl<T: DecimalType> From<ArrayData> for DecimalArray<T> {
     fn from(data: ArrayData) -> Self {
         assert_eq!(
             data.buffers().len(),
             1,
             "DecimalArray data should contain 1 buffer only (values)"
         );
         let values = data.buffers()[0].as_ptr();
-        let (precision, scale) = match (data.data_type(), BYTE_WIDTH) {
+        let (precision, scale) = match (data.data_type(), Self::VALUE_LENGTH) {
             (DataType::Decimal128(precision, scale), 16)
             | (DataType::Decimal256(precision, scale), 32) => (*precision, *scale),
             _ => panic!("Expected data type to be Decimal"),
@@ -394,27 +401,18 @@ impl<const BYTE_WIDTH: usize> From<ArrayData> for BasicDecimalArray<BYTE_WIDTH>
             value_data: unsafe { RawPtrBox::new(values) },
             precision,
             scale,
+            _phantom: Default::default(),
         }
     }
 }
 
-impl<'a> Decimal128Array {
-    /// Constructs a new iterator that iterates `Decimal128` values as i128 values.
-    /// This is kept mostly for back-compatibility purpose.
-    /// Suggests to use `iter()` that returns `Decimal128Iter`.
-    #[allow(deprecated)]
-    pub fn i128_iter(&'a self) -> DecimalIter<'a> {
-        DecimalIter::<'a>::new(self)
-    }
-}
-
-fn build_decimal_array_from<const BYTE_WIDTH: usize>(
+fn build_decimal_array_from<T: DecimalType>(
     null_buf: BooleanBufferBuilder,
     buffer: Buffer,
-) -> BasicDecimalArray<BYTE_WIDTH> {
+) -> DecimalArray<T> {
     let data = unsafe {
         ArrayData::new_unchecked(
-            BasicDecimalArray::<BYTE_WIDTH>::default_type(),
+            DecimalArray::<T>::default_type(),
             null_buf.len(),
             None,
             Some(null_buf.into()),
@@ -423,7 +421,7 @@ fn build_decimal_array_from<const BYTE_WIDTH: usize>(
             vec![],
         )
     };
-    BasicDecimalArray::<BYTE_WIDTH>::from(data)
+    DecimalArray::from(data)
 }
 
 impl<Ptr: Into<Decimal256>> FromIterator<Option<Ptr>> for Decimal256Array {
@@ -446,7 +444,7 @@ impl<Ptr: Into<Decimal256>> FromIterator<Option<Ptr>> for Decimal256Array {
             }
         });
 
-        build_decimal_array_from::<32>(null_buf, buffer.into())
+        build_decimal_array_from(null_buf, buffer.into())
     }
 }
 
@@ -471,11 +469,11 @@ impl<Ptr: Into<i128>> FromIterator<Option<Ptr>> for Decimal128Array {
             })
             .collect();
 
-        build_decimal_array_from::<16>(null_buf, buffer)
+        build_decimal_array_from(null_buf, buffer)
     }
 }
 
-impl<const BYTE_WIDTH: usize> Array for BasicDecimalArray<BYTE_WIDTH> {
+impl<T: DecimalType> Array for DecimalArray<T> {
     fn as_any(&self) -> &dyn Any {
         self
     }
@@ -489,18 +487,18 @@ impl<const BYTE_WIDTH: usize> Array for BasicDecimalArray<BYTE_WIDTH> {
     }
 }
 
-impl<const BYTE_WIDTH: usize> From<BasicDecimalArray<BYTE_WIDTH>> for ArrayData {
-    fn from(array: BasicDecimalArray<BYTE_WIDTH>) -> Self {
+impl<T: DecimalType> From<DecimalArray<T>> for ArrayData {
+    fn from(array: DecimalArray<T>) -> Self {
         array.data
     }
 }
 
-impl<const BYTE_WIDTH: usize> fmt::Debug for BasicDecimalArray<BYTE_WIDTH> {
+impl<T: DecimalType> fmt::Debug for DecimalArray<T> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(
             f,
             "Decimal{}Array<{}, {}>\n[\n",
-            BYTE_WIDTH * 8,
+            T::BYTE_LENGTH * 8,
             self.precision,
             self.scale
         )?;
@@ -513,31 +511,31 @@ impl<const BYTE_WIDTH: usize> fmt::Debug for BasicDecimalArray<BYTE_WIDTH> {
     }
 }
 
-impl<'a, const BYTE_WIDTH: usize> ArrayAccessor for &'a BasicDecimalArray<BYTE_WIDTH> {
-    type Item = BasicDecimal<BYTE_WIDTH>;
+impl<'a, T: DecimalType> ArrayAccessor for &'a DecimalArray<T> {
+    type Item = Decimal<T>;
 
     fn value(&self, index: usize) -> Self::Item {
-        BasicDecimalArray::<BYTE_WIDTH>::value(self, index)
+        DecimalArray::<T>::value(self, index)
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
-        BasicDecimalArray::<BYTE_WIDTH>::value_unchecked(self, index)
+        DecimalArray::<T>::value_unchecked(self, index)
     }
 }
 
-impl<'a, const BYTE_WIDTH: usize> IntoIterator for &'a BasicDecimalArray<BYTE_WIDTH> {
-    type Item = Option<BasicDecimal<BYTE_WIDTH>>;
-    type IntoIter = BasicDecimalIter<'a, BYTE_WIDTH>;
+impl<'a, T: DecimalType> IntoIterator for &'a DecimalArray<T> {
+    type Item = Option<Decimal<T>>;
+    type IntoIter = DecimalIter<'a, T>;
 
     fn into_iter(self) -> Self::IntoIter {
-        BasicDecimalIter::<'a, BYTE_WIDTH>::new(self)
+        DecimalIter::<'a, T>::new(self)
     }
 }
 
-impl<'a, const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
+impl<'a, T: DecimalType> DecimalArray<T> {
     /// constructs a new iterator
-    pub fn iter(&'a self) -> BasicDecimalIter<'a, BYTE_WIDTH> {
-        BasicDecimalIter::<'a, BYTE_WIDTH>::new(self)
+    pub fn iter(&'a self) -> DecimalIter<'a, T> {
+        DecimalIter::<'a, T>::new(self)
     }
 }
 

diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
@@ -365,7 +365,7 @@ mod tests {
             .expect("should not validate invalid value at builder");
 
         let array = builder.finish();
-        let array_data = array_decimal::BasicDecimalArray::data(&array);
+        let array_data = array_decimal::DecimalArray::data(&array);
         array_data.validate_values().unwrap();
     }
 }
diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs
@@ -16,11 +16,12 @@
 // under the License.
 
 use crate::array::array::ArrayAccessor;
-use crate::array::BasicDecimalArray;
+use crate::array::DecimalArray;
+use crate::datatypes::{Decimal128Type, Decimal256Type};
 
 use super::{
-    Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
-    GenericStringArray, PrimitiveArray,
+    BooleanArray, GenericBinaryArray, GenericListArray, GenericStringArray,
+    PrimitiveArray,
 };
 
 /// an iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
@@ -104,69 +105,14 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
 pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
 pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;
 
-pub type BasicDecimalIter<'a, const BYTE_WIDTH: usize> =
-    ArrayIter<&'a BasicDecimalArray<BYTE_WIDTH>>;
+pub type DecimalIter<'a, T> = ArrayIter<&'a DecimalArray<T>>;
 /// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a
-/// [`Decimal128Array`]
-pub type Decimal128Iter<'a> = BasicDecimalIter<'a, 16>;
+/// [`super::Decimal128Array`]
+pub type Decimal128Iter<'a> = DecimalIter<'a, Decimal128Type>;
 
 /// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a
 /// [`super::Decimal256Array`]
-pub type Decimal256Iter<'a> = BasicDecimalIter<'a, 32>;
-/// an iterator that returns `Some(i128)` or `None`, that can be used on a
-/// [`Decimal128Array`]
-#[derive(Debug)]
-#[deprecated(note = "Please use `Decimal128Iter` instead. \
-    `DecimalIter` iterates `Decimal128` values as i128 values. \
-    This is kept mostly for back-compatibility purpose. Suggests to use `Decimal128Array.iter()` \
-    that returns `Decimal128Iter`.")]
-pub struct DecimalIter<'a> {
-    array: &'a Decimal128Array,
-    current: usize,
-    current_end: usize,
-}
-
-#[allow(deprecated)]
-impl<'a> DecimalIter<'a> {
-    pub fn new(array: &'a Decimal128Array) -> Self {
-        Self {
-            array,
-            current: 0,
-            current_end: array.len(),
-        }
-    }
-}
-
-#[allow(deprecated)]
-impl<'a> std::iter::Iterator for DecimalIter<'a> {
-    type Item = Option<i128>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.current == self.current_end {
-            None
-        } else {
-            let old = self.current;
-            self.current += 1;
-            // TODO: Improve performance by avoiding bounds check here
-            // (by using adding a `value_unchecked, for example)
-            if self.array.is_null(old) {
-                Some(None)
-            } else {
-                Some(Some(self.array.value(old).as_i128()))
-            }
-        }
-    }
-
-    #[inline]
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        let remain = self.current_end - self.current;
-        (remain, Some(remain))
-    }
-}
-
-/// iterator has known size.
-#[allow(deprecated)]
-impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}
+pub type Decimal256Iter<'a> = DecimalIter<'a, Decimal256Type>;
 
 #[cfg(test)]
 mod tests {

diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs
@@ -199,15 +199,12 @@ pub(crate) use self::data::BufferSpec;
 pub use self::array_binary::BinaryArray;
 pub use self::array_binary::LargeBinaryArray;
 pub use self::array_boolean::BooleanArray;
-pub use self::array_decimal::BasicDecimalArray;
 pub use self::array_decimal::Decimal128Array;
 pub use self::array_decimal::Decimal256Array;
+pub use self::array_decimal::DecimalArray;
 pub use self::array_fixed_size_binary::FixedSizeBinaryArray;
 pub use self::array_fixed_size_list::FixedSizeListArray;
 
-#[deprecated(note = "Please use `Decimal128Array` instead")]
-pub type DecimalArray = Decimal128Array;
-
 pub use self::array_dictionary::{DictionaryArray, TypedDictionaryArray};
 pub use self::array_list::LargeListArray;
 pub use self::array_list::ListArray;