Decimal precision scale datatype change (#2532)

* Change decimal array precison and scale to u8 * bug fixes
apache · Aug 20, 2022 · 7764334 · 7764334
1 parent 1d5656e
commit 7764334
Show file tree

Hide file tree

Showing 16 changed files with 124 additions and 121 deletions.
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
@@ -82,7 +82,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
     Arc::new(builder.finish())
 }
 
-fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
+fn build_decimal128_array(size: usize, precision: u8, scale: u8) -> ArrayRef {
     let mut rng = seedable_rng();
     let mut builder = Decimal128Builder::with_capacity(size, precision, scale);
 
@@ -92,7 +92,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR
     Arc::new(builder.finish())
 }
 
-fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
+fn build_decimal256_array(size: usize, precision: u8, scale: u8) -> ArrayRef {
     let mut rng = seedable_rng();
     let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
     let mut bytes = [0; 32];

diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
@@ -81,29 +81,29 @@ pub type Decimal256Array = DecimalArray<Decimal256Type>;
 pub struct DecimalArray<T: DecimalType> {
     data: ArrayData,
     value_data: RawPtrBox<u8>,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
     _phantom: PhantomData<T>,
 }
 
 impl<T: DecimalType> DecimalArray<T> {
     pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32;
     const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE;
-    pub const MAX_PRECISION: usize = T::MAX_PRECISION;
-    pub const MAX_SCALE: usize = T::MAX_SCALE;
-    const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR;
+    pub const MAX_PRECISION: u8 = T::MAX_PRECISION;
+    pub const MAX_SCALE: u8 = T::MAX_SCALE;
+    const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = T::TYPE_CONSTRUCTOR;
 
     pub fn data(&self) -> &ArrayData {
         &self.data
     }
 
     /// Return the precision (total digits) that can be stored by this array
-    pub fn precision(&self) -> usize {
+    pub fn precision(&self) -> u8 {
         self.precision
     }
 
     /// Return the scale (digits after the decimal) that can be stored by this array
-    pub fn scale(&self) -> usize {
+    pub fn scale(&self) -> u8 {
         self.scale
     }
 
@@ -167,8 +167,8 @@ impl<T: DecimalType> DecimalArray<T> {
     /// range for a decimal
     pub fn from_fixed_size_binary_array(
         v: FixedSizeBinaryArray,
-        precision: usize,
-        scale: usize,
+        precision: u8,
+        scale: u8,
     ) -> Self {
         assert!(
             v.value_length() == Self::VALUE_LENGTH,
@@ -194,8 +194,8 @@ impl<T: DecimalType> DecimalArray<T> {
     #[deprecated(note = "please use `from_fixed_size_binary_array` instead")]
     pub fn from_fixed_size_list_array(
         v: FixedSizeListArray,
-        precision: usize,
-        scale: usize,
+        precision: u8,
+        scale: u8,
     ) -> Self {
         assert_eq!(
             v.data_ref().child_data().len(),
@@ -261,7 +261,7 @@ impl<T: DecimalType> DecimalArray<T> {
     /// 1. `precision` is larger than [`Self::MAX_PRECISION`]
     /// 2. `scale` is larger than [`Self::MAX_SCALE`];
     /// 3. `scale` is > `precision`
-    pub fn with_precision_and_scale(self, precision: usize, scale: usize) -> Result<Self>
+    pub fn with_precision_and_scale(self, precision: u8, scale: u8) -> Result<Self>
     where
         Self: Sized,
     {
@@ -281,7 +281,7 @@ impl<T: DecimalType> DecimalArray<T> {
     }
 
     // validate that the new precision and scale are valid or not
-    fn validate_precision_scale(&self, precision: usize, scale: usize) -> Result<()> {
+    fn validate_precision_scale(&self, precision: u8, scale: u8) -> Result<()> {
         if precision > Self::MAX_PRECISION {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "precision {} is greater than max {}",
@@ -309,7 +309,7 @@ impl<T: DecimalType> DecimalArray<T> {
     }
 
     // validate all the data in the array are valid within the new precision or not
-    fn validate_data(&self, precision: usize) -> Result<()> {
+    fn validate_data(&self, precision: u8) -> Result<()> {
         // TODO: Move into DecimalType
         match Self::VALUE_LENGTH {
             16 => self
@@ -350,7 +350,7 @@ impl Decimal128Array {
 
     // Validates decimal128 values in this array can be properly interpreted
     // with the specified precision.
-    fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
+    fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
         (0..self.len()).try_for_each(|idx| {
             if self.is_valid(idx) {
                 let decimal = unsafe { self.value_unchecked(idx) };
@@ -365,7 +365,7 @@ impl Decimal128Array {
 impl Decimal256Array {
     // Validates decimal256 values in this array can be properly interpreted
     // with the specified precision.
-    fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
+    fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
         (0..self.len()).try_for_each(|idx| {
             if self.is_valid(idx) {
                 let raw_val = unsafe {

diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs
@@ -37,8 +37,8 @@ use crate::util::decimal::Decimal256;
 #[derive(Debug)]
 pub struct Decimal128Builder {
     builder: FixedSizeBinaryBuilder,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
 
     /// Should i128 values be validated for compatibility with scale and precision?
     /// defaults to true
@@ -51,8 +51,8 @@ pub struct Decimal128Builder {
 #[derive(Debug)]
 pub struct Decimal256Builder {
     builder: FixedSizeBinaryBuilder,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
 
     /// Should decimal values be validated for compatibility with scale and precision?
     /// defaults to true
@@ -61,14 +61,15 @@ pub struct Decimal256Builder {
 
 impl Decimal128Builder {
     const BYTE_LENGTH: i32 = 16;
+
     /// Creates a new [`Decimal128Builder`]
-    pub fn new(precision: usize, scale: usize) -> Self {
+    pub fn new(precision: u8, scale: u8) -> Self {
         Self::with_capacity(1024, precision, scale)
     }
 
     /// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values
     /// that can be appended without reallocating
-    pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
+    pub fn with_capacity(capacity: usize, precision: u8, scale: u8) -> Self {
         Self {
             builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
             precision,
@@ -160,14 +161,15 @@ impl ArrayBuilder for Decimal128Builder {
 
 impl Decimal256Builder {
     const BYTE_LENGTH: i32 = 32;
+
     /// Creates a new [`Decimal256Builder`]
-    pub fn new(precision: usize, scale: usize) -> Self {
+    pub fn new(precision: u8, scale: u8) -> Self {
         Self::with_capacity(1024, precision, scale)
     }
 
     /// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values
     /// that can be appended without reallocating
-    pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
+    pub fn with_capacity(capacity: usize, precision: u8, scale: u8) -> Self {
         Self {
             builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
             precision,

diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
@@ -688,8 +688,8 @@ mod tests {
 
     fn create_decimal_array(
         array: Vec<Option<i128>>,
-        precision: usize,
-        scale: usize,
+        precision: u8,
+        scale: u8,
     ) -> Decimal128Array {
         array
             .into_iter()

diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
@@ -301,8 +301,8 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
 fn cast_primitive_to_decimal<T: ArrayAccessor, F>(
     array: T,
     op: F,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
 ) -> Result<Arc<dyn Array>>
 where
     F: Fn(T::Item) -> i128,
@@ -318,8 +318,8 @@ where
 
 fn cast_integer_to_decimal<T: ArrowNumericType>(
     array: &PrimitiveArray<T>,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
 ) -> Result<Arc<dyn Array>>
 where
     <T as ArrowPrimitiveType>::Native: AsPrimitive<i128>,
@@ -333,8 +333,8 @@ where
 
 fn cast_floating_point_to_decimal<T: ArrowNumericType>(
     array: &PrimitiveArray<T>,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
 ) -> Result<Arc<dyn Array>>
 where
     <T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
@@ -1306,9 +1306,9 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
 /// Cast one type of decimal array to another type of decimal array
 fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
     array: &ArrayRef,
-    input_scale: &usize,
-    output_precision: &usize,
-    output_scale: &usize,
+    input_scale: &u8,
+    output_precision: &u8,
+    output_scale: &u8,
 ) -> Result<ArrayRef> {
     if input_scale > output_scale {
         // For example, input_scale is 4 and output_scale is 3;
@@ -2592,8 +2592,8 @@ mod tests {
 
     fn create_decimal_array(
         array: Vec<Option<i128>>,
-        precision: usize,
-        scale: usize,
+        precision: u8,
+        scale: u8,
     ) -> Result<Decimal128Array> {
         array
             .into_iter()
@@ -2603,8 +2603,8 @@ mod tests {
 
     fn create_decimal256_array(
         array: Vec<Option<BigInt>>,
-        precision: usize,
-        scale: usize,
+        precision: u8,
+        scale: u8,
     ) -> Result<Decimal256Array> {
         array
             .into_iter()

diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs
@@ -987,8 +987,8 @@ mod tests {
         index: &UInt32Array,
         options: Option<TakeOptions>,
         expected_data: Vec<Option<i128>>,
-        precision: &usize,
-        scale: &usize,
+        precision: &u8,
+        scale: &u8,
     ) -> Result<()> {
         let output = data
             .into_iter()
@@ -1105,8 +1105,8 @@ mod tests {
     #[test]
     fn test_take_decimal128_non_null_indices() {
         let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]);
-        let precision: usize = 10;
-        let scale: usize = 5;
+        let precision: u8 = 10;
+        let scale: u8 = 5;
         test_take_decimal_arrays(
             vec![None, Some(3), Some(5), Some(2), Some(3), None],
             &index,
@@ -1121,8 +1121,8 @@ mod tests {
     #[test]
     fn test_take_decimal128() {
         let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
-        let precision: usize = 10;
-        let scale: usize = 5;
+        let precision: u8 = 10;
+        let scale: u8 = 5;
         test_take_decimal_arrays(
             vec![Some(0), Some(1), Some(2), Some(3), Some(4)],
             &index,

diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs
@@ -696,8 +696,8 @@ fn build_decimal_array(
     _line_number: usize,
     rows: &[StringRecord],
     col_idx: usize,
-    precision: usize,
-    scale: usize,
+    precision: u8,
+    scale: u8,
 ) -> Result<ArrayRef> {
     let mut decimal_builder =
         Decimal128Builder::with_capacity(rows.len(), precision, scale);
@@ -732,11 +732,12 @@ fn build_decimal_array(
 
 // Parse the string format decimal value to i128 format and checking the precision and scale.
 // The result i128 value can't be out of bounds.
-fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Result<i128> {
+fn parse_decimal_with_parameter(s: &str, precision: u8, scale: u8) -> Result<i128> {
     if PARSE_DECIMAL_RE.is_match(s) {
         let mut offset = s.len();
         let len = s.len();
         let mut base = 1;
+        let scale_usize = usize::from(scale);
 
         // handle the value after the '.' and meet the scale
         let delimiter_position = s.find('.');
@@ -747,12 +748,12 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu
             }
             Some(mid) => {
                 // there is the '.'
-                if len - mid >= scale + 1 {
+                if len - mid >= scale_usize + 1 {
                     // If the string value is "123.12345" and the scale is 2, we should just remain '.12' and drop the '345' value.
-                    offset -= len - mid - 1 - scale;
+                    offset -= len - mid - 1 - scale_usize;
                 } else {
                     // If the string value is "123.12" and the scale is 4, we should append '00' to the tail.
-                    base = 10_i128.pow((scale + 1 + mid - len) as u32);
+                    base = 10_i128.pow((scale_usize + 1 + mid - len) as u32);
                 }
             }
         };