apache · liukun4515 · Aug 8, 2022 · HaoYang670 · Aug 8, 2022 · liukun4515
diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs
@@ -57,7 +57,7 @@ use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};
 ///    // set precision and scale so values are interpreted
 ///    // as `8887.000000`, `Null`, and `-8887.000000`
 ///    let decimal_array = decimal_array
-///     .with_precision_and_scale(23, 6)
+///     .with_precision_and_scale(23, 6, true)
 ///     .unwrap();
 ///
 ///    assert_eq!(&DataType::Decimal128(23, 6), decimal_array.data_type());
@@ -253,11 +253,15 @@ pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:
     /// Returns a Decimal array with the same data as self, with the
     /// specified precision.
     ///
+    /// If make sure that all values in this array are not out of ranges/bounds with the specified precision,
+    /// please set `need_validation` to `false, otherwise set to `true`.
+    ///
     /// Returns an Error if:
     /// 1. `precision` is larger than [`Self::MAX_PRECISION`]
     /// 2. `scale` is larger than [`Self::MAX_SCALE`];
     /// 3. `scale` is > `precision`
-    fn with_precision_and_scale(self, precision: usize, scale: usize) -> Result<U>
+    /// 4. `need_validation` is `true`, but some values are out of ranges/bounds
+    fn with_precision_and_scale(self, precision: usize, scale: usize, need_validation: bool) -> Result<U>
     where
         Self: Sized,
     {
@@ -282,10 +286,9 @@ pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:
             )));
         }
 
-        // Ensure that all values are within the requested
-        // precision. For performance, only check if the precision is
-        // decreased
-        self.validate_decimal_precision(precision)?;
+        if need_validation {
+            self.validate_decimal_precision(precision)?;
+        }
 
         let data_type = if Self::VALUE_LENGTH == 16 {
             DataType::Decimal128(self.precision(), self.scale())

diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs
@@ -431,8 +431,8 @@ pub fn cast_with_options(
         return Ok(array.clone());
     }
     match (from_type, to_type) {
-        (Decimal128(_, s1), Decimal128(p2, s2)) => {
-            cast_decimal_to_decimal(array, s1, p2, s2)
+        (Decimal128(p1, s1), Decimal128(p2, s2)) => {
+            cast_decimal_to_decimal(array, p1,s1, p2, s2)
         }
         (Decimal128(_, scale), _) => {
             // cast decimal to other type
@@ -1254,6 +1254,7 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
 /// Cast one type of decimal array to another type of decimal array
 fn cast_decimal_to_decimal(
     array: &ArrayRef,
+    input_precision, &usize,
     input_scale: &usize,
     output_precision: &usize,
     output_scale: &usize,
@@ -1276,8 +1277,17 @@ fn cast_decimal_to_decimal(
             .iter()
             .map(|v| v.map(|v| v.as_i128() * mul))
             .collect::<Decimal128Array>()
-    }
-    .with_precision_and_scale(*output_precision, *output_scale)?;
+    };
+    // For decimal cast to decimal, if the range of output is gt_eq than the input, don't need to
+    // do validation.
+    let output_array  = match output_precision-output_scale>=input_precision - input_scale {
+        true => {
+            output_array.with_precision_and_scale(*output_precision, *output_scale, false)
 ArrowType::Decimal128(p, s) => { 
 fn cast_decimal_to_decimal( 
 ArrowType::Decimal128(p, s) => { 
 fn cast_decimal_to_decimal( 
+        }
+        false => {
+            output_array.with_precision_and_scale(*output_precision, *output_scale, true)
+        }
+    }?;
-    let output_array  = match output_precision-output_scale>=input_precision - input_scale {
-        true => {
-            output_array.with_precision_and_scale(*output_precision, *output_scale, false)
-        }
-        false => {
-            output_array.with_precision_and_scale(*output_precision, *output_scale, true)
-        }
-    }?;
+    let output_array  = output_array.with_precision_and_scale(
+        *output_precision, 
+        *output_scale, 
+        output_precision - output_scale < input_precision - input_scale)?;
-    let output_array  = match output_precision-output_scale>=input_precision - input_scale {
-        true => {
-            output_array.with_precision_and_scale(*output_precision, *output_scale, false)
-        }
-        false => {
-            output_array.with_precision_and_scale(*output_precision, *output_scale, true)
-        }
-    }?;
+    let output_array  = output_array.with_precision_and_scale(
+        *output_precision, 
+        *output_scale, 
+        output_precision - output_scale < input_precision - input_scale)?;
 
     Ok(Arc::new(output_array))
 }

diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -184,6 +184,8 @@ where
                 let a = arrow::compute::cast(&array, &ArrowType::Date32)?;
                 arrow::compute::cast(&a, &target_type)?
             }
+            // In the parquet file, if the logical/converted type is decimal and the physical type
+            // is INT32 or INT64, don't need to do validation.
             ArrowType::Decimal128(p, s) => {
                 let array = match array.data_type() {
                     ArrowType::Int32 => array
@@ -208,7 +210,7 @@ where
                         ))
                     }
                 }
-                .with_precision_and_scale(p, s)?;
+                .with_precision_and_scale(p, s, false)?;
 
                 Arc::new(array) as ArrayRef
             }

diff --git a/parquet/src/arrow/buffer/converter.rs b/parquet/src/arrow/buffer/converter.rs
@@ -82,23 +82,25 @@ impl Converter<Vec<Option<FixedLenByteArray>>, Decimal128Array>
     for DecimalArrayConverter
 {
     fn convert(&self, source: Vec<Option<FixedLenByteArray>>) -> Result<Decimal128Array> {
+        // In the parquet file, if the logical/converted type is decimal, don't need to do validation.
         let array = source
             .into_iter()
             .map(|array| array.map(|array| from_bytes_to_i128(array.data())))
             .collect::<Decimal128Array>()
-            .with_precision_and_scale(self.precision as usize, self.scale as usize)?;
+            .with_precision_and_scale(self.precision as usize, self.scale as usize, false)?;
 
         Ok(array)
     }
 }
 
 impl Converter<Vec<Option<ByteArray>>, Decimal128Array> for DecimalArrayConverter {
     fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<Decimal128Array> {
+        // In the parquet file, if the logical/converted type is decimal, don't need to do validation.
         let array = source
             .into_iter()
             .map(|array| array.map(|array| from_bytes_to_i128(array.data())))
             .collect::<Decimal128Array>()
-            .with_precision_and_scale(self.precision as usize, self.scale as usize)?;
+            .with_precision_and_scale(self.precision as usize, self.scale as usize, false)?;
 
         Ok(array)
     }