From 225b6c071f462801331a84681484d994efdb47a9 Mon Sep 17 00:00:00 2001 From: psvri Date: Fri, 19 Aug 2022 17:50:24 +0000 Subject: [PATCH 1/2] Change decimal array precison and scale to u8 --- arrow/src/array/array_decimal.rs | 32 +++++++++--------- arrow/src/array/builder/decimal_builder.rs | 12 +++---- arrow/src/array/transform/mod.rs | 4 +-- arrow/src/compute/kernels/cast.rs | 26 +++++++-------- arrow/src/compute/kernels/take.rs | 12 +++---- arrow/src/csv/reader.rs | 13 ++++---- arrow/src/datatypes/datatype.rs | 30 ++++++++--------- arrow/src/datatypes/ffi.rs | 8 ++--- arrow/src/datatypes/types.rs | 18 +++++----- arrow/src/ipc/convert.rs | 10 ++++-- arrow/src/util/decimal.rs | 39 +++++++++------------- parquet/src/arrow/array_reader/builder.rs | 5 ++- parquet/src/arrow/buffer/converter.rs | 10 +++--- parquet/src/arrow/schema.rs | 2 +- parquet/src/arrow/schema/primitive.rs | 14 ++++---- 15 files changed, 118 insertions(+), 117 deletions(-) diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 540024a923c..5a0ce289273 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -81,29 +81,29 @@ pub type Decimal256Array = DecimalArray; pub struct DecimalArray { data: ArrayData, value_data: RawPtrBox, - precision: usize, - scale: usize, + precision: u8, + scale: u8, _phantom: PhantomData, } impl DecimalArray { pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32; const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE; - pub const MAX_PRECISION: usize = T::MAX_PRECISION; - pub const MAX_SCALE: usize = T::MAX_SCALE; - const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR; + pub const MAX_PRECISION: u8 = T::MAX_PRECISION; + pub const MAX_SCALE: u8 = T::MAX_SCALE; + const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = T::TYPE_CONSTRUCTOR; pub fn data(&self) -> &ArrayData { &self.data } /// Return the precision (total digits) that can be stored by this array - pub fn precision(&self) -> usize { + pub fn precision(&self) -> u8 { self.precision } /// Return the scale (digits after the decimal) that can be stored by this array - pub fn scale(&self) -> usize { + pub fn scale(&self) -> u8 { self.scale } @@ -167,8 +167,8 @@ impl DecimalArray { /// range for a decimal pub fn from_fixed_size_binary_array( v: FixedSizeBinaryArray, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Self { assert!( v.value_length() == Self::VALUE_LENGTH, @@ -194,8 +194,8 @@ impl DecimalArray { #[deprecated(note = "please use `from_fixed_size_binary_array` instead")] pub fn from_fixed_size_list_array( v: FixedSizeListArray, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Self { assert_eq!( v.data_ref().child_data().len(), @@ -261,7 +261,7 @@ impl DecimalArray { /// 1. `precision` is larger than [`Self::MAX_PRECISION`] /// 2. `scale` is larger than [`Self::MAX_SCALE`]; /// 3. `scale` is > `precision` - pub fn with_precision_and_scale(self, precision: usize, scale: usize) -> Result + pub fn with_precision_and_scale(self, precision: u8, scale: u8) -> Result where Self: Sized, { @@ -281,7 +281,7 @@ impl DecimalArray { } // validate that the new precision and scale are valid or not - fn validate_precision_scale(&self, precision: usize, scale: usize) -> Result<()> { + fn validate_precision_scale(&self, precision: u8, scale: u8) -> Result<()> { if precision > Self::MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "precision {} is greater than max {}", @@ -309,7 +309,7 @@ impl DecimalArray { } // validate all the data in the array are valid within the new precision or not - fn validate_data(&self, precision: usize) -> Result<()> { + fn validate_data(&self, precision: u8) -> Result<()> { // TODO: Move into DecimalType match Self::VALUE_LENGTH { 16 => self @@ -350,7 +350,7 @@ impl Decimal128Array { // Validates decimal128 values in this array can be properly interpreted // with the specified precision. - fn validate_decimal_precision(&self, precision: usize) -> Result<()> { + fn validate_decimal_precision(&self, precision: u8) -> Result<()> { (0..self.len()).try_for_each(|idx| { if self.is_valid(idx) { let decimal = unsafe { self.value_unchecked(idx) }; @@ -365,7 +365,7 @@ impl Decimal128Array { impl Decimal256Array { // Validates decimal256 values in this array can be properly interpreted // with the specified precision. - fn validate_decimal_precision(&self, precision: usize) -> Result<()> { + fn validate_decimal_precision(&self, precision: u8) -> Result<()> { (0..self.len()).try_for_each(|idx| { if self.is_valid(idx) { let raw_val = unsafe { diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index 7021dce372e..a9351a06f03 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -37,8 +37,8 @@ use crate::util::decimal::Decimal256; #[derive(Debug)] pub struct Decimal128Builder { builder: FixedSizeBinaryBuilder, - precision: usize, - scale: usize, + precision: u8, + scale: u8, /// Should i128 values be validated for compatibility with scale and precision? /// defaults to true @@ -51,8 +51,8 @@ pub struct Decimal128Builder { #[derive(Debug)] pub struct Decimal256Builder { builder: FixedSizeBinaryBuilder, - precision: usize, - scale: usize, + precision: u8, + scale: u8, /// Should decimal values be validated for compatibility with scale and precision? /// defaults to true @@ -63,7 +63,7 @@ impl Decimal128Builder { const BYTE_LENGTH: i32 = 16; /// Creates a new [`Decimal128Builder`], `capacity` is the number of bytes in the values /// array - pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { + pub fn new(capacity: usize, precision: u8, scale: u8) -> Self { Self { builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH), precision, @@ -157,7 +157,7 @@ impl Decimal256Builder { const BYTE_LENGTH: i32 = 32; /// Creates a new [`Decimal256Builder`], `capacity` is the number of bytes in the values /// array - pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { + pub fn new(capacity: usize, precision: u8, scale: u8) -> Self { Self { builder: FixedSizeBinaryBuilder::new(capacity, Self::BYTE_LENGTH), precision, diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs index dafbc05c67a..ad4c356a93d 100644 --- a/arrow/src/array/transform/mod.rs +++ b/arrow/src/array/transform/mod.rs @@ -688,8 +688,8 @@ mod tests { fn create_decimal_array( array: Vec>, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Decimal128Array { array .into_iter() diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 4f59c00fcb5..1d765b04ded 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -301,8 +301,8 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result { fn cast_primitive_to_decimal( array: T, op: F, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Result> where F: Fn(T::Item) -> i128, @@ -318,8 +318,8 @@ where fn cast_integer_to_decimal( array: &PrimitiveArray, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Result> where ::Native: AsPrimitive, @@ -333,8 +333,8 @@ where fn cast_floating_point_to_decimal( array: &PrimitiveArray, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Result> where ::Native: AsPrimitive, @@ -1306,9 +1306,9 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 { /// Cast one type of decimal array to another type of decimal array fn cast_decimal_to_decimal( array: &ArrayRef, - input_scale: &usize, - output_precision: &usize, - output_scale: &usize, + input_scale: &u8, + output_precision: &u8, + output_scale: &u8, ) -> Result { if input_scale > output_scale { // For example, input_scale is 4 and output_scale is 3; @@ -2592,8 +2592,8 @@ mod tests { fn create_decimal_array( array: Vec>, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Result { array .into_iter() @@ -2603,8 +2603,8 @@ mod tests { fn create_decimal256_array( array: Vec>, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Result { array .into_iter() diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index fb8f7565188..3718bcfb013 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -987,8 +987,8 @@ mod tests { index: &UInt32Array, options: Option, expected_data: Vec>, - precision: &usize, - scale: &usize, + precision: &u8, + scale: &u8, ) -> Result<()> { let output = data .into_iter() @@ -1105,8 +1105,8 @@ mod tests { #[test] fn test_take_decimal128_non_null_indices() { let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]); - let precision: usize = 10; - let scale: usize = 5; + let precision: u8 = 10; + let scale: u8 = 5; test_take_decimal_arrays( vec![None, Some(3), Some(5), Some(2), Some(3), None], &index, @@ -1121,8 +1121,8 @@ mod tests { #[test] fn test_take_decimal128() { let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]); - let precision: usize = 10; - let scale: usize = 5; + let precision: u8 = 10; + let scale: u8 = 5; test_take_decimal_arrays( vec![Some(0), Some(1), Some(2), Some(3), Some(4)], &index, diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index ac26b377ffe..b4e00b7e03b 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -696,8 +696,8 @@ fn build_decimal_array( _line_number: usize, rows: &[StringRecord], col_idx: usize, - precision: usize, - scale: usize, + precision: u8, + scale: u8, ) -> Result { let mut decimal_builder = Decimal128Builder::new(rows.len(), precision, scale); for row in rows { @@ -731,11 +731,12 @@ fn build_decimal_array( // Parse the string format decimal value to i128 format and checking the precision and scale. // The result i128 value can't be out of bounds. -fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Result { +fn parse_decimal_with_parameter(s: &str, precision: u8, scale: u8) -> Result { if PARSE_DECIMAL_RE.is_match(s) { let mut offset = s.len(); let len = s.len(); let mut base = 1; + let scale_usize = usize::from(scale); // handle the value after the '.' and meet the scale let delimiter_position = s.find('.'); @@ -746,12 +747,12 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu } Some(mid) => { // there is the '.' - if len - mid >= scale + 1 { + if len - mid >= scale_usize + 1 { // If the string value is "123.12345" and the scale is 2, we should just remain '.12' and drop the '345' value. - offset -= len - mid - 1 - scale; + offset -= len - mid - 1 - scale_usize; } else { // If the string value is "123.12" and the scale is 4, we should append '00' to the tail. - base = 10_i128.pow((scale + 1 + mid - len) as u32); + base = 10_i128.pow((scale_usize + 1 + mid - len) as u32); } } }; diff --git a/arrow/src/datatypes/datatype.rs b/arrow/src/datatypes/datatype.rs index 4f65afd2d20..51f70a6cba3 100644 --- a/arrow/src/datatypes/datatype.rs +++ b/arrow/src/datatypes/datatype.rs @@ -197,14 +197,14 @@ pub enum DataType { /// * scale is the number of digits past the decimal /// /// For example the number 123.45 has precision 5 and scale 2. - Decimal128(usize, usize), + Decimal128(u8, u8), /// Exact 256-bit width decimal value with precision and scale /// /// * precision is the total number of digits /// * scale is the number of digits past the decimal /// /// For example the number 123.45 has precision 5 and scale 2. - Decimal256(usize, usize), + Decimal256(u8, u8), /// A Map is a logical nested type that is represented as /// /// `List>` @@ -972,24 +972,24 @@ pub const MIN_DECIMAL_FOR_EACH_PRECISION: [i128; 38] = [ ]; /// The maximum precision for [DataType::Decimal128] values -pub const DECIMAL128_MAX_PRECISION: usize = 38; +pub const DECIMAL128_MAX_PRECISION: u8 = 38; /// The maximum scale for [DataType::Decimal128] values -pub const DECIMAL128_MAX_SCALE: usize = 38; +pub const DECIMAL128_MAX_SCALE: u8 = 38; /// The maximum precision for [DataType::Decimal256] values -pub const DECIMAL256_MAX_PRECISION: usize = 76; +pub const DECIMAL256_MAX_PRECISION: u8 = 76; /// The maximum scale for [DataType::Decimal256] values -pub const DECIMAL256_MAX_SCALE: usize = 76; +pub const DECIMAL256_MAX_SCALE: u8 = 76; /// The default scale for [DataType::Decimal128] and [DataType::Decimal256] values -pub const DECIMAL_DEFAULT_SCALE: usize = 10; +pub const DECIMAL_DEFAULT_SCALE: u8 = 10; /// Validates that the specified `i128` value can be properly /// interpreted as a Decimal number with precision `precision` #[inline] -pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Result<()> { +pub(crate) fn validate_decimal_precision(value: i128, precision: u8) -> Result<()> { if precision > DECIMAL128_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( "Max precision of a Decimal128 is {}, but got {}", @@ -997,8 +997,8 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul ))); } - let max = MAX_DECIMAL_FOR_EACH_PRECISION[precision - 1]; - let min = MIN_DECIMAL_FOR_EACH_PRECISION[precision - 1]; + let max = MAX_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_FOR_EACH_PRECISION[usize::from(precision) - 1]; if value > max { Err(ArrowError::InvalidArgumentError(format!( @@ -1020,7 +1020,7 @@ pub(crate) fn validate_decimal_precision(value: i128, precision: usize) -> Resul #[inline] pub(crate) fn validate_decimal256_precision_with_lt_bytes( lt_value: &[u8], - precision: usize, + precision: u8, ) -> Result<()> { if precision > DECIMAL256_MAX_PRECISION { return Err(ArrowError::InvalidArgumentError(format!( @@ -1028,8 +1028,8 @@ pub(crate) fn validate_decimal256_precision_with_lt_bytes( DECIMAL256_MAX_PRECISION, precision, ))); } - let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision - 1]; - let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[precision - 1]; + let max = MAX_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; + let min = MIN_DECIMAL_BYTES_FOR_LARGER_EACH_PRECISION[usize::from(precision) - 1]; if singed_cmp_le_bytes(lt_value, &max) == Ordering::Greater { Err(ArrowError::InvalidArgumentError(format!( @@ -1075,13 +1075,13 @@ impl DataType { Some(s) if s == "decimal" => { // return a list with any type as its child isn't defined in the map let precision = match map.get("precision") { - Some(p) => Ok(p.as_u64().unwrap() as usize), + Some(p) => Ok(p.as_u64().unwrap().try_into().unwrap()), None => Err(ArrowError::ParseError( "Expecting a precision for decimal".to_string(), )), }?; let scale = match map.get("scale") { - Some(s) => Ok(s.as_u64().unwrap() as usize), + Some(s) => Ok(s.as_u64().unwrap().try_into().unwrap()), _ => Err(ArrowError::ParseError( "Expecting a scale for decimal".to_string(), )), diff --git a/arrow/src/datatypes/ffi.rs b/arrow/src/datatypes/ffi.rs index 60d285315c0..ef303dfdd1f 100644 --- a/arrow/src/datatypes/ffi.rs +++ b/arrow/src/datatypes/ffi.rs @@ -98,12 +98,12 @@ impl TryFrom<&FFI_ArrowSchema> for DataType { ["d", extra] => { match extra.splitn(3, ',').collect::>().as_slice() { [precision, scale] => { - let parsed_precision = precision.parse::().map_err(|_| { + let parsed_precision = precision.parse::().map_err(|_| { ArrowError::CDataInterface( "The decimal type requires an integer precision".to_string(), ) })?; - let parsed_scale = scale.parse::().map_err(|_| { + let parsed_scale = scale.parse::().map_err(|_| { ArrowError::CDataInterface( "The decimal type requires an integer scale".to_string(), ) @@ -114,12 +114,12 @@ impl TryFrom<&FFI_ArrowSchema> for DataType { if *bits != "128" { return Err(ArrowError::CDataInterface("Only 128 bit wide decimal is supported in the Rust implementation".to_string())); } - let parsed_precision = precision.parse::().map_err(|_| { + let parsed_precision = precision.parse::().map_err(|_| { ArrowError::CDataInterface( "The decimal type requires an integer precision".to_string(), ) })?; - let parsed_scale = scale.parse::().map_err(|_| { + let parsed_scale = scale.parse::().map_err(|_| { ArrowError::CDataInterface( "The decimal type requires an integer scale".to_string(), ) diff --git a/arrow/src/datatypes/types.rs b/arrow/src/datatypes/types.rs index 60b9f31d290..1b7d0675bb4 100644 --- a/arrow/src/datatypes/types.rs +++ b/arrow/src/datatypes/types.rs @@ -491,9 +491,9 @@ pub trait DecimalType: 'static + Send + Sync + private::DecimalTypeSealed { type Native: NativeDecimalType; const BYTE_LENGTH: usize; - const MAX_PRECISION: usize; - const MAX_SCALE: usize; - const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType; + const MAX_PRECISION: u8; + const MAX_SCALE: u8; + const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType; const DEFAULT_TYPE: DataType; } @@ -505,9 +505,9 @@ impl DecimalType for Decimal128Type { type Native = [u8; 16]; const BYTE_LENGTH: usize = 16; - const MAX_PRECISION: usize = DECIMAL128_MAX_PRECISION; - const MAX_SCALE: usize = DECIMAL128_MAX_SCALE; - const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = DataType::Decimal128; + const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION; + const MAX_SCALE: u8 = DECIMAL128_MAX_SCALE; + const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = DataType::Decimal128; const DEFAULT_TYPE: DataType = DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); } @@ -520,9 +520,9 @@ impl DecimalType for Decimal256Type { type Native = [u8; 32]; const BYTE_LENGTH: usize = 32; - const MAX_PRECISION: usize = DECIMAL256_MAX_PRECISION; - const MAX_SCALE: usize = DECIMAL256_MAX_SCALE; - const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = DataType::Decimal256; + const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION; + const MAX_SCALE: u8 = DECIMAL256_MAX_SCALE; + const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = DataType::Decimal256; const DEFAULT_TYPE: DataType = DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); } diff --git a/arrow/src/ipc/convert.rs b/arrow/src/ipc/convert.rs index 705bd5cb301..00503d50e33 100644 --- a/arrow/src/ipc/convert.rs +++ b/arrow/src/ipc/convert.rs @@ -322,9 +322,15 @@ pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataT let fsb = field.type_as_decimal().unwrap(); let bit_width = fsb.bitWidth(); if bit_width == 128 { - DataType::Decimal128(fsb.precision() as usize, fsb.scale() as usize) + DataType::Decimal128( + fsb.precision().try_into().unwrap(), + fsb.scale().try_into().unwrap(), + ) } else if bit_width == 256 { - DataType::Decimal256(fsb.precision() as usize, fsb.scale() as usize) + DataType::Decimal256( + fsb.precision().try_into().unwrap(), + fsb.scale().try_into().unwrap(), + ) } else { panic!("Unexpected decimal bit width {}", bit_width) } diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index f4430a2dc89..421942df5c1 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -34,8 +34,8 @@ use std::cmp::{min, Ordering}; /// [`Decimal128Array`]: [crate::array::Decimal128Array] /// [`Decimal256Array`]: [crate::array::Decimal256Array] pub struct Decimal { - precision: usize, - scale: usize, + precision: u8, + scale: u8, value: T::Native, } @@ -65,9 +65,9 @@ impl Clone for Decimal { impl Copy for Decimal {} impl Decimal { - pub const MAX_PRECISION: usize = T::MAX_PRECISION; - pub const MAX_SCALE: usize = T::MAX_SCALE; - pub const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR; + pub const MAX_PRECISION: u8 = T::MAX_PRECISION; + pub const MAX_SCALE: u8 = T::MAX_SCALE; + pub const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = T::TYPE_CONSTRUCTOR; pub const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE; /// Tries to create a decimal value from precision, scale and bytes. @@ -76,11 +76,7 @@ impl Decimal { /// Safety: /// This method doesn't validate if the decimal value represented by the bytes /// can be fitted into the specified precision. - pub fn try_new_from_bytes( - precision: usize, - scale: usize, - bytes: &T::Native, - ) -> Result + pub fn try_new_from_bytes(precision: u8, scale: u8, bytes: &T::Native) -> Result where Self: Sized, { @@ -114,7 +110,7 @@ impl Decimal { /// Safety: /// This method doesn't check if the precision and scale are valid. /// Use `try_new_from_bytes` for safe constructor. - pub fn new(precision: usize, scale: usize, bytes: &T::Native) -> Self { + pub fn new(precision: u8, scale: u8, bytes: &T::Native) -> Self { Self { precision, scale, @@ -127,12 +123,12 @@ impl Decimal { } /// Returns the precision of the decimal. - pub fn precision(&self) -> usize { + pub fn precision(&self) -> u8 { self.precision } /// Returns the scale of the decimal. - pub fn scale(&self) -> usize { + pub fn scale(&self) -> u8 { self.scale } @@ -146,18 +142,19 @@ impl Decimal { let value_str = integer.to_string(); let (sign, rest) = value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); - let bound = min(self.precision(), rest.len()) + sign.len(); + let bound = min(usize::from(self.precision()), rest.len()) + sign.len(); let value_str = &value_str[0..bound]; + let scale_usize = usize::from(self.scale()); if self.scale() == 0 { value_str.to_string() - } else if rest.len() > self.scale() { + } else if rest.len() > scale_usize { // Decimal separator is in the middle of the string - let (whole, decimal) = value_str.split_at(value_str.len() - self.scale()); + let (whole, decimal) = value_str.split_at(value_str.len() - scale_usize); format!("{}.{}", whole, decimal) } else { // String has to be padded - format!("{}0.{:0>width$}", sign, rest, width = self.scale()) + format!("{}0.{:0>width$}", sign, rest, width = scale_usize) } } } @@ -207,7 +204,7 @@ pub type Decimal128 = Decimal; impl Decimal128 { /// Creates `Decimal128` from an `i128` value. #[allow(dead_code)] - pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { + pub(crate) fn new_from_i128(precision: u8, scale: u8, value: i128) -> Self { Decimal128 { precision, scale, @@ -233,11 +230,7 @@ pub type Decimal256 = Decimal; impl Decimal256 { /// Constructs a `Decimal256` value from a `BigInt`. - pub fn from_big_int( - num: &BigInt, - precision: usize, - scale: usize, - ) -> Result { + pub fn from_big_int(num: &BigInt, precision: u8, scale: u8) -> Result { let mut bytes = if num.is_negative() { [255_u8; 32] } else { diff --git a/parquet/src/arrow/array_reader/builder.rs b/parquet/src/arrow/array_reader/builder.rs index 84e833ac45e..f844a00d63c 100644 --- a/parquet/src/arrow/array_reader/builder.rs +++ b/parquet/src/arrow/array_reader/builder.rs @@ -220,8 +220,7 @@ fn build_primitive_reader( Some(DataType::Decimal128(precision, scale)) => { // read decimal data from parquet binary physical type let convert = DecimalByteArrayConvert::new(DecimalArrayConverter::new( - precision as i32, - scale as i32, + precision, scale, )); Ok(Box::new(ComplexObjectArrayReader::< ByteArrayType, @@ -235,7 +234,7 @@ fn build_primitive_reader( PhysicalType::FIXED_LEN_BYTE_ARRAY => match field.arrow_type { DataType::Decimal128(precision, scale) => { let converter = DecimalFixedLengthByteArrayConverter::new( - DecimalArrayConverter::new(precision as i32, scale as i32), + DecimalArrayConverter::new(precision, scale), ); Ok(Box::new(ComplexObjectArrayReader::< FixedLenByteArrayType, diff --git a/parquet/src/arrow/buffer/converter.rs b/parquet/src/arrow/buffer/converter.rs index aeca548bde7..8ab98c54605 100644 --- a/parquet/src/arrow/buffer/converter.rs +++ b/parquet/src/arrow/buffer/converter.rs @@ -68,12 +68,12 @@ impl Converter>, FixedSizeBinaryArray> } pub struct DecimalArrayConverter { - precision: i32, - scale: i32, + precision: u8, + scale: u8, } impl DecimalArrayConverter { - pub fn new(precision: i32, scale: i32) -> Self { + pub fn new(precision: u8, scale: u8) -> Self { Self { precision, scale } } } @@ -86,7 +86,7 @@ impl Converter>, Decimal128Array> .into_iter() .map(|array| array.map(|array| from_bytes_to_i128(array.data()))) .collect::() - .with_precision_and_scale(self.precision as usize, self.scale as usize)?; + .with_precision_and_scale(self.precision, self.scale)?; Ok(array) } @@ -98,7 +98,7 @@ impl Converter>, Decimal128Array> for DecimalArrayConverte .into_iter() .map(|array| array.map(|array| from_bytes_to_i128(array.data()))) .collect::() - .with_precision_and_scale(self.precision as usize, self.scale as usize)?; + .with_precision_and_scale(self.precision, self.scale)?; Ok(array) } diff --git a/parquet/src/arrow/schema.rs b/parquet/src/arrow/schema.rs index f39d7b617a4..ad5b6b1f5f8 100644 --- a/parquet/src/arrow/schema.rs +++ b/parquet/src/arrow/schema.rs @@ -220,7 +220,7 @@ pub fn parquet_to_arrow_field(parquet_column: &ColumnDescriptor) -> Result usize { +pub fn decimal_length_from_precision(precision: u8) -> usize { (10.0_f64.powi(precision as i32).log2() / 8.0).ceil() as usize } diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index c05a13565b1..87edd75b0b8 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -235,12 +235,14 @@ fn from_byte_array(info: &BasicTypeInfo, precision: i32, scale: i32) -> Result Ok(DataType::Binary), (None, ConvertedType::ENUM) => Ok(DataType::Binary), (None, ConvertedType::UTF8) => Ok(DataType::Utf8), - (Some(LogicalType::Decimal { precision, scale }), _) => { - Ok(DataType::Decimal128(precision as usize, scale as usize)) - } - (None, ConvertedType::DECIMAL) => { - Ok(DataType::Decimal128(precision as usize, scale as usize)) - } + (Some(LogicalType::Decimal { precision, scale }), _) => Ok(DataType::Decimal128( + precision.try_into().unwrap(), + scale.try_into().unwrap(), + )), + (None, ConvertedType::DECIMAL) => Ok(DataType::Decimal128( + precision.try_into().unwrap(), + scale.try_into().unwrap(), + )), (logical, converted) => Err(arrow_err!( "Unable to convert parquet BYTE_ARRAY logical type {:?} or converted type {}", logical, From 06a1a775811e40711c58fc379f2831f31ff3047b Mon Sep 17 00:00:00 2001 From: psvri Date: Fri, 19 Aug 2022 18:07:01 +0000 Subject: [PATCH 2/2] bug fixes --- arrow/benches/cast_kernels.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs index a7bae35e712..cb22b6d627b 100644 --- a/arrow/benches/cast_kernels.rs +++ b/arrow/benches/cast_kernels.rs @@ -82,7 +82,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef { Arc::new(builder.finish()) } -fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef { +fn build_decimal128_array(size: usize, precision: u8, scale: u8) -> ArrayRef { let mut rng = seedable_rng(); let mut builder = Decimal128Builder::new(size, precision, scale); @@ -92,7 +92,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR Arc::new(builder.finish()) } -fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef { +fn build_decimal256_array(size: usize, precision: u8, scale: u8) -> ArrayRef { let mut rng = seedable_rng(); let mut builder = Decimal256Builder::new(size, precision, scale); let mut bytes = [0; 32];