diff --git a/arrow-array/src/array/decimal_array.rs b/arrow-array/src/array/decimal_array.rs deleted file mode 100644 index 5ca9b0715cf..00000000000 --- a/arrow-array/src/array/decimal_array.rs +++ /dev/null @@ -1,997 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::builder::BooleanBufferBuilder; -use crate::decimal::{Decimal, Decimal256}; -use crate::iterator::DecimalIter; -use crate::raw_pointer::RawPtrBox; -use crate::types::{Decimal128Type, Decimal256Type, DecimalType, NativeDecimalType}; -use crate::{ - print_long_array, Array, ArrayAccessor, FixedSizeBinaryArray, FixedSizeListArray, -}; -use arrow_buffer::{Buffer, MutableBuffer}; -use arrow_data::decimal::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, -}; -use arrow_data::ArrayData; -use arrow_schema::{ArrowError, DataType}; -use std::any::Any; -use std::marker::PhantomData; - -/// `Decimal128Array` stores fixed width decimal numbers, -/// with a fixed precision and scale. -/// -/// # Examples -/// -/// ``` -/// use arrow_array::{Array, DecimalArray, Decimal128Array}; -/// use arrow_schema::DataType; -/// -/// // Create a DecimalArray with the default precision and scale -/// let decimal_array: Decimal128Array = vec![ -/// Some(8_887_000_000_i128), -/// None, -/// Some(-8_887_000_000_i128), -/// ] -/// .into_iter().collect(); -/// -/// // set precision and scale so values are interpreted -/// // as `8887.000000`, `Null`, and `-8887.000000` -/// let decimal_array = decimal_array -/// .with_precision_and_scale(23, 6) -/// .unwrap(); -/// -/// assert_eq!(&DataType::Decimal128(23, 6), decimal_array.data_type()); -/// assert_eq!(8_887_000_000_i128, decimal_array.value(0).as_i128()); -/// assert_eq!("8887.000000", decimal_array.value_as_string(0)); -/// assert_eq!(3, decimal_array.len()); -/// assert_eq!(1, decimal_array.null_count()); -/// assert_eq!(32, decimal_array.value_offset(2)); -/// assert_eq!(16, decimal_array.value_length()); -/// assert_eq!(23, decimal_array.precision()); -/// assert_eq!(6, decimal_array.scale()); -/// ``` -/// -pub type Decimal128Array = DecimalArray; - -/// `Decimal256Array` stores fixed width decimal numbers, -/// with a fixed precision and scale -pub type Decimal256Array = DecimalArray; - -/// A generic [`Array`] for fixed width decimal numbers -/// -/// See [`Decimal128Array`] and [`Decimal256Array`] -pub struct DecimalArray { - data: ArrayData, - value_data: RawPtrBox, - precision: u8, - scale: u8, - _phantom: PhantomData, -} - -impl DecimalArray { - pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32; - const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE; - pub const MAX_PRECISION: u8 = T::MAX_PRECISION; - pub const MAX_SCALE: u8 = T::MAX_SCALE; - const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = T::TYPE_CONSTRUCTOR; - - pub fn data(&self) -> &ArrayData { - &self.data - } - - /// Return the precision (total digits) that can be stored by this array - pub fn precision(&self) -> u8 { - self.precision - } - - /// Return the scale (digits after the decimal) that can be stored by this array - pub fn scale(&self) -> u8 { - self.scale - } - - /// Returns the element at index `i`. - /// # Panics - /// Panics if index `i` is out of bounds. - pub fn value(&self, i: usize) -> Decimal { - assert!( - i < self.data().len(), - "Trying to access an element at index {} from a DecimalArray of length {}", - i, - self.len() - ); - - unsafe { self.value_unchecked(i) } - } - - /// Returns the element at index `i`. - /// # Safety - /// Caller is responsible for ensuring that the index is within the bounds of the array - pub unsafe fn value_unchecked(&self, i: usize) -> Decimal { - let data = self.data(); - let offset = i + data.offset(); - let raw_val = { - let pos = self.value_offset_at(offset); - T::Native::from_slice(std::slice::from_raw_parts( - self.raw_value_data_ptr().offset(pos as isize), - Self::VALUE_LENGTH as usize, - )) - }; - Decimal::new(self.precision(), self.scale(), &raw_val) - } - - /// Returns the offset for the element at index `i`. - /// - /// Note this doesn't do any bound checking, for performance reason. - #[inline] - pub fn value_offset(&self, i: usize) -> i32 { - self.value_offset_at(self.data().offset() + i) - } - - /// Returns the length for an element. - /// - /// All elements have the same length as the array is a fixed size. - #[inline] - pub fn value_length(&self) -> i32 { - Self::VALUE_LENGTH - } - - /// Returns a clone of the value data buffer - pub fn value_data(&self) -> Buffer { - self.data().buffers()[0].clone() - } - - #[inline] - pub fn value_offset_at(&self, i: usize) -> i32 { - Self::VALUE_LENGTH * i as i32 - } - - #[inline] - pub fn value_as_string(&self, row: usize) -> String { - self.value(row).to_string() - } - - /// Build a decimal array from [`FixedSizeBinaryArray`]. - /// - /// NB: This function does not validate that each value is in the permissible - /// range for a decimal - pub fn from_fixed_size_binary_array( - v: FixedSizeBinaryArray, - precision: u8, - scale: u8, - ) -> Self { - assert!( - v.value_length() == Self::VALUE_LENGTH, - "Value length of the array ({}) must equal to the byte width of the decimal ({})", - v.value_length(), - Self::VALUE_LENGTH, - ); - let data_type = if Self::VALUE_LENGTH == 16 { - DataType::Decimal128(precision, scale) - } else { - DataType::Decimal256(precision, scale) - }; - let builder = v.into_data().into_builder().data_type(data_type); - - let array_data = unsafe { builder.build_unchecked() }; - Self::from(array_data) - } - - /// Build a decimal array from [`FixedSizeListArray`]. - /// - /// NB: This function does not validate that each value is in the permissible - /// range for a decimal. - #[deprecated(note = "please use `from_fixed_size_binary_array` instead")] - pub fn from_fixed_size_list_array( - v: FixedSizeListArray, - precision: u8, - scale: u8, - ) -> Self { - assert_eq!( - v.data_ref().child_data().len(), - 1, - "DecimalArray can only be created from list array of u8 values \ - (i.e. FixedSizeList>)." - ); - let child_data = &v.data_ref().child_data()[0]; - - assert_eq!( - child_data.child_data().len(), - 0, - "DecimalArray can only be created from list array of u8 values \ - (i.e. FixedSizeList>)." - ); - assert_eq!( - child_data.data_type(), - &DataType::UInt8, - "DecimalArray can only be created from FixedSizeList arrays, mismatched data types." - ); - assert!( - v.value_length() == Self::VALUE_LENGTH, - "Value length of the array ({}) must equal to the byte width of the decimal ({})", - v.value_length(), - Self::VALUE_LENGTH, - ); - assert_eq!( - v.data_ref().child_data()[0].null_count(), - 0, - "The child array cannot contain null values." - ); - - let list_offset = v.offset(); - let child_offset = child_data.offset(); - let data_type = if Self::VALUE_LENGTH == 16 { - DataType::Decimal128(precision, scale) - } else { - DataType::Decimal256(precision, scale) - }; - let builder = ArrayData::builder(data_type) - .len(v.len()) - .add_buffer(child_data.buffers()[0].slice(child_offset)) - .null_bit_buffer(v.data_ref().null_buffer().cloned()) - .offset(list_offset); - - let array_data = unsafe { builder.build_unchecked() }; - Self::from(array_data) - } - - /// The default precision and scale used when not specified. - pub const fn default_type() -> DataType { - Self::DEFAULT_TYPE - } - - fn raw_value_data_ptr(&self) -> *const u8 { - self.value_data.as_ptr() - } - - /// Returns a Decimal array with the same data as self, with the - /// specified precision. - /// - /// Returns an Error if: - /// 1. `precision` is larger than [`Self::MAX_PRECISION`] - /// 2. `scale` is larger than [`Self::MAX_SCALE`]; - /// 3. `scale` is > `precision` - pub fn with_precision_and_scale( - self, - precision: u8, - scale: u8, - ) -> Result - where - Self: Sized, - { - // validate precision and scale - self.validate_precision_scale(precision, scale)?; - - // Ensure that all values are within the requested - // precision. For performance, only check if the precision is - // decreased - if precision < self.precision { - self.validate_data(precision)?; - } - - // safety: self.data is valid DataType::Decimal as checked above - let new_data_type = Self::TYPE_CONSTRUCTOR(precision, scale); - let data = self.data().clone().into_builder().data_type(new_data_type); - - // SAFETY - // Validated data above - Ok(unsafe { data.build_unchecked().into() }) - } - - // validate that the new precision and scale are valid or not - fn validate_precision_scale( - &self, - precision: u8, - scale: u8, - ) -> Result<(), ArrowError> { - if precision > Self::MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "precision {} is greater than max {}", - precision, - Self::MAX_PRECISION - ))); - } - if scale > Self::MAX_SCALE { - return Err(ArrowError::InvalidArgumentError(format!( - "scale {} is greater than max {}", - scale, - Self::MAX_SCALE - ))); - } - if scale > precision { - return Err(ArrowError::InvalidArgumentError(format!( - "scale {} is greater than precision {}", - scale, precision - ))); - } - let data_type = Self::TYPE_CONSTRUCTOR(self.precision, self.scale); - assert_eq!(self.data().data_type(), &data_type); - - Ok(()) - } - - // validate all the data in the array are valid within the new precision or not - fn validate_data(&self, precision: u8) -> Result<(), ArrowError> { - // TODO: Move into DecimalType - match Self::VALUE_LENGTH { - 16 => self - .as_any() - .downcast_ref::() - .unwrap() - .validate_decimal_precision(precision), - 32 => self - .as_any() - .downcast_ref::() - .unwrap() - .validate_decimal_precision(precision), - other_width => { - panic!("invalid byte width {}", other_width); - } - } - } -} - -impl Decimal128Array { - /// Creates a [Decimal128Array] with default precision and scale, - /// based on an iterator of `i128` values without nulls - pub fn from_iter_values>(iter: I) -> Self { - let val_buf: Buffer = iter.into_iter().collect(); - let data = unsafe { - ArrayData::new_unchecked( - Self::default_type(), - val_buf.len() / std::mem::size_of::(), - None, - None, - 0, - vec![val_buf], - vec![], - ) - }; - Decimal128Array::from(data) - } - - // Validates decimal128 values in this array can be properly interpreted - // with the specified precision. - fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { - (0..self.len()).try_for_each(|idx| { - if self.is_valid(idx) { - let decimal = unsafe { self.value_unchecked(idx) }; - validate_decimal_precision(decimal.as_i128(), precision) - } else { - Ok(()) - } - }) - } -} - -impl Decimal256Array { - // Validates decimal256 values in this array can be properly interpreted - // with the specified precision. - fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { - (0..self.len()).try_for_each(|idx| { - if self.is_valid(idx) { - let raw_val = unsafe { - let pos = self.value_offset(idx); - std::slice::from_raw_parts( - self.raw_value_data_ptr().offset(pos as isize), - Self::VALUE_LENGTH as usize, - ) - }; - validate_decimal256_precision_with_lt_bytes(raw_val, precision) - } else { - Ok(()) - } - }) - } -} - -impl From for DecimalArray { - fn from(data: ArrayData) -> Self { - assert_eq!( - data.buffers().len(), - 1, - "DecimalArray data should contain 1 buffer only (values)" - ); - let values = data.buffers()[0].as_ptr(); - let (precision, scale) = match (data.data_type(), Self::DEFAULT_TYPE) { - (DataType::Decimal128(precision, scale), DataType::Decimal128(_, _)) - | (DataType::Decimal256(precision, scale), DataType::Decimal256(_, _)) => { - (*precision, *scale) - } - _ => panic!( - "Expected data type to match {} got {}", - Self::DEFAULT_TYPE, - data.data_type() - ), - }; - Self { - data, - // SAFETY: - // ArrayData must be valid, and verified data type above - value_data: unsafe { RawPtrBox::new(values) }, - precision, - scale, - _phantom: Default::default(), - } - } -} - -fn build_decimal_array_from( - null_buf: BooleanBufferBuilder, - buffer: Buffer, -) -> DecimalArray { - let data = unsafe { - ArrayData::new_unchecked( - DecimalArray::::default_type(), - null_buf.len(), - None, - Some(null_buf.into()), - 0, - vec![buffer], - vec![], - ) - }; - DecimalArray::from(data) -} - -impl> FromIterator> for Decimal256Array { - fn from_iter>>(iter: I) -> Self { - let iter = iter.into_iter(); - let (lower, upper) = iter.size_hint(); - let size_hint = upper.unwrap_or(lower); - - let mut null_buf = BooleanBufferBuilder::new(size_hint); - - let mut buffer = MutableBuffer::with_capacity(size_hint); - - iter.for_each(|item| { - if let Some(a) = item { - null_buf.append(true); - buffer.extend_from_slice(Into::into(a).raw_value()); - } else { - null_buf.append(false); - buffer.extend_zeros(32); - } - }); - - build_decimal_array_from(null_buf, buffer.into()) - } -} - -impl> FromIterator> for Decimal128Array { - fn from_iter>>(iter: I) -> Self { - let iter = iter.into_iter(); - let (lower, upper) = iter.size_hint(); - let size_hint = upper.unwrap_or(lower); - - let mut null_buf = BooleanBufferBuilder::new(size_hint); - - let buffer: Buffer = iter - .map(|item| { - if let Some(a) = item { - null_buf.append(true); - a.into() - } else { - null_buf.append(false); - // arbitrary value for NULL - 0 - } - }) - .collect(); - - build_decimal_array_from(null_buf, buffer) - } -} - -impl Array for DecimalArray { - fn as_any(&self) -> &dyn Any { - self - } - - fn data(&self) -> &ArrayData { - &self.data - } - - fn into_data(self) -> ArrayData { - self.into() - } -} - -impl From> for ArrayData { - fn from(array: DecimalArray) -> Self { - array.data - } -} - -impl std::fmt::Debug for DecimalArray { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!( - f, - "Decimal{}Array<{}, {}>\n[\n", - T::BYTE_LENGTH * 8, - self.precision, - self.scale - )?; - print_long_array(self, f, |array, index, f| { - let formatted_decimal = array.value_as_string(index); - - write!(f, "{}", formatted_decimal) - })?; - write!(f, "]") - } -} - -impl<'a, T: DecimalType> ArrayAccessor for &'a DecimalArray { - type Item = Decimal; - - fn value(&self, index: usize) -> Self::Item { - DecimalArray::::value(self, index) - } - - unsafe fn value_unchecked(&self, index: usize) -> Self::Item { - DecimalArray::::value_unchecked(self, index) - } -} - -impl<'a, T: DecimalType> IntoIterator for &'a DecimalArray { - type Item = Option>; - type IntoIter = DecimalIter<'a, T>; - - fn into_iter(self) -> Self::IntoIter { - DecimalIter::<'a, T>::new(self) - } -} - -impl<'a, T: DecimalType> DecimalArray { - /// constructs a new iterator - pub fn iter(&'a self) -> DecimalIter<'a, T> { - DecimalIter::<'a, T>::new(self) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::builder::{Decimal128Builder, Decimal256Builder}; - use crate::decimal::Decimal128; - use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; - use arrow_schema::Field; - use num::{BigInt, Num}; - - #[test] - fn test_decimal_array() { - // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; - // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]; - let values: [u8; 32] = [ - 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - ]; - let array_data = ArrayData::builder(DataType::Decimal128(38, 6)) - .len(2) - .add_buffer(Buffer::from(&values[..])) - .build() - .unwrap(); - let decimal_array = Decimal128Array::from(array_data); - assert_eq!(8_887_000_000_i128, decimal_array.value(0).into()); - assert_eq!(-8_887_000_000_i128, decimal_array.value(1).into()); - assert_eq!(16, decimal_array.value_length()); - } - - #[test] - #[cfg(not(feature = "force_validate"))] - fn test_decimal_append_error_value() { - let mut decimal_builder = Decimal128Builder::with_capacity(10, 5, 3); - let mut result = decimal_builder.append_value(123456); - let mut error = result.unwrap_err(); - assert_eq!( - "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", - error.to_string() - ); - - unsafe { - decimal_builder.disable_value_validation(); - } - result = decimal_builder.append_value(123456); - assert!(result.is_ok()); - decimal_builder.append_value(12345).unwrap(); - let arr = decimal_builder.finish(); - assert_eq!("12.345", arr.value_as_string(1)); - - decimal_builder = Decimal128Builder::new(2, 1); - result = decimal_builder.append_value(100); - error = result.unwrap_err(); - assert_eq!( - "Invalid argument error: 100 is too large to store in a Decimal128 of precision 2. Max is 99", - error.to_string() - ); - - unsafe { - decimal_builder.disable_value_validation(); - } - result = decimal_builder.append_value(100); - assert!(result.is_ok()); - decimal_builder.append_value(99).unwrap(); - result = decimal_builder.append_value(-100); - assert!(result.is_ok()); - decimal_builder.append_value(-99).unwrap(); - let arr = decimal_builder.finish(); - assert_eq!("9.9", arr.value_as_string(1)); - assert_eq!("-9.9", arr.value_as_string(3)); - } - - #[test] - fn test_decimal_from_iter_values() { - let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); - assert_eq!(array.len(), 3); - assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); - assert_eq!(-100_i128, array.value(0).into()); - assert!(!array.is_null(0)); - assert_eq!(0_i128, array.value(1).into()); - assert!(!array.is_null(1)); - assert_eq!(101_i128, array.value(2).into()); - assert!(!array.is_null(2)); - } - - #[test] - fn test_decimal_from_iter() { - let array: Decimal128Array = - vec![Some(-100), None, Some(101)].into_iter().collect(); - assert_eq!(array.len(), 3); - assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); - assert_eq!(-100_i128, array.value(0).into()); - assert!(!array.is_null(0)); - assert!(array.is_null(1)); - assert_eq!(101_i128, array.value(2).into()); - assert!(!array.is_null(2)); - } - - #[test] - fn test_decimal_iter() { - let data = vec![Some(-100), None, Some(101)]; - let array: Decimal128Array = data.clone().into_iter().collect(); - - let collected: Vec<_> = array.iter().map(|d| d.map(|v| v.as_i128())).collect(); - assert_eq!(data, collected); - } - - #[test] - fn test_decimal_into_iter() { - let data = vec![Some(-100), None, Some(101)]; - let array: Decimal128Array = data.clone().into_iter().collect(); - - let collected: Vec<_> = - array.into_iter().map(|d| d.map(|v| v.as_i128())).collect(); - assert_eq!(data, collected); - } - - #[test] - fn test_decimal_iter_sized() { - let data = vec![Some(-100), None, Some(101)]; - let array: Decimal128Array = data.into_iter().collect(); - let mut iter = array.into_iter(); - - // is exact sized - assert_eq!(array.len(), 3); - - // size_hint is reported correctly - assert_eq!(iter.size_hint(), (3, Some(3))); - iter.next().unwrap(); - assert_eq!(iter.size_hint(), (2, Some(2))); - iter.next().unwrap(); - iter.next().unwrap(); - assert_eq!(iter.size_hint(), (0, Some(0))); - assert!(iter.next().is_none()); - assert_eq!(iter.size_hint(), (0, Some(0))); - } - - #[test] - fn test_decimal_array_value_as_string() { - let arr = [123450, -123450, 100, -100, 10, -10, 0] - .into_iter() - .map(Some) - .collect::() - .with_precision_and_scale(6, 3) - .unwrap(); - - assert_eq!("123.450", arr.value_as_string(0)); - assert_eq!("-123.450", arr.value_as_string(1)); - assert_eq!("0.100", arr.value_as_string(2)); - assert_eq!("-0.100", arr.value_as_string(3)); - assert_eq!("0.010", arr.value_as_string(4)); - assert_eq!("-0.010", arr.value_as_string(5)); - assert_eq!("0.000", arr.value_as_string(6)); - } - - #[test] - fn test_decimal_array_with_precision_and_scale() { - let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) - .with_precision_and_scale(20, 2) - .unwrap(); - - assert_eq!(arr.data_type(), &DataType::Decimal128(20, 2)); - assert_eq!(arr.precision(), 20); - assert_eq!(arr.scale(), 2); - - let actual: Vec<_> = (0..arr.len()).map(|i| arr.value_as_string(i)).collect(); - let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"]; - - assert_eq!(actual, expected); - } - - #[test] - #[should_panic( - expected = "-123223423432432 is too small to store in a Decimal128 of precision 5. Min is -99999" - )] - fn test_decimal_array_with_precision_and_scale_out_of_range() { - Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) - // precision is too small to hold value - .with_precision_and_scale(5, 2) - .unwrap(); - } - - #[test] - #[should_panic(expected = "precision 40 is greater than max 38")] - fn test_decimal_array_with_precision_and_scale_invalid_precision() { - Decimal128Array::from_iter_values([12345, 456]) - .with_precision_and_scale(40, 2) - .unwrap(); - } - - #[test] - #[should_panic(expected = "scale 40 is greater than max 38")] - fn test_decimal_array_with_precision_and_scale_invalid_scale() { - Decimal128Array::from_iter_values([12345, 456]) - .with_precision_and_scale(20, 40) - .unwrap(); - } - - #[test] - #[should_panic(expected = "scale 10 is greater than precision 4")] - fn test_decimal_array_with_precision_and_scale_invalid_precision_and_scale() { - Decimal128Array::from_iter_values([12345, 456]) - .with_precision_and_scale(4, 10) - .unwrap(); - } - - #[test] - fn test_decimal_array_fmt_debug() { - let arr = [Some(8887000000_i128), Some(-8887000000_i128), None] - .into_iter() - .collect::() - .with_precision_and_scale(23, 6) - .unwrap(); - - assert_eq!( - "Decimal128Array<23, 6>\n[\n 8887.000000,\n -8887.000000,\n null,\n]", - format!("{:?}", arr) - ); - } - - #[test] - fn test_decimal_array_from_fixed_size_binary() { - let value_data = ArrayData::builder(DataType::FixedSizeBinary(16)) - .offset(1) - .len(3) - .add_buffer(Buffer::from_slice_ref(&[99999_i128, 2, 34, 560])) - .null_bit_buffer(Some(Buffer::from_slice_ref(&[0b1010]))) - .build() - .unwrap(); - - let binary_array = FixedSizeBinaryArray::from(value_data); - let decimal = Decimal128Array::from_fixed_size_binary_array(binary_array, 38, 1); - - assert_eq!(decimal.len(), 3); - assert_eq!(decimal.value_as_string(0), "0.2".to_string()); - assert!(decimal.is_null(1)); - assert_eq!(decimal.value_as_string(2), "56.0".to_string()); - } - - #[test] - #[should_panic( - expected = "Value length of the array (8) must equal to the byte width of the decimal (16)" - )] - fn test_decimal_array_from_fixed_size_binary_wrong_length() { - let value_data = ArrayData::builder(DataType::FixedSizeBinary(8)) - .offset(1) - .len(3) - .add_buffer(Buffer::from_slice_ref(&[99999_i64, 2, 34, 560])) - .null_bit_buffer(Some(Buffer::from_slice_ref(&[0b1010]))) - .build() - .unwrap(); - - let binary_array = FixedSizeBinaryArray::from(value_data); - let _ = Decimal128Array::from_fixed_size_binary_array(binary_array, 38, 1); - } - - #[test] - #[allow(deprecated)] - fn test_decimal_array_from_fixed_size_list() { - let value_data = ArrayData::builder(DataType::UInt8) - .offset(16) - .len(48) - .add_buffer(Buffer::from_slice_ref(&[99999_i128, 12, 34, 56])) - .build() - .unwrap(); - - let null_buffer = Buffer::from_slice_ref(&[0b101]); - - // Construct a list array from the above two - let list_data_type = DataType::FixedSizeList( - Box::new(Field::new("item", DataType::UInt8, false)), - 16, - ); - let list_data = ArrayData::builder(list_data_type) - .len(2) - .null_bit_buffer(Some(null_buffer)) - .offset(1) - .add_child_data(value_data) - .build() - .unwrap(); - let list_array = FixedSizeListArray::from(list_data); - let decimal = Decimal128Array::from_fixed_size_list_array(list_array, 38, 0); - - assert_eq!(decimal.len(), 2); - assert!(decimal.is_null(0)); - assert_eq!(decimal.value_as_string(1), "56".to_string()); - } - - #[test] - #[allow(deprecated)] - #[should_panic(expected = "The child array cannot contain null values.")] - fn test_decimal_array_from_fixed_size_list_with_child_nulls_failed() { - let value_data = ArrayData::builder(DataType::UInt8) - .len(16) - .add_buffer(Buffer::from_slice_ref(&[12_i128])) - .null_bit_buffer(Some(Buffer::from_slice_ref(&[0b1010101010101010]))) - .build() - .unwrap(); - - // Construct a list array from the above two - let list_data_type = DataType::FixedSizeList( - Box::new(Field::new("item", DataType::UInt8, false)), - 16, - ); - let list_data = ArrayData::builder(list_data_type) - .len(1) - .add_child_data(value_data) - .build() - .unwrap(); - let list_array = FixedSizeListArray::from(list_data); - drop(Decimal128Array::from_fixed_size_list_array( - list_array, 38, 0, - )); - } - - #[test] - #[allow(deprecated)] - #[should_panic( - expected = "Value length of the array (8) must equal to the byte width of the decimal (16)" - )] - fn test_decimal_array_from_fixed_size_list_with_wrong_length() { - let value_data = ArrayData::builder(DataType::UInt8) - .len(16) - .add_buffer(Buffer::from_slice_ref(&[12_i128])) - .null_bit_buffer(Some(Buffer::from_slice_ref(&[0b1010101010101010]))) - .build() - .unwrap(); - - // Construct a list array from the above two - let list_data_type = DataType::FixedSizeList( - Box::new(Field::new("item", DataType::UInt8, false)), - 8, - ); - let list_data = ArrayData::builder(list_data_type) - .len(2) - .add_child_data(value_data) - .build() - .unwrap(); - let list_array = FixedSizeListArray::from(list_data); - drop(Decimal128Array::from_fixed_size_list_array( - list_array, 38, 0, - )); - } - - #[test] - fn test_decimal256_iter() { - let mut builder = Decimal256Builder::with_capacity(30, 76, 6); - let value = BigInt::from_str_radix("12345", 10).unwrap(); - let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap(); - builder.append_value(&decimal1).unwrap(); - - builder.append_null(); - - let value = BigInt::from_str_radix("56789", 10).unwrap(); - let decimal2 = Decimal256::from_big_int(&value, 76, 6).unwrap(); - builder.append_value(&decimal2).unwrap(); - - let array: Decimal256Array = builder.finish(); - - let collected: Vec<_> = array.iter().collect(); - assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected); - } - - #[test] - fn test_from_iter_decimal256array() { - let value1 = BigInt::from_str_radix("12345", 10).unwrap(); - let value2 = BigInt::from_str_radix("56789", 10).unwrap(); - - let array: Decimal256Array = - vec![Some(value1.clone()), None, Some(value2.clone())] - .into_iter() - .collect(); - assert_eq!(array.len(), 3); - assert_eq!(array.data_type(), &DataType::Decimal256(76, 10)); - assert_eq!( - Decimal256::from_big_int( - &value1, - DECIMAL256_MAX_PRECISION, - DECIMAL_DEFAULT_SCALE, - ) - .unwrap(), - array.value(0) - ); - assert!(!array.is_null(0)); - assert!(array.is_null(1)); - assert_eq!( - Decimal256::from_big_int( - &value2, - DECIMAL256_MAX_PRECISION, - DECIMAL_DEFAULT_SCALE, - ) - .unwrap(), - array.value(2) - ); - assert!(!array.is_null(2)); - } - - #[test] - fn test_from_iter_decimal128array() { - let array: Decimal128Array = vec![ - Some(Decimal128::new_from_i128(38, 10, -100)), - None, - Some(Decimal128::new_from_i128(38, 10, 101)), - ] - .into_iter() - .collect(); - assert_eq!(array.len(), 3); - assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); - assert_eq!(-100_i128, array.value(0).into()); - assert!(!array.is_null(0)); - assert!(array.is_null(1)); - assert_eq!(101_i128, array.value(2).into()); - assert!(!array.is_null(2)); - } - - #[test] - #[should_panic( - expected = "Trying to access an element at index 4 from a DecimalArray of length 3" - )] - fn test_fixed_size_binary_array_get_value_index_out_of_bound() { - let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); - - array.value(4); - } - - #[test] - #[should_panic( - expected = "Expected data type to match Decimal256(76, 10) got Decimal128(38, 10)" - )] - fn test_from_array_data_validation() { - let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); - let _ = Decimal256Array::from(array.into_data()); - } -} diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs index e2ea6154912..1613e4a69b8 100644 --- a/arrow-array/src/array/mod.rs +++ b/arrow-array/src/array/mod.rs @@ -31,9 +31,6 @@ pub use binary_array::*; mod boolean_array; pub use boolean_array::*; -mod decimal_array; -pub use decimal_array::*; - mod dictionary_array; pub use dictionary_array::*; @@ -449,12 +446,6 @@ impl PartialEq for FixedSizeBinaryArray { } } -impl PartialEq for Decimal128Array { - fn eq(&self, other: &Self) -> bool { - self.data().eq(other.data()) - } -} - impl PartialEq for GenericListArray { fn eq(&self, other: &Self) -> bool { self.data().eq(other.data()) diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 895c80b0753..377523267a4 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -16,6 +16,7 @@ // under the License. use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder}; +use crate::decimal::Decimal; use crate::iterator::PrimitiveIter; use crate::raw_pointer::RawPtrBox; use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time}; @@ -25,7 +26,7 @@ use crate::{print_long_array, Array, ArrayAccessor}; use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer}; use arrow_data::bit_iterator::try_for_each_valid_idx; use arrow_data::ArrayData; -use arrow_schema::DataType; +use arrow_schema::{ArrowError, DataType}; use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime}; use half::f16; use std::any::Any; @@ -176,6 +177,9 @@ pub type DurationMillisecondArray = PrimitiveArray; pub type DurationMicrosecondArray = PrimitiveArray; pub type DurationNanosecondArray = PrimitiveArray; +pub type Decimal128Array = PrimitiveArray; +pub type Decimal256Array = PrimitiveArray; + /// Trait bridging the dynamic-typed nature of Arrow (via [`DataType`]) with the /// static-typed nature of rust types ([`ArrowNativeType`]) for all types that implement [`ArrowNativeType`]. pub trait ArrowPrimitiveType: 'static { @@ -842,9 +846,146 @@ impl From for PrimitiveArray { } } +impl PrimitiveArray { + /// Returns a Decimal array with the same data as self, with the + /// specified precision. + /// + /// Returns an Error if: + /// 1. `precision` is larger than `T:MAX_PRECISION` + /// 2. `scale` is larger than `T::MAX_SCALE` + /// 3. `scale` is > `precision` + pub fn with_precision_and_scale( + self, + precision: u8, + scale: u8, + ) -> Result + where + Self: Sized, + { + // validate precision and scale + self.validate_precision_scale(precision, scale)?; + + // safety: self.data is valid DataType::Decimal as checked above + let new_data_type = T::TYPE_CONSTRUCTOR(precision, scale); + let data = self.data().clone().into_builder().data_type(new_data_type); + + // SAFETY + // Validated data above + Ok(unsafe { data.build_unchecked().into() }) + } + + // validate that the new precision and scale are valid or not + fn validate_precision_scale( + &self, + precision: u8, + scale: u8, + ) -> Result<(), ArrowError> { + if precision > T::MAX_PRECISION { + return Err(ArrowError::InvalidArgumentError(format!( + "precision {} is greater than max {}", + precision, + Decimal128Type::MAX_PRECISION + ))); + } + if scale > T::MAX_SCALE { + return Err(ArrowError::InvalidArgumentError(format!( + "scale {} is greater than max {}", + scale, + Decimal128Type::MAX_SCALE + ))); + } + if scale > precision { + return Err(ArrowError::InvalidArgumentError(format!( + "scale {} is greater than precision {}", + scale, precision + ))); + } + + Ok(()) + } + + /// Validates values in this array can be properly interpreted + /// with the specified precision. + pub fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { + (0..self.len()).try_for_each(|idx| { + if self.is_valid(idx) { + let decimal = unsafe { self.value_unchecked(idx) }; + T::validate_decimal_precision(decimal, precision) + } else { + Ok(()) + } + }) + } + + pub fn value_as_string(&self, row: usize) -> Result { + let p = self.precision()?; + let s = self.scale()?; + Ok(Decimal::::new(p, s, &T::to_native(self.value(row))).to_string()) + } + + pub fn precision(&self) -> Result { + match T::BYTE_LENGTH { + 16 => { + if let DataType::Decimal128(p, _) = self.data().data_type() { + Ok(*p) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "Decimal128Array datatype is not DataType::Decimal128 but {}", + self.data_type() + ))) + } + } + 32 => { + if let DataType::Decimal256(p, _) = self.data().data_type() { + Ok(*p) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "Decimal256Array datatype is not DataType::Decimal256 but {}", + self.data_type() + ))) + } + } + other => Err(ArrowError::InvalidArgumentError(format!( + "Unsupported byte length for decimal array {}", + other + ))), + } + } + + pub fn scale(&self) -> Result { + match T::BYTE_LENGTH { + 16 => { + if let DataType::Decimal128(_, s) = self.data().data_type() { + Ok(*s) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "Decimal128Array datatype is not DataType::Decimal128 but {}", + self.data_type() + ))) + } + } + 32 => { + if let DataType::Decimal256(_, s) = self.data().data_type() { + Ok(*s) + } else { + Err(ArrowError::InvalidArgumentError(format!( + "Decimal256Array datatype is not DataType::Decimal256 but {}", + self.data_type() + ))) + } + } + other => Err(ArrowError::InvalidArgumentError(format!( + "Unsupported byte length for decimal array {}", + other + ))), + } + } +} + #[cfg(test)] mod tests { use super::*; + use crate::builder::{Decimal128Builder, Decimal256Builder}; use crate::BooleanArray; #[test] @@ -1408,4 +1549,239 @@ mod tests { let array = PrimitiveArray::::from(array.data().clone()); assert_eq!(array.values(), &values); } + + #[test] + fn test_decimal_array() { + // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]; + let values: [u8; 32] = [ + 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ]; + let array_data = ArrayData::builder(DataType::Decimal128(38, 6)) + .len(2) + .add_buffer(Buffer::from(&values[..])) + .build() + .unwrap(); + let decimal_array = Decimal128Array::from(array_data); + assert_eq!(8_887_000_000_i128, decimal_array.value(0).into()); + assert_eq!(-8_887_000_000_i128, decimal_array.value(1).into()); + } + + #[test] + fn test_decimal_append_error_value() { + let mut decimal_builder = Decimal128Builder::with_capacity(10); + decimal_builder.append_value(123456); + decimal_builder.append_value(12345); + let result = decimal_builder.finish().with_precision_and_scale(5, 3); + assert!(result.is_ok()); + let arr = result.unwrap(); + assert_eq!("12.345", arr.value_as_string(1).unwrap()); + + // Validate it explicitly + let result = arr.validate_decimal_precision(5); + let error = result.unwrap_err(); + assert_eq!( + "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", + error.to_string() + ); + + decimal_builder = Decimal128Builder::new(); + decimal_builder.append_value(100); + decimal_builder.append_value(99); + decimal_builder.append_value(-100); + decimal_builder.append_value(-99); + let result = decimal_builder.finish().with_precision_and_scale(2, 1); + assert!(result.is_ok()); + let arr = result.unwrap(); + assert_eq!("9.9", arr.value_as_string(1).unwrap()); + assert_eq!("-9.9", arr.value_as_string(3).unwrap()); + + // Validate it explicitly + let result = arr.validate_decimal_precision(2); + let error = result.unwrap_err(); + assert_eq!( + "Invalid argument error: 100 is too large to store in a Decimal128 of precision 2. Max is 99", + error.to_string() + ); + } + + #[test] + fn test_decimal_from_iter_values() { + let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); + assert_eq!(array.len(), 3); + assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); + assert_eq!(-100_i128, array.value(0).into()); + assert!(!array.is_null(0)); + assert_eq!(0_i128, array.value(1).into()); + assert!(!array.is_null(1)); + assert_eq!(101_i128, array.value(2).into()); + assert!(!array.is_null(2)); + } + + #[test] + fn test_decimal_from_iter() { + let array: Decimal128Array = + vec![Some(-100), None, Some(101)].into_iter().collect(); + assert_eq!(array.len(), 3); + assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); + assert_eq!(-100_i128, array.value(0).into()); + assert!(!array.is_null(0)); + assert!(array.is_null(1)); + assert_eq!(101_i128, array.value(2).into()); + assert!(!array.is_null(2)); + } + + #[test] + fn test_decimal_iter_sized() { + let data = vec![Some(-100), None, Some(101)]; + let array: Decimal128Array = data.into_iter().collect(); + let mut iter = array.into_iter(); + + // is exact sized + assert_eq!(array.len(), 3); + + // size_hint is reported correctly + assert_eq!(iter.size_hint(), (3, Some(3))); + iter.next().unwrap(); + assert_eq!(iter.size_hint(), (2, Some(2))); + iter.next().unwrap(); + iter.next().unwrap(); + assert_eq!(iter.size_hint(), (0, Some(0))); + assert!(iter.next().is_none()); + assert_eq!(iter.size_hint(), (0, Some(0))); + } + + #[test] + fn test_decimal_array_value_as_string() { + let arr = [123450, -123450, 100, -100, 10, -10, 0] + .into_iter() + .map(Some) + .collect::() + .with_precision_and_scale(6, 3) + .unwrap(); + + assert_eq!("123.450", arr.value_as_string(0).unwrap()); + assert_eq!("-123.450", arr.value_as_string(1).unwrap()); + assert_eq!("0.100", arr.value_as_string(2).unwrap()); + assert_eq!("-0.100", arr.value_as_string(3).unwrap()); + assert_eq!("0.010", arr.value_as_string(4).unwrap()); + assert_eq!("-0.010", arr.value_as_string(5).unwrap()); + assert_eq!("0.000", arr.value_as_string(6).unwrap()); + } + + #[test] + fn test_decimal_array_with_precision_and_scale() { + let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) + .with_precision_and_scale(20, 2) + .unwrap(); + + assert_eq!(arr.data_type(), &DataType::Decimal128(20, 2)); + assert_eq!(arr.precision().unwrap(), 20); + assert_eq!(arr.scale().unwrap(), 2); + + let actual: Vec<_> = (0..arr.len()) + .map(|i| arr.value_as_string(i).unwrap()) + .collect(); + let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"]; + + assert_eq!(actual, expected); + } + + #[test] + #[should_panic( + expected = "-123223423432432 is too small to store in a Decimal128 of precision 5. Min is -99999" + )] + fn test_decimal_array_with_precision_and_scale_out_of_range() { + let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) + // precision is too small to hold value + .with_precision_and_scale(5, 2) + .unwrap(); + arr.validate_decimal_precision(5).unwrap(); + } + + #[test] + #[should_panic(expected = "precision 40 is greater than max 38")] + fn test_decimal_array_with_precision_and_scale_invalid_precision() { + Decimal128Array::from_iter_values([12345, 456]) + .with_precision_and_scale(40, 2) + .unwrap(); + } + + #[test] + #[should_panic(expected = "scale 40 is greater than max 38")] + fn test_decimal_array_with_precision_and_scale_invalid_scale() { + Decimal128Array::from_iter_values([12345, 456]) + .with_precision_and_scale(20, 40) + .unwrap(); + } + + #[test] + #[should_panic(expected = "scale 10 is greater than precision 4")] + fn test_decimal_array_with_precision_and_scale_invalid_precision_and_scale() { + Decimal128Array::from_iter_values([12345, 456]) + .with_precision_and_scale(4, 10) + .unwrap(); + } + + #[test] + fn test_decimal256_iter() { + let mut builder = Decimal256Builder::with_capacity(30); + let decimal1 = i256::from_i128(12345); + builder.append_value(decimal1); + + builder.append_null(); + + let decimal2 = i256::from_i128(56789); + builder.append_value(decimal2); + + let array: Decimal256Array = + builder.finish().with_precision_and_scale(76, 6).unwrap(); + + let collected: Vec<_> = array.iter().collect(); + assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected); + } + + #[test] + fn test_from_iter_decimal256array() { + let value1 = i256::from_i128(12345); + let value2 = i256::from_i128(56789); + + let mut array: Decimal256Array = + vec![Some(value1.clone()), None, Some(value2.clone())] + .into_iter() + .collect(); + array = array.with_precision_and_scale(76, 10).unwrap(); + assert_eq!(array.len(), 3); + assert_eq!(array.data_type(), &DataType::Decimal256(76, 10)); + assert_eq!(value1, array.value(0)); + assert!(!array.is_null(0)); + assert!(array.is_null(1)); + assert_eq!(value2, array.value(2)); + assert!(!array.is_null(2)); + } + + #[test] + fn test_from_iter_decimal128array() { + let mut array: Decimal128Array = + vec![Some(-100), None, Some(101)].into_iter().collect(); + array = array.with_precision_and_scale(38, 10).unwrap(); + assert_eq!(array.len(), 3); + assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); + assert_eq!(-100_i128, array.value(0).into()); + assert!(!array.is_null(0)); + assert!(array.is_null(1)); + assert_eq!(101_i128, array.value(2).into()); + assert!(!array.is_null(2)); + } + + #[test] + #[should_panic( + expected = "Trying to access an element at index 4 from a PrimitiveArray of length 3" + )] + fn test_fixed_size_binary_array_get_value_index_out_of_bound() { + let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); + + array.value(4); + } } diff --git a/arrow-array/src/builder/decimal_builder.rs b/arrow-array/src/builder/decimal_builder.rs deleted file mode 100644 index 096cbec3a6c..00000000000 --- a/arrow-array/src/builder/decimal_builder.rs +++ /dev/null @@ -1,382 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::builder::{ArrayBuilder, FixedSizeBinaryBuilder}; -use crate::decimal::Decimal256; -use crate::{ArrayRef, Decimal128Array, Decimal256Array}; -use arrow_data::decimal::{ - validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, -}; -use arrow_schema::ArrowError; -use std::any::Any; -use std::sync::Arc; - -/// Array Builder for [`Decimal128Array`] -/// -/// See [`Decimal128Array`] for example. -/// -#[derive(Debug)] -pub struct Decimal128Builder { - builder: FixedSizeBinaryBuilder, - precision: u8, - scale: u8, - - /// Should i128 values be validated for compatibility with scale and precision? - /// defaults to true - value_validation: bool, -} - -/// Array Builder for [`Decimal256Array`] -/// -/// See [`Decimal256Array`] for example. -#[derive(Debug)] -pub struct Decimal256Builder { - builder: FixedSizeBinaryBuilder, - precision: u8, - scale: u8, - - /// Should decimal values be validated for compatibility with scale and precision? - /// defaults to true - value_validation: bool, -} - -impl Decimal128Builder { - const BYTE_LENGTH: i32 = 16; - - /// Creates a new [`Decimal128Builder`] - pub fn new(precision: u8, scale: u8) -> Self { - Self::with_capacity(1024, precision, scale) - } - - /// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values - /// that can be appended without reallocating - pub fn with_capacity(capacity: usize, precision: u8, scale: u8) -> Self { - Self { - builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH), - precision, - scale, - value_validation: true, - } - } - - /// Disable validation - /// - /// # Safety - /// - /// After disabling validation, caller must ensure that appended values are compatible - /// for the specified precision and scale. - pub unsafe fn disable_value_validation(&mut self) { - self.value_validation = false; - } - - /// Appends a decimal value into the builder. - #[inline] - pub fn append_value(&mut self, value: impl Into) -> Result<(), ArrowError> { - let value = value.into(); - if self.value_validation { - validate_decimal_precision(value, self.precision)? - } - let value_as_bytes: [u8; 16] = value.to_le_bytes(); - self.builder.append_value(value_as_bytes.as_slice()) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) { - self.builder.append_null() - } - - /// Appends an `Option>` into the builder. - #[inline] - pub fn append_option( - &mut self, - value: Option>, - ) -> Result<(), ArrowError> { - match value { - None => { - self.append_null(); - Ok(()) - } - Some(value) => self.append_value(value), - } - } - - /// Builds the `Decimal128Array` and reset this builder. - pub fn finish(&mut self) -> Decimal128Array { - Decimal128Array::from_fixed_size_binary_array( - self.builder.finish(), - self.precision, - self.scale, - ) - } -} - -impl ArrayBuilder for Decimal128Builder { - /// Returns the builder as a non-mutable `Any` reference. - fn as_any(&self) -> &dyn Any { - self - } - - /// Returns the builder as a mutable `Any` reference. - fn as_any_mut(&mut self) -> &mut dyn Any { - self - } - - /// Returns the boxed builder as a box of `Any`. - fn into_box_any(self: Box) -> Box { - self - } - - /// Returns the number of array slots in the builder - fn len(&self) -> usize { - self.builder.len() - } - - /// Returns whether the number of array slots is zero - fn is_empty(&self) -> bool { - self.builder.is_empty() - } - - /// Builds the array and reset this builder. - fn finish(&mut self) -> ArrayRef { - Arc::new(self.finish()) - } -} - -impl Decimal256Builder { - const BYTE_LENGTH: i32 = 32; - - /// Creates a new [`Decimal256Builder`] - pub fn new(precision: u8, scale: u8) -> Self { - Self::with_capacity(1024, precision, scale) - } - - /// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values - /// that can be appended without reallocating - pub fn with_capacity(capacity: usize, precision: u8, scale: u8) -> Self { - Self { - builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH), - precision, - scale, - value_validation: true, - } - } - - /// Disable validation - /// - /// # Safety - /// - /// After disabling validation, caller must ensure that appended values are compatible - /// for the specified precision and scale. - pub unsafe fn disable_value_validation(&mut self) { - self.value_validation = false; - } - - /// Appends a [`Decimal256`] number into the builder. - /// - /// Returns an error if `value` has different precision, scale or length in bytes than this builder - #[inline] - pub fn append_value(&mut self, value: &Decimal256) -> Result<(), ArrowError> { - let value = if self.value_validation { - let raw_bytes = value.raw_value(); - validate_decimal256_precision_with_lt_bytes(raw_bytes, self.precision)?; - value - } else { - value - }; - - if self.precision != value.precision() || self.scale != value.scale() { - return Err(ArrowError::InvalidArgumentError( - "Decimal value does not have the same precision or scale as Decimal256Builder".to_string() - )); - } - - let value_as_bytes = value.raw_value(); - - if Self::BYTE_LENGTH != value_as_bytes.len() as i32 { - return Err(ArrowError::InvalidArgumentError( - "Byte slice does not have the same length as Decimal256Builder value lengths".to_string() - )); - } - self.builder.append_value(value_as_bytes) - } - - /// Append a null value to the array. - #[inline] - pub fn append_null(&mut self) { - self.builder.append_null() - } - - /// Appends an `Option<&Decimal256>` into the builder. - #[inline] - pub fn append_option( - &mut self, - value: Option<&Decimal256>, - ) -> Result<(), ArrowError> { - match value { - None => { - self.append_null(); - Ok(()) - } - Some(value) => self.append_value(value), - } - } - - /// Builds the [`Decimal256Array`] and reset this builder. - pub fn finish(&mut self) -> Decimal256Array { - Decimal256Array::from_fixed_size_binary_array( - self.builder.finish(), - self.precision, - self.scale, - ) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::decimal::Decimal128; - use crate::Array; - use arrow_schema::DataType; - use num::{BigInt, Num}; - - #[test] - fn test_decimal_builder() { - let mut builder = Decimal128Builder::new(38, 6); - - builder.append_value(8_887_000_000_i128).unwrap(); - builder.append_null(); - builder.append_value(-8_887_000_000_i128).unwrap(); - builder.append_option(None::).unwrap(); - builder.append_option(Some(8_887_000_000_i128)).unwrap(); - let decimal_array: Decimal128Array = builder.finish(); - - assert_eq!(&DataType::Decimal128(38, 6), decimal_array.data_type()); - assert_eq!(5, decimal_array.len()); - assert_eq!(2, decimal_array.null_count()); - assert_eq!(32, decimal_array.value_offset(2)); - assert_eq!(16, decimal_array.value_length()); - } - - #[test] - fn test_decimal_builder_with_decimal128() { - let mut builder = Decimal128Builder::new(38, 6); - - builder - .append_value(Decimal128::new_from_i128(30, 38, 8_887_000_000_i128)) - .unwrap(); - builder.append_null(); - builder - .append_value(Decimal128::new_from_i128(30, 38, -8_887_000_000_i128)) - .unwrap(); - let decimal_array: Decimal128Array = builder.finish(); - - assert_eq!(&DataType::Decimal128(38, 6), decimal_array.data_type()); - assert_eq!(3, decimal_array.len()); - assert_eq!(1, decimal_array.null_count()); - assert_eq!(32, decimal_array.value_offset(2)); - assert_eq!(16, decimal_array.value_length()); - } - - #[test] - fn test_decimal256_builder() { - let mut builder = Decimal256Builder::new(40, 6); - - let mut bytes = [0_u8; 32]; - bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes()); - let value = Decimal256::try_new_from_bytes(40, 6, &bytes).unwrap(); - builder.append_value(&value).unwrap(); - - builder.append_null(); - - bytes = [255; 32]; - let value = Decimal256::try_new_from_bytes(40, 6, &bytes).unwrap(); - builder.append_value(&value).unwrap(); - - bytes = [0; 32]; - bytes[0..16].clone_from_slice(&0_i128.to_le_bytes()); - bytes[15] = 128; - let value = Decimal256::try_new_from_bytes(40, 6, &bytes).unwrap(); - builder.append_value(&value).unwrap(); - - builder.append_option(None::<&Decimal256>).unwrap(); - builder.append_option(Some(&value)).unwrap(); - - let decimal_array: Decimal256Array = builder.finish(); - - assert_eq!(&DataType::Decimal256(40, 6), decimal_array.data_type()); - assert_eq!(6, decimal_array.len()); - assert_eq!(2, decimal_array.null_count()); - assert_eq!(64, decimal_array.value_offset(2)); - assert_eq!(32, decimal_array.value_length()); - - assert_eq!(decimal_array.value(0).to_string(), "8887.000000"); - assert!(decimal_array.is_null(1)); - assert_eq!(decimal_array.value(2).to_string(), "-0.000001"); - assert_eq!( - decimal_array.value(3).to_string(), - "170141183460469231731687303715884.105728" - ); - } - - #[test] - #[should_panic( - expected = "Decimal value does not have the same precision or scale as Decimal256Builder" - )] - fn test_decimal256_builder_unmatched_precision_scale() { - let mut builder = Decimal256Builder::with_capacity(30, 10, 6); - - let mut bytes = [0_u8; 32]; - bytes[0..16].clone_from_slice(&8_887_000_000_i128.to_le_bytes()); - let value = Decimal256::try_new_from_bytes(40, 6, &bytes).unwrap(); - builder.append_value(&value).unwrap(); - } - - #[test] - #[should_panic( - expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999" - )] - fn test_decimal256_builder_out_of_range_precision_scale() { - let mut builder = Decimal256Builder::new(75, 6); - - let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap(); - let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap(); - builder.append_value(&value).unwrap(); - } - - #[test] - #[should_panic( - expected = "9999999999999999999999999999999999999999999999999999999999999999999999999999 is too large to store in a Decimal256 of precision 75. Max is 999999999999999999999999999999999999999999999999999999999999999999999999999" - )] - fn test_decimal256_data_validation() { - let mut builder = Decimal256Builder::new(75, 6); - // Disable validation at builder - unsafe { - builder.disable_value_validation(); - } - - let big_value = BigInt::from_str_radix("9999999999999999999999999999999999999999999999999999999999999999999999999999", 10).unwrap(); - let value = Decimal256::from_big_int(&big_value, 75, 6).unwrap(); - builder - .append_value(&value) - .expect("should not validate invalid value at builder"); - - let array = builder.finish(); - let array_data = array.data(); - array_data.validate_values().unwrap(); - } -} diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index cd4a82890a2..5edf011d7bf 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -24,8 +24,6 @@ mod boolean_builder; pub use boolean_builder::*; mod buffer_builder; pub use buffer_builder::*; -mod decimal_builder; -pub use decimal_builder::*; mod fixed_size_binary_builder; pub use fixed_size_binary_builder::*; mod fixed_size_list_builder; diff --git a/arrow-array/src/builder/primitive_builder.rs b/arrow-array/src/builder/primitive_builder.rs index c5b8c955707..ed3594c60df 100644 --- a/arrow-array/src/builder/primitive_builder.rs +++ b/arrow-array/src/builder/primitive_builder.rs @@ -52,6 +52,9 @@ pub type DurationMillisecondBuilder = PrimitiveBuilder; pub type DurationMicrosecondBuilder = PrimitiveBuilder; pub type DurationNanosecondBuilder = PrimitiveBuilder; +pub type Decimal128Builder = PrimitiveBuilder; +pub type Decimal256Builder = PrimitiveBuilder; + /// Array builder for fixed-width primitive types #[derive(Debug)] pub struct PrimitiveBuilder { diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 61993140394..69c092c0368 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -109,9 +109,9 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len)) } - DataType::Decimal128(precision, scale) => Box::new( - Decimal128Builder::with_capacity(capacity, *precision, *scale), - ), + DataType::Decimal128(_precision, _scale) => { + Box::new(Decimal128Builder::with_capacity(capacity)) + } DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)), DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)), DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)), diff --git a/arrow-array/src/decimal.rs b/arrow-array/src/decimal.rs index 323281d9233..34305333064 100644 --- a/arrow-array/src/decimal.rs +++ b/arrow-array/src/decimal.rs @@ -18,6 +18,7 @@ //! Decimal related utilities, types and functions use crate::types::{Decimal128Type, Decimal256Type, DecimalType}; +use arrow_buffer::i256; use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; use arrow_schema::{ArrowError, DataType}; use num::{BigInt, Signed}; @@ -33,7 +34,7 @@ use std::cmp::{min, Ordering}; pub struct Decimal { precision: u8, scale: u8, - value: T::Native, + value: T::DecimalNative, } /// Manually implement to avoid `T: Debug` bound @@ -76,7 +77,7 @@ impl Decimal { pub fn try_new_from_bytes( precision: u8, scale: u8, - bytes: &T::Native, + bytes: &T::DecimalNative, ) -> Result where Self: Sized, @@ -111,15 +112,16 @@ impl Decimal { /// Safety: /// This method doesn't check if the precision and scale are valid. /// Use `try_new_from_bytes` for safe constructor. - pub fn new(precision: u8, scale: u8, bytes: &T::Native) -> Self { + pub fn new(precision: u8, scale: u8, bytes: &T::DecimalNative) -> Self { Self { precision, scale, value: *bytes, } } + /// Returns the raw bytes of the integer representation of the decimal. - pub fn raw_value(&self) -> &T::Native { + pub fn raw_value(&self) -> &T::DecimalNative { &self.value } @@ -245,6 +247,10 @@ impl Decimal256 { Decimal256::try_new_from_bytes(precision, scale, &bytes) } + pub fn from_i256(precision: u8, scale: u8, value: i256) -> Self { + Decimal256::new(precision, scale, &value.to_le_bytes()) + } + /// Constructs a `BigInt` from this `Decimal256` value. pub fn to_big_int(self) -> BigInt { BigInt::from_signed_bytes_le(&self.value) diff --git a/arrow-array/src/iterator.rs b/arrow-array/src/iterator.rs index 25727e0d75f..351f90bacfc 100644 --- a/arrow-array/src/iterator.rs +++ b/arrow-array/src/iterator.rs @@ -18,10 +18,9 @@ //! Idiomatic iterators for [`Array`](crate::Array) use crate::array::{ - ArrayAccessor, BooleanArray, DecimalArray, FixedSizeBinaryArray, GenericBinaryArray, + ArrayAccessor, BooleanArray, FixedSizeBinaryArray, GenericBinaryArray, GenericListArray, GenericStringArray, PrimitiveArray, }; -use crate::types::{Decimal128Type, Decimal256Type}; /// An iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`] /// @@ -123,15 +122,6 @@ pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray>; pub type FixedSizeBinaryIter<'a> = ArrayIter<&'a FixedSizeBinaryArray>; pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray>; -pub type DecimalIter<'a, T> = ArrayIter<&'a DecimalArray>; -/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a -/// [`super::Decimal128Array`] -pub type Decimal128Iter<'a> = DecimalIter<'a, Decimal128Type>; - -/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a -/// [`super::Decimal256Array`] -pub type Decimal256Iter<'a> = DecimalIter<'a, Decimal256Type>; - #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index 9bd43369258..2e161813dbc 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -21,10 +21,11 @@ use crate::array::ArrowPrimitiveType; use crate::delta::shift_months; use arrow_buffer::i256; use arrow_data::decimal::{ + validate_decimal256_precision_with_lt_bytes, validate_decimal_precision, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL_DEFAULT_SCALE, }; -use arrow_schema::{DataType, IntervalUnit, TimeUnit}; +use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit}; use chrono::{Duration, NaiveDate}; use half::f16; use std::ops::{Add, Sub}; @@ -491,14 +492,23 @@ impl NativeDecimalType for [u8; N] { /// [`DecimalArray`]: [crate::array::DecimalArray] /// [`Decimal128Array`]: [crate::array::Decimal128Array] /// [`Decimal256Array`]: [crate::array::Decimal256Array] -pub trait DecimalType: 'static + Send + Sync + private::DecimalTypeSealed { - type Native: NativeDecimalType; +pub trait DecimalType: + 'static + Send + Sync + ArrowPrimitiveType + private::DecimalTypeSealed +{ + type DecimalNative: NativeDecimalType; const BYTE_LENGTH: usize; const MAX_PRECISION: u8; const MAX_SCALE: u8; const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType; const DEFAULT_TYPE: DataType; + + fn to_native(num: ::Native) -> Self::DecimalNative; + + fn validate_decimal_precision( + num: ::Native, + precision: u8, + ) -> Result<(), ArrowError>; } /// The decimal type for a Decimal128Array @@ -506,7 +516,7 @@ pub trait DecimalType: 'static + Send + Sync + private::DecimalTypeSealed { pub struct Decimal128Type {} impl DecimalType for Decimal128Type { - type Native = [u8; 16]; + type DecimalNative = [u8; 16]; const BYTE_LENGTH: usize = 16; const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION; @@ -514,6 +524,14 @@ impl DecimalType for Decimal128Type { const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = DataType::Decimal128; const DEFAULT_TYPE: DataType = DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); + + fn to_native(num: i128) -> [u8; 16] { + num.to_le_bytes() + } + + fn validate_decimal_precision(num: i128, precision: u8) -> Result<(), ArrowError> { + validate_decimal_precision(num, precision) + } } impl ArrowPrimitiveType for Decimal128Type { @@ -527,7 +545,7 @@ impl ArrowPrimitiveType for Decimal128Type { pub struct Decimal256Type {} impl DecimalType for Decimal256Type { - type Native = [u8; 32]; + type DecimalNative = [u8; 32]; const BYTE_LENGTH: usize = 32; const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION; @@ -535,6 +553,14 @@ impl DecimalType for Decimal256Type { const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = DataType::Decimal256; const DEFAULT_TYPE: DataType = DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); + + fn to_native(num: i256) -> [u8; 32] { + num.to_le_bytes() + } + + fn validate_decimal_precision(num: i256, precision: u8) -> Result<(), ArrowError> { + validate_decimal256_precision_with_lt_bytes(&num.to_le_bytes(), precision) + } } impl ArrowPrimitiveType for Decimal256Type { diff --git a/arrow-buffer/src/bigint.rs b/arrow-buffer/src/bigint.rs index df4cfd8ea59..7873064b45f 100644 --- a/arrow-buffer/src/bigint.rs +++ b/arrow-buffer/src/bigint.rs @@ -86,6 +86,16 @@ impl i256 { } } + pub fn from_i128(v: i128) -> Self { + let mut bytes = if num::Signed::is_negative(&v) { + [255_u8; 32] + } else { + [0; 32] + }; + bytes[0..16].copy_from_slice(&v.to_le_bytes()); + Self::from_le_bytes(bytes) + } + /// Create an i256 from the provided low u128 and high i128 #[inline] pub fn from_parts(low: u128, high: i128) -> Self { diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from_vec.rs index 59bef65a18c..229ac0b87d4 100644 --- a/arrow/benches/array_from_vec.rs +++ b/arrow/benches/array_from_vec.rs @@ -23,8 +23,7 @@ use criterion::Criterion; extern crate arrow; use arrow::array::*; -use arrow::util::decimal::Decimal256; -use num::BigInt; +use arrow_buffer::i256; use rand::Rng; use std::{convert::TryFrom, sync::Arc}; @@ -87,7 +86,7 @@ fn decimal128_array_from_vec(array: &[Option]) { ); } -fn decimal256_array_from_vec(array: &[Option]) { +fn decimal256_array_from_vec(array: &[Option]) { criterion::black_box( array .iter() @@ -117,8 +116,7 @@ fn decimal_benchmark(c: &mut Criterion) { let mut array = vec![]; let mut rng = rand::thread_rng(); for _ in 0..size { - let decimal = - Decimal256::from(BigInt::from(rng.gen_range::(0..9999999999999))); + let decimal = i256::from_i128(rng.gen_range::(0..9999999999999)); array.push(Some(decimal)); } diff --git a/arrow/benches/builder.rs b/arrow/benches/builder.rs index c2ebcb3daa5..8cb226e8905 100644 --- a/arrow/benches/builder.rs +++ b/arrow/benches/builder.rs @@ -22,12 +22,11 @@ extern crate rand; use std::mem::size_of; use criterion::*; -use num::BigInt; use rand::distributions::Standard; use arrow::array::*; -use arrow::util::decimal::Decimal256; use arrow::util::test_util::seedable_rng; +use arrow_buffer::i256; use rand::Rng; // Build arrays with 512k elements. @@ -112,13 +111,16 @@ fn bench_decimal128(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { let mut rng = rand::thread_rng(); - let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE, 38, 0); + let mut decimal_builder = Decimal128Builder::with_capacity(BATCH_SIZE); for _ in 0..BATCH_SIZE { - decimal_builder - .append_value(rng.gen_range::(0..9999999999)) - .unwrap(); + decimal_builder.append_value(rng.gen_range::(0..9999999999)); } - black_box(decimal_builder.finish()); + black_box( + decimal_builder + .finish() + .with_precision_and_scale(38, 0) + .unwrap(), + ); }) }); } @@ -127,16 +129,18 @@ fn bench_decimal256(c: &mut Criterion) { c.bench_function("bench_decimal128_builder", |b| { b.iter(|| { let mut rng = rand::thread_rng(); - let mut decimal_builder = - Decimal256Builder::with_capacity(BATCH_SIZE, 76, 10); + let mut decimal_builder = Decimal256Builder::with_capacity(BATCH_SIZE); for _ in 0..BATCH_SIZE { - decimal_builder - .append_value(&Decimal256::from(BigInt::from( - rng.gen_range::(0..99999999999), - ))) - .unwrap() + decimal_builder.append_value(i256::from_i128( + rng.gen_range::(0..99999999999), + )); } - black_box(decimal_builder.finish()); + black_box( + decimal_builder + .finish() + .with_precision_and_scale(76, 10) + .unwrap(), + ); }) }); } diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs index ac8fc08d921..2c3d8cd1678 100644 --- a/arrow/benches/cast_kernels.rs +++ b/arrow/benches/cast_kernels.rs @@ -29,8 +29,8 @@ use arrow::array::*; use arrow::compute::cast; use arrow::datatypes::*; use arrow::util::bench_util::*; -use arrow::util::decimal::Decimal256; use arrow::util::test_util::seedable_rng; +use arrow_buffer::i256; fn build_array(size: usize) -> ArrayRef where @@ -84,24 +84,34 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef { fn build_decimal128_array(size: usize, precision: u8, scale: u8) -> ArrayRef { let mut rng = seedable_rng(); - let mut builder = Decimal128Builder::with_capacity(size, precision, scale); + let mut builder = Decimal128Builder::with_capacity(size); for _ in 0..size { - let _ = builder.append_value(rng.gen_range::(0..1000000000)); + builder.append_value(rng.gen_range::(0..1000000000)); } - Arc::new(builder.finish()) + Arc::new( + builder + .finish() + .with_precision_and_scale(precision, scale) + .unwrap(), + ) } fn build_decimal256_array(size: usize, precision: u8, scale: u8) -> ArrayRef { let mut rng = seedable_rng(); - let mut builder = Decimal256Builder::with_capacity(size, precision, scale); + let mut builder = Decimal256Builder::with_capacity(size); let mut bytes = [0; 32]; for _ in 0..size { let num = rng.gen_range::(0..1000000000); bytes[0..16].clone_from_slice(&num.to_le_bytes()); - let _ = builder.append_value(&Decimal256::new(precision, scale, &bytes)); + builder.append_value(i256::from_le_bytes(bytes)); } - Arc::new(builder.finish()) + Arc::new( + builder + .finish() + .with_precision_and_scale(precision, scale) + .unwrap(), + ) } // cast array from specified primitive array type to desired data type diff --git a/arrow/benches/decimal_validate.rs b/arrow/benches/decimal_validate.rs index 555373e4a63..a70da1d2cfb 100644 --- a/arrow/benches/decimal_validate.rs +++ b/arrow/benches/decimal_validate.rs @@ -22,12 +22,11 @@ use arrow::array::{ Array, Decimal128Array, Decimal128Builder, Decimal256Array, Decimal256Builder, }; use criterion::Criterion; -use num::BigInt; use rand::Rng; extern crate arrow; -use arrow::util::decimal::Decimal256; +use arrow_buffer::i256; fn validate_decimal128_array(array: Decimal128Array) { array.with_precision_and_scale(35, 0).unwrap(); @@ -40,13 +39,14 @@ fn validate_decimal256_array(array: Decimal256Array) { fn validate_decimal128_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let size: i128 = 20000; - let mut decimal_builder = Decimal128Builder::with_capacity(size as usize, 38, 0); + let mut decimal_builder = Decimal128Builder::with_capacity(size as usize); for _ in 0..size { - decimal_builder - .append_value(rng.gen_range::(0..999999999999)) - .unwrap(); + decimal_builder.append_value(rng.gen_range::(0..999999999999)); } - let decimal_array = decimal_builder.finish(); + let decimal_array = decimal_builder + .finish() + .with_precision_and_scale(38, 0) + .unwrap(); let data = decimal_array.into_data(); c.bench_function("validate_decimal128_array 20000", |b| { b.iter(|| { @@ -59,13 +59,16 @@ fn validate_decimal128_benchmark(c: &mut Criterion) { fn validate_decimal256_benchmark(c: &mut Criterion) { let mut rng = rand::thread_rng(); let size: i128 = 20000; - let mut decimal_builder = Decimal256Builder::with_capacity(size as usize, 76, 0); + let mut decimal_builder = Decimal256Builder::with_capacity(size as usize); for _ in 0..size { let v = rng.gen_range::(0..999999999999999); - let decimal = Decimal256::from_big_int(&BigInt::from(v), 76, 0).unwrap(); - decimal_builder.append_value(&decimal).unwrap(); + let decimal = i256::from_i128(v); + decimal_builder.append_value(decimal); } - let decimal_array256_data = decimal_builder.finish(); + let decimal_array256_data = decimal_builder + .finish() + .with_precision_and_scale(76, 0) + .unwrap(); let data = decimal_array256_data.into_data(); c.bench_function("validate_decimal256_array 20000", |b| { b.iter(|| { diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 49a9b18d85f..b05e4c4ba7f 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -38,7 +38,6 @@ use chrono::format::strftime::StrftimeItems; use chrono::format::{parse, Parsed}; use chrono::{NaiveDateTime, Timelike}; -use std::ops::{Div, Mul}; use std::str; use std::sync::Arc; @@ -58,8 +57,9 @@ use crate::{ buffer::Buffer, util::display::array_value_to_string, util::serialization::lexical_to_string, }; +use arrow_buffer::i256; use num::cast::AsPrimitive; -use num::{BigInt, NumCast, ToPrimitive}; +use num::{NumCast, ToPrimitive}; /// CastOptions provides a way to override the default cast behaviors #[derive(Debug)] @@ -386,7 +386,7 @@ macro_rules! cast_decimal_to_integer { if array.is_null(i) { value_builder.append_null(); } else { - let v = array.value(i).as_i128() / div; + let v = array.value(i) / div; // check the overflow // For example: Decimal(128,10,0) as i8 // 128 is out of range i8 @@ -416,7 +416,7 @@ macro_rules! cast_decimal_to_float { } else { // The range of f32 or f64 is larger than i128, we don't need to check overflow. // cast the i128 to f64 will lose precision, for example the `112345678901234568` will be as `112345678901234560`. - let v = (array.value(i).as_i128() as f64 / div) as $NATIVE_TYPE; + let v = (array.value(i) as f64 / div) as $NATIVE_TYPE; value_builder.append_value(v); } } @@ -1466,7 +1466,7 @@ fn cast_decimal_to_decimal( let div = 10_i128.pow((input_scale - output_scale) as u32); if BYTE_WIDTH1 == 16 { let array = array.as_any().downcast_ref::().unwrap(); - let iter = array.iter().map(|v| v.map(|v| v.as_i128() / div)); + let iter = array.iter().map(|v| v.map(|v| v.wrapping_div(div))); if BYTE_WIDTH2 == 16 { let output_array = iter .collect::() @@ -1475,7 +1475,7 @@ fn cast_decimal_to_decimal( Ok(Arc::new(output_array)) } else { let output_array = iter - .map(|v| v.map(BigInt::from)) + .map(|v| v.map(i256::from_i128)) .collect::() .with_precision_and_scale(*output_precision, *output_scale)?; @@ -1483,7 +1483,8 @@ fn cast_decimal_to_decimal( } } else { let array = array.as_any().downcast_ref::().unwrap(); - let iter = array.iter().map(|v| v.map(|v| v.to_big_int().div(div))); + let div = i256::from_i128(div); + let iter = array.iter().map(|v| v.map(|v| v.wrapping_div(div))); if BYTE_WIDTH2 == 16 { let values = iter .map(|v| { @@ -1521,7 +1522,7 @@ fn cast_decimal_to_decimal( let mul = 10_i128.pow((output_scale - input_scale) as u32); if BYTE_WIDTH1 == 16 { let array = array.as_any().downcast_ref::().unwrap(); - let iter = array.iter().map(|v| v.map(|v| v.as_i128() * mul)); + let iter = array.iter().map(|v| v.map(|v| v.wrapping_mul(mul))); if BYTE_WIDTH2 == 16 { let output_array = iter .collect::() @@ -1530,7 +1531,7 @@ fn cast_decimal_to_decimal( Ok(Arc::new(output_array)) } else { let output_array = iter - .map(|v| v.map(BigInt::from)) + .map(|v| v.map(i256::from_i128)) .collect::() .with_precision_and_scale(*output_precision, *output_scale)?; @@ -1538,7 +1539,8 @@ fn cast_decimal_to_decimal( } } else { let array = array.as_any().downcast_ref::().unwrap(); - let iter = array.iter().map(|v| v.map(|v| v.to_big_int().mul(mul))); + let mul = i256::from_i128(mul); + let iter = array.iter().map(|v| v.map(|v| v.wrapping_mul(mul))); if BYTE_WIDTH2 == 16 { let values = iter .map(|v| { @@ -2825,7 +2827,6 @@ where mod tests { use super::*; use crate::datatypes::TimeUnit; - use crate::util::decimal::{Decimal128, Decimal256}; use crate::{buffer::Buffer, util::display::array_value_to_string}; macro_rules! generate_cast_test_case { @@ -2865,7 +2866,7 @@ mod tests { } fn create_decimal256_array( - array: Vec>, + array: Vec>, precision: u8, scale: u8, ) -> Result { @@ -2876,6 +2877,7 @@ mod tests { } #[test] + #[cfg(not(feature = "force_validate"))] fn test_cast_decimal128_to_decimal128() { let input_type = DataType::Decimal128(20, 3); let output_type = DataType::Decimal128(20, 4); @@ -2888,9 +2890,9 @@ mod tests { Decimal128Array, &output_type, vec![ - Some(Decimal128::new_from_i128(20, 4, 11234560_i128)), - Some(Decimal128::new_from_i128(20, 4, 21234560_i128)), - Some(Decimal128::new_from_i128(20, 4, 31234560_i128)), + Some(11234560_i128), + Some(21234560_i128), + Some(31234560_i128), None ] ); @@ -2899,9 +2901,12 @@ mod tests { let input_decimal_array = create_decimal_array(array, 10, 0).unwrap(); let array = Arc::new(input_decimal_array) as ArrayRef; let result = cast(&array, &DataType::Decimal128(2, 2)); - assert!(result.is_err()); + assert!(result.is_ok()); + let array = result.unwrap(); + let array: &Decimal128Array = as_primitive_array(&array); + let err = array.validate_decimal_precision(2); assert_eq!("Invalid argument error: 12345600 is too large to store in a Decimal128 of precision 2. Max is 99", - result.unwrap_err().to_string()); + err.unwrap_err().to_string()); } #[test] @@ -2917,18 +2922,9 @@ mod tests { Decimal256Array, &output_type, vec![ - Some( - Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4) - .unwrap() - ), - Some( - Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4) - .unwrap() - ), - Some( - Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4) - .unwrap() - ), + Some(i256::from_i128(11234560_i128)), + Some(i256::from_i128(21234560_i128)), + Some(i256::from_i128(31234560_i128)), None ] ); @@ -2940,9 +2936,9 @@ mod tests { let output_type = DataType::Decimal128(20, 4); assert!(can_cast_types(&input_type, &output_type)); let array = vec![ - Some(BigInt::from(1123456)), - Some(BigInt::from(2123456)), - Some(BigInt::from(3123456)), + Some(i256::from_i128(1123456)), + Some(i256::from_i128(2123456)), + Some(i256::from_i128(3123456)), None, ]; let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap(); @@ -2952,9 +2948,9 @@ mod tests { Decimal128Array, &output_type, vec![ - Some(Decimal128::new_from_i128(20, 4, 11234560_i128)), - Some(Decimal128::new_from_i128(20, 4, 21234560_i128)), - Some(Decimal128::new_from_i128(20, 4, 31234560_i128)), + Some(11234560_i128), + Some(21234560_i128), + Some(31234560_i128), None ] ); @@ -2966,9 +2962,9 @@ mod tests { let output_type = DataType::Decimal256(20, 4); assert!(can_cast_types(&input_type, &output_type)); let array = vec![ - Some(BigInt::from(1123456)), - Some(BigInt::from(2123456)), - Some(BigInt::from(3123456)), + Some(i256::from_i128(1123456)), + Some(i256::from_i128(2123456)), + Some(i256::from_i128(3123456)), None, ]; let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap(); @@ -2978,18 +2974,9 @@ mod tests { Decimal256Array, &output_type, vec![ - Some( - Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4) - .unwrap() - ), - Some( - Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4) - .unwrap() - ), - Some( - Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4) - .unwrap() - ), + Some(i256::from_i128(11234560_i128)), + Some(i256::from_i128(21234560_i128)), + Some(i256::from_i128(31234560_i128)), None ] ); @@ -3116,6 +3103,7 @@ mod tests { } #[test] + #[cfg(not(feature = "force_validate"))] fn test_cast_numeric_to_decimal() { // test negative cast type let decimal_type = DataType::Decimal128(38, 6); @@ -3158,11 +3146,11 @@ mod tests { Decimal128Array, &decimal_type, vec![ - Some(Decimal128::new_from_i128(38, 6, 1000000_i128)), - Some(Decimal128::new_from_i128(38, 6, 2000000_i128)), - Some(Decimal128::new_from_i128(38, 6, 3000000_i128)), + Some(1000000_i128), + Some(2000000_i128), + Some(3000000_i128), None, - Some(Decimal128::new_from_i128(38, 6, 5000000_i128)) + Some(5000000_i128) ] ); } @@ -3172,8 +3160,11 @@ mod tests { let array = Int8Array::from(vec![1, 2, 3, 4, 100]); let array = Arc::new(array) as ArrayRef; let casted_array = cast(&array, &DataType::Decimal128(3, 1)); - assert!(casted_array.is_err()); - assert_eq!("Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999", casted_array.unwrap_err().to_string()); + assert!(casted_array.is_ok()); + let array = casted_array.unwrap(); + let array: &Decimal128Array = as_primitive_array(&array); + let err = array.validate_decimal_precision(3); + assert_eq!("Invalid argument error: 1000 is too large to store in a Decimal128 of precision 3. Max is 999", err.unwrap_err().to_string()); // test f32 to decimal type let array = Float32Array::from(vec![ @@ -3190,12 +3181,12 @@ mod tests { Decimal128Array, &decimal_type, vec![ - Some(Decimal128::new_from_i128(38, 6, 1100000_i128)), - Some(Decimal128::new_from_i128(38, 6, 2200000_i128)), - Some(Decimal128::new_from_i128(38, 6, 4400000_i128)), + Some(1100000_i128), + Some(2200000_i128), + Some(4400000_i128), None, - Some(Decimal128::new_from_i128(38, 6, 1123456_i128)), - Some(Decimal128::new_from_i128(38, 6, 1123456_i128)), + Some(1123456_i128), + Some(1123456_i128), ] ); @@ -3215,13 +3206,13 @@ mod tests { Decimal128Array, &decimal_type, vec![ - Some(Decimal128::new_from_i128(38, 6, 1100000_i128)), - Some(Decimal128::new_from_i128(38, 6, 2200000_i128)), - Some(Decimal128::new_from_i128(38, 6, 4400000_i128)), + Some(1100000_i128), + Some(2200000_i128), + Some(4400000_i128), None, - Some(Decimal128::new_from_i128(38, 6, 1123456_i128)), - Some(Decimal128::new_from_i128(38, 6, 1123456_i128)), - Some(Decimal128::new_from_i128(38, 6, 1123456_i128)), + Some(1123456_i128), + Some(1123456_i128), + Some(1123456_i128), ] ); } diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index 5eaed4bc62b..ef423fcbf42 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -493,7 +493,7 @@ where .expect("Unable to downcast to decimal array"); let valids = value_indices .into_iter() - .map(|index| (index, decimal_array.value(index as usize).as_i128())) + .map(|index| (index, decimal_array.value(index as usize))) .collect::>(); sort_primitive_inner( decimal_values.len(), diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 714c29772a5..a399f060200 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -451,7 +451,7 @@ where if decimal_values.is_null(index) { Ok(None) } else { - Ok(Some(decimal_values.value(index).as_i128())) + Ok(Some(decimal_values.value(index))) } }); let t: Result>> = t.transpose(); @@ -461,7 +461,7 @@ where .collect::>()? // PERF: we could avoid re-validating that the data in // Decimal128Array was in range as we know it came from a valid Decimal128Array - .with_precision_and_scale(decimal_values.precision(), decimal_values.scale()) + .with_precision_and_scale(decimal_values.precision()?, decimal_values.scale()?) } /// `take` implementation for all primitive arrays diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index 2f4ec1a1ca3..ab5947b4ece 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -707,8 +707,7 @@ fn build_decimal_array( precision: u8, scale: u8, ) -> Result { - let mut decimal_builder = - Decimal128Builder::with_capacity(rows.len(), precision, scale); + let mut decimal_builder = Decimal128Builder::with_capacity(rows.len()); for row in rows { let col_s = row.get(col_idx); match col_s { @@ -725,7 +724,7 @@ fn build_decimal_array( parse_decimal_with_parameter(s, precision, scale); match decimal_value { Ok(v) => { - decimal_builder.append_value(v)?; + decimal_builder.append_value(v); } Err(e) => { return Err(e); @@ -735,7 +734,11 @@ fn build_decimal_array( } } } - Ok(Arc::new(decimal_builder.finish())) + Ok(Arc::new( + decimal_builder + .finish() + .with_precision_and_scale(precision, scale)?, + )) } // Parse the string format decimal value to i128 format and checking the precision and scale. @@ -1237,16 +1240,16 @@ mod tests { .downcast_ref::() .unwrap(); - assert_eq!("57.653484", lat.value_as_string(0)); - assert_eq!("53.002666", lat.value_as_string(1)); - assert_eq!("52.412811", lat.value_as_string(2)); - assert_eq!("51.481583", lat.value_as_string(3)); - assert_eq!("12.123456", lat.value_as_string(4)); - assert_eq!("50.760000", lat.value_as_string(5)); - assert_eq!("0.123000", lat.value_as_string(6)); - assert_eq!("123.000000", lat.value_as_string(7)); - assert_eq!("123.000000", lat.value_as_string(8)); - assert_eq!("-50.760000", lat.value_as_string(9)); + assert_eq!("57.653484", lat.value_as_string(0).unwrap()); + assert_eq!("53.002666", lat.value_as_string(1).unwrap()); + assert_eq!("52.412811", lat.value_as_string(2).unwrap()); + assert_eq!("51.481583", lat.value_as_string(3).unwrap()); + assert_eq!("12.123456", lat.value_as_string(4).unwrap()); + assert_eq!("50.760000", lat.value_as_string(5).unwrap()); + assert_eq!("0.123000", lat.value_as_string(6).unwrap()); + assert_eq!("123.000000", lat.value_as_string(7).unwrap()); + assert_eq!("123.000000", lat.value_as_string(8).unwrap()); + assert_eq!("-50.760000", lat.value_as_string(9).unwrap()); } #[test] diff --git a/arrow/src/row/fixed.rs b/arrow/src/row/fixed.rs index 9952ee094bf..ec7afd8e30a 100644 --- a/arrow/src/row/fixed.rs +++ b/arrow/src/row/fixed.rs @@ -20,7 +20,7 @@ use crate::compute::SortOptions; use crate::datatypes::ArrowPrimitiveType; use crate::row::{null_sentinel, Rows}; use arrow_array::types::DecimalType; -use arrow_array::{BooleanArray, DecimalArray}; +use arrow_array::BooleanArray; use arrow_buffer::{bit_util, MutableBuffer, ToByteSlice}; use arrow_data::{ArrayData, ArrayDataBuilder}; use arrow_schema::DataType; @@ -355,12 +355,12 @@ fn decode_fixed( } /// Decodes a `DecimalArray` from rows -pub fn decode_decimal( +pub fn decode_decimal( rows: &mut [&[u8]], options: SortOptions, precision: u8, scale: u8, -) -> DecimalArray { +) -> PrimitiveArray { decode_fixed::>(rows, T::TYPE_CONSTRUCTOR(precision, scale), options) .into() } diff --git a/arrow/src/row/mod.rs b/arrow/src/row/mod.rs index f5ac570320b..f604f65706d 100644 --- a/arrow/src/row/mod.rs +++ b/arrow/src/row/mod.rs @@ -529,7 +529,7 @@ fn encode_column( .downcast_ref::() .unwrap() .into_iter() - .map(|x| x.map(|x| RawDecimal(*x.raw_value()))); + .map(|x| x.map(|x| RawDecimal(x.to_le_bytes()))); fixed::encode(out, iter, opts) }, @@ -539,7 +539,7 @@ fn encode_column( .downcast_ref::() .unwrap() .into_iter() - .map(|x| x.map(|x| RawDecimal(*x.raw_value()))); + .map(|x| x.map(|x| RawDecimal(x.to_le_bytes()))); fixed::encode(out, iter, opts) }, diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index cf8394efaa6..8b8db1be575 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -274,8 +274,7 @@ pub fn make_string_from_decimal(column: &Arc, row: usize) -> Result() .unwrap(); - let formatted_decimal = array.value_as_string(row); - Ok(formatted_decimal) + array.value_as_string(row) } fn append_struct_field_string( diff --git a/arrow/tests/array_validation.rs b/arrow/tests/array_validation.rs index 93b936e7c2f..f4dcda2e8de 100644 --- a/arrow/tests/array_validation.rs +++ b/arrow/tests/array_validation.rs @@ -1065,9 +1065,9 @@ fn test_decimal_full_validation() { #[test] fn test_decimal_validation() { - let mut builder = Decimal128Builder::with_capacity(4, 10, 4); - builder.append_value(10000).unwrap(); - builder.append_value(20000).unwrap(); + let mut builder = Decimal128Builder::with_capacity(4); + builder.append_value(10000); + builder.append_value(20000); let array = builder.finish(); array.data().validate_full().unwrap(); diff --git a/integration-testing/Cargo.toml b/integration-testing/Cargo.toml index f4f6a336498..7b28f399f24 100644 --- a/integration-testing/Cargo.toml +++ b/integration-testing/Cargo.toml @@ -33,6 +33,7 @@ logging = ["tracing-subscriber"] [dependencies] arrow = { path = "../arrow", default-features = false, features = ["test_utils", "ipc", "ipc_compression", "json"] } arrow-flight = { path = "../arrow-flight", default-features = false } +arrow-buffer = { path = "../arrow-buffer", default-features = false } async-trait = { version = "0.1.41", default-features = false } clap = { version = "4", default-features = false, features = ["std", "derive", "help", "error-context", "usage"] } futures = { version = "0.3", default-features = false } diff --git a/integration-testing/src/util/mod.rs b/integration-testing/src/util/mod.rs index c0eb80a3571..72ecfaa00f0 100644 --- a/integration-testing/src/util/mod.rs +++ b/integration-testing/src/util/mod.rs @@ -34,7 +34,7 @@ use arrow::datatypes::*; use arrow::error::{ArrowError, Result}; use arrow::record_batch::{RecordBatch, RecordBatchReader}; use arrow::util::bit_util; -use arrow::util::decimal::Decimal256; +use arrow_buffer::i256; mod datatype; mod field; @@ -787,12 +787,7 @@ pub fn array_from_json( } } DataType::Decimal128(precision, scale) => { - let mut b = - Decimal128Builder::with_capacity(json_col.count, *precision, *scale); - // C++ interop tests involve incompatible decimal values - unsafe { - b.disable_value_validation(); - } + let mut b = Decimal128Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -801,21 +796,16 @@ pub fn array_from_json( .zip(json_col.data.unwrap()) { match is_valid { - 1 => { - b.append_value(value.as_str().unwrap().parse::().unwrap())? - } + 1 => b.append_value(value.as_str().unwrap().parse::().unwrap()), _ => b.append_null(), }; } - Ok(Arc::new(b.finish())) + Ok(Arc::new( + b.finish().with_precision_and_scale(*precision, *scale)?, + )) } DataType::Decimal256(precision, scale) => { - let mut b = - Decimal256Builder::with_capacity(json_col.count, *precision, *scale); - // C++ interop tests involve incompatible decimal values - unsafe { - b.disable_value_validation(); - } + let mut b = Decimal256Builder::with_capacity(json_col.count); for (is_valid, value) in json_col .validity .as_ref() @@ -835,15 +825,14 @@ pub fn array_from_json( }; bytes[0..integer_bytes.len()] .copy_from_slice(integer_bytes.as_slice()); - let decimal = - Decimal256::try_new_from_bytes(*precision, *scale, &bytes) - .unwrap(); - b.append_value(&decimal)?; + b.append_value(i256::from_le_bytes(bytes)); } _ => b.append_null(), } } - Ok(Arc::new(b.finish())) + Ok(Arc::new( + b.finish().with_precision_and_scale(*precision, *scale)?, + )) } DataType::Map(child_field, _) => { let null_buf = create_null_buf(&json_col); diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 5ee963916da..0390b43aaa9 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1176,11 +1176,11 @@ mod tests { let expected = 1..25; - assert_eq!(col.precision(), target_precision); - assert_eq!(col.scale(), 2); + assert_eq!(col.precision().unwrap(), target_precision); + assert_eq!(col.scale().unwrap(), 2); for (i, v) in expected.enumerate() { - assert_eq!(col.value(i).as_i128(), v * 100_i128); + assert_eq!(col.value(i), v * 100_i128); } } } diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index b5c0b50127d..79d9d28095c 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -583,9 +583,9 @@ fn get_decimal_array_slice( indices: &[usize], ) -> Vec { let mut values = Vec::with_capacity(indices.len()); - let size = decimal_length_from_precision(array.precision()); + let size = decimal_length_from_precision(array.precision().unwrap()); for i in indices { - let as_be_bytes = array.value(*i).as_i128().to_be_bytes(); + let as_be_bytes = array.value(*i).to_be_bytes(); let resized_value = as_be_bytes[(16 - size)..].to_vec(); values.push(FixedLenByteArray::from(ByteArray::from(resized_value))); }