From 5bf214a45b8a34a2faa88f1f09b8532ee9acd910 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 14 Oct 2022 10:32:19 +1300 Subject: [PATCH 1/2] Decimal cleanup (#2637) --- .github/workflows/arrow.yml | 10 +- arrow-array/src/array/primitive_array.rs | 96 +++-- arrow-array/src/decimal.rs | 484 ----------------------- arrow-array/src/lib.rs | 1 - arrow-array/src/types.rs | 40 +- arrow/src/compute/kernels/take.rs | 4 +- arrow/src/csv/reader.rs | 20 +- arrow/src/util/display.rs | 2 +- arrow/src/util/mod.rs | 1 - parquet/src/arrow/arrow_reader/mod.rs | 4 +- parquet/src/arrow/arrow_writer/mod.rs | 2 +- 11 files changed, 96 insertions(+), 568 deletions(-) delete mode 100644 arrow-array/src/decimal.rs diff --git a/.github/workflows/arrow.yml b/.github/workflows/arrow.yml index 466f0b12ec3..c4b97551733 100644 --- a/.github/workflows/arrow.yml +++ b/.github/workflows/arrow.yml @@ -186,6 +186,14 @@ jobs: - name: Setup Clippy run: | rustup component add clippy - - name: Run clippy + - name: Clippy arrow-buffer with all features + run: cargo clippy -p arrow-buffer --all-features + - name: Clippy arrow-data with all features + run: cargo clippy -p arrow-data --all-features + - name: Clippy arrow-schema with all features + run: cargo clippy -p arrow-schema --all-features + - name: Clippy arrow-array with all features + run: cargo clippy -p arrow-array --all-features + - name: Clippy arrow run: | cargo clippy -p arrow --features=prettyprint,csv,ipc,test_utils,ffi,ipc_compression,dyn_cmp_dict,dyn_arith_dict --all-targets -- -D warnings diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 377523267a4..1da453a4baa 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -16,7 +16,6 @@ // under the License. use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder}; -use crate::decimal::Decimal; use crate::iterator::PrimitiveIter; use crate::raw_pointer::RawPtrBox; use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time}; @@ -917,67 +916,62 @@ impl PrimitiveArray { }) } - pub fn value_as_string(&self, row: usize) -> Result { - let p = self.precision()?; - let s = self.scale()?; - Ok(Decimal::::new(p, s, &T::to_native(self.value(row))).to_string()) + /// Returns [`Self::value`] formatted as a string + pub fn value_as_string(&self, row: usize) -> String { + T::format_decimal(self.value(row), self.precision(), self.scale()) } - pub fn precision(&self) -> Result { + /// Returns the decimal precision of this array + pub fn precision(&self) -> u8 { match T::BYTE_LENGTH { 16 => { if let DataType::Decimal128(p, _) = self.data().data_type() { - Ok(*p) + *p } else { - Err(ArrowError::InvalidArgumentError(format!( + unreachable!( "Decimal128Array datatype is not DataType::Decimal128 but {}", self.data_type() - ))) + ) } } 32 => { if let DataType::Decimal256(p, _) = self.data().data_type() { - Ok(*p) + *p } else { - Err(ArrowError::InvalidArgumentError(format!( + unreachable!( "Decimal256Array datatype is not DataType::Decimal256 but {}", self.data_type() - ))) + ) } } - other => Err(ArrowError::InvalidArgumentError(format!( - "Unsupported byte length for decimal array {}", - other - ))), + other => unreachable!("Unsupported byte length for decimal array {}", other), } } - pub fn scale(&self) -> Result { + /// Returns the decimal scale of this array + pub fn scale(&self) -> u8 { match T::BYTE_LENGTH { 16 => { if let DataType::Decimal128(_, s) = self.data().data_type() { - Ok(*s) + *s } else { - Err(ArrowError::InvalidArgumentError(format!( + unreachable!( "Decimal128Array datatype is not DataType::Decimal128 but {}", self.data_type() - ))) + ) } } 32 => { if let DataType::Decimal256(_, s) = self.data().data_type() { - Ok(*s) + *s } else { - Err(ArrowError::InvalidArgumentError(format!( + unreachable!( "Decimal256Array datatype is not DataType::Decimal256 but {}", self.data_type() - ))) + ) } } - other => Err(ArrowError::InvalidArgumentError(format!( - "Unsupported byte length for decimal array {}", - other - ))), + other => unreachable!("Unsupported byte length for decimal array {}", other), } } } @@ -1564,8 +1558,8 @@ mod tests { .build() .unwrap(); let decimal_array = Decimal128Array::from(array_data); - assert_eq!(8_887_000_000_i128, decimal_array.value(0).into()); - assert_eq!(-8_887_000_000_i128, decimal_array.value(1).into()); + assert_eq!(8_887_000_000_i128, decimal_array.value(0)); + assert_eq!(-8_887_000_000_i128, decimal_array.value(1)); } #[test] @@ -1576,7 +1570,7 @@ mod tests { let result = decimal_builder.finish().with_precision_and_scale(5, 3); assert!(result.is_ok()); let arr = result.unwrap(); - assert_eq!("12.345", arr.value_as_string(1).unwrap()); + assert_eq!("12.345", arr.value_as_string(1)); // Validate it explicitly let result = arr.validate_decimal_precision(5); @@ -1594,8 +1588,8 @@ mod tests { let result = decimal_builder.finish().with_precision_and_scale(2, 1); assert!(result.is_ok()); let arr = result.unwrap(); - assert_eq!("9.9", arr.value_as_string(1).unwrap()); - assert_eq!("-9.9", arr.value_as_string(3).unwrap()); + assert_eq!("9.9", arr.value_as_string(1)); + assert_eq!("-9.9", arr.value_as_string(3)); // Validate it explicitly let result = arr.validate_decimal_precision(2); @@ -1611,11 +1605,11 @@ mod tests { let array = Decimal128Array::from_iter_values(vec![-100, 0, 101].into_iter()); assert_eq!(array.len(), 3); assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); - assert_eq!(-100_i128, array.value(0).into()); + assert_eq!(-100_i128, array.value(0)); assert!(!array.is_null(0)); - assert_eq!(0_i128, array.value(1).into()); + assert_eq!(0_i128, array.value(1)); assert!(!array.is_null(1)); - assert_eq!(101_i128, array.value(2).into()); + assert_eq!(101_i128, array.value(2)); assert!(!array.is_null(2)); } @@ -1625,10 +1619,10 @@ mod tests { vec![Some(-100), None, Some(101)].into_iter().collect(); assert_eq!(array.len(), 3); assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); - assert_eq!(-100_i128, array.value(0).into()); + assert_eq!(-100_i128, array.value(0)); assert!(!array.is_null(0)); assert!(array.is_null(1)); - assert_eq!(101_i128, array.value(2).into()); + assert_eq!(101_i128, array.value(2)); assert!(!array.is_null(2)); } @@ -1661,13 +1655,13 @@ mod tests { .with_precision_and_scale(6, 3) .unwrap(); - assert_eq!("123.450", arr.value_as_string(0).unwrap()); - assert_eq!("-123.450", arr.value_as_string(1).unwrap()); - assert_eq!("0.100", arr.value_as_string(2).unwrap()); - assert_eq!("-0.100", arr.value_as_string(3).unwrap()); - assert_eq!("0.010", arr.value_as_string(4).unwrap()); - assert_eq!("-0.010", arr.value_as_string(5).unwrap()); - assert_eq!("0.000", arr.value_as_string(6).unwrap()); + assert_eq!("123.450", arr.value_as_string(0)); + assert_eq!("-123.450", arr.value_as_string(1)); + assert_eq!("0.100", arr.value_as_string(2)); + assert_eq!("-0.100", arr.value_as_string(3)); + assert_eq!("0.010", arr.value_as_string(4)); + assert_eq!("-0.010", arr.value_as_string(5)); + assert_eq!("0.000", arr.value_as_string(6)); } #[test] @@ -1677,11 +1671,11 @@ mod tests { .unwrap(); assert_eq!(arr.data_type(), &DataType::Decimal128(20, 2)); - assert_eq!(arr.precision().unwrap(), 20); - assert_eq!(arr.scale().unwrap(), 2); + assert_eq!(arr.precision(), 20); + assert_eq!(arr.scale(), 2); let actual: Vec<_> = (0..arr.len()) - .map(|i| arr.value_as_string(i).unwrap()) + .map(|i| arr.value_as_string(i)) .collect(); let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"]; @@ -1748,9 +1742,7 @@ mod tests { let value2 = i256::from_i128(56789); let mut array: Decimal256Array = - vec![Some(value1.clone()), None, Some(value2.clone())] - .into_iter() - .collect(); + vec![Some(value1), None, Some(value2)].into_iter().collect(); array = array.with_precision_and_scale(76, 10).unwrap(); assert_eq!(array.len(), 3); assert_eq!(array.data_type(), &DataType::Decimal256(76, 10)); @@ -1768,10 +1760,10 @@ mod tests { array = array.with_precision_and_scale(38, 10).unwrap(); assert_eq!(array.len(), 3); assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); - assert_eq!(-100_i128, array.value(0).into()); + assert_eq!(-100_i128, array.value(0)); assert!(!array.is_null(0)); assert!(array.is_null(1)); - assert_eq!(101_i128, array.value(2).into()); + assert_eq!(101_i128, array.value(2)); assert!(!array.is_null(2)); } diff --git a/arrow-array/src/decimal.rs b/arrow-array/src/decimal.rs deleted file mode 100644 index 34305333064..00000000000 --- a/arrow-array/src/decimal.rs +++ /dev/null @@ -1,484 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Decimal related utilities, types and functions - -use crate::types::{Decimal128Type, Decimal256Type, DecimalType}; -use arrow_buffer::i256; -use arrow_data::decimal::{DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE}; -use arrow_schema::{ArrowError, DataType}; -use num::{BigInt, Signed}; -use std::cmp::{min, Ordering}; - -/// [`Decimal`] is the generic representation of a single decimal value -/// -/// See [`Decimal128`] and [`Decimal256`] for the value types of [`Decimal128Array`] -/// and [`Decimal256Array`] respectively -/// -/// [`Decimal128Array`]: [crate::array::Decimal128Array] -/// [`Decimal256Array`]: [crate::array::Decimal256Array] -pub struct Decimal { - precision: u8, - scale: u8, - value: T::DecimalNative, -} - -/// Manually implement to avoid `T: Debug` bound -impl std::fmt::Debug for Decimal { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("Decimal") - .field("scale", &self.precision) - .field("precision", &self.precision) - // TODO: Could format this better - .field("value", &self.value.as_ref()) - .finish() - } -} - -/// Manually implement to avoid `T: Debug` bound -impl Clone for Decimal { - fn clone(&self) -> Self { - Self { - precision: self.precision, - scale: self.scale, - value: self.value, - } - } -} - -impl Copy for Decimal {} - -impl Decimal { - pub const MAX_PRECISION: u8 = T::MAX_PRECISION; - pub const MAX_SCALE: u8 = T::MAX_SCALE; - pub const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = T::TYPE_CONSTRUCTOR; - pub const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE; - - /// Tries to create a decimal value from precision, scale and bytes. - /// The bytes should be stored in little-endian order. - /// - /// Safety: - /// This method doesn't validate if the decimal value represented by the bytes - /// can be fitted into the specified precision. - pub fn try_new_from_bytes( - precision: u8, - scale: u8, - bytes: &T::DecimalNative, - ) -> Result - where - Self: Sized, - { - if precision > Self::MAX_PRECISION { - return Err(ArrowError::InvalidArgumentError(format!( - "precision {} is greater than max {}", - precision, - Self::MAX_PRECISION - ))); - } - if scale > Self::MAX_SCALE { - return Err(ArrowError::InvalidArgumentError(format!( - "scale {} is greater than max {}", - scale, - Self::MAX_SCALE - ))); - } - - if precision < scale { - return Err(ArrowError::InvalidArgumentError(format!( - "Precision {} is less than scale {}", - precision, scale - ))); - } - - Ok(Self::new(precision, scale, bytes)) - } - - /// Creates a decimal value from precision, scale, and bytes. - /// - /// Safety: - /// This method doesn't check if the precision and scale are valid. - /// Use `try_new_from_bytes` for safe constructor. - pub fn new(precision: u8, scale: u8, bytes: &T::DecimalNative) -> Self { - Self { - precision, - scale, - value: *bytes, - } - } - - /// Returns the raw bytes of the integer representation of the decimal. - pub fn raw_value(&self) -> &T::DecimalNative { - &self.value - } - - /// Returns the precision of the decimal. - pub fn precision(&self) -> u8 { - self.precision - } - - /// Returns the scale of the decimal. - pub fn scale(&self) -> u8 { - self.scale - } - - /// Returns the string representation of the decimal. - /// If the string representation cannot be fitted with the precision of the decimal, - /// the string will be truncated. - #[allow(clippy::inherent_to_string)] - pub fn to_string(&self) -> String { - let raw_bytes = self.raw_value(); - let integer = BigInt::from_signed_bytes_le(raw_bytes.as_ref()); - let value_str = integer.to_string(); - let (sign, rest) = - value_str.split_at(if integer >= BigInt::from(0) { 0 } else { 1 }); - let bound = min(usize::from(self.precision()), rest.len()) + sign.len(); - let value_str = &value_str[0..bound]; - let scale_usize = usize::from(self.scale()); - - if self.scale() == 0 { - value_str.to_string() - } else if rest.len() > scale_usize { - // Decimal separator is in the middle of the string - let (whole, decimal) = value_str.split_at(value_str.len() - scale_usize); - format!("{}.{}", whole, decimal) - } else { - // String has to be padded - format!("{}0.{:0>width$}", sign, rest, width = scale_usize) - } - } -} - -impl PartialOrd for Decimal { - fn partial_cmp(&self, other: &Self) -> Option { - assert_eq!( - self.scale, other.scale, - "Cannot compare two Decimals with different scale: {}, {}", - self.scale, other.scale - ); - Some(singed_cmp_le_bytes( - self.value.as_ref(), - other.value.as_ref(), - )) - } -} - -impl Ord for Decimal { - fn cmp(&self, other: &Self) -> Ordering { - assert_eq!( - self.scale, other.scale, - "Cannot compare two Decimals with different scale: {}, {}", - self.scale, other.scale - ); - singed_cmp_le_bytes(self.value.as_ref(), other.value.as_ref()) - } -} - -impl PartialEq for Decimal { - fn eq(&self, other: &Self) -> bool { - assert_eq!( - self.scale, other.scale, - "Cannot compare two Decimals with different scale: {}, {}", - self.scale, other.scale - ); - self.value.as_ref().eq(other.value.as_ref()) - } -} - -impl Eq for Decimal {} - -/// Represents a decimal value with precision and scale. -/// The decimal value could represented by a signed 128-bit integer. -pub type Decimal128 = Decimal; - -impl Decimal128 { - /// Creates `Decimal128` from an `i128` value. - pub fn new_from_i128(precision: u8, scale: u8, value: i128) -> Self { - Decimal128 { - precision, - scale, - value: value.to_le_bytes(), - } - } - - /// Returns `i128` representation of the decimal. - pub fn as_i128(&self) -> i128 { - i128::from_le_bytes(self.value) - } -} - -impl From for i128 { - fn from(decimal: Decimal128) -> Self { - decimal.as_i128() - } -} - -/// Represents a decimal value with precision and scale. -/// The decimal value could be represented by a signed 256-bit integer. -pub type Decimal256 = Decimal; - -impl Decimal256 { - /// Constructs a `Decimal256` value from a `BigInt`. - pub fn from_big_int( - num: &BigInt, - precision: u8, - scale: u8, - ) -> Result { - let mut bytes = if num.is_negative() { - [255_u8; 32] - } else { - [0; 32] - }; - let num_bytes = &num.to_signed_bytes_le(); - bytes[0..num_bytes.len()].clone_from_slice(num_bytes); - Decimal256::try_new_from_bytes(precision, scale, &bytes) - } - - pub fn from_i256(precision: u8, scale: u8, value: i256) -> Self { - Decimal256::new(precision, scale, &value.to_le_bytes()) - } - - /// Constructs a `BigInt` from this `Decimal256` value. - pub fn to_big_int(self) -> BigInt { - BigInt::from_signed_bytes_le(&self.value) - } -} - -impl From for Decimal256 { - fn from(bigint: BigInt) -> Self { - Decimal256::from_big_int(&bigint, DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE) - .unwrap() - } -} - -// compare two signed integer which are encoded with little endian. -// left bytes and right bytes must have the same length. -#[inline] -pub(crate) fn singed_cmp_le_bytes(left: &[u8], right: &[u8]) -> Ordering { - assert_eq!( - left.len(), - right.len(), - "Can't compare bytes array with different len: {}, {}", - left.len(), - right.len() - ); - assert_ne!(left.len(), 0, "Can't compare bytes array of length 0"); - let len = left.len(); - // the sign bit is 1, the value is negative - let left_negative = left[len - 1] >= 0x80_u8; - let right_negative = right[len - 1] >= 0x80_u8; - if left_negative != right_negative { - return match left_negative { - true => { - // left is negative value - // right is positive value - Ordering::Less - } - false => Ordering::Greater, - }; - } - for i in 0..len { - let l_byte = left[len - 1 - i]; - let r_byte = right[len - 1 - i]; - match l_byte.cmp(&r_byte) { - Ordering::Less => { - return Ordering::Less; - } - Ordering::Greater => { - return Ordering::Greater; - } - Ordering::Equal => {} - } - } - Ordering::Equal -} - -#[cfg(test)] -mod tests { - use super::*; - use num::{BigInt, Num}; - use rand::random; - - #[test] - fn decimal_128_to_string() { - let mut value = Decimal128::new_from_i128(5, 2, 100); - assert_eq!(value.to_string(), "1.00"); - - value = Decimal128::new_from_i128(5, 3, 100); - assert_eq!(value.to_string(), "0.100"); - } - - #[test] - fn decimal_invalid_precision_scale() { - let bytes = 100_i128.to_le_bytes(); - let err = Decimal128::try_new_from_bytes(5, 6, &bytes); - assert!(err.is_err()); - } - - #[test] - fn decimal_128_from_bytes() { - let mut bytes = 100_i128.to_le_bytes(); - let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "1.00"); - - bytes = (-1_i128).to_le_bytes(); - let value = Decimal128::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "-0.01"); - - bytes = i128::MAX.to_le_bytes(); - let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "170141183460469231731687303715884105.72"); - - bytes = i128::MIN.to_le_bytes(); - let value = Decimal128::try_new_from_bytes(38, 2, &bytes).unwrap(); - assert_eq!( - value.to_string(), - "-170141183460469231731687303715884105.72" - ); - - // Truncated - bytes = 12345_i128.to_le_bytes(); - let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "1.23"); - - bytes = (-12345_i128).to_le_bytes(); - let value = Decimal128::try_new_from_bytes(3, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "-1.23"); - } - - #[test] - fn decimal_256_from_bytes() { - let mut bytes = [0_u8; 32]; - bytes[0..16].clone_from_slice(&100_i128.to_le_bytes()); - let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "1.00"); - - bytes[0..16].clone_from_slice(&i128::MAX.to_le_bytes()); - let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap(); - assert_eq!( - value.to_string(), - "17014118346046923173168730371588410.5727" - ); - - // i128 maximum + 1 - bytes[0..16].clone_from_slice(&0_i128.to_le_bytes()); - bytes[15] = 128; - let value = Decimal256::try_new_from_bytes(40, 4, &bytes).unwrap(); - assert_eq!( - value.to_string(), - "17014118346046923173168730371588410.5728" - ); - - // smaller than i128 minimum - bytes = [255; 32]; - bytes[31] = 128; - let value = Decimal256::try_new_from_bytes(76, 4, &bytes).unwrap(); - assert_eq!( - value.to_string(), - "-574437317700748313234121683441537667865831564552201235664496608164256541.5731" - ); - - bytes = [255; 32]; - let value = Decimal256::try_new_from_bytes(5, 2, &bytes).unwrap(); - assert_eq!(value.to_string(), "-0.01"); - } - - fn i128_func(value: impl Into) -> i128 { - value.into() - } - - #[test] - fn decimal_128_to_i128() { - let value = Decimal128::new_from_i128(5, 2, 100); - let integer = i128_func(value); - assert_eq!(integer, 100); - } - - #[test] - fn bigint_to_decimal256() { - let num = BigInt::from_str_radix("123456789", 10).unwrap(); - let value = Decimal256::from_big_int(&num, 30, 2).unwrap(); - assert_eq!(value.to_string(), "1234567.89"); - - let num = BigInt::from_str_radix("-5744373177007483132341216834415376678658315645522012356644966081642565415731", 10).unwrap(); - let value = Decimal256::from_big_int(&num, 76, 4).unwrap(); - assert_eq!(value.to_string(), "-574437317700748313234121683441537667865831564552201235664496608164256541.5731"); - } - - #[test] - fn test_lt_cmp_byte() { - for _i in 0..100 { - let left = random::(); - let right = random::(); - let result = singed_cmp_le_bytes( - left.to_le_bytes().as_slice(), - right.to_le_bytes().as_slice(), - ); - assert_eq!(left.cmp(&right), result); - } - for _i in 0..100 { - let left = random::(); - let right = random::(); - let result = singed_cmp_le_bytes( - left.to_le_bytes().as_slice(), - right.to_le_bytes().as_slice(), - ); - assert_eq!(left.cmp(&right), result); - } - } - - #[test] - fn compare_decimal128() { - let v1 = -100_i128; - let v2 = 10000_i128; - let right = Decimal128::new_from_i128(20, 3, v2); - for v in v1..v2 { - let left = Decimal128::new_from_i128(20, 3, v); - assert!(left < right); - } - - for _i in 0..100 { - let left = random::(); - let right = random::(); - let left_decimal = Decimal128::new_from_i128(38, 2, left); - let right_decimal = Decimal128::new_from_i128(38, 2, right); - assert_eq!(left < right, left_decimal < right_decimal); - assert_eq!(left == right, left_decimal == right_decimal) - } - } - - #[test] - fn compare_decimal256() { - let v1 = -100_i128; - let v2 = 10000_i128; - let right = Decimal256::from_big_int(&BigInt::from(v2), 75, 2).unwrap(); - for v in v1..v2 { - let left = Decimal256::from_big_int(&BigInt::from(v), 75, 2).unwrap(); - assert!(left < right); - } - - for _i in 0..100 { - let left = random::(); - let right = random::(); - let left_decimal = - Decimal256::from_big_int(&BigInt::from(left), 75, 2).unwrap(); - let right_decimal = - Decimal256::from_big_int(&BigInt::from(right), 75, 2).unwrap(); - assert_eq!(left < right, left_decimal < right_decimal); - assert_eq!(left == right, left_decimal == right_decimal) - } - } -} diff --git a/arrow-array/src/lib.rs b/arrow-array/src/lib.rs index 16e46f68ba0..cc963925d65 100644 --- a/arrow-array/src/lib.rs +++ b/arrow-array/src/lib.rs @@ -166,7 +166,6 @@ pub use record_batch::{RecordBatch, RecordBatchOptions}; pub mod builder; pub mod cast; -pub mod decimal; mod delta; pub mod iterator; mod raw_pointer; diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs index 2e161813dbc..70c43a2a494 100644 --- a/arrow-array/src/types.rs +++ b/arrow-array/src/types.rs @@ -495,18 +495,18 @@ impl NativeDecimalType for [u8; N] { pub trait DecimalType: 'static + Send + Sync + ArrowPrimitiveType + private::DecimalTypeSealed { - type DecimalNative: NativeDecimalType; - const BYTE_LENGTH: usize; const MAX_PRECISION: u8; const MAX_SCALE: u8; const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType; const DEFAULT_TYPE: DataType; - fn to_native(num: ::Native) -> Self::DecimalNative; + /// Formats the decimal value with the provided precision and scale + fn format_decimal(value: Self::Native, precision: u8, scale: u8) -> String; + /// Validates that `value` contains no more than `precision` decimal digits fn validate_decimal_precision( - num: ::Native, + value: Self::Native, precision: u8, ) -> Result<(), ArrowError>; } @@ -516,8 +516,6 @@ pub trait DecimalType: pub struct Decimal128Type {} impl DecimalType for Decimal128Type { - type DecimalNative = [u8; 16]; - const BYTE_LENGTH: usize = 16; const MAX_PRECISION: u8 = DECIMAL128_MAX_PRECISION; const MAX_SCALE: u8 = DECIMAL128_MAX_SCALE; @@ -525,8 +523,8 @@ impl DecimalType for Decimal128Type { const DEFAULT_TYPE: DataType = DataType::Decimal128(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); - fn to_native(num: i128) -> [u8; 16] { - num.to_le_bytes() + fn format_decimal(value: Self::Native, precision: u8, scale: u8) -> String { + format_decimal_str(&value.to_string(), precision as usize, scale as usize) } fn validate_decimal_precision(num: i128, precision: u8) -> Result<(), ArrowError> { @@ -545,8 +543,6 @@ impl ArrowPrimitiveType for Decimal128Type { pub struct Decimal256Type {} impl DecimalType for Decimal256Type { - type DecimalNative = [u8; 32]; - const BYTE_LENGTH: usize = 32; const MAX_PRECISION: u8 = DECIMAL256_MAX_PRECISION; const MAX_SCALE: u8 = DECIMAL256_MAX_SCALE; @@ -554,8 +550,8 @@ impl DecimalType for Decimal256Type { const DEFAULT_TYPE: DataType = DataType::Decimal256(DECIMAL256_MAX_PRECISION, DECIMAL_DEFAULT_SCALE); - fn to_native(num: i256) -> [u8; 32] { - num.to_le_bytes() + fn format_decimal(value: Self::Native, precision: u8, scale: u8) -> String { + format_decimal_str(&value.to_string(), precision as usize, scale as usize) } fn validate_decimal_precision(num: i256, precision: u8) -> Result<(), ArrowError> { @@ -569,6 +565,26 @@ impl ArrowPrimitiveType for Decimal256Type { const DATA_TYPE: DataType = ::DEFAULT_TYPE; } +fn format_decimal_str(value_str: &str, precision: usize, scale: usize) -> String { + let (sign, rest) = match value_str.strip_prefix('-') { + Some(stripped) => ("-", stripped), + None => ("", value_str), + }; + let bound = precision.min(rest.len()) + sign.len(); + let value_str = &value_str[0..bound]; + + if scale == 0 { + value_str.to_string() + } else if rest.len() > scale { + // Decimal separator is in the middle of the string + let (whole, decimal) = value_str.split_at(value_str.len() - scale); + format!("{}.{}", whole, decimal) + } else { + // String has to be padded + format!("{}0.{:0>width$}", sign, rest, width = scale) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index a399f060200..b6a92d9b5f8 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -459,9 +459,7 @@ where t }) .collect::>()? - // PERF: we could avoid re-validating that the data in - // Decimal128Array was in range as we know it came from a valid Decimal128Array - .with_precision_and_scale(decimal_values.precision()?, decimal_values.scale()?) + .with_precision_and_scale(decimal_values.precision(), decimal_values.scale()) } /// `take` implementation for all primitive arrays diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index ab5947b4ece..123c5e1c671 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -1240,16 +1240,16 @@ mod tests { .downcast_ref::() .unwrap(); - assert_eq!("57.653484", lat.value_as_string(0).unwrap()); - assert_eq!("53.002666", lat.value_as_string(1).unwrap()); - assert_eq!("52.412811", lat.value_as_string(2).unwrap()); - assert_eq!("51.481583", lat.value_as_string(3).unwrap()); - assert_eq!("12.123456", lat.value_as_string(4).unwrap()); - assert_eq!("50.760000", lat.value_as_string(5).unwrap()); - assert_eq!("0.123000", lat.value_as_string(6).unwrap()); - assert_eq!("123.000000", lat.value_as_string(7).unwrap()); - assert_eq!("123.000000", lat.value_as_string(8).unwrap()); - assert_eq!("-50.760000", lat.value_as_string(9).unwrap()); + assert_eq!("57.653484", lat.value_as_string(0)); + assert_eq!("53.002666", lat.value_as_string(1)); + assert_eq!("52.412811", lat.value_as_string(2)); + assert_eq!("51.481583", lat.value_as_string(3)); + assert_eq!("12.123456", lat.value_as_string(4)); + assert_eq!("50.760000", lat.value_as_string(5)); + assert_eq!("0.123000", lat.value_as_string(6)); + assert_eq!("123.000000", lat.value_as_string(7)); + assert_eq!("123.000000", lat.value_as_string(8)); + assert_eq!("-50.760000", lat.value_as_string(9)); } #[test] diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index 8b8db1be575..7c0b5a28f89 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -274,7 +274,7 @@ pub fn make_string_from_decimal(column: &Arc, row: usize) -> Result() .unwrap(); - array.value_as_string(row) + Ok(array.value_as_string(row)) } fn append_struct_field_string( diff --git a/arrow/src/util/mod.rs b/arrow/src/util/mod.rs index a20657b5822..f0b9e0076ba 100644 --- a/arrow/src/util/mod.rs +++ b/arrow/src/util/mod.rs @@ -32,5 +32,4 @@ pub mod string_writer; #[cfg(any(test, feature = "test_utils"))] pub mod test_util; -pub use arrow_array::decimal; pub(crate) mod reader_parser; diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 0390b43aaa9..51b09302cdf 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1176,8 +1176,8 @@ mod tests { let expected = 1..25; - assert_eq!(col.precision().unwrap(), target_precision); - assert_eq!(col.scale().unwrap(), 2); + assert_eq!(col.precision(), target_precision); + assert_eq!(col.scale(), 2); for (i, v) in expected.enumerate() { assert_eq!(col.value(i), v * 100_i128); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 79d9d28095c..bc68874ebab 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -583,7 +583,7 @@ fn get_decimal_array_slice( indices: &[usize], ) -> Vec { let mut values = Vec::with_capacity(indices.len()); - let size = decimal_length_from_precision(array.precision().unwrap()); + let size = decimal_length_from_precision(array.precision()); for i in indices { let as_be_bytes = array.value(*i).to_be_bytes(); let resized_value = as_be_bytes[(16 - size)..].to_vec(); From 1d3f434c3132f26a841e3bebaf5df16b871cf034 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Fri, 14 Oct 2022 16:26:31 +1300 Subject: [PATCH 2/2] Format --- arrow-array/src/array/primitive_array.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs index 1da453a4baa..4722cec67c6 100644 --- a/arrow-array/src/array/primitive_array.rs +++ b/arrow-array/src/array/primitive_array.rs @@ -1674,9 +1674,7 @@ mod tests { assert_eq!(arr.precision(), 20); assert_eq!(arr.scale(), 2); - let actual: Vec<_> = (0..arr.len()) - .map(|i| arr.value_as_string(i)) - .collect(); + let actual: Vec<_> = (0..arr.len()).map(|i| arr.value_as_string(i)).collect(); let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"]; assert_eq!(actual, expected);