diff --git a/arrow/src/array/array_decimal.rs b/arrow/src/array/array_decimal.rs index 67c1328fe45..9fb724a7e0f 100644 --- a/arrow/src/array/array_decimal.rs +++ b/arrow/src/array/array_decimal.rs @@ -32,7 +32,7 @@ use crate::datatypes::{ DECIMAL_MAX_SCALE, }; use crate::error::{ArrowError, Result}; -use crate::util::decimal::{BasicDecimal, Decimal128}; +use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256}; /// `DecimalArray` stores fixed width decimal numbers, /// with a fixed precision and scale. @@ -75,47 +75,68 @@ pub struct DecimalArray { scale: usize, } -impl DecimalArray { - const VALUE_LENGTH: i32 = 16; +pub struct Decimal256Array { + data: ArrayData, + value_data: RawPtrBox, + precision: usize, + scale: usize, +} + +mod private_decimal { + pub trait DecimalArrayPrivate { + fn raw_value_data_ptr(&self) -> *const u8; + } +} + +pub trait BasicDecimalArray>: + private_decimal::DecimalArrayPrivate +{ + const VALUE_LENGTH: i32; + + fn data(&self) -> &ArrayData; + + /// Return the precision (total digits) that can be stored by this array + fn precision(&self) -> usize; + + /// Return the scale (digits after the decimal) that can be stored by this array + fn scale(&self) -> usize; /// Returns the element at index `i`. - pub fn value(&self, i: usize) -> Decimal128 { - assert!(i < self.data.len(), "DecimalArray out of bounds access"); - let offset = i + self.data.offset(); + fn value(&self, i: usize) -> T { + let data = self.data(); + assert!(i < data.len(), "Out of bounds access"); + + let offset = i + data.offset(); let raw_val = unsafe { let pos = self.value_offset_at(offset); std::slice::from_raw_parts( - self.value_data.as_ptr().offset(pos as isize), + self.raw_value_data_ptr().offset(pos as isize), Self::VALUE_LENGTH as usize, ) }; let as_array = raw_val.try_into().unwrap(); - Decimal128::new_from_i128( - self.precision, - self.scale, - i128::from_le_bytes(as_array), - ) + T::new(self.precision(), self.scale(), as_array) } /// Returns the offset for the element at index `i`. /// /// Note this doesn't do any bound checking, for performance reason. #[inline] - pub fn value_offset(&self, i: usize) -> i32 { - self.value_offset_at(self.data.offset() + i) + fn value_offset(&self, i: usize) -> i32 { + self.value_offset_at(self.data().offset() + i) } /// Returns the length for an element. /// /// All elements have the same length as the array is a fixed size. #[inline] - pub const fn value_length(&self) -> i32 { + fn value_length(&self) -> i32 { Self::VALUE_LENGTH } /// Returns a clone of the value data buffer - pub fn value_data(&self) -> Buffer { - self.data.buffers()[0].clone() + fn value_data(&self) -> Buffer { + self.data().buffers()[0].clone() } #[inline] @@ -124,15 +145,15 @@ impl DecimalArray { } #[inline] - pub fn value_as_string(&self, row: usize) -> String { + fn value_as_string(&self, row: usize) -> String { self.value(row).to_string() } - pub fn from_fixed_size_list_array( + fn from_fixed_size_list_array( v: FixedSizeListArray, precision: usize, scale: usize, - ) -> Self { + ) -> U { let child_data = &v.data_ref().child_data()[0]; assert_eq!( child_data.child_data().len(), @@ -155,9 +176,55 @@ impl DecimalArray { .offset(list_offset); let array_data = unsafe { builder.build_unchecked() }; - Self::from(array_data) + U::from(array_data) + } +} + +impl private_decimal::DecimalArrayPrivate for DecimalArray { + fn raw_value_data_ptr(&self) -> *const u8 { + self.value_data.as_ptr() + } +} + +impl private_decimal::DecimalArrayPrivate for Decimal256Array { + fn raw_value_data_ptr(&self) -> *const u8 { + self.value_data.as_ptr() } +} + +impl BasicDecimalArray for DecimalArray { + const VALUE_LENGTH: i32 = 16; + + fn data(&self) -> &ArrayData { + &self.data + } + + fn precision(&self) -> usize { + self.precision + } + + fn scale(&self) -> usize { + self.scale + } +} + +impl BasicDecimalArray for Decimal256Array { + const VALUE_LENGTH: i32 = 32; + + fn data(&self) -> &ArrayData { + &self.data + } + + fn precision(&self) -> usize { + self.precision + } + + fn scale(&self) -> usize { + self.scale + } +} +impl DecimalArray { /// Creates a [DecimalArray] with default precision and scale, /// based on an iterator of `i128` values without nulls pub fn from_iter_values>(iter: I) -> Self { @@ -176,16 +243,6 @@ impl DecimalArray { DecimalArray::from(data) } - /// Return the precision (total digits) that can be stored by this array - pub fn precision(&self) -> usize { - self.precision - } - - /// Return the scale (digits after the decimal) that can be stored by this array - pub fn scale(&self) -> usize { - self.scale - } - /// Returns a DecimalArray with the same data as self, with the /// specified precision. /// @@ -267,6 +324,27 @@ impl From for DecimalArray { } } +impl From for Decimal256Array { + fn from(data: ArrayData) -> Self { + assert_eq!( + data.buffers().len(), + 1, + "DecimalArray data should contain 1 buffer only (values)" + ); + let values = data.buffers()[0].as_ptr(); + let (precision, scale) = match data.data_type() { + DataType::Decimal(precision, scale) => (*precision, *scale), + _ => panic!("Expected data type to be Decimal"), + }; + Self { + data, + value_data: unsafe { RawPtrBox::new(values) }, + precision, + scale, + } + } +} + impl From for ArrayData { fn from(array: DecimalArray) -> Self { array.data @@ -337,6 +415,22 @@ impl fmt::Debug for DecimalArray { } } +impl fmt::Debug for Decimal256Array { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "Decimal256Array<{}, {}>\n[\n", + self.precision, self.scale + )?; + print_long_array(self, f, |array, index, f| { + let formatted_decimal = array.value_as_string(index); + + write!(f, "{}", formatted_decimal) + })?; + write!(f, "]") + } +} + impl Array for DecimalArray { fn as_any(&self) -> &dyn Any { self @@ -347,6 +441,16 @@ impl Array for DecimalArray { } } +impl Array for Decimal256Array { + fn as_any(&self) -> &dyn Any { + self + } + + fn data(&self) -> &ArrayData { + &self.data + } +} + #[cfg(test)] mod tests { use crate::{array::DecimalBuilder, datatypes::Field}; diff --git a/arrow/src/array/builder/decimal_builder.rs b/arrow/src/array/builder/decimal_builder.rs index e7e9ec6a58f..b8edeb9354a 100644 --- a/arrow/src/array/builder/decimal_builder.rs +++ b/arrow/src/array/builder/decimal_builder.rs @@ -18,6 +18,7 @@ use std::any::Any; use std::sync::Arc; +use crate::array::array_decimal::{BasicDecimalArray, Decimal256Array}; use crate::array::ArrayRef; use crate::array::DecimalArray; use crate::array::UInt8Builder; @@ -26,6 +27,7 @@ use crate::array::{ArrayBuilder, FixedSizeListBuilder}; use crate::error::{ArrowError, Result}; use crate::datatypes::validate_decimal_precision; +use crate::util::decimal::{BasicDecimal, Decimal256}; /// Array Builder for [`DecimalArray`] /// @@ -42,8 +44,18 @@ pub struct DecimalBuilder { value_validation: bool, } +/// Array Builder for [`Decimal256Array`] +/// +/// See [`Decimal256Array`] for example. +#[derive(Debug)] +pub struct Decimal256Builder { + builder: FixedSizeListBuilder, + precision: usize, + scale: usize, +} + impl DecimalBuilder { - /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values + /// Creates a new `DecimalBuilder`, `capacity` is the number of bytes in the values /// array pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { let values_builder = UInt8Builder::new(capacity); @@ -154,10 +166,59 @@ impl ArrayBuilder for DecimalBuilder { } } +impl Decimal256Builder { + /// Creates a new `Decimal256Builder`, `capacity` is the number of bytes in the values + /// array + pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { + let values_builder = UInt8Builder::new(capacity); + let byte_width = 32; + Self { + builder: FixedSizeListBuilder::new(values_builder, byte_width), + precision, + scale, + } + } + + /// Appends a byte slice into the builder. + /// + /// Automatically calls the `append` method to delimit the slice appended in as a + /// distinct array element. + #[inline] + pub fn append_value(&mut self, value: &Decimal256) -> Result<()> { + let value_as_bytes = value.raw_value(); + + if self.builder.value_length() != value_as_bytes.len() as i32 { + return Err(ArrowError::InvalidArgumentError( + "Byte slice does not have the same length as Decimal256Builder value lengths".to_string() + )); + } + self.builder.values().append_slice(value_as_bytes)?; + self.builder.append(true) + } + + /// Append a null value to the array. + #[inline] + pub fn append_null(&mut self) -> Result<()> { + let length: usize = self.builder.value_length() as usize; + self.builder.values().append_slice(&vec![0u8; length][..])?; + self.builder.append(false) + } + + /// Builds the `Decimal256Array` and reset this builder. + pub fn finish(&mut self) -> Decimal256Array { + Decimal256Array::from_fixed_size_list_array( + self.builder.finish(), + self.precision, + self.scale, + ) + } +} + #[cfg(test)] mod tests { use super::*; + use crate::array::array_decimal::BasicDecimalArray; use crate::array::Array; use crate::datatypes::DataType; use crate::util::decimal::Decimal128; diff --git a/arrow/src/array/builder/mod.rs b/arrow/src/array/builder/mod.rs index 634ef772f3c..045a11648d5 100644 --- a/arrow/src/array/builder/mod.rs +++ b/arrow/src/array/builder/mod.rs @@ -45,6 +45,7 @@ use super::ArrayRef; pub use boolean_buffer_builder::BooleanBufferBuilder; pub use boolean_builder::BooleanBuilder; pub use buffer_builder::BufferBuilder; +pub use decimal_builder::Decimal256Builder; pub use decimal_builder::DecimalBuilder; pub use fixed_size_binary_builder::FixedSizeBinaryBuilder; pub use fixed_size_list_builder::FixedSizeListBuilder; diff --git a/arrow/src/array/equal_json.rs b/arrow/src/array/equal_json.rs index 9db1a4397cb..3fc84a7e3ab 100644 --- a/arrow/src/array/equal_json.rs +++ b/arrow/src/array/equal_json.rs @@ -16,7 +16,9 @@ // under the License. use super::*; +use crate::array::BasicDecimalArray; use crate::datatypes::*; +use crate::util::decimal::BasicDecimal; use array::Array; use hex::FromHex; use serde_json::value::Value::{Null as JNull, Object, String as JString}; @@ -378,6 +380,20 @@ impl JsonEqual for DecimalArray { } } +impl JsonEqual for Decimal256Array { + fn equals_json(&self, json: &[&Value]) -> bool { + if self.len() != json.len() { + return false; + } + + (0..self.len()).all(|i| match json[i] { + JString(s) => self.is_valid(i) && (s == &self.value(i).to_string()), + JNull => self.is_null(i), + _ => false, + }) + } +} + impl PartialEq for DecimalArray { fn eq(&self, json: &Value) -> bool { match json { diff --git a/arrow/src/array/iterator.rs b/arrow/src/array/iterator.rs index bc70d1a2a8e..9ac2d0642d4 100644 --- a/arrow/src/array/iterator.rs +++ b/arrow/src/array/iterator.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use crate::array::BasicDecimalArray; use crate::datatypes::ArrowPrimitiveType; use super::{ diff --git a/arrow/src/array/mod.rs b/arrow/src/array/mod.rs index 90c3b21fbd2..60703c53b5b 100644 --- a/arrow/src/array/mod.rs +++ b/arrow/src/array/mod.rs @@ -181,6 +181,8 @@ pub use self::array_binary::BinaryArray; pub use self::array_binary::FixedSizeBinaryArray; pub use self::array_binary::LargeBinaryArray; pub use self::array_boolean::BooleanArray; +pub use self::array_decimal::BasicDecimalArray; +pub use self::array_decimal::Decimal256Array; pub use self::array_decimal::DecimalArray; pub use self::array_dictionary::DictionaryArray; pub use self::array_list::FixedSizeListArray; @@ -468,6 +470,7 @@ pub use self::builder::BinaryBuilder; pub use self::builder::BooleanBufferBuilder; pub use self::builder::BooleanBuilder; pub use self::builder::BufferBuilder; +pub use self::builder::Decimal256Builder; pub use self::builder::DecimalBuilder; pub use self::builder::FixedSizeBinaryBuilder; pub use self::builder::FixedSizeListBuilder; diff --git a/arrow/src/array/ord.rs b/arrow/src/array/ord.rs index be910f96bd5..019b1163b50 100644 --- a/arrow/src/array/ord.rs +++ b/arrow/src/array/ord.rs @@ -19,6 +19,7 @@ use std::cmp::Ordering; +use crate::array::BasicDecimalArray; use crate::array::*; use crate::datatypes::TimeUnit; use crate::datatypes::*; diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index 4ffb1ea5d10..08236680600 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -38,6 +38,7 @@ use std::str; use std::sync::Arc; +use crate::array::BasicDecimalArray; use crate::buffer::MutableBuffer; use crate::compute::kernels::arithmetic::{divide, multiply}; use crate::compute::kernels::arity::unary; @@ -2134,6 +2135,7 @@ where #[cfg(test)] mod tests { use super::*; + use crate::array::BasicDecimalArray; use crate::util::decimal::Decimal128; use crate::{buffer::Buffer, util::display::array_value_to_string}; diff --git a/arrow/src/compute/kernels/sort.rs b/arrow/src/compute/kernels/sort.rs index e399cf9f0c1..8e0831c6140 100644 --- a/arrow/src/compute/kernels/sort.rs +++ b/arrow/src/compute/kernels/sort.rs @@ -17,6 +17,7 @@ //! Defines sort kernel for `ArrayRef` +use crate::array::BasicDecimalArray; use crate::array::*; use crate::buffer::MutableBuffer; use crate::compute::take; diff --git a/arrow/src/compute/kernels/take.rs b/arrow/src/compute/kernels/take.rs index 624e9ddcdb5..5b08388f576 100644 --- a/arrow/src/compute/kernels/take.rs +++ b/arrow/src/compute/kernels/take.rs @@ -19,6 +19,8 @@ use std::{ops::AddAssign, sync::Arc}; +use crate::array::BasicDecimalArray; + use crate::buffer::{Buffer, MutableBuffer}; use crate::compute::util::{ take_value_indices_from_fixed_size_list, take_value_indices_from_list, diff --git a/arrow/src/csv/reader.rs b/arrow/src/csv/reader.rs index 639c0b42afe..7250f943e48 100644 --- a/arrow/src/csv/reader.rs +++ b/arrow/src/csv/reader.rs @@ -1115,6 +1115,7 @@ mod tests { use std::io::{Cursor, Write}; use tempfile::NamedTempFile; + use crate::array::BasicDecimalArray; use crate::array::*; use crate::compute::cast; use crate::datatypes::Field; diff --git a/arrow/src/util/decimal.rs b/arrow/src/util/decimal.rs index 7649b6b417f..4d67245647d 100644 --- a/arrow/src/util/decimal.rs +++ b/arrow/src/util/decimal.rs @@ -107,6 +107,7 @@ pub struct Decimal128 { impl Decimal128 { /// Creates `Decimal128` from an `i128` value. + #[allow(dead_code)] pub(crate) fn new_from_i128(precision: usize, scale: usize, value: i128) -> Self { Decimal128 { precision, diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs index 220aa59ad40..7a7da8ccb0f 100644 --- a/arrow/src/util/display.rs +++ b/arrow/src/util/display.rs @@ -23,6 +23,7 @@ use std::fmt::Write; use std::sync::Arc; use crate::array::Array; +use crate::array::BasicDecimalArray; use crate::datatypes::{ ArrowNativeType, ArrowPrimitiveType, DataType, Field, Int16Type, Int32Type, Int64Type, Int8Type, TimeUnit, UInt16Type, UInt32Type, UInt64Type, UInt8Type, diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index a18098ff156..4effa99ba64 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use arrow::array as arrow_array; use arrow::array::ArrayRef; +use arrow::array::BasicDecimalArray; use arrow::datatypes::{DataType as ArrowDataType, IntervalUnit, SchemaRef}; use arrow::record_batch::RecordBatch; use arrow_array::Array;