Skip to content

Commit

Permalink
Add Decimal256Builder and Decimal256Array (#2000)
Browse files Browse the repository at this point in the history
* Add Decimal256Builder and Decimal256Array

* Add decimal builder test

* Rebased with master

* Trigger Build

* Disable value validation for precision > 38

* Deduplicate by using macro

* Trigger Build

* Add precision and scale check
  • Loading branch information
viirya committed Jul 8, 2022
1 parent d70c2ea commit ca1bfb8
Show file tree
Hide file tree
Showing 15 changed files with 294 additions and 58 deletions.
196 changes: 139 additions & 57 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::borrow::Borrow;
use std::convert::{From, TryInto};
use std::convert::From;
use std::fmt;
use std::{any::Any, iter::FromIterator};

Expand All @@ -32,15 +32,15 @@ use crate::datatypes::{
DECIMAL_MAX_SCALE,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal128};
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};

/// `DecimalArray` stores fixed width decimal numbers,
/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, DecimalArray};
/// use arrow::array::{Array, BasicDecimalArray, DecimalArray};
/// use arrow::datatypes::DataType;
///
/// // Create a DecimalArray with the default precision and scale
Expand Down Expand Up @@ -75,47 +75,67 @@ pub struct DecimalArray {
scale: usize,
}

impl DecimalArray {
const VALUE_LENGTH: i32 = 16;
pub struct Decimal256Array {
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
}

mod private_decimal {
pub trait DecimalArrayPrivate {
fn raw_value_data_ptr(&self) -> *const u8;
}
}

pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:
private_decimal::DecimalArrayPrivate
{
const VALUE_LENGTH: i32;

fn data(&self) -> &ArrayData;

/// Return the precision (total digits) that can be stored by this array
fn precision(&self) -> usize;

/// Return the scale (digits after the decimal) that can be stored by this array
fn scale(&self) -> usize;

/// Returns the element at index `i`.
pub fn value(&self, i: usize) -> Decimal128 {
assert!(i < self.data.len(), "DecimalArray out of bounds access");
let offset = i + self.data.offset();
fn value(&self, i: usize) -> T {
let data = self.data();
assert!(i < data.len(), "Out of bounds access");

let offset = i + data.offset();
let raw_val = unsafe {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
self.value_data.as_ptr().offset(pos as isize),
self.raw_value_data_ptr().offset(pos as isize),
Self::VALUE_LENGTH as usize,
)
};
let as_array = raw_val.try_into().unwrap();
Decimal128::new_from_i128(
self.precision,
self.scale,
i128::from_le_bytes(as_array),
)
T::new(self.precision(), self.scale(), raw_val)
}

/// Returns the offset for the element at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
#[inline]
pub fn value_offset(&self, i: usize) -> i32 {
self.value_offset_at(self.data.offset() + i)
fn value_offset(&self, i: usize) -> i32 {
self.value_offset_at(self.data().offset() + i)
}

/// Returns the length for an element.
///
/// All elements have the same length as the array is a fixed size.
#[inline]
pub const fn value_length(&self) -> i32 {
fn value_length(&self) -> i32 {
Self::VALUE_LENGTH
}

/// Returns a clone of the value data buffer
pub fn value_data(&self) -> Buffer {
self.data.buffers()[0].clone()
fn value_data(&self) -> Buffer {
self.data().buffers()[0].clone()
}

#[inline]
Expand All @@ -124,15 +144,15 @@ impl DecimalArray {
}

#[inline]
pub fn value_as_string(&self, row: usize) -> String {
fn value_as_string(&self, row: usize) -> String {
self.value(row).to_string()
}

pub fn from_fixed_size_list_array(
fn from_fixed_size_list_array(
v: FixedSizeListArray,
precision: usize,
scale: usize,
) -> Self {
) -> U {
let child_data = &v.data_ref().child_data()[0];
assert_eq!(
child_data.child_data().len(),
Expand All @@ -155,9 +175,43 @@ impl DecimalArray {
.offset(list_offset);

let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
U::from(array_data)
}
}

impl BasicDecimalArray<Decimal128, DecimalArray> for DecimalArray {
const VALUE_LENGTH: i32 = 16;

fn data(&self) -> &ArrayData {
&self.data
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}

impl BasicDecimalArray<Decimal256, Decimal256Array> for Decimal256Array {
const VALUE_LENGTH: i32 = 32;

fn data(&self) -> &ArrayData {
&self.data
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}

impl DecimalArray {
/// Creates a [DecimalArray] with default precision and scale,
/// based on an iterator of `i128` values without nulls
pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self {
Expand All @@ -176,16 +230,6 @@ impl DecimalArray {
DecimalArray::from(data)
}

/// Return the precision (total digits) that can be stored by this array
pub fn precision(&self) -> usize {
self.precision
}

/// Return the scale (digits after the decimal) that can be stored by this array
pub fn scale(&self) -> usize {
self.scale
}

/// Returns a DecimalArray with the same data as self, with the
/// specified precision.
///
Expand Down Expand Up @@ -267,9 +311,24 @@ impl From<ArrayData> for DecimalArray {
}
}

impl From<DecimalArray> for ArrayData {
fn from(array: DecimalArray) -> Self {
array.data
impl From<ArrayData> for Decimal256Array {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"DecimalArray data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match data.data_type() {
DataType::Decimal(precision, scale) => (*precision, *scale),
_ => panic!("Expected data type to be Decimal"),
};
Self {
data,
value_data: unsafe { RawPtrBox::new(values) },
precision,
scale,
}
}
}

Expand Down Expand Up @@ -325,32 +384,55 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for DecimalArray {
}
}

impl fmt::Debug for DecimalArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
print_long_array(self, f, |array, index, f| {
let formatted_decimal = array.value_as_string(index);
macro_rules! def_decimal_array {
($ty:ident, $array_name:expr) => {
impl private_decimal::DecimalArrayPrivate for $ty {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
}
}

write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
}
impl Array for $ty {
fn as_any(&self) -> &dyn Any {
self
}

impl Array for DecimalArray {
fn as_any(&self) -> &dyn Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}

fn data(&self) -> &ArrayData {
&self.data
}
fn into_data(self) -> ArrayData {
self.into()
}
}

fn into_data(self) -> ArrayData {
self.into()
}
impl From<$ty> for ArrayData {
fn from(array: $ty) -> Self {
array.data
}
}

impl fmt::Debug for $ty {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}<{}, {}>\n[\n",
$array_name, self.precision, self.scale
)?;
print_long_array(self, f, |array, index, f| {
let formatted_decimal = array.value_as_string(index);

write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
}
};
}

def_decimal_array!(DecimalArray, "DecimalArray");
def_decimal_array!(Decimal256Array, "Decimal256Array");

#[cfg(test)]
mod tests {
use crate::{array::DecimalBuilder, datatypes::Field};
Expand Down

0 comments on commit ca1bfb8

Please sign in to comment.