Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Decimal256Builder and Decimal256Array; Decimal arrays now implement BasicDecimalArray trait #2000

Merged
merged 8 commits into from Jul 8, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
196 changes: 139 additions & 57 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use std::borrow::Borrow;
use std::convert::{From, TryInto};
use std::convert::From;
use std::fmt;
use std::{any::Any, iter::FromIterator};

Expand All @@ -32,15 +32,15 @@ use crate::datatypes::{
DECIMAL_MAX_SCALE,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal128};
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};

/// `DecimalArray` stores fixed width decimal numbers,
/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, DecimalArray};
/// use arrow::array::{Array, BasicDecimalArray, DecimalArray};
/// use arrow::datatypes::DataType;
///
/// // Create a DecimalArray with the default precision and scale
Expand Down Expand Up @@ -75,47 +75,67 @@ pub struct DecimalArray {
scale: usize,
}

impl DecimalArray {
const VALUE_LENGTH: i32 = 16;
pub struct Decimal256Array {
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
}

mod private_decimal {
pub trait DecimalArrayPrivate {
fn raw_value_data_ptr(&self) -> *const u8;
}
}

pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:
private_decimal::DecimalArrayPrivate
{
const VALUE_LENGTH: i32;

fn data(&self) -> &ArrayData;

/// Return the precision (total digits) that can be stored by this array
fn precision(&self) -> usize;

/// Return the scale (digits after the decimal) that can be stored by this array
fn scale(&self) -> usize;

/// Returns the element at index `i`.
pub fn value(&self, i: usize) -> Decimal128 {
assert!(i < self.data.len(), "DecimalArray out of bounds access");
let offset = i + self.data.offset();
fn value(&self, i: usize) -> T {
let data = self.data();
assert!(i < data.len(), "Out of bounds access");

let offset = i + data.offset();
let raw_val = unsafe {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
self.value_data.as_ptr().offset(pos as isize),
self.raw_value_data_ptr().offset(pos as isize),
Self::VALUE_LENGTH as usize,
)
};
let as_array = raw_val.try_into().unwrap();
Decimal128::new_from_i128(
self.precision,
self.scale,
i128::from_le_bytes(as_array),
)
T::new(self.precision(), self.scale(), raw_val)
}

/// Returns the offset for the element at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
#[inline]
pub fn value_offset(&self, i: usize) -> i32 {
self.value_offset_at(self.data.offset() + i)
fn value_offset(&self, i: usize) -> i32 {
self.value_offset_at(self.data().offset() + i)
}

/// Returns the length for an element.
///
/// All elements have the same length as the array is a fixed size.
#[inline]
pub const fn value_length(&self) -> i32 {
fn value_length(&self) -> i32 {
Self::VALUE_LENGTH
}

/// Returns a clone of the value data buffer
pub fn value_data(&self) -> Buffer {
self.data.buffers()[0].clone()
fn value_data(&self) -> Buffer {
self.data().buffers()[0].clone()
}

#[inline]
Expand All @@ -124,15 +144,15 @@ impl DecimalArray {
}

#[inline]
pub fn value_as_string(&self, row: usize) -> String {
fn value_as_string(&self, row: usize) -> String {
self.value(row).to_string()
}

pub fn from_fixed_size_list_array(
fn from_fixed_size_list_array(
v: FixedSizeListArray,
precision: usize,
scale: usize,
) -> Self {
) -> U {
let child_data = &v.data_ref().child_data()[0];
assert_eq!(
child_data.child_data().len(),
Expand All @@ -155,9 +175,43 @@ impl DecimalArray {
.offset(list_offset);

let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
U::from(array_data)
}
}

impl BasicDecimalArray<Decimal128, DecimalArray> for DecimalArray {
const VALUE_LENGTH: i32 = 16;

fn data(&self) -> &ArrayData {
&self.data
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}

impl BasicDecimalArray<Decimal256, Decimal256Array> for Decimal256Array {
const VALUE_LENGTH: i32 = 32;

fn data(&self) -> &ArrayData {
&self.data
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}

impl DecimalArray {
/// Creates a [DecimalArray] with default precision and scale,
/// based on an iterator of `i128` values without nulls
pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self {
Expand All @@ -176,16 +230,6 @@ impl DecimalArray {
DecimalArray::from(data)
}

/// Return the precision (total digits) that can be stored by this array
pub fn precision(&self) -> usize {
self.precision
}

/// Return the scale (digits after the decimal) that can be stored by this array
pub fn scale(&self) -> usize {
self.scale
}

/// Returns a DecimalArray with the same data as self, with the
/// specified precision.
///
Expand Down Expand Up @@ -267,9 +311,24 @@ impl From<ArrayData> for DecimalArray {
}
}

impl From<DecimalArray> for ArrayData {
fn from(array: DecimalArray) -> Self {
array.data
impl From<ArrayData> for Decimal256Array {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"DecimalArray data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match data.data_type() {
DataType::Decimal(precision, scale) => (*precision, *scale),
_ => panic!("Expected data type to be Decimal"),
};
Self {
data,
value_data: unsafe { RawPtrBox::new(values) },
precision,
scale,
}
}
}

Expand Down Expand Up @@ -325,32 +384,55 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for DecimalArray {
}
}

impl fmt::Debug for DecimalArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
print_long_array(self, f, |array, index, f| {
let formatted_decimal = array.value_as_string(index);
macro_rules! def_decimal_array {
($ty:ident, $array_name:expr) => {
impl private_decimal::DecimalArrayPrivate for $ty {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
}
}

write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
}
impl Array for $ty {
fn as_any(&self) -> &dyn Any {
self
}

impl Array for DecimalArray {
fn as_any(&self) -> &dyn Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}

fn data(&self) -> &ArrayData {
&self.data
}
fn into_data(self) -> ArrayData {
self.into()
}
}

fn into_data(self) -> ArrayData {
self.into()
}
impl From<$ty> for ArrayData {
fn from(array: $ty) -> Self {
array.data
}
}

impl fmt::Debug for $ty {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}<{}, {}>\n[\n",
$array_name, self.precision, self.scale
)?;
print_long_array(self, f, |array, index, f| {
let formatted_decimal = array.value_as_string(index);

write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
}
};
}

def_decimal_array!(DecimalArray, "DecimalArray");
def_decimal_array!(Decimal256Array, "Decimal256Array");

#[cfg(test)]
mod tests {
use crate::{array::DecimalBuilder, datatypes::Field};
Expand Down