Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RFC: Simplify decimal (#2440) #2477

Merged
merged 7 commits into from Aug 18, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
104 changes: 48 additions & 56 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -20,27 +20,30 @@ use crate::array::ArrayAccessor;
use std::borrow::Borrow;
use std::convert::From;
use std::fmt;
use std::marker::PhantomData;
use std::{any::Any, iter::FromIterator};

use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
};
use super::{BasicDecimalIter, BooleanBufferBuilder, FixedSizeBinaryArray};
use super::{BooleanBufferBuilder, DecimalIter, FixedSizeBinaryArray};
#[allow(deprecated)]
pub use crate::array::DecimalIter;
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::validate_decimal_precision;
use crate::datatypes::{validate_decimal256_precision_with_lt_bytes, DataType};
use crate::datatypes::{
validate_decimal256_precision_with_lt_bytes, DataType, Decimal128Type,
Decimal256Type, DecimalType, NativeDecimalType,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal256};
use crate::util::decimal::{Decimal, Decimal256};

/// `Decimal128Array` stores fixed width decimal numbers,
/// with a fixed precision and scale.
///
/// # Examples
///
/// ```
/// use arrow::array::{Array, BasicDecimalArray, Decimal128Array};
/// use arrow::array::{Array, DecimalArray, Decimal128Array};
/// use arrow::datatypes::DataType;
///
/// // Create a DecimalArray with the default precision and scale
Expand Down Expand Up @@ -68,24 +71,24 @@ use crate::util::decimal::{BasicDecimal, Decimal256};
/// assert_eq!(6, decimal_array.scale());
/// ```
///
pub type Decimal128Array = BasicDecimalArray<16>;
pub type Decimal128Array = DecimalArray<Decimal128Type>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is certainly a more consistent with the other Array types


pub type Decimal256Array = BasicDecimalArray<32>;
pub type Decimal256Array = DecimalArray<Decimal256Type>;

pub struct BasicDecimalArray<const BYTE_WIDTH: usize> {
pub struct DecimalArray<T: DecimalType> {
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
_phantom: PhantomData<T>,
}

impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
pub const VALUE_LENGTH: i32 = BYTE_WIDTH as i32;
const DEFAULT_TYPE: DataType = BasicDecimal::<BYTE_WIDTH>::DEFAULT_TYPE;
pub const MAX_PRECISION: usize = BasicDecimal::<BYTE_WIDTH>::MAX_PRECISION;
pub const MAX_SCALE: usize = BasicDecimal::<BYTE_WIDTH>::MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType =
BasicDecimal::<BYTE_WIDTH>::TYPE_CONSTRUCTOR;
impl<T: DecimalType> DecimalArray<T> {
pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32;
const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE;
pub const MAX_PRECISION: usize = T::MAX_PRECISION;
pub const MAX_SCALE: usize = T::MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR;

pub fn data(&self) -> &ArrayData {
&self.data
Expand All @@ -102,7 +105,7 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
}

/// Returns the element at index `i`.
pub fn value(&self, i: usize) -> BasicDecimal<BYTE_WIDTH> {
pub fn value(&self, i: usize) -> Decimal<T> {
assert!(i < self.data().len(), "Out of bounds access");

unsafe { self.value_unchecked(i) }
Expand All @@ -111,19 +114,17 @@ impl<const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
/// Returns the element at index `i`.
/// # Safety
/// Caller is responsible for ensuring that the index is within the bounds of the array
pub unsafe fn value_unchecked(&self, i: usize) -> BasicDecimal<BYTE_WIDTH> {
pub unsafe fn value_unchecked(&self, i: usize) -> Decimal<T> {
let data = self.data();
let offset = i + data.offset();
let raw_val = {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
T::Native::from_slice(std::slice::from_raw_parts(
self.raw_value_data_ptr().offset(pos as isize),
Self::VALUE_LENGTH as usize,
)
.try_into()
.unwrap()
))
};
BasicDecimal::<BYTE_WIDTH>::new(self.precision(), self.scale(), raw_val)
Decimal::new(self.precision(), self.scale(), &raw_val)
}

/// Returns the offset for the element at index `i`.
Expand Down Expand Up @@ -401,15 +402,15 @@ impl Decimal256Array {
}
}

impl<const BYTE_WIDTH: usize> From<ArrayData> for BasicDecimalArray<BYTE_WIDTH> {
impl<T: DecimalType> From<ArrayData> for DecimalArray<T> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"DecimalArray data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match (data.data_type(), BYTE_WIDTH) {
let (precision, scale) = match (data.data_type(), T::BYTE_LENGTH) {
tustvold marked this conversation as resolved.
Show resolved Hide resolved
(DataType::Decimal128(precision, scale), 16)
| (DataType::Decimal256(precision, scale), 32) => (*precision, *scale),
_ => panic!("Expected data type to be Decimal"),
Expand All @@ -419,27 +420,18 @@ impl<const BYTE_WIDTH: usize> From<ArrayData> for BasicDecimalArray<BYTE_WIDTH>
value_data: unsafe { RawPtrBox::new(values) },
precision,
scale,
_phantom: Default::default(),
}
}
}

impl<'a> Decimal128Array {
/// Constructs a new iterator that iterates `Decimal128` values as i128 values.
/// This is kept mostly for back-compatibility purpose.
/// Suggests to use `iter()` that returns `Decimal128Iter`.
#[allow(deprecated)]
pub fn i128_iter(&'a self) -> DecimalIter<'a> {
DecimalIter::<'a>::new(self)
}
}

fn build_decimal_array_from<const BYTE_WIDTH: usize>(
fn build_decimal_array_from<T: DecimalType>(
null_buf: BooleanBufferBuilder,
buffer: Buffer,
) -> BasicDecimalArray<BYTE_WIDTH> {
) -> DecimalArray<T> {
let data = unsafe {
ArrayData::new_unchecked(
BasicDecimalArray::<BYTE_WIDTH>::default_type(),
DecimalArray::<T>::default_type(),
null_buf.len(),
None,
Some(null_buf.into()),
Expand All @@ -448,7 +440,7 @@ fn build_decimal_array_from<const BYTE_WIDTH: usize>(
vec![],
)
};
BasicDecimalArray::<BYTE_WIDTH>::from(data)
DecimalArray::from(data)
}

impl<Ptr: Into<Decimal256>> FromIterator<Option<Ptr>> for Decimal256Array {
Expand All @@ -471,7 +463,7 @@ impl<Ptr: Into<Decimal256>> FromIterator<Option<Ptr>> for Decimal256Array {
}
});

build_decimal_array_from::<32>(null_buf, buffer.into())
build_decimal_array_from(null_buf, buffer.into())
}
}

Expand All @@ -496,11 +488,11 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for Decimal128Array {
})
.collect();

build_decimal_array_from::<16>(null_buf, buffer)
build_decimal_array_from(null_buf, buffer)
}
}

impl<const BYTE_WIDTH: usize> Array for BasicDecimalArray<BYTE_WIDTH> {
impl<T: DecimalType> Array for DecimalArray<T> {
fn as_any(&self) -> &dyn Any {
self
}
Expand All @@ -514,18 +506,18 @@ impl<const BYTE_WIDTH: usize> Array for BasicDecimalArray<BYTE_WIDTH> {
}
}

impl<const BYTE_WIDTH: usize> From<BasicDecimalArray<BYTE_WIDTH>> for ArrayData {
fn from(array: BasicDecimalArray<BYTE_WIDTH>) -> Self {
impl<T: DecimalType> From<DecimalArray<T>> for ArrayData {
fn from(array: DecimalArray<T>) -> Self {
array.data
}
}

impl<const BYTE_WIDTH: usize> fmt::Debug for BasicDecimalArray<BYTE_WIDTH> {
impl<T: DecimalType> fmt::Debug for DecimalArray<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Decimal{}Array<{}, {}>\n[\n",
BYTE_WIDTH * 8,
T::BYTE_LENGTH * 8,
self.precision,
self.scale
)?;
Expand All @@ -538,31 +530,31 @@ impl<const BYTE_WIDTH: usize> fmt::Debug for BasicDecimalArray<BYTE_WIDTH> {
}
}

impl<'a, const BYTE_WIDTH: usize> ArrayAccessor for &'a BasicDecimalArray<BYTE_WIDTH> {
type Item = BasicDecimal<BYTE_WIDTH>;
impl<'a, T: DecimalType> ArrayAccessor for &'a DecimalArray<T> {
type Item = Decimal<T>;

fn value(&self, index: usize) -> Self::Item {
BasicDecimalArray::<BYTE_WIDTH>::value(self, index)
DecimalArray::<T>::value(self, index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
BasicDecimalArray::<BYTE_WIDTH>::value_unchecked(self, index)
DecimalArray::<T>::value_unchecked(self, index)
}
}

impl<'a, const BYTE_WIDTH: usize> IntoIterator for &'a BasicDecimalArray<BYTE_WIDTH> {
type Item = Option<BasicDecimal<BYTE_WIDTH>>;
type IntoIter = BasicDecimalIter<'a, BYTE_WIDTH>;
impl<'a, T: DecimalType> IntoIterator for &'a DecimalArray<T> {
type Item = Option<Decimal<T>>;
type IntoIter = DecimalIter<'a, T>;

fn into_iter(self) -> Self::IntoIter {
BasicDecimalIter::<'a, BYTE_WIDTH>::new(self)
DecimalIter::<'a, T>::new(self)
}
}

impl<'a, const BYTE_WIDTH: usize> BasicDecimalArray<BYTE_WIDTH> {
impl<'a, T: DecimalType> DecimalArray<T> {
/// constructs a new iterator
pub fn iter(&'a self) -> BasicDecimalIter<'a, BYTE_WIDTH> {
BasicDecimalIter::<'a, BYTE_WIDTH>::new(self)
pub fn iter(&'a self) -> DecimalIter<'a, T> {
DecimalIter::<'a, T>::new(self)
}
}

Expand Down
2 changes: 1 addition & 1 deletion arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -365,7 +365,7 @@ mod tests {
.expect("should not validate invalid value at builder");

let array = builder.finish();
let array_data = array_decimal::BasicDecimalArray::data(&array);
let array_data = array_decimal::DecimalArray::data(&array);
array_data.validate_values().unwrap();
}
}
68 changes: 7 additions & 61 deletions arrow/src/array/iterator.rs
Expand Up @@ -16,11 +16,12 @@
// under the License.

use crate::array::array::ArrayAccessor;
use crate::array::BasicDecimalArray;
use crate::array::DecimalArray;
use crate::datatypes::{Decimal128Type, Decimal256Type};

use super::{
Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
GenericStringArray, PrimitiveArray,
BooleanArray, GenericBinaryArray, GenericListArray, GenericStringArray,
PrimitiveArray,
};

/// an iterator that returns Some(T) or None, that can be used on any [`ArrayAccessor`]
Expand Down Expand Up @@ -104,69 +105,14 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;

pub type BasicDecimalIter<'a, const BYTE_WIDTH: usize> =
ArrayIter<&'a BasicDecimalArray<BYTE_WIDTH>>;
pub type DecimalIter<'a, T> = ArrayIter<&'a DecimalArray<T>>;
/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a
/// [`Decimal128Array`]
pub type Decimal128Iter<'a> = BasicDecimalIter<'a, 16>;
pub type Decimal128Iter<'a> = DecimalIter<'a, Decimal128Type>;

/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a
/// [`super::Decimal256Array`]
pub type Decimal256Iter<'a> = BasicDecimalIter<'a, 32>;
/// an iterator that returns `Some(i128)` or `None`, that can be used on a
/// [`Decimal128Array`]
#[derive(Debug)]
#[deprecated(note = "Please use `Decimal128Iter` instead. \
`DecimalIter` iterates `Decimal128` values as i128 values. \
This is kept mostly for back-compatibility purpose. Suggests to use `Decimal128Array.iter()` \
that returns `Decimal128Iter`.")]
pub struct DecimalIter<'a> {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wanted to reuse this name, as BasicDecimal is confusing. It isn't vital we remove it, but I think it is better to not be stuck on a stale name

array: &'a Decimal128Array,
current: usize,
current_end: usize,
}

#[allow(deprecated)]
impl<'a> DecimalIter<'a> {
pub fn new(array: &'a Decimal128Array) -> Self {
Self {
array,
current: 0,
current_end: array.len(),
}
}
}

#[allow(deprecated)]
impl<'a> std::iter::Iterator for DecimalIter<'a> {
type Item = Option<i128>;

fn next(&mut self) -> Option<Self::Item> {
if self.current == self.current_end {
None
} else {
let old = self.current;
self.current += 1;
// TODO: Improve performance by avoiding bounds check here
// (by using adding a `value_unchecked, for example)
if self.array.is_null(old) {
Some(None)
} else {
Some(Some(self.array.value(old).as_i128()))
}
}
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let remain = self.current_end - self.current;
(remain, Some(remain))
}
}

/// iterator has known size.
#[allow(deprecated)]
impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}
pub type Decimal256Iter<'a> = DecimalIter<'a, Decimal256Type>;

#[cfg(test)]
mod tests {
Expand Down
5 changes: 1 addition & 4 deletions arrow/src/array/mod.rs
Expand Up @@ -199,15 +199,12 @@ pub(crate) use self::data::BufferSpec;
pub use self::array_binary::BinaryArray;
pub use self::array_binary::LargeBinaryArray;
pub use self::array_boolean::BooleanArray;
pub use self::array_decimal::BasicDecimalArray;
pub use self::array_decimal::Decimal128Array;
pub use self::array_decimal::Decimal256Array;
pub use self::array_decimal::DecimalArray;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this is the generic representation of the array, akin to PrimitiveArray

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

pub use self::array_fixed_size_binary::FixedSizeBinaryArray;
pub use self::array_fixed_size_list::FixedSizeListArray;

#[deprecated(note = "Please use `Decimal128Array` instead")]
pub type DecimalArray = Decimal128Array;

pub use self::array_dictionary::{DictionaryArray, TypedDictionaryArray};
pub use self::array_list::LargeListArray;
pub use self::array_list::ListArray;
Expand Down