Skip to content

Commit

Permalink
Add precision/scale check. Add decimal128 and decimal256 iters.
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Jul 22, 2022
1 parent 91b2d7e commit 0e7a5cd
Show file tree
Hide file tree
Showing 6 changed files with 182 additions and 44 deletions.
1 change: 0 additions & 1 deletion arrow/src/array/array_binary.rs
Expand Up @@ -24,7 +24,6 @@ use super::{
FixedSizeListArray, GenericBinaryIter, GenericListArray, OffsetSizeTrait,
};
use crate::array::array::ArrayAccessor;
pub use crate::array::DecimalIter;
use crate::buffer::Buffer;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
Expand Down
127 changes: 94 additions & 33 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::{ArrayAccessor, Decimal128Iter, Decimal256Iter};
use std::borrow::Borrow;
use std::convert::From;
use std::fmt;
Expand All @@ -24,13 +25,11 @@ use super::{
array::print_long_array, raw_pointer::RawPtrBox, Array, ArrayData, FixedSizeListArray,
};
use super::{BooleanBufferBuilder, FixedSizeBinaryArray};
#[allow(deprecated)]
pub use crate::array::DecimalIter;
use crate::buffer::Buffer;
use crate::datatypes::DataType;
use crate::datatypes::{
validate_decimal_precision, DECIMAL_DEFAULT_SCALE, DECIMAL_MAX_PRECISION,
DECIMAL_MAX_SCALE,
};
use crate::datatypes::{validate_decimal_precision, DECIMAL_DEFAULT_SCALE};
use crate::datatypes::{DataType, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};

Expand Down Expand Up @@ -103,11 +102,18 @@ pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:

/// Returns the element at index `i`.
fn value(&self, i: usize) -> T {
let data = self.data();
assert!(i < data.len(), "Out of bounds access");
assert!(i < self.data().len(), "Out of bounds access");

unsafe { self.value_unchecked(i) }
}

/// Returns the element at index `i`.
/// # Safety
/// Caller is responsible for ensuring that the index is within the bounds of the array
unsafe fn value_unchecked(&self, i: usize) -> T {
let data = self.data();
let offset = i + data.offset();
let raw_val = unsafe {
let raw_val = {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
self.raw_value_data_ptr().offset(pos as isize),
Expand Down Expand Up @@ -270,24 +276,24 @@ impl Decimal128Array {
/// specified precision.
///
/// Returns an Error if:
/// 1. `precision` is larger than [`DECIMAL_MAX_PRECISION`]
/// 2. `scale` is larger than [`DECIMAL_MAX_SCALE`];
/// 1. `precision` is larger than [`DECIMAL128_MAX_PRECISION`]
/// 2. `scale` is larger than [`DECIMAL128_MAX_SCALE`];
/// 3. `scale` is > `precision`
pub fn with_precision_and_scale(
mut self,
precision: usize,
scale: usize,
) -> Result<Self> {
if precision > DECIMAL_MAX_PRECISION {
if precision > DECIMAL128_MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
precision, DECIMAL_MAX_PRECISION
precision, DECIMAL128_MAX_PRECISION
)));
}
if scale > DECIMAL_MAX_SCALE {
if scale > DECIMAL128_MAX_SCALE {
return Err(ArrowError::InvalidArgumentError(format!(
"scale {} is greater than max {}",
scale, DECIMAL_MAX_SCALE
scale, DECIMAL128_MAX_SCALE
)));
}
if scale > precision {
Expand All @@ -302,7 +308,7 @@ impl Decimal128Array {
// decreased
if precision < self.precision {
for v in self.iter().flatten() {
validate_decimal_precision(v, precision)?;
validate_decimal_precision(v.as_i128(), precision)?;
}
}

Expand All @@ -322,7 +328,7 @@ impl Decimal128Array {
/// The default precision and scale used when not specified.
pub fn default_type() -> DataType {
// Keep maximum precision
DataType::Decimal(DECIMAL_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
DataType::Decimal(DECIMAL128_MAX_PRECISION, DECIMAL_DEFAULT_SCALE)
}
}

Expand Down Expand Up @@ -368,19 +374,13 @@ impl From<ArrayData> for Decimal256Array {
}
}

impl<'a> IntoIterator for &'a Decimal128Array {
type Item = Option<i128>;
type IntoIter = DecimalIter<'a>;

fn into_iter(self) -> Self::IntoIter {
DecimalIter::<'a>::new(self)
}
}

impl<'a> Decimal128Array {
/// constructs a new iterator
pub fn iter(&'a self) -> DecimalIter<'a> {
DecimalIter::new(self)
/// Constructs a new iterator that iterates `Decimal128` values as i128 values.
/// This is kept mostly for back-compatibility purpose.
/// Suggests to use `iter()` that returns `Decimal128Iter`.
#[allow(deprecated)]
pub fn i128_iter(&'a self) -> DecimalIter<'a> {
DecimalIter::<'a>::new(self)
}
}

Expand Down Expand Up @@ -421,7 +421,7 @@ impl<Ptr: Borrow<Option<i128>>> FromIterator<Ptr> for Decimal128Array {
}

macro_rules! def_decimal_array {
($ty:ident, $array_name:expr) => {
($ty:ident, $array_name:expr, $decimal_ty:ident, $iter_ty:ident) => {
impl private_decimal::DecimalArrayPrivate for $ty {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
Expand Down Expand Up @@ -463,15 +463,55 @@ macro_rules! def_decimal_array {
write!(f, "]")
}
}

impl<'a> ArrayAccessor for &'a $ty {
type Item = $decimal_ty;

fn value(&self, index: usize) -> Self::Item {
$ty::value(self, index)
}

unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
$ty::value_unchecked(self, index)
}
}

impl<'a> IntoIterator for &'a $ty {
type Item = Option<$decimal_ty>;
type IntoIter = $iter_ty<'a>;

fn into_iter(self) -> Self::IntoIter {
$iter_ty::<'a>::new(self)
}
}

impl<'a> $ty {
/// constructs a new iterator
pub fn iter(&'a self) -> $iter_ty<'a> {
$iter_ty::<'a>::new(self)
}
}
};
}

def_decimal_array!(Decimal128Array, "Decimal128Array");
def_decimal_array!(Decimal256Array, "Decimal256Array");
def_decimal_array!(
Decimal128Array,
"Decimal128Array",
Decimal128,
Decimal128Iter
);
def_decimal_array!(
Decimal256Array,
"Decimal256Array",
Decimal256,
Decimal256Iter
);

#[cfg(test)]
mod tests {
use crate::array::Decimal256Builder;
use crate::{array::Decimal128Builder, datatypes::Field};
use num::{BigInt, Num};

use super::*;

Expand Down Expand Up @@ -567,7 +607,7 @@ mod tests {
let data = vec![Some(-100), None, Some(101)];
let array: Decimal128Array = data.clone().into_iter().collect();

let collected: Vec<_> = array.iter().collect();
let collected: Vec<_> = array.iter().map(|d| d.map(|v| v.as_i128())).collect();
assert_eq!(data, collected);
}

Expand All @@ -576,7 +616,8 @@ mod tests {
let data = vec![Some(-100), None, Some(101)];
let array: Decimal128Array = data.clone().into_iter().collect();

let collected: Vec<_> = array.into_iter().collect();
let collected: Vec<_> =
array.into_iter().map(|d| d.map(|v| v.as_i128())).collect();
assert_eq!(data, collected);
}

Expand Down Expand Up @@ -750,4 +791,24 @@ mod tests {
assert!(decimal.is_null(0));
assert_eq!(decimal.value_as_string(1), "56".to_string());
}

#[test]
fn test_decimal256_iter() {
// TODO: Impl FromIterator for Decimal256Array
let mut builder = Decimal256Builder::new(30, 76, 6);
let value = BigInt::from_str_radix("12345", 10).unwrap();
let decimal1 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal1).unwrap();

builder.append_null();

let value = BigInt::from_str_radix("56789", 10).unwrap();
let decimal2 = Decimal256::from_big_int(&value, 76, 6).unwrap();
builder.append_value(&decimal2).unwrap();

let array: Decimal256Array = builder.finish();

let collected: Vec<_> = array.iter().collect();
assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected);
}
}
14 changes: 13 additions & 1 deletion arrow/src/array/iterator.rs
Expand Up @@ -16,7 +16,7 @@
// under the License.

use crate::array::array::ArrayAccessor;
use crate::array::BasicDecimalArray;
use crate::array::{BasicDecimalArray, Decimal256Array};

use super::{
Array, BooleanArray, Decimal128Array, GenericBinaryArray, GenericListArray,
Expand Down Expand Up @@ -104,15 +104,25 @@ pub type GenericStringIter<'a, T> = ArrayIter<&'a GenericStringArray<T>>;
pub type GenericBinaryIter<'a, T> = ArrayIter<&'a GenericBinaryArray<T>>;
pub type GenericListArrayIter<'a, O> = ArrayIter<&'a GenericListArray<O>>;

/// an iterator that returns `Some(Decimal128)` or `None`, that can be used on a
/// [`Decimal128Array`]
pub type Decimal128Iter<'a> = ArrayIter<&'a Decimal128Array>;

/// an iterator that returns `Some(Decimal256)` or `None`, that can be used on a
/// [`Decimal256Array`]
pub type Decimal256Iter<'a> = ArrayIter<&'a Decimal256Array>;

/// an iterator that returns `Some(i128)` or `None`, that can be used on a
/// [`Decimal128Array`]
#[derive(Debug)]
#[deprecated(note = "Please use `Decimal128Iter` instead")]
pub struct DecimalIter<'a> {
array: &'a Decimal128Array,
current: usize,
current_end: usize,
}

#[allow(deprecated)]
impl<'a> DecimalIter<'a> {
pub fn new(array: &'a Decimal128Array) -> Self {
Self {
Expand All @@ -123,6 +133,7 @@ impl<'a> DecimalIter<'a> {
}
}

#[allow(deprecated)]
impl<'a> std::iter::Iterator for DecimalIter<'a> {
type Item = Option<i128>;

Expand Down Expand Up @@ -150,6 +161,7 @@ impl<'a> std::iter::Iterator for DecimalIter<'a> {
}

/// iterator has known size.
#[allow(deprecated)]
impl<'a> std::iter::ExactSizeIterator for DecimalIter<'a> {}

#[cfg(test)]
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/compute/kernels/cast.rs
Expand Up @@ -1205,15 +1205,15 @@ fn cast_decimal_to_decimal(
let div = 10_i128.pow((input_scale - output_scale) as u32);
array
.iter()
.map(|v| v.map(|v| v / div))
.map(|v| v.map(|v| v.as_i128() / div))
.collect::<Decimal128Array>()
} else {
// For example, input_scale is 3 and output_scale is 4;
// Original value is 1123_i128, and will be cast to 11230_i128.
let mul = 10_i128.pow((output_scale - input_scale) as u32);
array
.iter()
.map(|v| v.map(|v| v * mul))
.map(|v| v.map(|v| v.as_i128() * mul))
.collect::<Decimal128Array>()
}
.with_precision_and_scale(*output_precision, *output_scale)?;
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/datatypes/datatype.rs
Expand Up @@ -431,10 +431,10 @@ pub const MIN_DECIMAL_FOR_LARGER_PRECISION: [&str; 38] = [
];

/// The maximum precision for [DataType::Decimal] values
pub const DECIMAL_MAX_PRECISION: usize = 38;
pub const DECIMAL128_MAX_PRECISION: usize = 38;

/// The maximum scale for [DataType::Decimal] values
pub const DECIMAL_MAX_SCALE: usize = 38;
pub const DECIMAL128_MAX_SCALE: usize = 38;

/// The maximum precision for [DataType::Decimal256] values
pub const DECIMAL256_MAX_PRECISION: usize = 76;
Expand Down

0 comments on commit 0e7a5cd

Please sign in to comment.