Skip to content

Commit

Permalink
Cast between decimal128 and decimal256
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Aug 8, 2022
1 parent 5fae299 commit 86e1ede
Show file tree
Hide file tree
Showing 6 changed files with 259 additions and 34 deletions.
2 changes: 1 addition & 1 deletion arrow/src/array/array_decimal.rs
Expand Up @@ -360,7 +360,7 @@ impl BasicDecimalArray<Decimal256, Decimal256Array> for Decimal256Array {
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
if precision < self.precision {
for v in self.iter().flatten() {
validate_decimal256_precision(&v.to_string(), precision)?;
validate_decimal256_precision(&v.to_big_int(), precision)?;
}
}
Ok(())
Expand Down
3 changes: 1 addition & 2 deletions arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -202,8 +202,7 @@ impl Decimal256Builder {
let value = if self.value_validation {
let raw_bytes = value.raw_value();
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
validate_decimal256_precision(&value_str, self.precision)?;
validate_decimal256_precision(&integer, self.precision)?;
value
} else {
value
Expand Down
13 changes: 9 additions & 4 deletions arrow/src/array/data.rs
Expand Up @@ -401,11 +401,17 @@ impl ArrayData {
#[inline]
pub(crate) fn with_data_type(mut self, new_data_type: DataType) -> Self {
assert!(
matches!(self.data_type, DataType::Decimal128(_, _)),
matches!(
self.data_type,
DataType::Decimal128(_, _) | DataType::Decimal256(_, _)
),
"only DecimalType is supported for existing type"
);
assert!(
matches!(new_data_type, DataType::Decimal128(_, _)),
matches!(
new_data_type,
DataType::Decimal128(_, _) | DataType::Decimal256(_, _)
),
"only DecimalType is supported for new datatype"
);
self.data_type = new_data_type;
Expand Down Expand Up @@ -1044,8 +1050,7 @@ impl ArrayData {
let offset = pos * 32;
let raw_bytes = &values[offset..offset + 32];
let integer = BigInt::from_signed_bytes_le(raw_bytes);
let value_str = integer.to_string();
validate_decimal256_precision(&value_str, *p)?;
validate_decimal256_precision(&integer, *p)?;
}
Ok(())
}
Expand Down
260 changes: 239 additions & 21 deletions arrow/src/compute/kernels/cast.rs
Expand Up @@ -36,6 +36,7 @@
//! ```

use chrono::Timelike;
use std::ops::{Div, Mul};
use std::str;
use std::sync::Arc;

Expand All @@ -50,10 +51,11 @@ use crate::temporal_conversions::{
EPOCH_DAYS_FROM_CE, MICROSECONDS, MILLISECONDS, MILLISECONDS_IN_DAY, NANOSECONDS,
SECONDS_IN_DAY,
};
use crate::util::decimal::BasicDecimal;
use crate::{array::*, compute::take};
use crate::{buffer::Buffer, util::serialization::lexical_to_string};
use num::cast::AsPrimitive;
use num::{NumCast, ToPrimitive};
use num::{BigInt, NumCast, ToPrimitive};

/// CastOptions provides a way to override the default cast behaviors
#[derive(Debug)]
Expand All @@ -78,6 +80,9 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
// TODO UTF8/unsigned numeric to decimal
// cast one decimal type to another decimal type
(Decimal128(_, _), Decimal128(_, _)) => true,
(Decimal256(_, _), Decimal256(_, _)) => true,
(Decimal128(_, _), Decimal256(_, _)) => true,
(Decimal256(_, _), Decimal128(_, _)) => true,
// signed numeric to decimal
(Null | Int8 | Int16 | Int32 | Int64 | Float32 | Float64, Decimal128(_, _)) |
// decimal to signed numeric
Expand Down Expand Up @@ -431,9 +436,30 @@ pub fn cast_with_options(
return Ok(array.clone());
}
match (from_type, to_type) {
(Decimal128(_, s1), Decimal128(p2, s2)) => {
cast_decimal_to_decimal(array, s1, p2, s2)
}
(Decimal128(_, s1), Decimal128(p2, s2)) => cast_decimal_to_decimal::<
crate::util::decimal::Decimal128,
crate::util::decimal::Decimal128,
Decimal128Array,
Decimal128Array,
>(array, s1, p2, s2),
(Decimal256(_, s1), Decimal256(p2, s2)) => cast_decimal_to_decimal::<
crate::util::decimal::Decimal256,
crate::util::decimal::Decimal256,
Decimal256Array,
Decimal256Array,
>(array, s1, p2, s2),
(Decimal128(_, s1), Decimal256(p2, s2)) => cast_decimal_to_decimal::<
crate::util::decimal::Decimal128,
crate::util::decimal::Decimal256,
Decimal128Array,
Decimal256Array,
>(array, s1, p2, s2),
(Decimal256(_, s1), Decimal128(p2, s2)) => cast_decimal_to_decimal::<
crate::util::decimal::Decimal256,
crate::util::decimal::Decimal128,
Decimal256Array,
Decimal128Array,
>(array, s1, p2, s2),
(Decimal128(_, scale), _) => {
// cast decimal to other type
match to_type {
Expand Down Expand Up @@ -1252,34 +1278,124 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
}

/// Cast one type of decimal array to another type of decimal array
fn cast_decimal_to_decimal(
fn cast_decimal_to_decimal<
T1: BasicDecimal,
T2: BasicDecimal,
D1: BasicDecimalArray<T1, D1> + From<ArrayData>,
D2: BasicDecimalArray<T2, D2> + From<ArrayData>,
>(
array: &ArrayRef,
input_scale: &usize,
output_precision: &usize,
output_scale: &usize,
) -> Result<ArrayRef> {
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();

let output_array = if input_scale > output_scale {
if input_scale > output_scale {
// For example, input_scale is 4 and output_scale is 3;
// Original value is 11234_i128, and will be cast to 1123_i128.
let div = 10_i128.pow((input_scale - output_scale) as u32);
array
.iter()
.map(|v| v.map(|v| v.as_i128() / div))
.collect::<Decimal128Array>()
if D1::VALUE_LENGTH == 16 {
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.as_i128() / div));
if D2::VALUE_LENGTH == 16 {
let output_array = iter
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
} else {
let output_array = iter
.map(|v| v.map(|v| BigInt::from(v)))
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
}
} else {
let array = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.to_big_int().div(div)));
if D2::VALUE_LENGTH == 16 {
let values = iter
.map(|v| {
if v.is_none() {
Ok(None)
} else {
v.and_then(|v| v.to_i128())
.ok_or(ArrowError::InvalidArgumentError(format!(
"Cannot be casted to 128-bit integer for Decimal128",
)))
.map(|v| Some(v))
}
})
.collect::<Result<Vec<_>>>()?;

let output_array = values
.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
} else {
let output_array = iter
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
}
}
} else {
// For example, input_scale is 3 and output_scale is 4;
// Original value is 1123_i128, and will be cast to 11230_i128.
let mul = 10_i128.pow((output_scale - input_scale) as u32);
array
.iter()
.map(|v| v.map(|v| v.as_i128() * mul))
.collect::<Decimal128Array>()
}
.with_precision_and_scale(*output_precision, *output_scale)?;
if D1::VALUE_LENGTH == 16 {
let array = array.as_any().downcast_ref::<Decimal128Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.as_i128() * mul));
if D2::VALUE_LENGTH == 16 {
let output_array = iter
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
} else {
let output_array = iter
.map(|v| v.map(|v| BigInt::from(v)))
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
Ok(Arc::new(output_array))
}
} else {
let array = array.as_any().downcast_ref::<Decimal256Array>().unwrap();
let iter = array.iter().map(|v| v.map(|v| v.to_big_int().mul(mul)));
if D2::VALUE_LENGTH == 16 {
let values = iter
.map(|v| {
if v.is_none() {
Ok(None)
} else {
v.and_then(|v| v.to_i128())
.ok_or(ArrowError::InvalidArgumentError(format!(
"Cannot be casted to 128-bit integer for Decimal128",
)))
.map(|v| Some(v))
}
})
.collect::<Result<Vec<_>>>()?;

let output_array = values
.into_iter()
.collect::<Decimal128Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
} else {
let output_array = iter
.collect::<Decimal256Array>()
.with_precision_and_scale(*output_precision, *output_scale)?;

Ok(Arc::new(output_array))
}
}
}
}

/// Cast an array by changing its array_data type to the desired type
Expand Down Expand Up @@ -2422,7 +2538,7 @@ mod tests {
use super::*;
use crate::array::BasicDecimalArray;
use crate::datatypes::TimeUnit;
use crate::util::decimal::Decimal128;
use crate::util::decimal::{Decimal128, Decimal256};
use crate::{buffer::Buffer, util::display::array_value_to_string};

macro_rules! generate_cast_test_case {
Expand Down Expand Up @@ -2461,8 +2577,19 @@ mod tests {
.with_precision_and_scale(precision, scale)
}

fn create_decimal256_array(
array: Vec<Option<BigInt>>,
precision: usize,
scale: usize,
) -> Result<Decimal256Array> {
array
.into_iter()
.collect::<Decimal256Array>()
.with_precision_and_scale(precision, scale)
}

#[test]
fn test_cast_decimal_to_decimal() {
fn test_cast_decimal128_to_decimal128() {
let input_type = DataType::Decimal128(20, 3);
let output_type = DataType::Decimal128(20, 4);
assert!(can_cast_types(&input_type, &output_type));
Expand Down Expand Up @@ -2490,6 +2617,97 @@ mod tests {
result.unwrap_err().to_string());
}

#[test]
fn test_cast_decimal128_to_decimal256() {
let input_type = DataType::Decimal128(20, 3);
let output_type = DataType::Decimal256(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![Some(1123456), Some(2123456), Some(3123456), None];
let input_decimal_array = create_decimal_array(&array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal256Array,
&output_type,
vec![
Some(
Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4)
.unwrap()
),
None
]
);
}

#[test]
fn test_cast_decimal256_to_decimal128() {
let input_type = DataType::Decimal256(20, 3);
let output_type = DataType::Decimal128(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![
Some(BigInt::from(1123456)),
Some(BigInt::from(2123456)),
Some(BigInt::from(3123456)),
None,
];
let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal128Array,
&output_type,
vec![
Some(Decimal128::new_from_i128(20, 4, 11234560_i128)),
Some(Decimal128::new_from_i128(20, 4, 21234560_i128)),
Some(Decimal128::new_from_i128(20, 4, 31234560_i128)),
None
]
);
}

#[test]
fn test_cast_decimal256_to_decimal256() {
let input_type = DataType::Decimal256(20, 3);
let output_type = DataType::Decimal256(20, 4);
assert!(can_cast_types(&input_type, &output_type));
let array = vec![
Some(BigInt::from(1123456)),
Some(BigInt::from(2123456)),
Some(BigInt::from(3123456)),
None,
];
let input_decimal_array = create_decimal256_array(array, 20, 3).unwrap();
let array = Arc::new(input_decimal_array) as ArrayRef;
generate_cast_test_case!(
&array,
Decimal256Array,
&output_type,
vec![
Some(
Decimal256::from_big_int(&BigInt::from(11234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(21234560_i128), 20, 4)
.unwrap()
),
Some(
Decimal256::from_big_int(&BigInt::from(31234560_i128), 20, 4)
.unwrap()
),
None
]
);
}

#[test]
fn test_cast_decimal_to_numeric() {
let decimal_type = DataType::Decimal128(38, 2);
Expand Down

0 comments on commit 86e1ede

Please sign in to comment.