Skip to content

Commit

Permalink
Decimal precision scale datatype change (#2532)
Browse files Browse the repository at this point in the history
* Change decimal array precison and scale to u8

* bug fixes
  • Loading branch information
psvri committed Aug 20, 2022
1 parent 1d5656e commit 7764334
Show file tree
Hide file tree
Showing 16 changed files with 124 additions and 121 deletions.
4 changes: 2 additions & 2 deletions arrow/benches/cast_kernels.rs
Expand Up @@ -82,7 +82,7 @@ fn build_utf8_date_time_array(size: usize, with_nulls: bool) -> ArrayRef {
Arc::new(builder.finish())
}

fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
fn build_decimal128_array(size: usize, precision: u8, scale: u8) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal128Builder::with_capacity(size, precision, scale);

Expand All @@ -92,7 +92,7 @@ fn build_decimal128_array(size: usize, precision: usize, scale: usize) -> ArrayR
Arc::new(builder.finish())
}

fn build_decimal256_array(size: usize, precision: usize, scale: usize) -> ArrayRef {
fn build_decimal256_array(size: usize, precision: u8, scale: u8) -> ArrayRef {
let mut rng = seedable_rng();
let mut builder = Decimal256Builder::with_capacity(size, precision, scale);
let mut bytes = [0; 32];
Expand Down
32 changes: 16 additions & 16 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -81,29 +81,29 @@ pub type Decimal256Array = DecimalArray<Decimal256Type>;
pub struct DecimalArray<T: DecimalType> {
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
_phantom: PhantomData<T>,
}

impl<T: DecimalType> DecimalArray<T> {
pub const VALUE_LENGTH: i32 = T::BYTE_LENGTH as i32;
const DEFAULT_TYPE: DataType = T::DEFAULT_TYPE;
pub const MAX_PRECISION: usize = T::MAX_PRECISION;
pub const MAX_SCALE: usize = T::MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(usize, usize) -> DataType = T::TYPE_CONSTRUCTOR;
pub const MAX_PRECISION: u8 = T::MAX_PRECISION;
pub const MAX_SCALE: u8 = T::MAX_SCALE;
const TYPE_CONSTRUCTOR: fn(u8, u8) -> DataType = T::TYPE_CONSTRUCTOR;

pub fn data(&self) -> &ArrayData {
&self.data
}

/// Return the precision (total digits) that can be stored by this array
pub fn precision(&self) -> usize {
pub fn precision(&self) -> u8 {
self.precision
}

/// Return the scale (digits after the decimal) that can be stored by this array
pub fn scale(&self) -> usize {
pub fn scale(&self) -> u8 {
self.scale
}

Expand Down Expand Up @@ -167,8 +167,8 @@ impl<T: DecimalType> DecimalArray<T> {
/// range for a decimal
pub fn from_fixed_size_binary_array(
v: FixedSizeBinaryArray,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Self {
assert!(
v.value_length() == Self::VALUE_LENGTH,
Expand All @@ -194,8 +194,8 @@ impl<T: DecimalType> DecimalArray<T> {
#[deprecated(note = "please use `from_fixed_size_binary_array` instead")]
pub fn from_fixed_size_list_array(
v: FixedSizeListArray,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Self {
assert_eq!(
v.data_ref().child_data().len(),
Expand Down Expand Up @@ -261,7 +261,7 @@ impl<T: DecimalType> DecimalArray<T> {
/// 1. `precision` is larger than [`Self::MAX_PRECISION`]
/// 2. `scale` is larger than [`Self::MAX_SCALE`];
/// 3. `scale` is > `precision`
pub fn with_precision_and_scale(self, precision: usize, scale: usize) -> Result<Self>
pub fn with_precision_and_scale(self, precision: u8, scale: u8) -> Result<Self>
where
Self: Sized,
{
Expand All @@ -281,7 +281,7 @@ impl<T: DecimalType> DecimalArray<T> {
}

// validate that the new precision and scale are valid or not
fn validate_precision_scale(&self, precision: usize, scale: usize) -> Result<()> {
fn validate_precision_scale(&self, precision: u8, scale: u8) -> Result<()> {
if precision > Self::MAX_PRECISION {
return Err(ArrowError::InvalidArgumentError(format!(
"precision {} is greater than max {}",
Expand Down Expand Up @@ -309,7 +309,7 @@ impl<T: DecimalType> DecimalArray<T> {
}

// validate all the data in the array are valid within the new precision or not
fn validate_data(&self, precision: usize) -> Result<()> {
fn validate_data(&self, precision: u8) -> Result<()> {
// TODO: Move into DecimalType
match Self::VALUE_LENGTH {
16 => self
Expand Down Expand Up @@ -350,7 +350,7 @@ impl Decimal128Array {

// Validates decimal128 values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let decimal = unsafe { self.value_unchecked(idx) };
Expand All @@ -365,7 +365,7 @@ impl Decimal128Array {
impl Decimal256Array {
// Validates decimal256 values in this array can be properly interpreted
// with the specified precision.
fn validate_decimal_precision(&self, precision: usize) -> Result<()> {
fn validate_decimal_precision(&self, precision: u8) -> Result<()> {
(0..self.len()).try_for_each(|idx| {
if self.is_valid(idx) {
let raw_val = unsafe {
Expand Down
18 changes: 10 additions & 8 deletions arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -37,8 +37,8 @@ use crate::util::decimal::Decimal256;
#[derive(Debug)]
pub struct Decimal128Builder {
builder: FixedSizeBinaryBuilder,
precision: usize,
scale: usize,
precision: u8,
scale: u8,

/// Should i128 values be validated for compatibility with scale and precision?
/// defaults to true
Expand All @@ -51,8 +51,8 @@ pub struct Decimal128Builder {
#[derive(Debug)]
pub struct Decimal256Builder {
builder: FixedSizeBinaryBuilder,
precision: usize,
scale: usize,
precision: u8,
scale: u8,

/// Should decimal values be validated for compatibility with scale and precision?
/// defaults to true
Expand All @@ -61,14 +61,15 @@ pub struct Decimal256Builder {

impl Decimal128Builder {
const BYTE_LENGTH: i32 = 16;

/// Creates a new [`Decimal128Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
pub fn new(precision: u8, scale: u8) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal128Builder`], `capacity` is the number of decimal values
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
pub fn with_capacity(capacity: usize, precision: u8, scale: u8) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down Expand Up @@ -160,14 +161,15 @@ impl ArrayBuilder for Decimal128Builder {

impl Decimal256Builder {
const BYTE_LENGTH: i32 = 32;

/// Creates a new [`Decimal256Builder`]
pub fn new(precision: usize, scale: usize) -> Self {
pub fn new(precision: u8, scale: u8) -> Self {
Self::with_capacity(1024, precision, scale)
}

/// Creates a new [`Decimal256Builder`], `capacity` is the number of decimal values
/// that can be appended without reallocating
pub fn with_capacity(capacity: usize, precision: usize, scale: usize) -> Self {
pub fn with_capacity(capacity: usize, precision: u8, scale: u8) -> Self {
Self {
builder: FixedSizeBinaryBuilder::with_capacity(capacity, Self::BYTE_LENGTH),
precision,
Expand Down
4 changes: 2 additions & 2 deletions arrow/src/array/transform/mod.rs
Expand Up @@ -688,8 +688,8 @@ mod tests {

fn create_decimal_array(
array: Vec<Option<i128>>,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Decimal128Array {
array
.into_iter()
Expand Down
26 changes: 13 additions & 13 deletions arrow/src/compute/kernels/cast.rs
Expand Up @@ -301,8 +301,8 @@ pub fn cast(array: &ArrayRef, to_type: &DataType) -> Result<ArrayRef> {
fn cast_primitive_to_decimal<T: ArrayAccessor, F>(
array: T,
op: F,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Result<Arc<dyn Array>>
where
F: Fn(T::Item) -> i128,
Expand All @@ -318,8 +318,8 @@ where

fn cast_integer_to_decimal<T: ArrowNumericType>(
array: &PrimitiveArray<T>,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Result<Arc<dyn Array>>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<i128>,
Expand All @@ -333,8 +333,8 @@ where

fn cast_floating_point_to_decimal<T: ArrowNumericType>(
array: &PrimitiveArray<T>,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Result<Arc<dyn Array>>
where
<T as ArrowPrimitiveType>::Native: AsPrimitive<f64>,
Expand Down Expand Up @@ -1306,9 +1306,9 @@ const fn time_unit_multiple(unit: &TimeUnit) -> i64 {
/// Cast one type of decimal array to another type of decimal array
fn cast_decimal_to_decimal<const BYTE_WIDTH1: usize, const BYTE_WIDTH2: usize>(
array: &ArrayRef,
input_scale: &usize,
output_precision: &usize,
output_scale: &usize,
input_scale: &u8,
output_precision: &u8,
output_scale: &u8,
) -> Result<ArrayRef> {
if input_scale > output_scale {
// For example, input_scale is 4 and output_scale is 3;
Expand Down Expand Up @@ -2592,8 +2592,8 @@ mod tests {

fn create_decimal_array(
array: Vec<Option<i128>>,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Result<Decimal128Array> {
array
.into_iter()
Expand All @@ -2603,8 +2603,8 @@ mod tests {

fn create_decimal256_array(
array: Vec<Option<BigInt>>,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Result<Decimal256Array> {
array
.into_iter()
Expand Down
12 changes: 6 additions & 6 deletions arrow/src/compute/kernels/take.rs
Expand Up @@ -987,8 +987,8 @@ mod tests {
index: &UInt32Array,
options: Option<TakeOptions>,
expected_data: Vec<Option<i128>>,
precision: &usize,
scale: &usize,
precision: &u8,
scale: &u8,
) -> Result<()> {
let output = data
.into_iter()
Expand Down Expand Up @@ -1105,8 +1105,8 @@ mod tests {
#[test]
fn test_take_decimal128_non_null_indices() {
let index = UInt32Array::from(vec![0, 5, 3, 1, 4, 2]);
let precision: usize = 10;
let scale: usize = 5;
let precision: u8 = 10;
let scale: u8 = 5;
test_take_decimal_arrays(
vec![None, Some(3), Some(5), Some(2), Some(3), None],
&index,
Expand All @@ -1121,8 +1121,8 @@ mod tests {
#[test]
fn test_take_decimal128() {
let index = UInt32Array::from(vec![Some(3), None, Some(1), Some(3), Some(2)]);
let precision: usize = 10;
let scale: usize = 5;
let precision: u8 = 10;
let scale: u8 = 5;
test_take_decimal_arrays(
vec![Some(0), Some(1), Some(2), Some(3), Some(4)],
&index,
Expand Down
13 changes: 7 additions & 6 deletions arrow/src/csv/reader.rs
Expand Up @@ -696,8 +696,8 @@ fn build_decimal_array(
_line_number: usize,
rows: &[StringRecord],
col_idx: usize,
precision: usize,
scale: usize,
precision: u8,
scale: u8,
) -> Result<ArrayRef> {
let mut decimal_builder =
Decimal128Builder::with_capacity(rows.len(), precision, scale);
Expand Down Expand Up @@ -732,11 +732,12 @@ fn build_decimal_array(

// Parse the string format decimal value to i128 format and checking the precision and scale.
// The result i128 value can't be out of bounds.
fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Result<i128> {
fn parse_decimal_with_parameter(s: &str, precision: u8, scale: u8) -> Result<i128> {
if PARSE_DECIMAL_RE.is_match(s) {
let mut offset = s.len();
let len = s.len();
let mut base = 1;
let scale_usize = usize::from(scale);

// handle the value after the '.' and meet the scale
let delimiter_position = s.find('.');
Expand All @@ -747,12 +748,12 @@ fn parse_decimal_with_parameter(s: &str, precision: usize, scale: usize) -> Resu
}
Some(mid) => {
// there is the '.'
if len - mid >= scale + 1 {
if len - mid >= scale_usize + 1 {
// If the string value is "123.12345" and the scale is 2, we should just remain '.12' and drop the '345' value.
offset -= len - mid - 1 - scale;
offset -= len - mid - 1 - scale_usize;
} else {
// If the string value is "123.12" and the scale is 4, we should append '00' to the tail.
base = 10_i128.pow((scale + 1 + mid - len) as u32);
base = 10_i128.pow((scale_usize + 1 + mid - len) as u32);
}
}
};
Expand Down

0 comments on commit 7764334

Please sign in to comment.