Skip to content

Commit

Permalink
Add Decimal256Builder and Decimal256Array
Browse files Browse the repository at this point in the history
  • Loading branch information
viirya committed Jul 4, 2022
1 parent 7ae97c9 commit 3fd533e
Show file tree
Hide file tree
Showing 14 changed files with 228 additions and 32 deletions.
166 changes: 135 additions & 31 deletions arrow/src/array/array_decimal.rs
Expand Up @@ -32,7 +32,7 @@ use crate::datatypes::{
DECIMAL_MAX_SCALE,
};
use crate::error::{ArrowError, Result};
use crate::util::decimal::{BasicDecimal, Decimal128};
use crate::util::decimal::{BasicDecimal, Decimal128, Decimal256};

/// `DecimalArray` stores fixed width decimal numbers,
/// with a fixed precision and scale.
Expand Down Expand Up @@ -75,47 +75,68 @@ pub struct DecimalArray {
scale: usize,
}

impl DecimalArray {
const VALUE_LENGTH: i32 = 16;
pub struct Decimal256Array {
data: ArrayData,
value_data: RawPtrBox<u8>,
precision: usize,
scale: usize,
}

mod private_decimal {
pub trait DecimalArrayPrivate {
fn raw_value_data_ptr(&self) -> *const u8;
}
}

pub trait BasicDecimalArray<T: BasicDecimal, U: From<ArrayData>>:
private_decimal::DecimalArrayPrivate
{
const VALUE_LENGTH: i32;

fn data(&self) -> &ArrayData;

/// Return the precision (total digits) that can be stored by this array
fn precision(&self) -> usize;

/// Return the scale (digits after the decimal) that can be stored by this array
fn scale(&self) -> usize;

/// Returns the element at index `i`.
pub fn value(&self, i: usize) -> Decimal128 {
assert!(i < self.data.len(), "DecimalArray out of bounds access");
let offset = i + self.data.offset();
fn value(&self, i: usize) -> T {
let data = self.data();
assert!(i < data.len(), "Out of bounds access");

let offset = i + data.offset();
let raw_val = unsafe {
let pos = self.value_offset_at(offset);
std::slice::from_raw_parts(
self.value_data.as_ptr().offset(pos as isize),
self.raw_value_data_ptr().offset(pos as isize),
Self::VALUE_LENGTH as usize,
)
};
let as_array = raw_val.try_into().unwrap();
Decimal128::new_from_i128(
self.precision,
self.scale,
i128::from_le_bytes(as_array),
)
T::new(self.precision(), self.scale(), as_array)
}

/// Returns the offset for the element at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
#[inline]
pub fn value_offset(&self, i: usize) -> i32 {
self.value_offset_at(self.data.offset() + i)
fn value_offset(&self, i: usize) -> i32 {
self.value_offset_at(self.data().offset() + i)
}

/// Returns the length for an element.
///
/// All elements have the same length as the array is a fixed size.
#[inline]
pub const fn value_length(&self) -> i32 {
fn value_length(&self) -> i32 {
Self::VALUE_LENGTH
}

/// Returns a clone of the value data buffer
pub fn value_data(&self) -> Buffer {
self.data.buffers()[0].clone()
fn value_data(&self) -> Buffer {
self.data().buffers()[0].clone()
}

#[inline]
Expand All @@ -124,15 +145,15 @@ impl DecimalArray {
}

#[inline]
pub fn value_as_string(&self, row: usize) -> String {
fn value_as_string(&self, row: usize) -> String {
self.value(row).to_string()
}

pub fn from_fixed_size_list_array(
fn from_fixed_size_list_array(
v: FixedSizeListArray,
precision: usize,
scale: usize,
) -> Self {
) -> U {
let child_data = &v.data_ref().child_data()[0];
assert_eq!(
child_data.child_data().len(),
Expand All @@ -155,9 +176,55 @@ impl DecimalArray {
.offset(list_offset);

let array_data = unsafe { builder.build_unchecked() };
Self::from(array_data)
U::from(array_data)
}
}

impl private_decimal::DecimalArrayPrivate for DecimalArray {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
}
}

impl private_decimal::DecimalArrayPrivate for Decimal256Array {
fn raw_value_data_ptr(&self) -> *const u8 {
self.value_data.as_ptr()
}
}

impl BasicDecimalArray<Decimal128, DecimalArray> for DecimalArray {
const VALUE_LENGTH: i32 = 16;

fn data(&self) -> &ArrayData {
&self.data
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}

impl BasicDecimalArray<Decimal256, Decimal256Array> for Decimal256Array {
const VALUE_LENGTH: i32 = 32;

fn data(&self) -> &ArrayData {
&self.data
}

fn precision(&self) -> usize {
self.precision
}

fn scale(&self) -> usize {
self.scale
}
}

impl DecimalArray {
/// Creates a [DecimalArray] with default precision and scale,
/// based on an iterator of `i128` values without nulls
pub fn from_iter_values<I: IntoIterator<Item = i128>>(iter: I) -> Self {
Expand All @@ -176,16 +243,6 @@ impl DecimalArray {
DecimalArray::from(data)
}

/// Return the precision (total digits) that can be stored by this array
pub fn precision(&self) -> usize {
self.precision
}

/// Return the scale (digits after the decimal) that can be stored by this array
pub fn scale(&self) -> usize {
self.scale
}

/// Returns a DecimalArray with the same data as self, with the
/// specified precision.
///
Expand Down Expand Up @@ -267,6 +324,27 @@ impl From<ArrayData> for DecimalArray {
}
}

impl From<ArrayData> for Decimal256Array {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"DecimalArray data should contain 1 buffer only (values)"
);
let values = data.buffers()[0].as_ptr();
let (precision, scale) = match data.data_type() {
DataType::Decimal(precision, scale) => (*precision, *scale),
_ => panic!("Expected data type to be Decimal"),
};
Self {
data,
value_data: unsafe { RawPtrBox::new(values) },
precision,
scale,
}
}
}

impl From<DecimalArray> for ArrayData {
fn from(array: DecimalArray) -> Self {
array.data
Expand Down Expand Up @@ -337,6 +415,22 @@ impl fmt::Debug for DecimalArray {
}
}

impl fmt::Debug for Decimal256Array {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"Decimal256Array<{}, {}>\n[\n",
self.precision, self.scale
)?;
print_long_array(self, f, |array, index, f| {
let formatted_decimal = array.value_as_string(index);

write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
}

impl Array for DecimalArray {
fn as_any(&self) -> &dyn Any {
self
Expand All @@ -347,6 +441,16 @@ impl Array for DecimalArray {
}
}

impl Array for Decimal256Array {
fn as_any(&self) -> &dyn Any {
self
}

fn data(&self) -> &ArrayData {
&self.data
}
}

#[cfg(test)]
mod tests {
use crate::{array::DecimalBuilder, datatypes::Field};
Expand Down
63 changes: 62 additions & 1 deletion arrow/src/array/builder/decimal_builder.rs
Expand Up @@ -18,6 +18,7 @@
use std::any::Any;
use std::sync::Arc;

use crate::array::array_decimal::{BasicDecimalArray, Decimal256Array};
use crate::array::ArrayRef;
use crate::array::DecimalArray;
use crate::array::UInt8Builder;
Expand All @@ -26,6 +27,7 @@ use crate::array::{ArrayBuilder, FixedSizeListBuilder};
use crate::error::{ArrowError, Result};

use crate::datatypes::validate_decimal_precision;
use crate::util::decimal::{BasicDecimal, Decimal256};

/// Array Builder for [`DecimalArray`]
///
Expand All @@ -42,8 +44,18 @@ pub struct DecimalBuilder {
value_validation: bool,
}

/// Array Builder for [`Decimal256Array`]
///
/// See [`Decimal256Array`] for example.
#[derive(Debug)]
pub struct Decimal256Builder {
builder: FixedSizeListBuilder<UInt8Builder>,
precision: usize,
scale: usize,
}

impl DecimalBuilder {
/// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values
/// Creates a new `DecimalBuilder`, `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
let values_builder = UInt8Builder::new(capacity);
Expand Down Expand Up @@ -154,10 +166,59 @@ impl ArrayBuilder for DecimalBuilder {
}
}

impl Decimal256Builder {
/// Creates a new `Decimal256Builder`, `capacity` is the number of bytes in the values
/// array
pub fn new(capacity: usize, precision: usize, scale: usize) -> Self {
let values_builder = UInt8Builder::new(capacity);
let byte_width = 32;
Self {
builder: FixedSizeListBuilder::new(values_builder, byte_width),
precision,
scale,
}
}

/// Appends a byte slice into the builder.
///
/// Automatically calls the `append` method to delimit the slice appended in as a
/// distinct array element.
#[inline]
pub fn append_value(&mut self, value: &Decimal256) -> Result<()> {
let value_as_bytes = value.raw_value();

if self.builder.value_length() != value_as_bytes.len() as i32 {
return Err(ArrowError::InvalidArgumentError(
"Byte slice does not have the same length as Decimal256Builder value lengths".to_string()
));
}
self.builder.values().append_slice(value_as_bytes)?;
self.builder.append(true)
}

/// Append a null value to the array.
#[inline]
pub fn append_null(&mut self) -> Result<()> {
let length: usize = self.builder.value_length() as usize;
self.builder.values().append_slice(&vec![0u8; length][..])?;
self.builder.append(false)
}

/// Builds the `Decimal256Array` and reset this builder.
pub fn finish(&mut self) -> Decimal256Array {
Decimal256Array::from_fixed_size_list_array(
self.builder.finish(),
self.precision,
self.scale,
)
}
}

#[cfg(test)]
mod tests {
use super::*;

use crate::array::array_decimal::BasicDecimalArray;
use crate::array::Array;
use crate::datatypes::DataType;
use crate::util::decimal::Decimal128;
Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/builder/mod.rs
Expand Up @@ -45,6 +45,7 @@ use super::ArrayRef;
pub use boolean_buffer_builder::BooleanBufferBuilder;
pub use boolean_builder::BooleanBuilder;
pub use buffer_builder::BufferBuilder;
pub use decimal_builder::Decimal256Builder;
pub use decimal_builder::DecimalBuilder;
pub use fixed_size_binary_builder::FixedSizeBinaryBuilder;
pub use fixed_size_list_builder::FixedSizeListBuilder;
Expand Down
16 changes: 16 additions & 0 deletions arrow/src/array/equal_json.rs
Expand Up @@ -16,7 +16,9 @@
// under the License.

use super::*;
use crate::array::BasicDecimalArray;
use crate::datatypes::*;
use crate::util::decimal::BasicDecimal;
use array::Array;
use hex::FromHex;
use serde_json::value::Value::{Null as JNull, Object, String as JString};
Expand Down Expand Up @@ -378,6 +380,20 @@ impl JsonEqual for DecimalArray {
}
}

impl JsonEqual for Decimal256Array {
fn equals_json(&self, json: &[&Value]) -> bool {
if self.len() != json.len() {
return false;
}

(0..self.len()).all(|i| match json[i] {
JString(s) => self.is_valid(i) && (s == &self.value(i).to_string()),
JNull => self.is_null(i),
_ => false,
})
}
}

impl PartialEq<Value> for DecimalArray {
fn eq(&self, json: &Value) -> bool {
match json {
Expand Down
1 change: 1 addition & 0 deletions arrow/src/array/iterator.rs
Expand Up @@ -15,6 +15,7 @@
// specific language governing permissions and limitations
// under the License.

use crate::array::BasicDecimalArray;
use crate::datatypes::ArrowPrimitiveType;

use super::{
Expand Down

0 comments on commit 3fd533e

Please sign in to comment.