From d3137a856ef82fe12eb7fc0e5705fd710e97094d Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Wed, 21 Dec 2022 13:43:56 +0000 Subject: [PATCH] Split out arrow-arith (#2594) --- Cargo.toml | 1 + arrow-arith/Cargo.toml | 57 +++++ .../kernels => arrow-arith/src}/aggregate.rs | 46 ++-- .../kernels => arrow-arith/src}/arithmetic.rs | 237 ++++++++++-------- .../kernels => arrow-arith/src}/arity.rs | 85 +++---- .../kernels => arrow-arith/src}/bitwise.rs | 74 +++--- .../kernels => arrow-arith/src}/boolean.rs | 111 ++++---- arrow-arith/src/lib.rs | 25 ++ .../kernels => arrow-arith/src}/temporal.rs | 73 +++--- arrow/Cargo.toml | 2 +- arrow/src/compute/kernels/mod.rs | 7 +- 11 files changed, 384 insertions(+), 334 deletions(-) create mode 100644 arrow-arith/Cargo.toml rename {arrow/src/compute/kernels => arrow-arith/src}/aggregate.rs (98%) rename {arrow/src/compute/kernels => arrow-arith/src}/arithmetic.rs (95%) rename {arrow/src/compute/kernels => arrow-arith/src}/arity.rs (91%) rename {arrow/src/compute/kernels => arrow-arith/src}/bitwise.rs (81%) rename {arrow/src/compute/kernels => arrow-arith/src}/boolean.rs (92%) create mode 100644 arrow-arith/src/lib.rs rename {arrow/src/compute/kernels => arrow-arith/src}/temporal.rs (95%) diff --git a/Cargo.toml b/Cargo.toml index fb072f7d346..ebecc9eaf07 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ [workspace] members = [ "arrow", + "arrow-arith", "arrow-array", "arrow-buffer", "arrow-cast", diff --git a/arrow-arith/Cargo.toml b/arrow-arith/Cargo.toml new file mode 100644 index 00000000000..854941c2534 --- /dev/null +++ b/arrow-arith/Cargo.toml @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "arrow-arith" +version = "29.0.0" +description = "Arrow arithmetic kernels" +homepage = "https://github.com/apache/arrow-rs" +repository = "https://github.com/apache/arrow-rs" +authors = ["Apache Arrow "] +license = "Apache-2.0" +keywords = ["arrow"] +include = [ + "benches/*.rs", + "src/**/*.rs", + "Cargo.toml", +] +edition = "2021" +rust-version = "1.62" + +[lib] +name = "arrow_arith" +path = "src/lib.rs" +bench = false + +[dependencies] +arrow-array = { version = "29.0.0", path = "../arrow-array" } +arrow-buffer = { version = "29.0.0", path = "../arrow-buffer" } +arrow-data = { version = "29.0.0", path = "../arrow-data" } +arrow-schema = { version = "29.0.0", path = "../arrow-schema" } +chrono = { version = "0.4.23", default-features = false } +half = { version = "2.1", default-features = false } +multiversion = { version = "0.6.1", default-features = false } +num = { version = "0.4", default-features = false, features = ["std"] } + +[dev-dependencies] + +[package.metadata.docs.rs] +features = ["dyn_arith_dict"] + +[features] +dyn_arith_dict = [] +simd = ["arrow-array/simd"] diff --git a/arrow/src/compute/kernels/aggregate.rs b/arrow-arith/src/aggregate.rs similarity index 98% rename from arrow/src/compute/kernels/aggregate.rs rename to arrow-arith/src/aggregate.rs index 4e726974f66..a9503130b0f 100644 --- a/arrow/src/compute/kernels/aggregate.rs +++ b/arrow-arith/src/aggregate.rs @@ -17,19 +17,16 @@ //! Defines aggregations over Arrow arrays. -use arrow_data::bit_iterator::try_for_each_valid_idx; -use arrow_schema::ArrowError; use multiversion::multiversion; -#[allow(unused_imports)] -use std::ops::{Add, Deref}; -use crate::array::{ - as_primitive_array, Array, ArrayAccessor, ArrayIter, BooleanArray, - GenericBinaryArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray, -}; -use crate::datatypes::{ArrowNativeType, ArrowNativeTypeOp, ArrowNumericType, DataType}; -use crate::error::Result; -use crate::util::bit_iterator::BitIndexIterator; +use arrow_array::cast::*; +use arrow_array::iterator::ArrayIter; +use arrow_array::*; +use arrow_buffer::ArrowNativeType; +use arrow_data::bit_iterator::try_for_each_valid_idx; +use arrow_data::bit_iterator::BitIndexIterator; +use arrow_schema::ArrowError; +use arrow_schema::*; /// Generic test for NaN, the optimizer should be able to remove this for integer types. #[inline] @@ -63,10 +60,8 @@ where /// Returns the minimum value in the boolean array. /// /// ``` -/// use arrow::{ -/// array::BooleanArray, -/// compute::min_boolean, -/// }; +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::aggregate::min_boolean; /// /// let a = BooleanArray::from(vec![Some(true), None, Some(false)]); /// assert_eq!(min_boolean(&a), Some(false)) @@ -88,10 +83,8 @@ pub fn min_boolean(array: &BooleanArray) -> Option { /// Returns the maximum value in the boolean array /// /// ``` -/// use arrow::{ -/// array::BooleanArray, -/// compute::max_boolean, -/// }; +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::aggregate::max_boolean; /// /// let a = BooleanArray::from(vec![Some(true), None, Some(false)]); /// assert_eq!(max_boolean(&a), Some(true)) @@ -205,7 +198,7 @@ where /// use `sum_array` instead. pub fn sum_array_checked>( array: A, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -345,7 +338,7 @@ where /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `sum` instead. -pub fn sum_checked(array: &PrimitiveArray) -> Result> +pub fn sum_checked(array: &PrimitiveArray) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -375,7 +368,7 @@ where array.len(), array.offset(), null_count, - Some(buffer.deref()), + Some(buffer.as_slice()), |idx| { unsafe { sum = sum.add_checked(array.value_unchecked(idx))? }; Ok::<_, ArrowError>(()) @@ -390,8 +383,7 @@ where #[cfg(feature = "simd")] mod simd { use super::is_nan; - use crate::array::{Array, PrimitiveArray}; - use crate::datatypes::{ArrowNativeTypeOp, ArrowNumericType}; + use arrow_array::*; use std::marker::PhantomData; pub(super) trait SimdAggregate { @@ -771,10 +763,8 @@ where #[cfg(test)] mod tests { use super::*; - use crate::array::*; - use crate::compute::add; - use crate::datatypes::{Float32Type, Int32Type, Int8Type}; - use arrow_array::types::Float64Type; + use crate::arithmetic::add; + use arrow_array::types::*; #[test] fn test_primitive_array_sum() { diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow-arith/src/arithmetic.rs similarity index 95% rename from arrow/src/compute/kernels/arithmetic.rs rename to arrow-arith/src/arithmetic.rs index 913a2cad6c9..c5f0c1096c3 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow-arith/src/arithmetic.rs @@ -22,29 +22,12 @@ //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. -use crate::array::*; -#[cfg(feature = "simd")] -use crate::buffer::MutableBuffer; -use crate::compute::kernels::arity::unary; -use crate::compute::{ - binary, binary_opt, try_binary, try_unary, try_unary_dyn, unary_dyn, -}; -use crate::datatypes::{ - ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type, Date64Type, - IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType, -}; -#[cfg(feature = "dyn_arith_dict")] -use crate::datatypes::{ - Decimal128Type, Decimal256Type, Float32Type, Float64Type, Int16Type, Int32Type, - Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, -}; -use crate::error::{ArrowError, Result}; -use crate::{datatypes, downcast_primitive_array}; +use crate::arity::*; +use arrow_array::cast::*; +use arrow_array::types::*; +use arrow_array::*; +use arrow_schema::*; use num::traits::Pow; -#[cfg(feature = "simd")] -use std::borrow::BorrowMut; -#[cfg(feature = "simd")] -use std::slice::{ChunksExact, ChunksExactMut}; use std::sync::Arc; /// Helper function to perform math lambda function on values from two arrays. If either @@ -58,7 +41,7 @@ pub fn math_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, -) -> Result> +) -> Result, ArrowError> where LT: ArrowNumericType, RT: ArrowNumericType, @@ -76,11 +59,11 @@ fn math_checked_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, -) -> Result> +) -> Result, ArrowError> where LT: ArrowNumericType, RT: ArrowNumericType, - F: Fn(LT::Native, RT::Native) -> Result, + F: Fn(LT::Native, RT::Native) -> Result, LT::Native: ArrowNativeTypeOp, RT::Native: ArrowNativeTypeOp, { @@ -99,11 +82,11 @@ fn math_checked_divide_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, -) -> Result> +) -> Result, ArrowError> where LT: ArrowNumericType, RT: ArrowNumericType, - F: Fn(LT::Native, RT::Native) -> Result, + F: Fn(LT::Native, RT::Native) -> Result, { try_binary(left, right, op) } @@ -122,11 +105,11 @@ fn math_checked_divide_op_on_iters( right: impl Iterator>, op: F, len: usize, - null_bit_buffer: Option, -) -> Result> + null_bit_buffer: Option, +) -> Result, ArrowError> where T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, + F: Fn(T::Native, T::Native) -> Result, { let buffer = if null_bit_buffer.is_some() { let values = left.zip(right).map(|(left, right)| { @@ -137,7 +120,7 @@ where } }); // Safety: Iterator comes from a PrimitiveArray which reports its size correctly - unsafe { crate::buffer::Buffer::try_from_trusted_len_iter(values) } + unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) } } else { // no value is null let values = left @@ -145,11 +128,11 @@ where .zip(right.map(|r| r.unwrap())) .map(|(left, right)| op(left, right)); // Safety: Iterator comes from a PrimitiveArray which reports its size correctly - unsafe { crate::buffer::Buffer::try_from_trusted_len_iter(values) } + unsafe { arrow_buffer::Buffer::try_from_trusted_len_iter(values) } }?; let data = unsafe { - ArrayData::new_unchecked( + arrow_data::ArrayData::new_unchecked( T::DATA_TYPE, len, None, @@ -174,7 +157,7 @@ fn simd_checked_modulus( valid_mask: Option, left: T::Simd, right: T::Simd, -) -> Result +) -> Result where T::Native: ArrowNativeTypeOp, { @@ -211,7 +194,7 @@ fn simd_checked_divide( valid_mask: Option, left: T::Simd, right: T::Simd, -) -> Result +) -> Result where T::Native: ArrowNativeTypeOp, { @@ -247,11 +230,11 @@ where #[inline] fn simd_checked_divide_op_remainder( valid_mask: Option, - left_chunks: ChunksExact, - right_chunks: ChunksExact, - result_chunks: ChunksExactMut, + left_chunks: std::slice::ChunksExact, + right_chunks: std::slice::ChunksExact, + result_chunks: std::slice::ChunksExactMut, op: F, -) -> Result<()> +) -> Result<(), ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -296,11 +279,11 @@ fn simd_checked_divide_op( right: &PrimitiveArray, simd_op: SI, scalar_op: SC, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, - SI: Fn(Option, T::Simd, T::Simd) -> Result, + SI: Fn(Option, T::Simd, T::Simd) -> Result, SC: Fn(T::Native, T::Native) -> T::Native, { if left.len() != right.len() { @@ -317,7 +300,8 @@ where let lanes = T::lanes(); let buffer_size = left.len() * std::mem::size_of::(); - let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); + let mut result = + arrow_buffer::MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); match &null_bit_buffer { Some(b) => { @@ -332,11 +316,7 @@ where valid_chunks .iter() - .zip( - result_chunks - .borrow_mut() - .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())), - ) + .zip((&mut result_chunks).zip((&mut left_chunks).zip(&mut right_chunks))) .try_for_each( |(mut mask, (result_slice, (left_slice, right_slice)))| { // split chunks further into slices corresponding to the vector length @@ -345,7 +325,7 @@ where result_slice .chunks_exact_mut(lanes) .zip(left_slice.chunks_exact(lanes).zip(right_slice.chunks_exact(lanes))) - .try_for_each(|(result_slice, (left_slice, right_slice))| -> Result<()> { + .try_for_each(|(result_slice, (left_slice, right_slice))| -> Result<(), ArrowError> { let simd_left = T::load(left_slice); let simd_right = T::load(right_slice); @@ -376,21 +356,20 @@ where let mut left_chunks = left.values().chunks_exact(lanes); let mut right_chunks = right.values().chunks_exact(lanes); - result_chunks - .borrow_mut() - .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())) + (&mut result_chunks) + .zip((&mut left_chunks).zip(&mut right_chunks)) .try_for_each( - |(result_slice, (left_slice, right_slice))| -> Result<()> { - let simd_left = T::load(left_slice); - let simd_right = T::load(right_slice); + |(result_slice, (left_slice, right_slice))| -> Result<(), ArrowError> { + let simd_left = T::load(left_slice); + let simd_right = T::load(right_slice); - let simd_result = simd_op(None, simd_left, simd_right)?; + let simd_result = simd_op(None, simd_left, simd_right)?; - T::write(simd_result, result_slice); + T::write(simd_result, result_slice); - Ok(()) - }, - )?; + Ok(()) + }, + )?; simd_checked_divide_op_remainder::( None, @@ -403,7 +382,7 @@ where } let data = unsafe { - ArrayData::new_unchecked( + arrow_data::ArrayData::new_unchecked( T::DATA_TYPE, left.len(), None, @@ -556,7 +535,7 @@ fn math_op_dict( left: &DictionaryArray, right: &DictionaryArray, op: F, -) -> Result> +) -> Result, ArrowError> where K: ArrowNumericType, T: ArrowNumericType, @@ -612,11 +591,11 @@ fn math_checked_op_dict( left: &DictionaryArray, right: &DictionaryArray, op: F, -) -> Result> +) -> Result, ArrowError> where K: ArrowNumericType, T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, + F: Fn(T::Native, T::Native) -> Result, T::Native: ArrowNativeTypeOp, { // left and right's value types are supposed to be same as guaranteed by the caller macro now. @@ -646,11 +625,11 @@ fn math_divide_checked_op_dict( left: &DictionaryArray, right: &DictionaryArray, op: F, -) -> Result> +) -> Result, ArrowError> where K: ArrowNumericType, T: ArrowNumericType, - F: Fn(T::Native, T::Native) -> Result, + F: Fn(T::Native, T::Native) -> Result, { if left.len() != right.len() { return Err(ArrowError::ComputeError(format!( @@ -699,7 +678,7 @@ fn math_divide_safe_op_dict( left: &DictionaryArray, right: &DictionaryArray, op: F, -) -> Result +) -> Result where K: ArrowNumericType, T: ArrowNumericType, @@ -715,7 +694,7 @@ fn math_safe_divide_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, -) -> Result +) -> Result where LT: ArrowNumericType, RT: ArrowNumericType, @@ -733,7 +712,7 @@ where pub fn add( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -749,7 +728,7 @@ where pub fn add_checked( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -762,7 +741,7 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `add_dyn_checked` instead. -pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result { +pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!(left, right, |a, b| a.add_wrapping(b), math_op_dict) @@ -834,7 +813,10 @@ pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result { /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `add_dyn` instead. -pub fn add_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result { +pub fn add_dyn_checked( + left: &dyn Array, + right: &dyn Array, +) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!( @@ -914,7 +896,7 @@ pub fn add_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result pub fn add_scalar( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -930,7 +912,7 @@ where pub fn add_scalar_checked( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -946,7 +928,10 @@ where /// For an overflow-checking variant, use `add_scalar_checked_dyn` instead. /// /// This returns an `Err` when the input array is not supported for adding operation. -pub fn add_scalar_dyn(array: &dyn Array, scalar: T::Native) -> Result +pub fn add_scalar_dyn( + array: &dyn Array, + scalar: T::Native, +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -963,7 +948,10 @@ where /// /// As this kernel has the branching costs and also prevents LLVM from vectorising it correctly, /// it is usually much slower than non-checking variant. -pub fn add_scalar_checked_dyn(array: &dyn Array, scalar: T::Native) -> Result +pub fn add_scalar_checked_dyn( + array: &dyn Array, + scalar: T::Native, +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -980,7 +968,7 @@ where pub fn subtract( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -996,7 +984,7 @@ where pub fn subtract_checked( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1009,7 +997,7 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `subtract_dyn_checked` instead. -pub fn subtract_dyn(left: &dyn Array, right: &dyn Array) -> Result { +pub fn subtract_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!(left, right, |a, b| a.sub_wrapping(b), math_op_dict) @@ -1033,7 +1021,10 @@ pub fn subtract_dyn(left: &dyn Array, right: &dyn Array) -> Result { /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `subtract_dyn` instead. -pub fn subtract_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result { +pub fn subtract_dyn_checked( + left: &dyn Array, + right: &dyn Array, +) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!( @@ -1065,7 +1056,7 @@ pub fn subtract_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1081,7 +1072,7 @@ where pub fn subtract_scalar_checked( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1095,7 +1086,10 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `subtract_scalar_checked_dyn` instead. -pub fn subtract_scalar_dyn(array: &dyn Array, scalar: T::Native) -> Result +pub fn subtract_scalar_dyn( + array: &dyn Array, + scalar: T::Native, +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1112,7 +1106,7 @@ where pub fn subtract_scalar_checked_dyn( array: &dyn Array, scalar: T::Native, -) -> Result +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1125,7 +1119,7 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `negate_checked` instead. -pub fn negate(array: &PrimitiveArray) -> Result> +pub fn negate(array: &PrimitiveArray) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1137,7 +1131,9 @@ where /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `negate` instead. -pub fn negate_checked(array: &PrimitiveArray) -> Result> +pub fn negate_checked( + array: &PrimitiveArray, +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1149,9 +1145,9 @@ where pub fn powf_scalar( array: &PrimitiveArray, raise: T::Native, -) -> Result> +) -> Result, ArrowError> where - T: datatypes::ArrowFloatNumericType, + T: ArrowFloatNumericType, T::Native: Pow, { Ok(unary(array, |x| x.pow(raise))) @@ -1165,7 +1161,7 @@ where pub fn multiply( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1181,7 +1177,7 @@ where pub fn multiply_checked( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1194,7 +1190,7 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `multiply_dyn_checked` instead. -pub fn multiply_dyn(left: &dyn Array, right: &dyn Array) -> Result { +pub fn multiply_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!(left, right, |a, b| a.mul_wrapping(b), math_op_dict) @@ -1218,7 +1214,10 @@ pub fn multiply_dyn(left: &dyn Array, right: &dyn Array) -> Result { /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `multiply_dyn` instead. -pub fn multiply_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result { +pub fn multiply_dyn_checked( + left: &dyn Array, + right: &dyn Array, +) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!( @@ -1250,9 +1249,9 @@ pub fn multiply_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where - T: datatypes::ArrowNumericType, + T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { Ok(unary(array, |value| value.mul_wrapping(scalar))) @@ -1266,7 +1265,7 @@ where pub fn multiply_scalar_checked( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1280,7 +1279,10 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `multiply_scalar_checked_dyn` instead. -pub fn multiply_scalar_dyn(array: &dyn Array, scalar: T::Native) -> Result +pub fn multiply_scalar_dyn( + array: &dyn Array, + scalar: T::Native, +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1297,7 +1299,7 @@ where pub fn multiply_scalar_checked_dyn( array: &dyn Array, scalar: T::Native, -) -> Result +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1312,7 +1314,7 @@ where pub fn modulus( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1340,7 +1342,7 @@ where pub fn divide_checked( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1370,7 +1372,7 @@ where pub fn divide_opt( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1390,7 +1392,7 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `divide_dyn_checked` instead. -pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { +pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!( @@ -1432,7 +1434,10 @@ pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { /// /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, /// use `divide_dyn` instead. -pub fn divide_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result { +pub fn divide_dyn_checked( + left: &dyn Array, + right: &dyn Array, +) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!( @@ -1465,7 +1470,10 @@ pub fn divide_dyn_checked(left: &dyn Array, right: &dyn Array) -> Result Result { +pub fn divide_dyn_opt( + left: &dyn Array, + right: &dyn Array, +) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { typed_dict_math_op!( @@ -1515,7 +1523,7 @@ pub fn divide_dyn_opt(left: &dyn Array, right: &dyn Array) -> Result { pub fn divide( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1531,7 +1539,7 @@ where pub fn modulus_scalar( array: &PrimitiveArray, modulo: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1549,7 +1557,7 @@ where pub fn divide_scalar( array: &PrimitiveArray, divisor: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1567,7 +1575,10 @@ where /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. /// For an overflow-checking variant, use `divide_scalar_checked_dyn` instead. -pub fn divide_scalar_dyn(array: &dyn Array, divisor: T::Native) -> Result +pub fn divide_scalar_dyn( + array: &dyn Array, + divisor: T::Native, +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1588,7 +1599,7 @@ where pub fn divide_scalar_checked_dyn( array: &dyn Array, divisor: T::Native, -) -> Result +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1611,7 +1622,10 @@ where /// Unlike `divide_scalar_dyn` or `divide_scalar_checked_dyn`, division by zero will get a /// null value instead returning an `Err`, this also doesn't check overflowing, overflowing /// will just wrap the result around. -pub fn divide_scalar_opt_dyn(array: &dyn Array, divisor: T::Native) -> Result +pub fn divide_scalar_opt_dyn( + array: &dyn Array, + divisor: T::Native, +) -> Result where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, @@ -1631,10 +1645,11 @@ where #[cfg(test)] mod tests { use super::*; - use crate::array::Int32Array; - use crate::compute::{binary_mut, try_binary_mut, try_unary_mut, unary_mut}; - use crate::datatypes::{Date64Type, Decimal128Type, Int32Type, Int8Type}; + use arrow_array::builder::{ + BooleanBufferBuilder, BufferBuilder, PrimitiveDictionaryBuilder, + }; use arrow_buffer::i256; + use arrow_data::ArrayDataBuilder; use chrono::NaiveDate; use half::f16; diff --git a/arrow/src/compute/kernels/arity.rs b/arrow-arith/src/arity.rs similarity index 91% rename from arrow/src/compute/kernels/arity.rs rename to arrow-arith/src/arity.rs index 02659a5a773..e89fe7b914a 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow-arith/src/arity.rs @@ -17,17 +17,14 @@ //! Defines kernels suitable to perform operations to primitive arrays. -use crate::array::{ - Array, ArrayAccessor, ArrayData, ArrayIter, ArrayRef, BufferBuilder, DictionaryArray, - PrimitiveArray, -}; -use crate::buffer::Buffer; -use crate::datatypes::{ArrowNumericType, ArrowPrimitiveType}; -use crate::downcast_dictionary_array; -use crate::error::{ArrowError, Result}; -use crate::util::bit_iterator::try_for_each_valid_idx; -use arrow_buffer::MutableBuffer; +use arrow_array::builder::BufferBuilder; +use arrow_array::iterator::ArrayIter; +use arrow_array::*; +use arrow_buffer::{Buffer, MutableBuffer}; +use arrow_data::bit_iterator::try_for_each_valid_idx; use arrow_data::bit_mask::combine_option_bitmap; +use arrow_data::ArrayData; +use arrow_schema::ArrowError; use std::sync::Arc; #[inline] @@ -71,11 +68,14 @@ where } /// See [`PrimitiveArray::try_unary`] -pub fn try_unary(array: &PrimitiveArray, op: F) -> Result> +pub fn try_unary( + array: &PrimitiveArray, + op: F, +) -> Result, ArrowError> where I: ArrowPrimitiveType, O: ArrowPrimitiveType, - F: Fn(I::Native) -> Result, + F: Fn(I::Native) -> Result, { array.try_unary(op) } @@ -84,19 +84,16 @@ where pub fn try_unary_mut( array: PrimitiveArray, op: F, -) -> std::result::Result< - std::result::Result, ArrowError>, - PrimitiveArray, -> +) -> Result, ArrowError>, PrimitiveArray> where I: ArrowPrimitiveType, - F: Fn(I::Native) -> Result, + F: Fn(I::Native) -> Result, { array.try_unary_mut(op) } /// A helper function that applies an infallible unary function to a dictionary array with primitive value type. -fn unary_dict(array: &DictionaryArray, op: F) -> Result +fn unary_dict(array: &DictionaryArray, op: F) -> Result where K: ArrowNumericType, T: ArrowPrimitiveType, @@ -108,11 +105,14 @@ where } /// A helper function that applies a fallible unary function to a dictionary array with primitive value type. -fn try_unary_dict(array: &DictionaryArray, op: F) -> Result +fn try_unary_dict( + array: &DictionaryArray, + op: F, +) -> Result where K: ArrowNumericType, T: ArrowPrimitiveType, - F: Fn(T::Native) -> Result, + F: Fn(T::Native) -> Result, { if std::mem::discriminant(&array.value_type()) != std::mem::discriminant(&T::DATA_TYPE) @@ -130,7 +130,7 @@ where } /// Applies an infallible unary function to an array with primitive values. -pub fn unary_dyn(array: &dyn Array, op: F) -> Result +pub fn unary_dyn(array: &dyn Array, op: F) -> Result where T: ArrowPrimitiveType, F: Fn(T::Native) -> T::Native, @@ -155,10 +155,10 @@ where } /// Applies a fallible unary function to an array with primitive values. -pub fn try_unary_dyn(array: &dyn Array, op: F) -> Result +pub fn try_unary_dyn(array: &dyn Array, op: F) -> Result where T: ArrowPrimitiveType, - F: Fn(T::Native) -> Result, + F: Fn(T::Native) -> Result, { downcast_dictionary_array! { array => if array.values().data_type() == &T::DATA_TYPE { @@ -202,7 +202,7 @@ pub fn binary( a: &PrimitiveArray, b: &PrimitiveArray, op: F, -) -> Result> +) -> Result, ArrowError> where A: ArrowPrimitiveType, B: ArrowPrimitiveType, @@ -258,10 +258,7 @@ pub fn binary_mut( a: PrimitiveArray, b: &PrimitiveArray, op: F, -) -> std::result::Result< - std::result::Result, ArrowError>, - PrimitiveArray, -> +) -> Result, ArrowError>, PrimitiveArray> where T: ArrowPrimitiveType, F: Fn(T::Native, T::Native) -> T::Native, @@ -320,10 +317,10 @@ pub fn try_binary( a: A, b: B, op: F, -) -> Result> +) -> Result, ArrowError> where O: ArrowPrimitiveType, - F: Fn(A::Item, B::Item) -> Result, + F: Fn(A::Item, B::Item) -> Result, { if a.len() != b.len() { return Err(ArrowError::ComputeError( @@ -382,13 +379,10 @@ pub fn try_binary_mut( a: PrimitiveArray, b: &PrimitiveArray, op: F, -) -> std::result::Result< - std::result::Result, ArrowError>, - PrimitiveArray, -> +) -> Result, ArrowError>, PrimitiveArray> where T: ArrowPrimitiveType, - F: Fn(T::Native, T::Native) -> Result, + F: Fn(T::Native, T::Native) -> Result, { if a.len() != b.len() { return Ok(Err(ArrowError::ComputeError( @@ -447,10 +441,10 @@ fn try_binary_no_nulls( a: A, b: B, op: F, -) -> Result> +) -> Result, ArrowError> where O: ArrowPrimitiveType, - F: Fn(A::Item, B::Item) -> Result, + F: Fn(A::Item, B::Item) -> Result, { let mut buffer = MutableBuffer::new(len * O::get_byte_width()); for idx in 0..len { @@ -468,13 +462,10 @@ fn try_binary_no_nulls_mut( a: PrimitiveArray, b: &PrimitiveArray, op: F, -) -> std::result::Result< - std::result::Result, ArrowError>, - PrimitiveArray, -> +) -> Result, ArrowError>, PrimitiveArray> where T: ArrowPrimitiveType, - F: Fn(T::Native, T::Native) -> Result, + F: Fn(T::Native, T::Native) -> Result, { let mut builder = a.into_builder()?; let slice = builder.values_slice_mut(); @@ -496,7 +487,7 @@ fn try_binary_opt_no_nulls( a: A, b: B, op: F, -) -> Result> +) -> Result, ArrowError> where O: ArrowPrimitiveType, F: Fn(A::Item, B::Item) -> Option, @@ -524,7 +515,7 @@ pub(crate) fn binary_opt Result> +) -> Result, ArrowError> where O: ArrowPrimitiveType, F: Fn(A::Item, B::Item) -> Option, @@ -563,9 +554,9 @@ where #[cfg(test)] mod tests { use super::*; - use crate::array::{as_primitive_array, Float64Array, PrimitiveDictionaryBuilder}; - use crate::datatypes::{Float64Type, Int32Type, Int8Type}; - use arrow_array::Int32Array; + use arrow_array::builder::*; + use arrow_array::cast::*; + use arrow_array::types::*; #[test] fn test_unary_f64_slice() { diff --git a/arrow/src/compute/kernels/bitwise.rs b/arrow-arith/src/bitwise.rs similarity index 81% rename from arrow/src/compute/kernels/bitwise.rs rename to arrow-arith/src/bitwise.rs index 0b877b32648..08cc246b351 100644 --- a/arrow/src/compute/kernels/bitwise.rs +++ b/arrow-arith/src/bitwise.rs @@ -15,10 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::array::PrimitiveArray; -use crate::compute::{binary, unary}; -use crate::datatypes::ArrowNumericType; -use crate::error::Result; +use crate::arity::{binary, unary}; +use arrow_array::*; +use arrow_schema::ArrowError; use std::ops::{BitAnd, BitOr, BitXor, Not}; // The helper function for bitwise operation with two array @@ -26,7 +25,7 @@ fn bitwise_op( left: &PrimitiveArray, right: &PrimitiveArray, op: F, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, F: Fn(T::Native, T::Native) -> T::Native, @@ -39,7 +38,7 @@ where pub fn bitwise_and( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: BitAnd, @@ -52,7 +51,7 @@ where pub fn bitwise_or( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: BitOr, @@ -65,7 +64,7 @@ where pub fn bitwise_xor( left: &PrimitiveArray, right: &PrimitiveArray, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: BitXor, @@ -75,7 +74,7 @@ where /// Perform `!array` operation on array. If array value is null /// then the result is also null. -pub fn bitwise_not(array: &PrimitiveArray) -> Result> +pub fn bitwise_not(array: &PrimitiveArray) -> Result, ArrowError> where T: ArrowNumericType, T::Native: Not, @@ -88,7 +87,7 @@ where pub fn bitwise_and_scalar( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: BitAnd, @@ -101,7 +100,7 @@ where pub fn bitwise_or_scalar( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: BitOr, @@ -114,7 +113,7 @@ where pub fn bitwise_xor_scalar( array: &PrimitiveArray, scalar: T::Native, -) -> Result> +) -> Result, ArrowError> where T: ArrowNumericType, T::Native: BitXor, @@ -124,15 +123,10 @@ where #[cfg(test)] mod tests { - use crate::array::{Int32Array, UInt64Array}; - use crate::compute::kernels::bitwise::{ - bitwise_and, bitwise_and_scalar, bitwise_not, bitwise_or, bitwise_or_scalar, - bitwise_xor, bitwise_xor_scalar, - }; - use crate::error::Result; + use super::*; #[test] - fn test_bitwise_and_array() -> Result<()> { + fn test_bitwise_and_array() -> Result<(), ArrowError> { // unsigned value let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4)]); let right = UInt64Array::from(vec![Some(5), Some(10), Some(8), Some(12)]); @@ -150,43 +144,41 @@ mod tests { } #[test] - fn test_bitwise_and_array_scalar() -> Result<()> { + fn test_bitwise_and_array_scalar() { // unsigned value let left = UInt64Array::from(vec![Some(15), Some(2), None, Some(4)]); let scalar = 7; let expected = UInt64Array::from(vec![Some(7), Some(2), None, Some(4)]); - let result = bitwise_and_scalar(&left, scalar)?; + let result = bitwise_and_scalar(&left, scalar).unwrap(); assert_eq!(expected, result); // signed value let left = Int32Array::from(vec![Some(1), Some(2), None, Some(4)]); let scalar = -20; let expected = Int32Array::from(vec![Some(0), Some(0), None, Some(4)]); - let result = bitwise_and_scalar(&left, scalar)?; + let result = bitwise_and_scalar(&left, scalar).unwrap(); assert_eq!(expected, result); - Ok(()) } #[test] - fn test_bitwise_or_array() -> Result<()> { + fn test_bitwise_or_array() { // unsigned value let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4)]); let right = UInt64Array::from(vec![Some(7), Some(5), Some(8), Some(13)]); let expected = UInt64Array::from(vec![Some(7), Some(7), None, Some(13)]); - let result = bitwise_or(&left, &right)?; + let result = bitwise_or(&left, &right).unwrap(); assert_eq!(expected, result); // signed value let left = Int32Array::from(vec![Some(1), Some(2), None, Some(4)]); let right = Int32Array::from(vec![Some(-7), Some(-5), Some(8), Some(13)]); let expected = Int32Array::from(vec![Some(-7), Some(-5), None, Some(13)]); - let result = bitwise_or(&left, &right)?; + let result = bitwise_or(&left, &right).unwrap(); assert_eq!(expected, result); - Ok(()) } #[test] - fn test_bitwise_not_array() -> Result<()> { + fn test_bitwise_not_array() { // unsigned value let array = UInt64Array::from(vec![Some(1), Some(2), None, Some(4)]); let expected = UInt64Array::from(vec![ @@ -195,67 +187,63 @@ mod tests { None, Some(18446744073709551611), ]); - let result = bitwise_not(&array)?; + let result = bitwise_not(&array).unwrap(); assert_eq!(expected, result); // signed value let array = Int32Array::from(vec![Some(1), Some(2), None, Some(4)]); let expected = Int32Array::from(vec![Some(-2), Some(-3), None, Some(-5)]); - let result = bitwise_not(&array)?; + let result = bitwise_not(&array).unwrap(); assert_eq!(expected, result); - Ok(()) } #[test] - fn test_bitwise_or_array_scalar() -> Result<()> { + fn test_bitwise_or_array_scalar() { // unsigned value let left = UInt64Array::from(vec![Some(15), Some(2), None, Some(4)]); let scalar = 7; let expected = UInt64Array::from(vec![Some(15), Some(7), None, Some(7)]); - let result = bitwise_or_scalar(&left, scalar)?; + let result = bitwise_or_scalar(&left, scalar).unwrap(); assert_eq!(expected, result); // signed value let left = Int32Array::from(vec![Some(1), Some(2), None, Some(4)]); let scalar = 20; let expected = Int32Array::from(vec![Some(21), Some(22), None, Some(20)]); - let result = bitwise_or_scalar(&left, scalar)?; + let result = bitwise_or_scalar(&left, scalar).unwrap(); assert_eq!(expected, result); - Ok(()) } #[test] - fn test_bitwise_xor_array() -> Result<()> { + fn test_bitwise_xor_array() { // unsigned value let left = UInt64Array::from(vec![Some(1), Some(2), None, Some(4)]); let right = UInt64Array::from(vec![Some(7), Some(5), Some(8), Some(13)]); let expected = UInt64Array::from(vec![Some(6), Some(7), None, Some(9)]); - let result = bitwise_xor(&left, &right)?; + let result = bitwise_xor(&left, &right).unwrap(); assert_eq!(expected, result); // signed value let left = Int32Array::from(vec![Some(1), Some(2), None, Some(4)]); let right = Int32Array::from(vec![Some(-7), Some(5), Some(8), Some(-13)]); let expected = Int32Array::from(vec![Some(-8), Some(7), None, Some(-9)]); - let result = bitwise_xor(&left, &right)?; + let result = bitwise_xor(&left, &right).unwrap(); assert_eq!(expected, result); - Ok(()) } #[test] - fn test_bitwise_xor_array_scalar() -> Result<()> { + fn test_bitwise_xor_array_scalar() { // unsigned value let left = UInt64Array::from(vec![Some(15), Some(2), None, Some(4)]); let scalar = 7; let expected = UInt64Array::from(vec![Some(8), Some(5), None, Some(3)]); - let result = bitwise_xor_scalar(&left, scalar)?; + let result = bitwise_xor_scalar(&left, scalar).unwrap(); assert_eq!(expected, result); // signed value let left = Int32Array::from(vec![Some(1), Some(2), None, Some(4)]); let scalar = -20; let expected = Int32Array::from(vec![Some(-19), Some(-18), None, Some(-24)]); - let result = bitwise_xor_scalar(&left, scalar)?; + let result = bitwise_xor_scalar(&left, scalar).unwrap(); assert_eq!(expected, result); - Ok(()) } } diff --git a/arrow/src/compute/kernels/boolean.rs b/arrow-arith/src/boolean.rs similarity index 92% rename from arrow/src/compute/kernels/boolean.rs rename to arrow-arith/src/boolean.rs index aa42f3d20c0..4c1a02ad749 100644 --- a/arrow/src/compute/kernels/boolean.rs +++ b/arrow-arith/src/boolean.rs @@ -22,17 +22,16 @@ //! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation //! [here](https://doc.rust-lang.org/stable/core/arch/) for more information. -pub use arrow_select::nullif; - -use crate::array::{Array, ArrayData, BooleanArray}; -use crate::buffer::{ +use arrow_array::*; +use arrow_buffer::bit_util::ceil; +use arrow_buffer::buffer::{ bitwise_bin_op_helper, bitwise_quaternary_op_helper, buffer_bin_and, buffer_bin_or, - buffer_unary_not, Buffer, MutableBuffer, + buffer_unary_not, }; -use crate::datatypes::DataType; -use crate::error::{ArrowError, Result}; -use crate::util::bit_util::ceil; +use arrow_buffer::{Buffer, MutableBuffer}; use arrow_data::bit_mask::combine_option_bitmap; +use arrow_data::ArrayData; +use arrow_schema::{ArrowError, DataType}; /// Updates null buffer based on data buffer and null buffer of the operand at other side /// in boolean AND kernel with Kleene logic. In short, because for AND kernel, null AND false @@ -182,7 +181,7 @@ pub(crate) fn binary_boolean_kernel( right: &BooleanArray, op: F, null_op: U, -) -> Result +) -> Result where F: Fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer, U: Fn(&ArrayData, usize, &ArrayData, usize, usize) -> Option, @@ -227,18 +226,17 @@ where /// This function errors when the arrays have different lengths. /// # Example /// ```rust -/// use arrow::array::BooleanArray; -/// use arrow::error::Result; -/// use arrow::compute::kernels::boolean::and; -/// # fn main() -> Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::and; /// let a = BooleanArray::from(vec![Some(false), Some(true), None]); /// let b = BooleanArray::from(vec![Some(true), Some(true), Some(false)]); -/// let and_ab = and(&a, &b)?; +/// let and_ab = and(&a, &b).unwrap(); /// assert_eq!(and_ab, BooleanArray::from(vec![Some(false), Some(true), None])); -/// # Ok(()) -/// # } /// ``` -pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result { +pub fn and( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { binary_boolean_kernel(left, right, buffer_bin_and, build_null_buffer_for_and_or) } @@ -261,22 +259,21 @@ pub fn and(left: &BooleanArray, right: &BooleanArray) -> Result { /// # Example /// /// ```rust -/// use arrow::array::BooleanArray; -/// use arrow::error::Result; -/// use arrow::compute::kernels::boolean::and_kleene; -/// # fn main() -> Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::and_kleene; /// let a = BooleanArray::from(vec![Some(true), Some(false), None]); /// let b = BooleanArray::from(vec![None, None, None]); -/// let and_ab = and_kleene(&a, &b)?; +/// let and_ab = and_kleene(&a, &b).unwrap(); /// assert_eq!(and_ab, BooleanArray::from(vec![None, Some(false), None])); -/// # Ok(()) -/// # } /// ``` /// /// # Fails /// /// If the operands have different lengths -pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result { +pub fn and_kleene( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { binary_boolean_kernel( left, right, @@ -291,18 +288,14 @@ pub fn and_kleene(left: &BooleanArray, right: &BooleanArray) -> Result Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::or; /// let a = BooleanArray::from(vec![Some(false), Some(true), None]); /// let b = BooleanArray::from(vec![Some(true), Some(true), Some(false)]); -/// let or_ab = or(&a, &b)?; +/// let or_ab = or(&a, &b).unwrap(); /// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), Some(true), None])); -/// # Ok(()) -/// # } /// ``` -pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { +pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { binary_boolean_kernel(left, right, buffer_bin_or, build_null_buffer_for_and_or) } @@ -325,22 +318,21 @@ pub fn or(left: &BooleanArray, right: &BooleanArray) -> Result { /// # Example /// /// ```rust -/// use arrow::array::BooleanArray; -/// use arrow::error::Result; -/// use arrow::compute::kernels::boolean::or_kleene; -/// # fn main() -> Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::or_kleene; /// let a = BooleanArray::from(vec![Some(true), Some(false), None]); /// let b = BooleanArray::from(vec![None, None, None]); -/// let or_ab = or_kleene(&a, &b)?; +/// let or_ab = or_kleene(&a, &b).unwrap(); /// assert_eq!(or_ab, BooleanArray::from(vec![Some(true), None, None])); -/// # Ok(()) -/// # } /// ``` /// /// # Fails /// /// If the operands have different lengths -pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result { +pub fn or_kleene( + left: &BooleanArray, + right: &BooleanArray, +) -> Result { binary_boolean_kernel(left, right, buffer_bin_or, build_null_buffer_for_or_kleene) } @@ -350,17 +342,13 @@ pub fn or_kleene(left: &BooleanArray, right: &BooleanArray) -> Result Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::not; /// let a = BooleanArray::from(vec![Some(false), Some(true), None]); -/// let not_a = not(&a)?; +/// let not_a = not(&a).unwrap(); /// assert_eq!(not_a, BooleanArray::from(vec![Some(true), Some(false), None])); -/// # Ok(()) -/// # } /// ``` -pub fn not(left: &BooleanArray) -> Result { +pub fn not(left: &BooleanArray) -> Result { let left_offset = left.offset(); let len = left.len(); @@ -391,17 +379,13 @@ pub fn not(left: &BooleanArray) -> Result { /// This function never errors. /// # Example /// ```rust -/// # use arrow::error::Result; -/// use arrow::array::BooleanArray; -/// use arrow::compute::kernels::boolean::is_null; -/// # fn main() -> Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::is_null; /// let a = BooleanArray::from(vec![Some(false), Some(true), None]); -/// let a_is_null = is_null(&a)?; +/// let a_is_null = is_null(&a).unwrap(); /// assert_eq!(a_is_null, BooleanArray::from(vec![false, false, true])); -/// # Ok(()) -/// # } /// ``` -pub fn is_null(input: &dyn Array) -> Result { +pub fn is_null(input: &dyn Array) -> Result { let len = input.len(); let output = match input.data_ref().null_buffer() { @@ -432,17 +416,13 @@ pub fn is_null(input: &dyn Array) -> Result { /// This function never errors. /// # Example /// ```rust -/// # use arrow::error::Result; -/// use arrow::array::BooleanArray; -/// use arrow::compute::kernels::boolean::is_not_null; -/// # fn main() -> Result<()> { +/// # use arrow_array::BooleanArray; +/// # use arrow_arith::boolean::is_not_null; /// let a = BooleanArray::from(vec![Some(false), Some(true), None]); -/// let a_is_not_null = is_not_null(&a)?; +/// let a_is_not_null = is_not_null(&a).unwrap(); /// assert_eq!(a_is_not_null, BooleanArray::from(vec![true, true, false])); -/// # Ok(()) -/// # } /// ``` -pub fn is_not_null(input: &dyn Array) -> Result { +pub fn is_not_null(input: &dyn Array) -> Result { let len = input.len(); let output = match input.data_ref().null_buffer() { @@ -473,7 +453,6 @@ pub fn is_not_null(input: &dyn Array) -> Result { #[cfg(test)] mod tests { use super::*; - use crate::array::{ArrayRef, Int32Array}; use std::sync::Arc; #[test] diff --git a/arrow-arith/src/lib.rs b/arrow-arith/src/lib.rs new file mode 100644 index 00000000000..60d31c972b6 --- /dev/null +++ b/arrow-arith/src/lib.rs @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Arrow arithmetic and aggregation kernels + +pub mod aggregate; +pub mod arithmetic; +pub mod arity; +pub mod bitwise; +pub mod boolean; +pub mod temporal; diff --git a/arrow/src/compute/kernels/temporal.rs b/arrow-arith/src/temporal.rs similarity index 95% rename from arrow/src/compute/kernels/temporal.rs rename to arrow-arith/src/temporal.rs index 15d56f70308..5dcda8758dc 100644 --- a/arrow/src/compute/kernels/temporal.rs +++ b/arrow-arith/src/temporal.rs @@ -17,18 +17,19 @@ //! Defines temporal kernels for time and date related functions. -use arrow_array::{downcast_dictionary_array, downcast_temporal_array}; use chrono::{DateTime, Datelike, NaiveDateTime, NaiveTime, Offset, Timelike}; use std::sync::Arc; -use crate::array::*; -use crate::datatypes::*; -use crate::error::{ArrowError, Result}; +use arrow_array::builder::*; +use arrow_array::iterator::ArrayIter; use arrow_array::temporal_conversions::{ as_datetime, as_datetime_with_timezone, as_time, }; - use arrow_array::timezone::Tz; +use arrow_array::types::*; +use arrow_array::*; +use arrow_buffer::ArrowNativeType; +use arrow_schema::{ArrowError, DataType}; /// This function takes an `ArrayIter` of input array and an extractor `op` which takes /// an input `NaiveTime` and returns time component (e.g. hour) as `i32` value. @@ -98,7 +99,7 @@ fn extract_component_from_datetime_array< mut builder: PrimitiveBuilder, tz: &str, op: F, -) -> Result +) -> Result where F: Fn(DateTime) -> i32, i64: From, @@ -178,7 +179,7 @@ pub fn using_chrono_tz_and_utc_naive_date_time( /// Extracts the hours of a given array as an array of integers within /// the range of [0, 23]. If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn hour_dyn(array: &dyn Array) -> Result { +pub fn hour_dyn(array: &dyn Array) -> Result { match array.data_type().clone() { DataType::Dictionary(_, _) => { downcast_dictionary_array!( @@ -203,7 +204,7 @@ pub fn hour_dyn(array: &dyn Array) -> Result { /// Extracts the hours of a given temporal primitive array as an array of integers within /// the range of [0, 23]. -pub fn hour(array: &PrimitiveArray) -> Result +pub fn hour(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -240,12 +241,12 @@ where /// Extracts the years of a given temporal array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn year_dyn(array: &dyn Array) -> Result { +pub fn year_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "year", |t| t.year()) } /// Extracts the years of a given temporal primitive array as an array of integers -pub fn year(array: &PrimitiveArray) -> Result +pub fn year(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -256,13 +257,13 @@ where /// Extracts the quarter of a given temporal array as an array of integersa within /// the range of [1, 4]. If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn quarter_dyn(array: &dyn Array) -> Result { +pub fn quarter_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "quarter", |t| t.quarter() as i32) } /// Extracts the quarter of a given temporal primitive array as an array of integers within /// the range of [1, 4]. -pub fn quarter(array: &PrimitiveArray) -> Result +pub fn quarter(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -273,13 +274,13 @@ where /// Extracts the month of a given temporal array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn month_dyn(array: &dyn Array) -> Result { +pub fn month_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "month", |t| t.month() as i32) } /// Extracts the month of a given temporal primitive array as an array of integers within /// the range of [1, 12]. -pub fn month(array: &PrimitiveArray) -> Result +pub fn month(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -296,7 +297,7 @@ where /// /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result { +pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "num_days_from_monday", |t| t.num_days_from_monday()) } @@ -306,7 +307,9 @@ pub fn num_days_from_monday_dyn(array: &dyn Array) -> Result { /// Monday is encoded as `0`, Tuesday as `1`, etc. /// /// See also [`num_days_from_sunday`] which starts at Sunday. -pub fn num_days_from_monday(array: &PrimitiveArray) -> Result +pub fn num_days_from_monday( + array: &PrimitiveArray, +) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -323,7 +326,7 @@ where /// /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result { +pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "num_days_from_sunday", |t| t.num_days_from_sunday()) } @@ -333,7 +336,9 @@ pub fn num_days_from_sunday_dyn(array: &dyn Array) -> Result { /// Sunday is encoded as `0`, Monday as `1`, etc. /// /// See also [`num_days_from_monday`] which starts at Monday. -pub fn num_days_from_sunday(array: &PrimitiveArray) -> Result +pub fn num_days_from_sunday( + array: &PrimitiveArray, +) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -344,12 +349,12 @@ where /// Extracts the day of a given temporal array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn day_dyn(array: &dyn Array) -> Result { +pub fn day_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "day", |t| t.day() as i32) } /// Extracts the day of a given temporal primitive array as an array of integers -pub fn day(array: &PrimitiveArray) -> Result +pub fn day(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -361,13 +366,13 @@ where /// The day of year that ranges from 1 to 366. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn doy_dyn(array: &dyn Array) -> Result { +pub fn doy_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "doy", |t| t.ordinal() as i32) } /// Extracts the day of year of a given temporal primitive array as an array of integers /// The day of year that ranges from 1 to 366 -pub fn doy(array: &PrimitiveArray) -> Result +pub fn doy(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, T::Native: ArrowNativeType, @@ -377,7 +382,7 @@ where } /// Extracts the minutes of a given temporal primitive array as an array of integers -pub fn minute(array: &PrimitiveArray) -> Result +pub fn minute(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -388,12 +393,12 @@ where /// Extracts the week of a given temporal array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn week_dyn(array: &dyn Array) -> Result { +pub fn week_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "week", |t| t.iso_week().week() as i32) } /// Extracts the week of a given temporal primitive array as an array of integers -pub fn week(array: &PrimitiveArray) -> Result +pub fn week(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -402,7 +407,7 @@ where } /// Extracts the seconds of a given temporal primitive array as an array of integers -pub fn second(array: &PrimitiveArray) -> Result +pub fn second(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -411,7 +416,7 @@ where } /// Extracts the nanoseconds of a given temporal primitive array as an array of integers -pub fn nanosecond(array: &PrimitiveArray) -> Result +pub fn nanosecond(array: &PrimitiveArray) -> Result where T: ArrowTemporalType + ArrowNumericType, i64: From, @@ -422,12 +427,16 @@ where /// Extracts the nanoseconds of a given temporal primitive array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn nanosecond_dyn(array: &dyn Array) -> Result { +pub fn nanosecond_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "nanosecond", |t| t.nanosecond() as i32) } /// Extracts the time fraction of a given temporal array as an array of integers -fn time_fraction_dyn(array: &dyn Array, name: &str, op: F) -> Result +fn time_fraction_dyn( + array: &dyn Array, + name: &str, + op: F, +) -> Result where F: Fn(NaiveDateTime) -> i32, { @@ -458,7 +467,7 @@ fn time_fraction_internal( array: &PrimitiveArray, name: &str, op: F, -) -> Result +) -> Result where F: Fn(NaiveDateTime) -> i32, T: ArrowTemporalType + ArrowNumericType, @@ -486,14 +495,14 @@ where /// Extracts the minutes of a given temporal array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn minute_dyn(array: &dyn Array) -> Result { +pub fn minute_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "minute", |t| t.minute() as i32) } /// Extracts the seconds of a given temporal array as an array of integers. /// If the given array isn't temporal primitive or dictionary array, /// an `Err` will be returned. -pub fn second_dyn(array: &dyn Array) -> Result { +pub fn second_dyn(array: &dyn Array) -> Result { time_fraction_dyn(array, "second", |t| t.second() as i32) } diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 772c1be7745..0382fff9007 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -45,6 +45,7 @@ ahash = { version = "0.8", default-features = false, features = ["compile-time-r ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } [dependencies] +arrow-arith = { version = "29.0.0", path = "../arrow-arith" } arrow-array = { version = "29.0.0", path = "../arrow-array" } arrow-buffer = { version = "29.0.0", path = "../arrow-buffer" } arrow-cast = { version = "29.0.0", path = "../arrow-cast" } @@ -66,7 +67,6 @@ regex = { version = "1.7.0", default-features = false, features = ["std", "unico chrono = { version = "0.4.23", default-features = false, features = ["clock"] } comfy-table = { version = "6.0", optional = true, default-features = false } pyo3 = { version = "0.17", default-features = false, optional = true } -multiversion = { version = "0.6.1", default-features = false } bitflags = { version = "1.2.1", default-features = false, optional = true } [package.metadata.docs.rs] diff --git a/arrow/src/compute/kernels/mod.rs b/arrow/src/compute/kernels/mod.rs index 837fb73d56d..19f3c27a04f 100644 --- a/arrow/src/compute/kernels/mod.rs +++ b/arrow/src/compute/kernels/mod.rs @@ -17,14 +17,9 @@ //! Computation kernels on Arrow Arrays -pub mod aggregate; -pub mod arithmetic; -pub mod arity; -pub mod bitwise; -pub mod boolean; pub mod limit; -pub mod temporal; +pub use arrow_arith::{aggregate, arithmetic, arity, bitwise, boolean, temporal}; pub use arrow_cast::cast; pub use arrow_cast::parse as cast_utils; pub use arrow_ord::{partition, sort};