From 22a8354d8493d29d43270bce72f69fae10587127 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 13 Sep 2022 12:42:28 +0800 Subject: [PATCH 1/6] update try_binary delete math_checked_op update the return type of checked ops Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/arithmetic.rs | 69 ++++--------------------- arrow/src/compute/kernels/arity.rs | 14 +++-- arrow/src/datatypes/native.rs | 59 +++++++++++++++------ 3 files changed, 62 insertions(+), 80 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 6638ae1e87d..6f846d0ab72 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -76,32 +76,6 @@ where Ok(binary(left, right, op)) } -/// This is similar to `math_op` as it performs given operation between two input primitive arrays. -/// But the given operation can return `None` if overflow is detected. For the case, this function -/// returns an `Err`. -fn math_checked_op( - left: &PrimitiveArray, - right: &PrimitiveArray, - op: F, -) -> Result> -where - LT: ArrowNumericType, - RT: ArrowNumericType, - F: Fn(LT::Native, RT::Native) -> Option, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform math operation on arrays of different length".to_string(), - )); - } - - try_binary(left, right, |a, b| { - op(a, b).ok_or_else(|| { - ArrowError::ComputeError(format!("Overflow happened on: {:?}, {:?}", a, b)) - }) - }) -} - /// Helper function for operations where a valid `0` on the right array should /// result in an [ArrowError::DivideByZero], namely the division and modulo operations /// @@ -119,7 +93,7 @@ where LT: ArrowNumericType, RT: ArrowNumericType, RT::Native: One + Zero, - F: Fn(LT::Native, RT::Native) -> Option, + F: Fn(LT::Native, RT::Native) -> Result, { if left.len() != right.len() { return Err(ArrowError::ComputeError( @@ -131,12 +105,7 @@ where if r.is_zero() { Err(ArrowError::DivideByZero) } else { - op(l, r).ok_or_else(|| { - ArrowError::ComputeError(format!( - "Overflow happened on: {:?}, {:?}", - l, r - )) - }) + op(l, r) } }) } @@ -723,7 +692,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_checked_op(left, right, |a, b| a.add_checked(b)) + try_binary(left, right, |a, b| a.add_checked(b)) } /// Perform `left + right` operation on two arrays. If either left or right value is null @@ -824,11 +793,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - try_unary(array, |value| { - value.add_checked(scalar).ok_or_else(|| { - ArrowError::CastError(format!("Overflow: adding {:?} to {:?}", scalar, value)) - }) - }) + try_unary(array, |value| value.add_checked(scalar)) } /// Add every value in an array by a scalar. If any value in the array is null then the @@ -871,7 +836,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_checked_op(left, right, |a, b| a.sub_checked(b)) + try_binary(left, right, |a, b| a.sub_checked(b)) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -924,14 +889,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp + Zero, { - try_unary(array, |value| { - value.sub_checked(scalar).ok_or_else(|| { - ArrowError::CastError(format!( - "Overflow: subtracting {:?} from {:?}", - scalar, value - )) - }) - }) + try_unary(array, |value| value.sub_checked(scalar)) } /// Subtract every value in an array by a scalar. If any value in the array is null then the @@ -999,7 +957,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - math_checked_op(left, right, |a, b| a.mul_checked(b)) + try_binary(left, right, |a, b| a.mul_checked(b)) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -1052,14 +1010,7 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp + Zero + One, { - try_unary(array, |value| { - value.mul_checked(scalar).ok_or_else(|| { - ArrowError::CastError(format!( - "Overflow: multiplying {:?} by {:?}", - value, scalar, - )) - }) - }) + try_unary(array, |value| value.mul_checked(scalar)) } /// Multiply every value in an array by a scalar. If any value in the array is null then the @@ -1095,7 +1046,7 @@ where a % b }); #[cfg(not(feature = "simd"))] - return math_checked_divide_op(left, right, |a, b| Some(a % b)); + return math_checked_divide_op(left, right, |a, b| Ok(a % b)); } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1129,7 +1080,7 @@ pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { _ => { downcast_primitive_array!( (left, right) => { - math_checked_divide_op(left, right, |a, b| Some(a / b)).map(|a| Arc::new(a) as ArrayRef) + math_checked_divide_op(left, right, |a, b| Ok(a / b)).map(|a| Arc::new(a) as ArrayRef) } _ => Err(ArrowError::CastError(format!( "Unsupported data type {}, {}", diff --git a/arrow/src/compute/kernels/arity.rs b/arrow/src/compute/kernels/arity.rs index ee3ff5e23a8..e3871fc2283 100644 --- a/arrow/src/compute/kernels/arity.rs +++ b/arrow/src/compute/kernels/arity.rs @@ -215,9 +215,10 @@ where /// /// Like [`try_unary`] the function is only evaluated for non-null indices /// -/// # Panic +/// # Error /// -/// Panics if the arrays have different lengths +/// Return an error if the arrays have different lengths or +/// the operation is under erroneous pub fn try_binary( a: &PrimitiveArray, b: &PrimitiveArray, @@ -229,13 +230,16 @@ where O: ArrowPrimitiveType, F: Fn(A::Native, B::Native) -> Result, { - assert_eq!(a.len(), b.len()); - let len = a.len(); - + if a.len() != b.len() { + return Err(ArrowError::ComputeError( + "Cannot perform a binary operation on arrays of different length".to_string(), + )); + } if a.is_empty() { return Ok(PrimitiveArray::from(ArrayData::new_empty(&O::DATA_TYPE))); } + let len = a.len(); let null_buffer = combine_option_bitmap(&[a.data(), b.data()], len).unwrap(); let null_count = null_buffer .as_ref() diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 444f2b27dce..1adc3dca43c 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -16,7 +16,9 @@ // under the License. use super::DataType; +use crate::error::{ArrowError, Result}; use half::f16; +use num::Zero; mod private { pub trait Sealed {} @@ -116,6 +118,7 @@ pub trait ArrowPrimitiveType: 'static { pub(crate) mod native_op { use super::ArrowNativeType; + use crate::error::Result; use std::ops::{Add, Div, Mul, Sub}; /// Trait for ArrowNativeType to provide overflow-checking and non-overflow-checking @@ -135,32 +138,32 @@ pub(crate) mod native_op { + Mul + Div { - fn add_checked(self, rhs: Self) -> Option { - Some(self + rhs) + fn add_checked(self, rhs: Self) -> Result { + Ok(self + rhs) } fn add_wrapping(self, rhs: Self) -> Self { self + rhs } - fn sub_checked(self, rhs: Self) -> Option { - Some(self - rhs) + fn sub_checked(self, rhs: Self) -> Result { + Ok(self - rhs) } fn sub_wrapping(self, rhs: Self) -> Self { self - rhs } - fn mul_checked(self, rhs: Self) -> Option { - Some(self * rhs) + fn mul_checked(self, rhs: Self) -> Result { + Ok(self * rhs) } fn mul_wrapping(self, rhs: Self) -> Self { self * rhs } - fn div_checked(self, rhs: Self) -> Option { - Some(self / rhs) + fn div_checked(self, rhs: Self) -> Result { + Ok(self / rhs) } fn div_wrapping(self, rhs: Self) -> Self { @@ -172,32 +175,56 @@ pub(crate) mod native_op { macro_rules! native_type_op { ($t:tt) => { impl native_op::ArrowNativeTypeOp for $t { - fn add_checked(self, rhs: Self) -> Option { - self.checked_add(rhs) + fn add_checked(self, rhs: Self) -> Result { + self.checked_add(rhs).ok_or_else(|| { + ArrowError::ComputeError(format!( + "Overflow happened on: {:?} + {:?}", + self, rhs + )) + }) } fn add_wrapping(self, rhs: Self) -> Self { self.wrapping_add(rhs) } - fn sub_checked(self, rhs: Self) -> Option { - self.checked_sub(rhs) + fn sub_checked(self, rhs: Self) -> Result { + self.checked_sub(rhs).ok_or_else(|| { + ArrowError::ComputeError(format!( + "Overflow happened on: {:?} - {:?}", + self, rhs + )) + }) } fn sub_wrapping(self, rhs: Self) -> Self { self.wrapping_sub(rhs) } - fn mul_checked(self, rhs: Self) -> Option { - self.checked_mul(rhs) + fn mul_checked(self, rhs: Self) -> Result { + self.checked_mul(rhs).ok_or_else(|| { + ArrowError::ComputeError(format!( + "Overflow happened on: {:?} * {:?}", + self, rhs + )) + }) } fn mul_wrapping(self, rhs: Self) -> Self { self.wrapping_mul(rhs) } - fn div_checked(self, rhs: Self) -> Option { - self.checked_div(rhs) + fn div_checked(self, rhs: Self) -> Result { + if rhs.is_zero() { + Err(ArrowError::DivideByZero) + } else { + self.checked_div(rhs).ok_or_else(|| { + ArrowError::ComputeError(format!( + "Overflow happened on: {:?} / {:?}", + self, rhs + )) + }) + } } fn div_wrapping(self, rhs: Self) -> Self { From c7b91e75697753b633806c1a5826ee3f0170fec1 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 13 Sep 2022 17:32:10 +0800 Subject: [PATCH 2/6] float div not panic on zero Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/arithmetic.rs | 117 ++++++++++++++++-------- arrow/src/datatypes/native.rs | 1 + 2 files changed, 82 insertions(+), 36 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 6f846d0ab72..52ec7349b71 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -95,19 +95,7 @@ where RT::Native: One + Zero, F: Fn(LT::Native, RT::Native) -> Result, { - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform math operation on arrays of different length".to_string(), - )); - } - - try_binary(left, right, |l, r| { - if r.is_zero() { - Err(ArrowError::DivideByZero) - } else { - op(l, r) - } - }) + try_binary(left, right, op) } /// Helper function for operations where a valid `0` on the right array should @@ -128,16 +116,12 @@ fn math_checked_divide_op_on_iters( where T: ArrowNumericType, T::Native: One + Zero, - F: Fn(T::Native, T::Native) -> T::Native, + F: Fn(T::Native, T::Native) -> Result, { let buffer = if null_bit_buffer.is_some() { let values = left.zip(right).map(|(left, right)| { if let (Some(l), Some(r)) = (left, right) { - if r.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(op(l, r)) - } + op(l, r) } else { Ok(T::default_value()) } @@ -146,15 +130,10 @@ where unsafe { Buffer::try_from_trusted_len_iter(values) } } else { // no value is null - let values = left.map(|l| l.unwrap()).zip(right.map(|r| r.unwrap())).map( - |(left, right)| { - if right.is_zero() { - Err(ArrowError::DivideByZero) - } else { - Ok(op(left, right)) - } - }, - ); + let values = left + .map(|l| l.unwrap()) + .zip(right.map(|r| r.unwrap())) + .map(|(left, right)| op(left, right)); // Safety: Iterator comes from a PrimitiveArray which reports its size correctly unsafe { Buffer::try_from_trusted_len_iter(values) } }?; @@ -621,7 +600,7 @@ where K: ArrowNumericType, T: ArrowNumericType, T::Native: One + Zero, - F: Fn(T::Native, T::Native) -> T::Native, + F: Fn(T::Native, T::Native) -> Result, { if left.len() != right.len() { return Err(ArrowError::ComputeError(format!( @@ -1046,7 +1025,13 @@ where a % b }); #[cfg(not(feature = "simd"))] - return math_checked_divide_op(left, right, |a, b| Ok(a % b)); + return try_binary(left, right, |a, b| { + if b.is_zero() { + Err(ArrowError::DivideByZero) + } else { + Ok(a % b) + } + }); } /// Perform `left / right` operation on two arrays. If either left or right value is null @@ -1075,12 +1060,17 @@ where pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result { match left.data_type() { DataType::Dictionary(_, _) => { - typed_dict_math_op!(left, right, |a, b| a / b, math_divide_checked_op_dict) + typed_dict_math_op!( + left, + right, + |a, b| a.div_checked(b), + math_divide_checked_op_dict + ) } _ => { downcast_primitive_array!( (left, right) => { - math_checked_divide_op(left, right, |a, b| Ok(a / b)).map(|a| Arc::new(a) as ArrayRef) + math_checked_divide_op(left, right, |a, b| a.div_checked(b)).map(|a| Arc::new(a) as ArrayRef) } _ => Err(ArrowError::CastError(format!( "Unsupported data type {}, {}", @@ -1951,31 +1941,61 @@ mod tests { #[test] #[should_panic(expected = "DivideByZero")] - fn test_primitive_array_divide_by_zero_with_checked() { + fn test_int_array_divide_by_zero_with_checked() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); divide_checked(&a, &b).unwrap(); } + #[test] + fn test_float_array_divide_by_zero_with_checked() { + let a = Float32Array::from(vec![1.0, 0.0, -1.0]); + let b = Float32Array::from(vec![0.0, 0.0, 0.0]); + let expected = + Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); + assert_eq!(expected, divide_checked(&a, &b).unwrap()) + } + #[test] #[should_panic(expected = "attempt to divide by zero")] - fn test_primitive_array_divide_by_zero() { + fn test_int_array_divide_by_zero() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); divide(&a, &b).unwrap(); } + #[test] + fn test_float_array_divide_by_zero() { + let a = Float32Array::from(vec![1.0, 0.0, -1.0]); + let b = Float32Array::from(vec![0.0, 0.0, 0.0]); + let expected = + Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); + assert_eq!(expected, divide(&a, &b).unwrap()) + } + #[test] #[should_panic(expected = "DivideByZero")] - fn test_primitive_array_divide_dyn_by_zero() { + fn test_int_array_divide_dyn_by_zero() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); divide_dyn(&a, &b).unwrap(); } + #[test] + fn test_float_array_divide_dyn_by_zero() { + let a = Float32Array::from(vec![1.0, 0.0, -1.0]); + let b = Float32Array::from(vec![0.0, 0.0, 0.0]); + let expected = + Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); + assert_eq!( + &expected, + as_primitive_array::(÷_dyn(&a, &b).unwrap()) + ) + } + #[test] #[should_panic(expected = "DivideByZero")] - fn test_primitive_array_divide_dyn_by_zero_dict() { + fn test_int_array_divide_dyn_by_zero_dict() { let mut builder = PrimitiveDictionaryBuilder::::with_capacity(1, 1); builder.append(15).unwrap(); @@ -1989,6 +2009,31 @@ mod tests { divide_dyn(&a, &b).unwrap(); } + #[test] + fn test_float_array_divide_dyn_by_zero_dict() { + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(1, 1); + builder.append(15.0).unwrap(); + builder.append(0.0).unwrap(); + builder.append(-1.5).unwrap(); + let a = builder.finish(); + + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(1, 1); + builder.append(0.0).unwrap(); + builder.append(0.0).unwrap(); + builder.append(0.0).unwrap(); + let b = builder.finish(); + + let expected = + Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); + + assert_eq!( + &expected, + as_primitive_array::(÷_dyn(&a, &b).unwrap()) + ) + } + #[test] #[should_panic(expected = "DivideByZero")] fn test_primitive_array_modulus_by_zero() { diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 1adc3dca43c..e298f0685fe 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -238,6 +238,7 @@ native_type_op!(i8); native_type_op!(i16); native_type_op!(i32); native_type_op!(i64); +native_type_op!(i128); native_type_op!(u8); native_type_op!(u16); native_type_op!(u32); From 5120125ca8404269c13df9c5848fab78a34e1a49 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Tue, 13 Sep 2022 17:59:36 +0800 Subject: [PATCH 3/6] fix nan test Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/arithmetic.rs | 37 ++++++++++++------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 52ec7349b71..6db4d9f7089 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1951,9 +1951,10 @@ mod tests { fn test_float_array_divide_by_zero_with_checked() { let a = Float32Array::from(vec![1.0, 0.0, -1.0]); let b = Float32Array::from(vec![0.0, 0.0, 0.0]); - let expected = - Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); - assert_eq!(expected, divide_checked(&a, &b).unwrap()) + let result = divide_checked(&a, &b).unwrap(); + assert_eq!(result.value(0), f32::INFINITY); + assert!(result.value(1).is_nan()); + assert_eq!(result.value(2), f32::NEG_INFINITY); } #[test] @@ -1968,9 +1969,10 @@ mod tests { fn test_float_array_divide_by_zero() { let a = Float32Array::from(vec![1.0, 0.0, -1.0]); let b = Float32Array::from(vec![0.0, 0.0, 0.0]); - let expected = - Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); - assert_eq!(expected, divide(&a, &b).unwrap()) + let result = divide(&a, &b).unwrap(); + assert_eq!(result.value(0), f32::INFINITY); + assert!(result.value(1).is_nan()); + assert_eq!(result.value(2), f32::NEG_INFINITY); } #[test] @@ -1985,12 +1987,11 @@ mod tests { fn test_float_array_divide_dyn_by_zero() { let a = Float32Array::from(vec![1.0, 0.0, -1.0]); let b = Float32Array::from(vec![0.0, 0.0, 0.0]); - let expected = - Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); - assert_eq!( - &expected, - as_primitive_array::(÷_dyn(&a, &b).unwrap()) - ) + let result = ÷_dyn(&a, &b).unwrap(); + let result = as_primitive_array::(result); + assert_eq!(result.value(0), f32::INFINITY); + assert!(result.value(1).is_nan()); + assert_eq!(result.value(2), f32::NEG_INFINITY); } #[test] @@ -2025,13 +2026,11 @@ mod tests { builder.append(0.0).unwrap(); let b = builder.finish(); - let expected = - Float32Array::from(vec![f32::INFINITY, f32::NAN, f32::NEG_INFINITY]); - - assert_eq!( - &expected, - as_primitive_array::(÷_dyn(&a, &b).unwrap()) - ) + let result = ÷_dyn(&a, &b).unwrap(); + let result = as_primitive_array::(result); + assert_eq!(result.value(0), f32::INFINITY); + assert!(result.value(1).is_nan()); + assert_eq!(result.value(2), f32::NEG_INFINITY); } #[test] From b0a0a51d92ea252de7e53d2c690e88e923b04a27 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 14 Sep 2022 17:31:41 +0800 Subject: [PATCH 4/6] add float divide by zero Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/Cargo.toml | 2 +- arrow/src/compute/kernels/arithmetic.rs | 54 ------------------------- arrow/src/datatypes/native.rs | 10 ++++- 3 files changed, 9 insertions(+), 57 deletions(-) diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml index 2de4db64276..15066d0d13f 100644 --- a/arrow/Cargo.toml +++ b/arrow/Cargo.toml @@ -49,7 +49,7 @@ serde_json = { version = "1.0", default-features = false, features = ["std"], op indexmap = { version = "1.9", default-features = false, features = ["std"] } rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true } num = { version = "0.4", default-features = false, features = ["std"] } -half = { version = "2.0", default-features = false } +half = { version = "2.0", default-features = false, features = ["num-traits"]} hashbrown = { version = "0.12", default-features = false } csv_crate = { version = "1.1", default-features = false, optional = true, package = "csv" } regex = { version = "1.5.6", default-features = false, features = ["std", "unicode"] } diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 45f46a293ad..f82e05fbc5c 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1973,16 +1973,6 @@ mod tests { divide_checked(&a, &b).unwrap(); } - #[test] - fn test_float_array_divide_by_zero_with_checked() { - let a = Float32Array::from(vec![1.0, 0.0, -1.0]); - let b = Float32Array::from(vec![0.0, 0.0, 0.0]); - let result = divide_checked(&a, &b).unwrap(); - assert_eq!(result.value(0), f32::INFINITY); - assert!(result.value(1).is_nan()); - assert_eq!(result.value(2), f32::NEG_INFINITY); - } - #[test] #[should_panic(expected = "attempt to divide by zero")] fn test_int_array_divide_by_zero() { @@ -1991,16 +1981,6 @@ mod tests { divide(&a, &b).unwrap(); } - #[test] - fn test_float_array_divide_by_zero() { - let a = Float32Array::from(vec![1.0, 0.0, -1.0]); - let b = Float32Array::from(vec![0.0, 0.0, 0.0]); - let result = divide(&a, &b).unwrap(); - assert_eq!(result.value(0), f32::INFINITY); - assert!(result.value(1).is_nan()); - assert_eq!(result.value(2), f32::NEG_INFINITY); - } - #[test] #[should_panic(expected = "DivideByZero")] fn test_int_array_divide_dyn_by_zero() { @@ -2009,17 +1989,6 @@ mod tests { divide_dyn(&a, &b).unwrap(); } - #[test] - fn test_float_array_divide_dyn_by_zero() { - let a = Float32Array::from(vec![1.0, 0.0, -1.0]); - let b = Float32Array::from(vec![0.0, 0.0, 0.0]); - let result = ÷_dyn(&a, &b).unwrap(); - let result = as_primitive_array::(result); - assert_eq!(result.value(0), f32::INFINITY); - assert!(result.value(1).is_nan()); - assert_eq!(result.value(2), f32::NEG_INFINITY); - } - #[test] #[should_panic(expected = "DivideByZero")] fn test_int_array_divide_dyn_by_zero_dict() { @@ -2036,29 +2005,6 @@ mod tests { divide_dyn(&a, &b).unwrap(); } - #[test] - fn test_float_array_divide_dyn_by_zero_dict() { - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(15.0).unwrap(); - builder.append(0.0).unwrap(); - builder.append(-1.5).unwrap(); - let a = builder.finish(); - - let mut builder = - PrimitiveDictionaryBuilder::::with_capacity(1, 1); - builder.append(0.0).unwrap(); - builder.append(0.0).unwrap(); - builder.append(0.0).unwrap(); - let b = builder.finish(); - - let result = ÷_dyn(&a, &b).unwrap(); - let result = as_primitive_array::(result); - assert_eq!(result.value(0), f32::INFINITY); - assert!(result.value(1).is_nan()); - assert_eq!(result.value(2), f32::NEG_INFINITY); - } - #[test] #[should_panic(expected = "DivideByZero")] fn test_primitive_array_modulus_by_zero() { diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index e298f0685fe..87f087fc0dd 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -118,7 +118,8 @@ pub trait ArrowPrimitiveType: 'static { pub(crate) mod native_op { use super::ArrowNativeType; - use crate::error::Result; + use crate::error::{ArrowError, Result}; + use num::Zero; use std::ops::{Add, Div, Mul, Sub}; /// Trait for ArrowNativeType to provide overflow-checking and non-overflow-checking @@ -137,6 +138,7 @@ pub(crate) mod native_op { + Sub + Mul + Div + + Zero { fn add_checked(self, rhs: Self) -> Result { Ok(self + rhs) @@ -163,7 +165,11 @@ pub(crate) mod native_op { } fn div_checked(self, rhs: Self) -> Result { - Ok(self / rhs) + if self.is_zero() { + Err(ArrowError::DivideByZero) + } else { + Ok(self / rhs) + } } fn div_wrapping(self, rhs: Self) -> Self { From d24397eb49d8d0f0660e3b466b9d866c864d3b36 Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Wed, 14 Sep 2022 17:52:44 +0800 Subject: [PATCH 5/6] add float tests Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/arithmetic.rs | 54 ++++++++++++++++++++++++- arrow/src/datatypes/native.rs | 2 +- 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index f82e05fbc5c..e2deb7e504d 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -1973,6 +1973,14 @@ mod tests { divide_checked(&a, &b).unwrap(); } + #[test] + #[should_panic(expected = "DivideByZero")] + fn test_f32_array_divide_by_zero_with_checked() { + let a = Float32Array::from(vec![15.0]); + let b = Float32Array::from(vec![0.0]); + divide_checked(&a, &b).unwrap(); + } + #[test] #[should_panic(expected = "attempt to divide by zero")] fn test_int_array_divide_by_zero() { @@ -1981,6 +1989,16 @@ mod tests { divide(&a, &b).unwrap(); } + #[test] + fn test_f32_array_divide_by_zero() { + let a = Float32Array::from(vec![1.5, 0.0, -1.5]); + let b = Float32Array::from(vec![0.0, 0.0, 0.0]); + let result = divide(&a, &b).unwrap(); + assert_eq!(result.value(0), f32::INFINITY); + assert!(result.value(1).is_nan()); + assert_eq!(result.value(2), f32::NEG_INFINITY); + } + #[test] #[should_panic(expected = "DivideByZero")] fn test_int_array_divide_dyn_by_zero() { @@ -1989,6 +2007,14 @@ mod tests { divide_dyn(&a, &b).unwrap(); } + #[test] + #[should_panic(expected = "DivideByZero")] + fn test_f32_array_divide_dyn_by_zero() { + let a = Float32Array::from(vec![1.5]); + let b = Float32Array::from(vec![0.0]); + divide_dyn(&a, &b).unwrap(); + } + #[test] #[should_panic(expected = "DivideByZero")] fn test_int_array_divide_dyn_by_zero_dict() { @@ -2007,14 +2033,38 @@ mod tests { #[test] #[should_panic(expected = "DivideByZero")] - fn test_primitive_array_modulus_by_zero() { + fn test_f32_dict_array_divide_dyn_by_zero() { + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(1, 1); + builder.append(1.5).unwrap(); + let a = builder.finish(); + + let mut builder = + PrimitiveDictionaryBuilder::::with_capacity(1, 1); + builder.append(0.0).unwrap(); + let b = builder.finish(); + + divide_dyn(&a, &b).unwrap(); + } + + #[test] + #[should_panic(expected = "DivideByZero")] + fn test_i32_array_modulus_by_zero() { let a = Int32Array::from(vec![15]); let b = Int32Array::from(vec![0]); modulus(&a, &b).unwrap(); } #[test] - fn test_primitive_array_divide_f64() { + #[should_panic(expected = "DivideByZero")] + fn test_f32_array_modulus_by_zero() { + let a = Float32Array::from(vec![1.5]); + let b = Float32Array::from(vec![0.0]); + modulus(&a, &b).unwrap(); + } + + #[test] + fn test_f64_array_divide() { let a = Float64Array::from(vec![15.0, 15.0, 8.0]); let b = Float64Array::from(vec![5.0, 6.0, 8.0]); let c = divide(&a, &b).unwrap(); diff --git a/arrow/src/datatypes/native.rs b/arrow/src/datatypes/native.rs index 87f087fc0dd..1fdef440946 100644 --- a/arrow/src/datatypes/native.rs +++ b/arrow/src/datatypes/native.rs @@ -165,7 +165,7 @@ pub(crate) mod native_op { } fn div_checked(self, rhs: Self) -> Result { - if self.is_zero() { + if rhs.is_zero() { Err(ArrowError::DivideByZero) } else { Ok(self / rhs) From 70e404ae80a0715263aa4a85732638b10a17c88d Mon Sep 17 00:00:00 2001 From: remzi <13716567376yh@gmail.com> Date: Fri, 16 Sep 2022 09:19:07 +0800 Subject: [PATCH 6/6] fix compile error Signed-off-by: remzi <13716567376yh@gmail.com> --- arrow/src/compute/kernels/arithmetic.rs | 41 +++++-------------------- 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 9f8bd2e1feb..7b91a261c7e 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -807,12 +807,8 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - try_unary_dyn::<_, T>(array, |value| { - value.add_checked(scalar).ok_or_else(|| { - ArrowError::CastError(format!("Overflow: adding {:?} to {:?}", scalar, value)) - }) - }) - .map(|a| Arc::new(a) as ArrayRef) + try_unary_dyn::<_, T>(array, |value| value.add_checked(scalar)) + .map(|a| Arc::new(a) as ArrayRef) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -928,15 +924,8 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - try_unary_dyn::<_, T>(array, |value| { - value.sub_checked(scalar).ok_or_else(|| { - ArrowError::CastError(format!( - "Overflow: subtracting {:?} from {:?}", - scalar, value - )) - }) - }) - .map(|a| Arc::new(a) as ArrayRef) + try_unary_dyn::<_, T>(array, |value| value.sub_checked(scalar)) + .map(|a| Arc::new(a) as ArrayRef) } /// Perform `-` operation on an array. If value is null then the result is also null. @@ -1073,15 +1062,8 @@ where T: ArrowNumericType, T::Native: ArrowNativeTypeOp, { - try_unary_dyn::<_, T>(array, |value| { - value.mul_checked(scalar).ok_or_else(|| { - ArrowError::CastError(format!( - "Overflow: multiplying {:?} by {:?}", - value, scalar - )) - }) - }) - .map(|a| Arc::new(a) as ArrayRef) + try_unary_dyn::<_, T>(array, |value| value.mul_checked(scalar)) + .map(|a| Arc::new(a) as ArrayRef) } /// Perform `left % right` operation on two arrays. If either left or right value is null @@ -1272,15 +1254,8 @@ where return Err(ArrowError::DivideByZero); } - try_unary_dyn::<_, T>(array, |value| { - value.div_checked(divisor).ok_or_else(|| { - ArrowError::CastError(format!( - "Overflow: dividing {:?} by {:?}", - value, divisor - )) - }) - }) - .map(|a| Arc::new(a) as ArrayRef) + try_unary_dyn::<_, T>(array, |value| value.div_checked(divisor)) + .map(|a| Arc::new(a) as ArrayRef) } #[cfg(test)]