New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update try_binary
and checked_ops
, and remove math_checked_op
#2717
Changes from 3 commits
22a8354
c7b91e7
5120125
5bdfb11
b0a0a51
d24397e
fd00bc6
70e404a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,32 +76,6 @@ where | |
Ok(binary(left, right, op)) | ||
} | ||
|
||
/// This is similar to `math_op` as it performs given operation between two input primitive arrays. | ||
/// But the given operation can return `None` if overflow is detected. For the case, this function | ||
/// returns an `Err`. | ||
fn math_checked_op<LT, RT, F>( | ||
left: &PrimitiveArray<LT>, | ||
right: &PrimitiveArray<RT>, | ||
op: F, | ||
) -> Result<PrimitiveArray<LT>> | ||
where | ||
LT: ArrowNumericType, | ||
RT: ArrowNumericType, | ||
F: Fn(LT::Native, RT::Native) -> Option<LT::Native>, | ||
{ | ||
if left.len() != right.len() { | ||
return Err(ArrowError::ComputeError( | ||
"Cannot perform math operation on arrays of different length".to_string(), | ||
)); | ||
} | ||
|
||
try_binary(left, right, |a, b| { | ||
op(a, b).ok_or_else(|| { | ||
ArrowError::ComputeError(format!("Overflow happened on: {:?}, {:?}", a, b)) | ||
}) | ||
}) | ||
} | ||
|
||
/// Helper function for operations where a valid `0` on the right array should | ||
/// result in an [ArrowError::DivideByZero], namely the division and modulo operations | ||
/// | ||
|
@@ -119,26 +93,9 @@ where | |
LT: ArrowNumericType, | ||
RT: ArrowNumericType, | ||
RT::Native: One + Zero, | ||
F: Fn(LT::Native, RT::Native) -> Option<LT::Native>, | ||
F: Fn(LT::Native, RT::Native) -> Result<LT::Native>, | ||
{ | ||
if left.len() != right.len() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried to remove this function but met some error from the type system. I will file a follow-up ticket to remove this. |
||
return Err(ArrowError::ComputeError( | ||
"Cannot perform math operation on arrays of different length".to_string(), | ||
)); | ||
} | ||
|
||
try_binary(left, right, |l, r| { | ||
if r.is_zero() { | ||
Err(ArrowError::DivideByZero) | ||
} else { | ||
op(l, r).ok_or_else(|| { | ||
ArrowError::ComputeError(format!( | ||
"Overflow happened on: {:?}, {:?}", | ||
l, r | ||
)) | ||
}) | ||
} | ||
}) | ||
try_binary(left, right, op) | ||
} | ||
|
||
/// Helper function for operations where a valid `0` on the right array should | ||
|
@@ -159,16 +116,12 @@ fn math_checked_divide_op_on_iters<T, F>( | |
where | ||
T: ArrowNumericType, | ||
T::Native: One + Zero, | ||
F: Fn(T::Native, T::Native) -> T::Native, | ||
F: Fn(T::Native, T::Native) -> Result<T::Native>, | ||
{ | ||
let buffer = if null_bit_buffer.is_some() { | ||
let values = left.zip(right).map(|(left, right)| { | ||
if let (Some(l), Some(r)) = (left, right) { | ||
if r.is_zero() { | ||
Err(ArrowError::DivideByZero) | ||
} else { | ||
Ok(op(l, r)) | ||
} | ||
op(l, r) | ||
} else { | ||
Ok(T::default_value()) | ||
} | ||
|
@@ -177,15 +130,10 @@ where | |
unsafe { Buffer::try_from_trusted_len_iter(values) } | ||
} else { | ||
// no value is null | ||
let values = left.map(|l| l.unwrap()).zip(right.map(|r| r.unwrap())).map( | ||
|(left, right)| { | ||
if right.is_zero() { | ||
Err(ArrowError::DivideByZero) | ||
} else { | ||
Ok(op(left, right)) | ||
} | ||
}, | ||
); | ||
let values = left | ||
.map(|l| l.unwrap()) | ||
.zip(right.map(|r| r.unwrap())) | ||
.map(|(left, right)| op(left, right)); | ||
// Safety: Iterator comes from a PrimitiveArray which reports its size correctly | ||
unsafe { Buffer::try_from_trusted_len_iter(values) } | ||
}?; | ||
|
@@ -652,7 +600,7 @@ where | |
K: ArrowNumericType, | ||
T: ArrowNumericType, | ||
T::Native: One + Zero, | ||
F: Fn(T::Native, T::Native) -> T::Native, | ||
F: Fn(T::Native, T::Native) -> Result<T::Native>, | ||
{ | ||
if left.len() != right.len() { | ||
return Err(ArrowError::ComputeError(format!( | ||
|
@@ -723,7 +671,7 @@ where | |
T: ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp, | ||
{ | ||
math_checked_op(left, right, |a, b| a.add_checked(b)) | ||
try_binary(left, right, |a, b| a.add_checked(b)) | ||
} | ||
|
||
/// Perform `left + right` operation on two arrays. If either left or right value is null | ||
|
@@ -824,11 +772,7 @@ where | |
T: ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp, | ||
{ | ||
try_unary(array, |value| { | ||
value.add_checked(scalar).ok_or_else(|| { | ||
ArrowError::CastError(format!("Overflow: adding {:?} to {:?}", scalar, value)) | ||
}) | ||
}) | ||
try_unary(array, |value| value.add_checked(scalar)) | ||
} | ||
|
||
/// Add every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -871,7 +815,7 @@ where | |
T: ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp, | ||
{ | ||
math_checked_op(left, right, |a, b| a.sub_checked(b)) | ||
try_binary(left, right, |a, b| a.sub_checked(b)) | ||
} | ||
|
||
/// Perform `left - right` operation on two arrays. If either left or right value is null | ||
|
@@ -924,14 +868,7 @@ where | |
T: ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp + Zero, | ||
{ | ||
try_unary(array, |value| { | ||
value.sub_checked(scalar).ok_or_else(|| { | ||
ArrowError::CastError(format!( | ||
"Overflow: subtracting {:?} from {:?}", | ||
scalar, value | ||
)) | ||
}) | ||
}) | ||
try_unary(array, |value| value.sub_checked(scalar)) | ||
} | ||
|
||
/// Subtract every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -999,7 +936,7 @@ where | |
T: ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp, | ||
{ | ||
math_checked_op(left, right, |a, b| a.mul_checked(b)) | ||
try_binary(left, right, |a, b| a.mul_checked(b)) | ||
} | ||
|
||
/// Perform `left * right` operation on two arrays. If either left or right value is null | ||
|
@@ -1052,14 +989,7 @@ where | |
T: ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp + Zero + One, | ||
{ | ||
try_unary(array, |value| { | ||
value.mul_checked(scalar).ok_or_else(|| { | ||
ArrowError::CastError(format!( | ||
"Overflow: multiplying {:?} by {:?}", | ||
value, scalar, | ||
)) | ||
}) | ||
}) | ||
try_unary(array, |value| value.mul_checked(scalar)) | ||
} | ||
|
||
/// Multiply every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -1095,7 +1025,13 @@ where | |
a % b | ||
}); | ||
#[cfg(not(feature = "simd"))] | ||
return math_checked_divide_op(left, right, |a, b| Some(a % b)); | ||
return try_binary(left, right, |a, b| { | ||
if b.is_zero() { | ||
Err(ArrowError::DivideByZero) | ||
} else { | ||
Ok(a % b) | ||
} | ||
}); | ||
} | ||
|
||
/// Perform `left / right` operation on two arrays. If either left or right value is null | ||
|
@@ -1124,12 +1060,17 @@ where | |
pub fn divide_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> { | ||
match left.data_type() { | ||
DataType::Dictionary(_, _) => { | ||
typed_dict_math_op!(left, right, |a, b| a / b, math_divide_checked_op_dict) | ||
typed_dict_math_op!( | ||
left, | ||
right, | ||
|a, b| a.div_checked(b), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Err...why you changed it to use I'm adding checked variant in another PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this fixes #2647 perhaps? I have to confess to being a little lost now, I had thought we'd collectively agreed that division didn't make sense to have an unchecked variant as Rust always checks division 😅 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I meant overflow-checking. I think this change doesn't change division by zero behavior of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How would division overflow? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we're fine the definition of
The checked_div is technically redundant as we've already checked that the value isn't zero, i.e. this code could be simplified to
The definition for floating points is
Which will not check for overflow, as it isn't meaningful to do so for floats? TLDR: I don't think overflow checking for division makes sense? The only types that could overlow, i.e. floats, already have a way to represent that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For "checked" kernel, I meant overflow checking behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Op, I am wrong, sorry. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No worries. I've fixed it in the PR to add checked variant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add another // check dividebyzero and overflow
fn div_fully_checked(self, rhs: Self) -> Result<Self> {
if rhs.is_zero() {
Err(ArrowError::DivideByZero)
} else {
self.checked_div(rhs).ok_or_else(|| {
ArrowError::ComputeError(format!(
"Overflow happened on: {:?} / {:?}",
self, rhs
))
})
}
}
// only check the dividebyzero error
fn div_checked_divideByZero(self, rhs: Self) -> Result<Self> {}
// no check
fn div_wrapping(self, rhs: Self) -> Self {
self.wrapping_div(rhs)
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think edit: Oh, I misread your comment. But I don't think it is good idea to add too many APIs there. It is pretty easy to do extra division by zero check + wrapping division. |
||
math_divide_checked_op_dict | ||
) | ||
} | ||
_ => { | ||
downcast_primitive_array!( | ||
(left, right) => { | ||
math_checked_divide_op(left, right, |a, b| Some(a / b)).map(|a| Arc::new(a) as ArrayRef) | ||
math_checked_divide_op(left, right, |a, b| a.div_checked(b)).map(|a| Arc::new(a) as ArrayRef) | ||
} | ||
_ => Err(ArrowError::CastError(format!( | ||
"Unsupported data type {}, {}", | ||
|
@@ -2000,31 +1941,62 @@ mod tests { | |
|
||
#[test] | ||
#[should_panic(expected = "DivideByZero")] | ||
fn test_primitive_array_divide_by_zero_with_checked() { | ||
fn test_int_array_divide_by_zero_with_checked() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. rename the test case to make it clearer. |
||
let a = Int32Array::from(vec![15]); | ||
let b = Int32Array::from(vec![0]); | ||
divide_checked(&a, &b).unwrap(); | ||
} | ||
|
||
#[test] | ||
fn test_float_array_divide_by_zero_with_checked() { | ||
let a = Float32Array::from(vec![1.0, 0.0, -1.0]); | ||
let b = Float32Array::from(vec![0.0, 0.0, 0.0]); | ||
let result = divide_checked(&a, &b).unwrap(); | ||
assert_eq!(result.value(0), f32::INFINITY); | ||
assert!(result.value(1).is_nan()); | ||
assert_eq!(result.value(2), f32::NEG_INFINITY); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "attempt to divide by zero")] | ||
fn test_primitive_array_divide_by_zero() { | ||
fn test_int_array_divide_by_zero() { | ||
let a = Int32Array::from(vec![15]); | ||
let b = Int32Array::from(vec![0]); | ||
divide(&a, &b).unwrap(); | ||
} | ||
|
||
#[test] | ||
fn test_float_array_divide_by_zero() { | ||
let a = Float32Array::from(vec![1.0, 0.0, -1.0]); | ||
let b = Float32Array::from(vec![0.0, 0.0, 0.0]); | ||
let result = divide(&a, &b).unwrap(); | ||
assert_eq!(result.value(0), f32::INFINITY); | ||
assert!(result.value(1).is_nan()); | ||
assert_eq!(result.value(2), f32::NEG_INFINITY); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "DivideByZero")] | ||
fn test_primitive_array_divide_dyn_by_zero() { | ||
fn test_int_array_divide_dyn_by_zero() { | ||
let a = Int32Array::from(vec![15]); | ||
let b = Int32Array::from(vec![0]); | ||
divide_dyn(&a, &b).unwrap(); | ||
} | ||
|
||
#[test] | ||
fn test_float_array_divide_dyn_by_zero() { | ||
let a = Float32Array::from(vec![1.0, 0.0, -1.0]); | ||
let b = Float32Array::from(vec![0.0, 0.0, 0.0]); | ||
let result = ÷_dyn(&a, &b).unwrap(); | ||
let result = as_primitive_array::<Float32Type>(result); | ||
assert_eq!(result.value(0), f32::INFINITY); | ||
assert!(result.value(1).is_nan()); | ||
assert_eq!(result.value(2), f32::NEG_INFINITY); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "DivideByZero")] | ||
fn test_primitive_array_divide_dyn_by_zero_dict() { | ||
fn test_int_array_divide_dyn_by_zero_dict() { | ||
let mut builder = | ||
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(1, 1); | ||
builder.append(15).unwrap(); | ||
|
@@ -2038,6 +2010,29 @@ mod tests { | |
divide_dyn(&a, &b).unwrap(); | ||
} | ||
|
||
#[test] | ||
fn test_float_array_divide_dyn_by_zero_dict() { | ||
let mut builder = | ||
PrimitiveDictionaryBuilder::<Int8Type, Float32Type>::with_capacity(1, 1); | ||
builder.append(15.0).unwrap(); | ||
builder.append(0.0).unwrap(); | ||
builder.append(-1.5).unwrap(); | ||
let a = builder.finish(); | ||
|
||
let mut builder = | ||
PrimitiveDictionaryBuilder::<Int8Type, Float32Type>::with_capacity(1, 1); | ||
builder.append(0.0).unwrap(); | ||
builder.append(0.0).unwrap(); | ||
builder.append(0.0).unwrap(); | ||
let b = builder.finish(); | ||
|
||
let result = ÷_dyn(&a, &b).unwrap(); | ||
let result = as_primitive_array::<Float32Type>(result); | ||
assert_eq!(result.value(0), f32::INFINITY); | ||
assert!(result.value(1).is_nan()); | ||
assert_eq!(result.value(2), f32::NEG_INFINITY); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "DivideByZero")] | ||
fn test_primitive_array_modulus_by_zero() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't need this function any more because this is the same as
try_binary
after we updating thetry_binary
and the type ofop