-
Notifications
You must be signed in to change notification settings - Fork 867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Overflow-checking variant of arithmetic scalar kernels #2650
Changes from 1 commit
5a48454
be51743
bd55844
44b9202
f4a02a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,8 +31,8 @@ use crate::buffer::Buffer; | |
#[cfg(feature = "simd")] | ||
use crate::buffer::MutableBuffer; | ||
use crate::compute::kernels::arity::unary; | ||
use crate::compute::unary_dyn; | ||
use crate::compute::util::combine_option_bitmap; | ||
use crate::compute::{unary_checked, unary_dyn}; | ||
use crate::datatypes; | ||
use crate::datatypes::{ | ||
native_op::ArrowNativeTypeOp, ArrowNumericType, ArrowPrimitiveType, DataType, | ||
|
@@ -952,15 +952,34 @@ pub fn add_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> { | |
|
||
/// Add every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. | ||
/// | ||
/// This doesn't detect overflow. Once overflowing, the result will wrap around. | ||
/// For an overflow-checking variant, use `add_scalar_checked` instead. | ||
pub fn add_scalar<T>( | ||
array: &PrimitiveArray<T>, | ||
scalar: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: Add<Output = T::Native>, | ||
T::Native: ArrowNativeTypeOp, | ||
{ | ||
Ok(unary(array, |value| value.add_wrapping(scalar))) | ||
} | ||
|
||
/// Add every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. | ||
/// | ||
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, | ||
/// use `add_scalar` instead. | ||
pub fn add_scalar_checked<T>( | ||
array: &PrimitiveArray<T>, | ||
scalar: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp, | ||
{ | ||
Ok(unary(array, |value| value + scalar)) | ||
unary_checked(array, |value| value.add_checked(scalar)) | ||
} | ||
|
||
/// Add every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -1019,19 +1038,34 @@ pub fn subtract_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> { | |
|
||
/// Subtract every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. | ||
/// | ||
/// This doesn't detect overflow. Once overflowing, the result will wrap around. | ||
/// For an overflow-checking variant, use `subtract_scalar_checked` instead. | ||
pub fn subtract_scalar<T>( | ||
array: &PrimitiveArray<T>, | ||
scalar: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: Add<Output = T::Native> | ||
+ Sub<Output = T::Native> | ||
+ Mul<Output = T::Native> | ||
+ Div<Output = T::Native> | ||
+ Zero, | ||
T::Native: ArrowNativeTypeOp + Zero, | ||
{ | ||
Ok(unary(array, |value| value.sub_wrapping(scalar))) | ||
} | ||
|
||
/// Subtract every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. | ||
/// | ||
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, | ||
/// use `subtract_scalar` instead. | ||
pub fn subtract_scalar_checked<T>( | ||
array: &PrimitiveArray<T>, | ||
scalar: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp + Zero, | ||
{ | ||
Ok(unary(array, |value| value - scalar)) | ||
unary_checked(array, |value| value.sub_checked(scalar)) | ||
} | ||
|
||
/// Subtract every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -1115,21 +1149,34 @@ pub fn multiply_dyn(left: &dyn Array, right: &dyn Array) -> Result<ArrayRef> { | |
|
||
/// Multiply every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. | ||
/// | ||
/// This doesn't detect overflow. Once overflowing, the result will wrap around. | ||
/// For an overflow-checking variant, use `multiply_scalar_checked` instead. | ||
pub fn multiply_scalar<T>( | ||
array: &PrimitiveArray<T>, | ||
scalar: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: Add<Output = T::Native> | ||
+ Sub<Output = T::Native> | ||
+ Mul<Output = T::Native> | ||
+ Div<Output = T::Native> | ||
+ Rem<Output = T::Native> | ||
+ Zero | ||
+ One, | ||
T::Native: ArrowNativeTypeOp + Zero + One, | ||
{ | ||
Ok(unary(array, |value| value * scalar)) | ||
Ok(unary(array, |value| value.mul_wrapping(scalar))) | ||
} | ||
|
||
/// Multiply every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. | ||
/// | ||
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, | ||
/// use `multiply_scalar` instead. | ||
pub fn multiply_scalar_checked<T>( | ||
array: &PrimitiveArray<T>, | ||
scalar: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp + Zero + One, | ||
{ | ||
unary_checked(array, |value| value.mul_checked(scalar)) | ||
} | ||
|
||
/// Multiply every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -1236,21 +1283,41 @@ where | |
Ok(unary(array, |a| a % modulo)) | ||
} | ||
|
||
/// Divide every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. If the scalar is zero then it will panic. For a variant returning | ||
/// `Err` on division by zero, use `divide_scalar_checked` instead. | ||
/// | ||
/// This doesn't detect overflow. Once overflowing, the result will wrap around. | ||
/// For an overflow-checking variant, use `divide_scalar_checked` instead. | ||
pub fn divide_scalar<T>( | ||
array: &PrimitiveArray<T>, | ||
divisor: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: ArrowNativeTypeOp + Zero, | ||
{ | ||
Ok(unary(array, |a| a.div_wrapping(divisor))) | ||
} | ||
|
||
/// Divide every value in an array by a scalar. If any value in the array is null then the | ||
/// result is also null. If the scalar is zero then the result of this operation will be | ||
/// `Err(ArrowError::DivideByZero)`. | ||
pub fn divide_scalar<T>( | ||
/// | ||
/// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant, | ||
/// use `divide_scalar` instead. | ||
pub fn divide_scalar_checked<T>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure of the value of a separate checked scalar kernel for division, given it only elides a single check of the scalar divisor. I would be tempted to leave it out for now, at least until #2647 is resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, I've thought about it and wondered maybe #2647 can be resolved quickly. 😄 Let me remove division first. |
||
array: &PrimitiveArray<T>, | ||
divisor: T::Native, | ||
) -> Result<PrimitiveArray<T>> | ||
where | ||
T: datatypes::ArrowNumericType, | ||
T::Native: Div<Output = T::Native> + Zero, | ||
T::Native: ArrowNativeTypeOp + Zero, | ||
{ | ||
if divisor.is_zero() { | ||
return Err(ArrowError::DivideByZero); | ||
} | ||
Ok(unary(array, |a| a / divisor)) | ||
unary_checked(array, |a| a.div_checked(divisor)) | ||
} | ||
|
||
/// Divide every value in an array by a scalar. If any value in the array is null then the | ||
|
@@ -2245,4 +2312,66 @@ mod tests { | |
let overflow = divide_checked(&a, &b); | ||
overflow.expect_err("overflow should be detected"); | ||
} | ||
|
||
#[test] | ||
fn test_primitive_add_scalar_wrapping_overflow() { | ||
let a = Int32Array::from(vec![i32::MAX, i32::MIN]); | ||
|
||
let wrapped = add_scalar(&a, 1); | ||
let expected = Int32Array::from(vec![-2147483648, -2147483647]); | ||
assert_eq!(expected, wrapped.unwrap()); | ||
|
||
let overflow = add_scalar_checked(&a, 1); | ||
overflow.expect_err("overflow should be detected"); | ||
} | ||
|
||
#[test] | ||
fn test_primitive_subtract_scalar_wrapping_overflow() { | ||
let a = Int32Array::from(vec![-2]); | ||
|
||
let wrapped = subtract_scalar(&a, i32::MAX); | ||
let expected = Int32Array::from(vec![i32::MAX]); | ||
assert_eq!(expected, wrapped.unwrap()); | ||
|
||
let overflow = subtract_scalar_checked(&a, i32::MAX); | ||
overflow.expect_err("overflow should be detected"); | ||
} | ||
|
||
#[test] | ||
fn test_primitive_mul_scalar_wrapping_overflow() { | ||
let a = Int32Array::from(vec![10]); | ||
|
||
let wrapped = multiply_scalar(&a, i32::MAX); | ||
let expected = Int32Array::from(vec![-10]); | ||
assert_eq!(expected, wrapped.unwrap()); | ||
|
||
let overflow = multiply_scalar_checked(&a, i32::MAX); | ||
overflow.expect_err("overflow should be detected"); | ||
} | ||
|
||
#[test] | ||
fn test_primitive_div_scalar_wrapping_overflow() { | ||
let a = Int32Array::from(vec![i32::MIN]); | ||
|
||
let wrapped = divide_scalar(&a, -1); | ||
let expected = Int32Array::from(vec![-2147483648]); | ||
assert_eq!(expected, wrapped.unwrap()); | ||
|
||
let overflow = divide_scalar_checked(&a, -1); | ||
overflow.expect_err("overflow should be detected"); | ||
} | ||
|
||
#[test] | ||
#[should_panic(expected = "attempt to divide by zero")] | ||
fn test_primitive_div_scalar_divide_by_zero() { | ||
let a = Int32Array::from(vec![10]); | ||
divide_scalar(&a, 0).unwrap(); | ||
} | ||
|
||
#[test] | ||
fn test_primitive_div_scalar_checked_divide_by_zero() { | ||
let a = Int32Array::from(vec![10]); | ||
let err = divide_scalar_checked(&a, 0); | ||
err.expect_err("division by zero should be detected"); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -19,6 +19,7 @@ | |||||||||||||
|
||||||||||||||
use crate::array::{Array, ArrayData, ArrayRef, DictionaryArray, PrimitiveArray}; | ||||||||||||||
use crate::buffer::Buffer; | ||||||||||||||
use crate::datatypes::native_op::ArrowNativeTypeOp; | ||||||||||||||
use crate::datatypes::{ | ||||||||||||||
ArrowNumericType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type, | ||||||||||||||
Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type, | ||||||||||||||
|
@@ -83,6 +84,47 @@ where | |||||||||||||
PrimitiveArray::<O>::from(data) | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// A overflow-checking variant of `unary`. | ||||||||||||||
pub(crate) fn unary_checked<I, F, O>( | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you suggesting to rename it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My thought is to align with the method There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure what you mean to "align with the method Based on these comments, let me try to guess what you are suggesting, are you suggesting to change returning type of I'm okay for the change, but where do you think the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for the confused comments for your PR. I think the |
||||||||||||||
array: &PrimitiveArray<I>, | ||||||||||||||
op: F, | ||||||||||||||
) -> Result<PrimitiveArray<O>> | ||||||||||||||
where | ||||||||||||||
I: ArrowPrimitiveType, | ||||||||||||||
O: ArrowPrimitiveType, | ||||||||||||||
F: Fn(I::Native) -> Option<O::Native>, | ||||||||||||||
I::Native: ArrowNativeTypeOp, | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ?? There is a reason that the return type of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The result of |
||||||||||||||
{ | ||||||||||||||
let values: Result<Vec<O::Native>> = array | ||||||||||||||
.values() | ||||||||||||||
.iter() | ||||||||||||||
.map(|v| { | ||||||||||||||
let result = op(*v); | ||||||||||||||
if let Some(r) = result { | ||||||||||||||
Ok(r) | ||||||||||||||
} else { | ||||||||||||||
// Overflow | ||||||||||||||
Err(ArrowError::ComputeError(format!( | ||||||||||||||
"Overflow happened on: {:?}", | ||||||||||||||
*v | ||||||||||||||
))) | ||||||||||||||
} | ||||||||||||||
}) | ||||||||||||||
.collect(); | ||||||||||||||
|
||||||||||||||
let values = values?; | ||||||||||||||
|
||||||||||||||
// JUSTIFICATION | ||||||||||||||
// Benefit | ||||||||||||||
// ~60% speedup | ||||||||||||||
// Soundness | ||||||||||||||
// `values` is an iterator with a known size because arrays are sized. | ||||||||||||||
let buffer = unsafe { Buffer::from_trusted_len_iter(values.into_iter()) }; | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. try_from_trusted_len_iter would allow avoiding collecting into a Vec |
||||||||||||||
|
||||||||||||||
let data = into_primitive_array_data::<_, O>(array, buffer); | ||||||||||||||
Ok(PrimitiveArray::<O>::from(data)) | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
/// A helper function that applies an unary function to a dictionary array with primitive value type. | ||||||||||||||
#[allow(clippy::redundant_closure)] | ||||||||||||||
fn unary_dict<K, F, T>(array: &DictionaryArray<K>, op: F) -> Result<ArrayRef> | ||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
after this https://github.com/apache/arrow-rs/pull/2650/files#r963221429
you can define characteristic error message for this closures