diff --git a/src/compute/arithmetics/decimal/div.rs b/src/compute/arithmetics/decimal/div.rs index 754c90a79e5..f6ca1533e2e 100644 --- a/src/compute/arithmetics/decimal/div.rs +++ b/src/compute/arithmetics/decimal/div.rs @@ -6,11 +6,12 @@ use crate::{ buffer::Buffer, compute::{ arithmetics::{ArrayCheckedDiv, ArrayDiv}, - arity::{binary, binary_checked}, + arity::{binary, binary_checked, unary}, utils::{check_same_len, combine_validities}, }, datatypes::DataType, error::{ArrowError, Result}, + scalar::{PrimitiveScalar, Scalar}, }; use super::{adjusted_precision_scale, get_parameters, max_value, number_digits}; @@ -67,6 +68,49 @@ pub fn div(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveA binary(lhs, rhs, lhs.data_type().clone(), op) } +/// Multiply a decimal [`PrimitiveArray`] with a [`PrimitiveScalar`] with the same precision and scale. If +/// the precision and scale is different, then an InvalidArgumentError is +/// returned. This function panics if the multiplied numbers result in a number +/// larger than the possible number for the selected precision. +pub fn div_scalar(lhs: &PrimitiveArray, rhs: &PrimitiveScalar) -> PrimitiveArray { + let (precision, scale) = get_parameters(lhs.data_type(), rhs.data_type()).unwrap(); + + let rhs = if let Some(rhs) = rhs.value() { + rhs + } else { + return PrimitiveArray::::new_null(lhs.data_type().clone(), lhs.len()); + }; + + let scale = 10i128.pow(scale as u32); + let max = max_value(precision); + + let op = move |a: i128| { + // The division is done using the numbers without scale. + // The dividend is scaled up to maintain precision after the + // division + + // 222.222 --> 222222000 + // 123.456 --> 123456 + // -------- --------- + // 1.800 <-- 1800 + let numeral: i128 = a * scale; + + // The division can overflow if the dividend is divided + // by zero. + let res: i128 = numeral.checked_div(rhs).expect("Found division by zero"); + + assert!( + res.abs() <= max, + "Overflow in multiplication presented for precision {}", + precision + ); + + res + }; + + unary(lhs, op, lhs.data_type().clone()) +} + /// Saturated division of two decimal primitive arrays with the same /// precision and scale. If the precision and scale is different, then an /// InvalidArgumentError is returned. If the result from the division is diff --git a/src/compute/arithmetics/decimal/mul.rs b/src/compute/arithmetics/decimal/mul.rs index 6b8289f418d..df3d77cef86 100644 --- a/src/compute/arithmetics/decimal/mul.rs +++ b/src/compute/arithmetics/decimal/mul.rs @@ -6,11 +6,12 @@ use crate::{ buffer::Buffer, compute::{ arithmetics::{ArrayCheckedMul, ArrayMul, ArraySaturatingMul}, - arity::{binary, binary_checked}, + arity::{binary, binary_checked, unary}, utils::{check_same_len, combine_validities}, }, datatypes::DataType, error::{ArrowError, Result}, + scalar::{PrimitiveScalar, Scalar}, }; use super::{adjusted_precision_scale, get_parameters, max_value, number_digits}; @@ -68,6 +69,52 @@ pub fn mul(lhs: &PrimitiveArray, rhs: &PrimitiveArray) -> PrimitiveA binary(lhs, rhs, lhs.data_type().clone(), op) } +/// Multiply a decimal [`PrimitiveArray`] with a [`PrimitiveScalar`] with the same precision and scale. If +/// the precision and scale is different, then an InvalidArgumentError is +/// returned. This function panics if the multiplied numbers result in a number +/// larger than the possible number for the selected precision. +pub fn mul_scalar(lhs: &PrimitiveArray, rhs: &PrimitiveScalar) -> PrimitiveArray { + let (precision, scale) = get_parameters(lhs.data_type(), rhs.data_type()).unwrap(); + + let rhs = if let Some(rhs) = rhs.value() { + rhs + } else { + return PrimitiveArray::::new_null(lhs.data_type().clone(), lhs.len()); + }; + + let scale = 10i128.pow(scale as u32); + let max = max_value(precision); + + let op = move |a: i128| { + // The multiplication between i128 can overflow if they are + // very large numbers. For that reason a checked + // multiplication is used. + let res: i128 = a + .checked_mul(rhs) + .expect("Mayor overflow for multiplication"); + + // The multiplication is done using the numbers without scale. + // The resulting scale of the value has to be corrected by + // dividing by (10^scale) + + // 111.111 --> 111111 + // 222.222 --> 222222 + // -------- ------- + // 24691.308 <-- 24691308642 + let res = res / scale; + + assert!( + res.abs() <= max, + "Overflow in multiplication presented for precision {}", + precision + ); + + res + }; + + unary(lhs, op, lhs.data_type().clone()) +} + /// Saturated multiplication of two decimal primitive arrays with the same /// precision and scale. If the precision and scale is different, then an /// InvalidArgumentError is returned. If the result from the multiplication is diff --git a/src/compute/arithmetics/mod.rs b/src/compute/arithmetics/mod.rs index 9a47889ee91..02948ef5823 100644 --- a/src/compute/arithmetics/mod.rs +++ b/src/compute/arithmetics/mod.rs @@ -20,6 +20,7 @@ use crate::{ array::Array, bitmap::Bitmap, datatypes::{DataType, IntervalUnit, TimeUnit}, + scalar::Scalar, types::NativeType, }; @@ -117,6 +118,22 @@ pub fn add(lhs: &dyn Array, rhs: &dyn Array) -> Box { ) } +/// Adds an [`Array`] and a [`Scalar`]. +/// # Panic +/// This function panics iff +/// * the opertion is not supported for the logical types (use [`can_add`] to check) +/// * the arrays have a different length +/// * one of the arrays is a timestamp with timezone and the timezone is not valid. +pub fn add_scalar(lhs: &dyn Array, rhs: &dyn Scalar) -> Box { + arith!( + lhs, + rhs, + add_scalar, + duration = add_duration_scalar, + interval = add_interval_scalar + ) +} + /// Returns whether two [`DataType`]s can be added by [`add`]. pub fn can_add(lhs: &DataType, rhs: &DataType) -> bool { use DataType::*; @@ -162,6 +179,22 @@ pub fn sub(lhs: &dyn Array, rhs: &dyn Array) -> Box { ) } +/// Adds an [`Array`] and a [`Scalar`]. +/// # Panic +/// This function panics iff +/// * the opertion is not supported for the logical types (use [`can_sub`] to check) +/// * the arrays have a different length +/// * one of the arrays is a timestamp with timezone and the timezone is not valid. +pub fn sub_scalar(lhs: &dyn Array, rhs: &dyn Scalar) -> Box { + arith!( + lhs, + rhs, + sub_scalar, + duration = sub_duration_scalar, + timestamp = sub_timestamps_scalar + ) +} + /// Returns whether two [`DataType`]s can be subtracted by [`sub`]. pub fn can_sub(lhs: &DataType, rhs: &DataType) -> bool { use DataType::*; @@ -199,6 +232,14 @@ pub fn mul(lhs: &dyn Array, rhs: &dyn Array) -> Box { arith!(lhs, rhs, mul, decimal = mul) } +/// Multiply an [`Array`] with a [`Scalar`]. +/// # Panic +/// This function panics iff +/// * the opertion is not supported for the logical types (use [`can_mul`] to check) +pub fn mul_scalar(lhs: &dyn Array, rhs: &dyn Scalar) -> Box { + arith!(lhs, rhs, mul_scalar, decimal = mul_scalar) +} + /// Returns whether two [`DataType`]s can be multiplied by [`mul`]. pub fn can_mul(lhs: &DataType, rhs: &DataType) -> bool { use DataType::*; @@ -227,6 +268,14 @@ pub fn div(lhs: &dyn Array, rhs: &dyn Array) -> Box { arith!(lhs, rhs, div, decimal = div) } +/// Divide an [`Array`] with a [`Scalar`]. +/// # Panic +/// This function panics iff +/// * the opertion is not supported for the logical types (use [`can_div`] to check) +pub fn div_scalar(lhs: &dyn Array, rhs: &dyn Scalar) -> Box { + arith!(lhs, rhs, div_scalar, decimal = div_scalar) +} + /// Returns whether two [`DataType`]s can be divided by [`div`]. pub fn can_div(lhs: &DataType, rhs: &DataType) -> bool { can_mul(lhs, rhs) diff --git a/src/compute/arithmetics/time.rs b/src/compute/arithmetics/time.rs index 9ef1930e95e..2bfd672694c 100644 --- a/src/compute/arithmetics/time.rs +++ b/src/compute/arithmetics/time.rs @@ -15,9 +15,10 @@ use num_traits::AsPrimitive; use crate::{ array::{Array, PrimitiveArray}, - compute::arity::binary, + compute::arity::{binary, unary}, datatypes::{DataType, TimeUnit}, error::{ArrowError, Result}, + scalar::{PrimitiveScalar, Scalar}, temporal_conversions, types::{months_days_ns, NativeType}, }; @@ -117,6 +118,31 @@ where binary(time, duration, time.data_type().clone(), op) } +/// Adds a duration to a time array (Timestamp, Time and Date). The timeunit +/// enum is used to scale correctly both arrays; adding seconds with seconds, +/// or milliseconds with milliseconds. +pub fn add_duration_scalar( + time: &PrimitiveArray, + duration: &PrimitiveScalar, +) -> PrimitiveArray +where + f64: AsPrimitive, + T: NativeType + Add, +{ + let scale = create_scale(time.data_type(), duration.data_type()).unwrap(); + let duration = if let Some(duration) = duration.value() { + duration + } else { + return PrimitiveArray::::new_null(time.data_type().clone(), time.len()); + }; + + // Closure for the binary operation. The closure contains the scale + // required to add a duration to the timestamp array. + let op = move |a: T| a + (duration as f64 * scale).as_(); + + unary(time, op, time.data_type().clone()) +} + /// Subtract a duration to a time array (Timestamp, Time and Date). The timeunit /// enum is used to scale correctly both arrays; adding seconds with seconds, /// or milliseconds with milliseconds. @@ -173,6 +199,29 @@ where binary(time, duration, time.data_type().clone(), op) } +/// Subtract a duration to a time array (Timestamp, Time and Date). The timeunit +/// enum is used to scale correctly both arrays; adding seconds with seconds, +/// or milliseconds with milliseconds. +pub fn sub_duration_scalar( + time: &PrimitiveArray, + duration: &PrimitiveScalar, +) -> PrimitiveArray +where + f64: AsPrimitive, + T: NativeType + Sub, +{ + let scale = create_scale(time.data_type(), duration.data_type()).unwrap(); + let duration = if let Some(duration) = duration.value() { + duration + } else { + return PrimitiveArray::::new_null(time.data_type().clone(), time.len()); + }; + + let op = move |a: T| a - (duration as f64 * scale).as_(); + + unary(time, op, time.data_type().clone()) +} + /// Calculates the difference between two timestamps returning an array of type /// Duration. The timeunit enum is used to scale correctly both arrays; /// subtracting seconds with seconds, or milliseconds with milliseconds. @@ -228,6 +277,40 @@ pub fn subtract_timestamps( } } +/// Calculates the difference between two timestamps as [`DataType::Duration`] with the same time scale. +pub fn sub_timestamps_scalar( + lhs: &PrimitiveArray, + rhs: &PrimitiveScalar, +) -> Result> { + let (scale, timeunit_a) = + if let (DataType::Timestamp(timeunit_a, None), DataType::Timestamp(timeunit_b, None)) = + (lhs.data_type(), rhs.data_type()) + { + ( + temporal_conversions::timeunit_scale(*timeunit_a, *timeunit_b), + timeunit_a, + ) + } else { + return Err(ArrowError::InvalidArgumentError( + "sub_timestamps_scalar requires both arguments to be timestamps without timezone" + .to_string(), + )); + }; + + let rhs = if let Some(value) = rhs.value() { + value + } else { + return Ok(PrimitiveArray::::new_null( + lhs.data_type().clone(), + lhs.len(), + )); + }; + + let op = move |a| a - (rhs as f64 * scale) as i64; + + Ok(unary(lhs, op, DataType::Duration(*timeunit_a))) +} + /// Adds an interval to a [`DataType::Timestamp`]. pub fn add_interval( timestamp: &PrimitiveArray, @@ -285,3 +368,67 @@ pub fn add_interval( )), } } + +/// Adds an interval to a [`DataType::Timestamp`]. +pub fn add_interval_scalar( + timestamp: &PrimitiveArray, + interval: &PrimitiveScalar, +) -> Result> { + let interval = if let Some(interval) = interval.value() { + interval + } else { + return Ok(PrimitiveArray::::new_null( + timestamp.data_type().clone(), + timestamp.len(), + )); + }; + + match timestamp.data_type().to_logical_type() { + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = temporal_conversions::parse_offset(timezone_str); + match timezone { + Ok(timezone) => Ok(unary( + timestamp, + |timestamp| { + temporal_conversions::add_interval( + timestamp, time_unit, interval, &timezone, + ) + }, + timestamp.data_type().clone(), + )), + #[cfg(feature = "chrono-tz")] + Err(_) => { + let timezone = temporal_conversions::parse_offset_tz(timezone_str)?; + Ok(unary( + timestamp, + |timestamp| { + temporal_conversions::add_interval( + timestamp, time_unit, interval, &timezone, + ) + }, + timestamp.data_type().clone(), + )) + } + #[cfg(not(feature = "chrono-tz"))] + _ => Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", + timezone_str + ))), + } + } + DataType::Timestamp(time_unit, None) => { + let time_unit = *time_unit; + Ok(unary( + timestamp, + |timestamp| { + temporal_conversions::add_naive_interval(timestamp, time_unit, interval) + }, + timestamp.data_type().clone(), + )) + } + _ => Err(ArrowError::InvalidArgumentError( + "Adding an interval is only supported for `DataType::Timestamp`".to_string(), + )), + } +} diff --git a/tests/it/compute/arithmetics/time.rs b/tests/it/compute/arithmetics/time.rs index 0df9416bd5c..cbe53381bef 100644 --- a/tests/it/compute/arithmetics/time.rs +++ b/tests/it/compute/arithmetics/time.rs @@ -1,6 +1,7 @@ use arrow2::array::*; -use arrow2::compute::arithmetics::time::{add_duration, subtract_duration, subtract_timestamps}; +use arrow2::compute::arithmetics::time::*; use arrow2::datatypes::{DataType, TimeUnit}; +use arrow2::scalar::*; #[test] fn test_adding_timestamp() { @@ -19,6 +20,15 @@ fn test_adding_timestamp() { ); assert_eq!(result, expected); + + let duration = PrimitiveScalar::from(Some(10i64)).to(DataType::Duration(TimeUnit::Second)); + + let result = add_duration_scalar(×tamp, &duration); + let expected = + PrimitiveArray::from([Some(100010i64), Some(200010i64), None, Some(300010i64)]).to( + DataType::Timestamp(TimeUnit::Second, Some("America/New_York".to_string())), + ); + assert_eq!(result, expected); } #[test]