diff --git a/src/array/display.rs b/src/array/display.rs index 059c5e56feb..c73e2b8d17f 100644 --- a/src/array/display.rs +++ b/src/array/display.rs @@ -66,56 +66,31 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box Strin dyn_primitive!(array, i64, temporal_conversions::time64ns_to_time) } Time64(_) => unreachable!(), // remaining are not valid - Timestamp(TimeUnit::Second, tz) => { + Timestamp(time_unit, tz) => { if let Some(tz) = tz { - let offset = temporal_conversions::parse_offset(tz).unwrap(); - dyn_primitive!(array, i64, |x| { - chrono::DateTime::::from_utc( - temporal_conversions::timestamp_s_to_datetime(x), - offset, - ) - }) - } else { - dyn_primitive!(array, i64, temporal_conversions::timestamp_s_to_datetime) - } - } - Timestamp(TimeUnit::Millisecond, tz) => { - if let Some(tz) = tz { - let offset = temporal_conversions::parse_offset(tz).unwrap(); - dyn_primitive!(array, i64, |x| { - chrono::DateTime::::from_utc( - temporal_conversions::timestamp_ms_to_datetime(x), - offset, - ) - }) - } else { - dyn_primitive!(array, i64, temporal_conversions::timestamp_ms_to_datetime) - } - } - Timestamp(TimeUnit::Microsecond, tz) => { - if let Some(tz) = tz { - let offset = temporal_conversions::parse_offset(tz).unwrap(); - dyn_primitive!(array, i64, |x| { - chrono::DateTime::::from_utc( - temporal_conversions::timestamp_us_to_datetime(x), - offset, - ) - }) + let timezone = temporal_conversions::parse_offset(tz); + match timezone { + Ok(timezone) => { + dyn_primitive!(array, i64, |time| { + temporal_conversions::timestamp_to_datetime(time, *time_unit, &timezone) + }) + } + #[cfg(feature = "chrono-tz")] + Err(_) => { + let timezone = temporal_conversions::parse_offset_tz(tz).unwrap(); + dyn_primitive!(array, i64, |time| { + temporal_conversions::timestamp_to_datetime(time, *time_unit, &timezone) + }) + } + #[cfg(not(feature = "chrono-tz"))] + _ => panic!( + "Invalid Offset format (must be [-]00:00) or chrono-tz feature not active" + ), + } } else { - dyn_primitive!(array, i64, temporal_conversions::timestamp_us_to_datetime) - } - } - Timestamp(TimeUnit::Nanosecond, tz) => { - if let Some(tz) = tz { - let offset = temporal_conversions::parse_offset(tz).unwrap(); - dyn_primitive!(array, i64, |x| { - chrono::DateTime::::from_utc( - temporal_conversions::timestamp_ns_to_datetime(x), - offset, - ) + dyn_primitive!(array, i64, |time| { + temporal_conversions::timestamp_to_naive_datetime(time, *time_unit) }) - } else { - dyn_primitive!(array, i64, temporal_conversions::timestamp_ns_to_datetime) } } Interval(IntervalUnit::YearMonth) => { diff --git a/src/compute/arithmetics/mod.rs b/src/compute/arithmetics/mod.rs index 3414634dfc3..3ca6c51b17c 100644 --- a/src/compute/arithmetics/mod.rs +++ b/src/compute/arithmetics/mod.rs @@ -63,7 +63,7 @@ use std::ops::{Add, Div, Mul, Neg, Rem, Sub}; use num_traits::{NumCast, Zero}; -use crate::datatypes::{DataType, TimeUnit}; +use crate::datatypes::{DataType, IntervalUnit, TimeUnit}; use crate::error::{ArrowError, Result}; use crate::types::NativeType; use crate::{array::*, bitmap::Bitmap}; @@ -145,6 +145,11 @@ pub fn arithmetic(lhs: &dyn Array, op: Operator, rhs: &dyn Array) -> Result(lhs, rhs).map(|x| Box::new(x) as Box) } + (Timestamp(_, _), Add, Interval(IntervalUnit::MonthDayNano)) => { + let lhs = lhs.as_any().downcast_ref().unwrap(); + let rhs = rhs.as_any().downcast_ref().unwrap(); + time::add_interval(lhs, rhs).map(|x| Box::new(x) as Box) + } (Time64(TimeUnit::Microsecond), Subtract, Duration(_)) | (Time64(TimeUnit::Nanosecond), Subtract, Duration(_)) | (Date64, Subtract, Duration(_)) @@ -214,6 +219,7 @@ pub fn can_arithmetic(lhs: &DataType, op: Operator, rhs: &DataType) -> bool { | (Time64(TimeUnit::Nanosecond), Add, Duration(_)) | (Timestamp(_, _), Subtract, Duration(_)) | (Timestamp(_, _), Add, Duration(_)) + | (Timestamp(_, _), Add, Interval(IntervalUnit::MonthDayNano)) | (Timestamp(_, None), Subtract, Timestamp(_, None)) ) } @@ -462,6 +468,7 @@ mod tests { Duration(TimeUnit::Millisecond), Duration(TimeUnit::Microsecond), Duration(TimeUnit::Nanosecond), + Interval(IntervalUnit::MonthDayNano), ]; let operators = vec![ Operator::Add, diff --git a/src/compute/arithmetics/time.rs b/src/compute/arithmetics/time.rs index f68fd7e7cdf..601457f6e12 100644 --- a/src/compute/arithmetics/time.rs +++ b/src/compute/arithmetics/time.rs @@ -18,8 +18,8 @@ use crate::{ compute::arity::binary, datatypes::{DataType, TimeUnit}, error::{ArrowError, Result}, - temporal_conversions::{timeunit_scale, SECONDS_IN_DAY}, - types::NativeType, + temporal_conversions, + types::{months_days_ns, NativeType}, }; /// Creates the scale required to add or subtract a Duration to a time array @@ -36,20 +36,21 @@ fn create_scale(lhs: &DataType, rhs: &DataType) -> Result { | (DataType::Time32(timeunit_a), DataType::Duration(timeunit_b)) | (DataType::Time64(timeunit_a), DataType::Duration(timeunit_b)) => { // The scale is based on the TimeUnit that each of the numbers have. - timeunit_scale(*timeunit_a, *timeunit_b) + temporal_conversions::timeunit_scale(*timeunit_a, *timeunit_b) } (DataType::Date32, DataType::Duration(timeunit)) => { // Date32 represents the time elapsed time since UNIX epoch // (1970-01-01) in days (32 bits). The duration value has to be // scaled to days to be able to add the value to the Date. - timeunit_scale(TimeUnit::Second, *timeunit) / SECONDS_IN_DAY as f64 + temporal_conversions::timeunit_scale(TimeUnit::Second, *timeunit) + / temporal_conversions::SECONDS_IN_DAY as f64 } (DataType::Date64, DataType::Duration(timeunit)) => { // Date64 represents the time elapsed time since UNIX epoch // (1970-01-01) in milliseconds (64 bits). The duration value has // to be scaled to milliseconds to be able to add the value to the // Date. - timeunit_scale(TimeUnit::Millisecond, *timeunit) + temporal_conversions::timeunit_scale(TimeUnit::Millisecond, *timeunit) } _ => { return Err(ArrowError::InvalidArgumentError( @@ -216,7 +217,7 @@ pub fn subtract_timestamps( (DataType::Timestamp(timeunit_a, None), DataType::Timestamp(timeunit_b, None)) => { // Closure for the binary operation. The closure contains the scale // required to calculate the difference between the timestamps. - let scale = timeunit_scale(*timeunit_a, *timeunit_b); + let scale = temporal_conversions::timeunit_scale(*timeunit_a, *timeunit_b); let op = move |a, b| a - (b as f64 * scale) as i64; binary(lhs, rhs, DataType::Duration(*timeunit_a), op) @@ -227,6 +228,64 @@ pub fn subtract_timestamps( } } +/// Adds an interval to a [`DataType::Timestamp`]. +pub fn add_interval( + timestamp: &PrimitiveArray, + interval: &PrimitiveArray, +) -> Result> { + match timestamp.data_type().to_logical_type() { + DataType::Timestamp(time_unit, Some(timezone_str)) => { + let time_unit = *time_unit; + let timezone = temporal_conversions::parse_offset(timezone_str); + match timezone { + Ok(timezone) => binary( + timestamp, + interval, + timestamp.data_type().clone(), + |timestamp, interval| { + temporal_conversions::add_interval( + timestamp, time_unit, interval, &timezone, + ) + }, + ), + #[cfg(feature = "chrono-tz")] + Err(_) => { + let timezone = temporal_conversions::parse_offset_tz(timezone_str)?; + binary( + timestamp, + interval, + timestamp.data_type().clone(), + |timestamp, interval| { + temporal_conversions::add_interval( + timestamp, time_unit, interval, &timezone, + ) + }, + ) + } + #[cfg(not(feature = "chrono-tz"))] + _ => Err(ArrowError::InvalidArgumentError(format!( + "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", + timezone_str + ))), + } + } + DataType::Timestamp(time_unit, None) => { + let time_unit = *time_unit; + binary( + timestamp, + interval, + timestamp.data_type().clone(), + |timestamp, interval| { + temporal_conversions::add_naive_interval(timestamp, time_unit, interval) + }, + ) + } + _ => Err(ArrowError::InvalidArgumentError( + "Adding an interval is only supported for `DataType::Timestamp`".to_string(), + )), + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/compute/cast/primitive_to.rs b/src/compute/cast/primitive_to.rs index 508c989a2ca..cfe101ac5bf 100644 --- a/src/compute/cast/primitive_to.rs +++ b/src/compute/cast/primitive_to.rs @@ -5,7 +5,6 @@ use crate::{ bitmap::Bitmap, compute::arity::unary, datatypes::{DataType, TimeUnit}, - error::ArrowError, temporal_conversions::*, types::NativeType, }; @@ -329,17 +328,10 @@ fn chrono_tz_timestamp_to_utf8( time_unit: TimeUnit, timezone_str: &str, ) -> Result> { - let timezone = parse_offset_tz(timezone_str); - if let Some(timezone) = timezone { - Ok(timestamp_to_utf8_impl::( - from, time_unit, timezone, - )) - } else { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed", - timezone_str - ))) - } + let timezone = parse_offset_tz(timezone_str)?; + Ok(timestamp_to_utf8_impl::( + from, time_unit, timezone, + )) } #[cfg(not(feature = "chrono-tz"))] @@ -348,6 +340,7 @@ fn chrono_tz_timestamp_to_utf8( _: TimeUnit, timezone_str: &str, ) -> Result> { + use crate::error::ArrowError; Err(ArrowError::InvalidArgumentError(format!( "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", timezone_str diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index 2610d8d8a67..b098c890778 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -82,15 +82,8 @@ fn chrono_tz_hour( time_unit: TimeUnit, timezone_str: &str, ) -> Result> { - let timezone = parse_offset_tz(timezone_str); - if let Some(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) - } else { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed", - timezone_str - ))) - } + let timezone = parse_offset_tz(timezone_str)?; + Ok(extract_impl(array, time_unit, timezone, |x| x.hour())) } #[cfg(not(feature = "chrono-tz"))] @@ -112,15 +105,8 @@ fn chrono_tz_year( time_unit: TimeUnit, timezone_str: &str, ) -> Result> { - let timezone = parse_offset_tz(timezone_str); - if let Some(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) - } else { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed", - timezone_str - ))) - } + let timezone = parse_offset_tz(timezone_str)?; + Ok(extract_impl(array, time_unit, timezone, |x| x.year())) } #[cfg(not(feature = "chrono-tz"))] diff --git a/src/temporal_conversions.rs b/src/temporal_conversions.rs index f4b4ad5f313..32a79a81852 100644 --- a/src/temporal_conversions.rs +++ b/src/temporal_conversions.rs @@ -2,15 +2,18 @@ use chrono::{ format::{parse, Parsed, StrftimeItems}, - FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, + Datelike, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime, }; -use crate::datatypes::{DataType, TimeUnit}; use crate::error::Result; use crate::{ array::{Offset, PrimitiveArray, Utf8Array}, error::ArrowError, }; +use crate::{ + datatypes::{DataType, TimeUnit}, + types::months_days_ns, +}; /// Number of seconds in a day pub const SECONDS_IN_DAY: i64 = 86_400; @@ -98,8 +101,8 @@ pub fn time64ns_to_time(v: i64) -> NaiveTime { /// converts a `i64` representing a `timestamp(s)` to [`NaiveDateTime`] #[inline] -pub fn timestamp_s_to_datetime(v: i64) -> NaiveDateTime { - NaiveDateTime::from_timestamp(v, 0) +pub fn timestamp_s_to_datetime(seconds: i64) -> NaiveDateTime { + NaiveDateTime::from_timestamp(seconds, 0) } /// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`] @@ -135,6 +138,27 @@ pub fn timestamp_ns_to_datetime(v: i64) -> NaiveDateTime { ) } +/// Converts a timestamp in `time_unit` and `timezone` into [`chrono::DateTime`]. +#[inline] +pub fn timestamp_to_naive_datetime(timestamp: i64, time_unit: TimeUnit) -> chrono::NaiveDateTime { + match time_unit { + TimeUnit::Second => timestamp_s_to_datetime(timestamp), + TimeUnit::Millisecond => timestamp_ms_to_datetime(timestamp), + TimeUnit::Microsecond => timestamp_us_to_datetime(timestamp), + TimeUnit::Nanosecond => timestamp_ns_to_datetime(timestamp), + } +} + +/// Converts a timestamp in `time_unit` and `timezone` into [`chrono::DateTime`]. +#[inline] +pub fn timestamp_to_datetime( + timestamp: i64, + time_unit: TimeUnit, + timezone: &T, +) -> chrono::DateTime { + timezone.from_utc_datetime(×tamp_to_naive_datetime(timestamp, time_unit)) +} + /// Calculates the scale factor between two TimeUnits. The function returns the /// scale that should multiply the TimeUnit "b" to have the same time scale as /// the TimeUnit "a". @@ -238,8 +262,10 @@ fn utf8_to_timestamp_ns_impl( #[cfg(feature = "chrono-tz")] #[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -pub(crate) fn parse_offset_tz(tz: &str) -> Option { - tz.parse::().ok() +pub fn parse_offset_tz(timezone: &str) -> Result { + timezone.parse::().map_err(|_| { + ArrowError::InvalidArgumentError(format!("timezone \"{}\" cannot be parsed", timezone)) + }) } #[cfg(feature = "chrono-tz")] @@ -249,15 +275,8 @@ fn chrono_tz_utf_to_timestamp_ns( fmt: &str, timezone: String, ) -> Result> { - let tz = parse_offset_tz(&timezone); - if let Some(tz) = tz { - Ok(utf8_to_timestamp_ns_impl(array, fmt, timezone, tz)) - } else { - Err(ArrowError::InvalidArgumentError(format!( - "timezone \"{}\" cannot be parsed", - timezone - ))) - } + let tz = parse_offset_tz(&timezone)?; + Ok(utf8_to_timestamp_ns_impl(array, fmt, timezone, tz)) } #[cfg(not(feature = "chrono-tz"))] @@ -308,3 +327,72 @@ pub fn utf8_to_naive_timestamp_ns( PrimitiveArray::from_trusted_len_iter(iter).to(DataType::Timestamp(TimeUnit::Nanosecond, None)) } + +fn add_month(year: i32, month: u32, months: i32) -> chrono::NaiveDate { + let new_year = (year * 12 + (month - 1) as i32 + months) / 12; + let new_month = (year * 12 + (month - 1) as i32 + months) % 12 + 1; + chrono::NaiveDate::from_ymd(new_year, new_month as u32, 1) +} + +fn get_days_between_months(year: i32, month: u32, months: i32) -> i64 { + add_month(year, month, months) + .signed_duration_since(chrono::NaiveDate::from_ymd(year, month, 1)) + .num_days() +} + +/// Adds an `interval` to a `timestamp` in `time_unit` units without timezone. +#[inline] +pub fn add_naive_interval(timestamp: i64, time_unit: TimeUnit, interval: months_days_ns) -> i64 { + // convert seconds to a DateTime of a given offset. + let datetime = match time_unit { + TimeUnit::Second => timestamp_s_to_datetime(timestamp), + TimeUnit::Millisecond => timestamp_ms_to_datetime(timestamp), + TimeUnit::Microsecond => timestamp_us_to_datetime(timestamp), + TimeUnit::Nanosecond => timestamp_ns_to_datetime(timestamp), + }; + + // compute the number of days in the interval, which depends on the particular year and month (leap days) + let delta_days = get_days_between_months(datetime.year(), datetime.month(), interval.months()) + + interval.days() as i64; + + // add; no leap hours are considered + let new_datetime_tz = datetime + + chrono::Duration::nanoseconds(delta_days * 24 * 60 * 60 * 1_000_000_000 + interval.ns()); + + // convert back to the target unit + match time_unit { + TimeUnit::Second => new_datetime_tz.timestamp_millis() / 1000, + TimeUnit::Millisecond => new_datetime_tz.timestamp_millis(), + TimeUnit::Microsecond => new_datetime_tz.timestamp_nanos() / 1000, + TimeUnit::Nanosecond => new_datetime_tz.timestamp_nanos(), + } +} + +/// Adds an `interval` to a `timestamp` in `time_unit` units and timezone `timezone`. +#[inline] +pub fn add_interval( + timestamp: i64, + time_unit: TimeUnit, + interval: months_days_ns, + timezone: &T, +) -> i64 { + // convert seconds to a DateTime of a given offset. + let datetime_tz = timestamp_to_datetime(timestamp, time_unit, timezone); + + // compute the number of days in the interval, which depends on the particular year and month (leap days) + let delta_days = + get_days_between_months(datetime_tz.year(), datetime_tz.month(), interval.months()) + + interval.days() as i64; + + // add; tz will take care of leap hours + let new_datetime_tz = datetime_tz + + chrono::Duration::nanoseconds(delta_days * 24 * 60 * 60 * 1_000_000_000 + interval.ns()); + + // convert back to the target unit + match time_unit { + TimeUnit::Second => new_datetime_tz.timestamp_millis() / 1000, + TimeUnit::Millisecond => new_datetime_tz.timestamp_millis(), + TimeUnit::Microsecond => new_datetime_tz.timestamp_nanos() / 1000, + TimeUnit::Nanosecond => new_datetime_tz.timestamp_nanos(), + } +} diff --git a/tests/it/temporal_conversions.rs b/tests/it/temporal_conversions.rs index 94e933728dc..672c4f04203 100644 --- a/tests/it/temporal_conversions.rs +++ b/tests/it/temporal_conversions.rs @@ -1,5 +1,7 @@ use arrow2::array::*; +use arrow2::datatypes::TimeUnit; use arrow2::temporal_conversions; +use arrow2::types::months_days_ns; #[test] fn naive() { @@ -64,3 +66,76 @@ fn tz_aware_no_timezone() { let r = temporal_conversions::utf8_to_timestamp_ns(&array, fmt, tz).unwrap(); assert_eq!(format!("{}", r), expected); } + +#[test] +fn add_interval_fixed_offset() { + // 1972 has a leap year on the 29th. + let timestamp = 68086800; // Mon Feb 28 1972 01:00:00 GMT+0000 + let timeunit = TimeUnit::Second; + let timezone = temporal_conversions::parse_offset("+01:00").unwrap(); + + let r = temporal_conversions::add_interval( + timestamp, + timeunit, + months_days_ns::new(0, 1, 60_000_000_000), + &timezone, + ); + let r = temporal_conversions::timestamp_to_datetime(r, timeunit, &timezone); + assert_eq!("1972-02-29 02:01:00 +01:00", format!("{}", r)); + + let r = temporal_conversions::add_interval( + timestamp, + timeunit, + months_days_ns::new(1, 1, 60_000_000_000), + &timezone, + ); + let r = temporal_conversions::timestamp_to_datetime(r, timeunit, &timezone); + assert_eq!("1972-03-29 02:01:00 +01:00", format!("{}", r)); + + let r = temporal_conversions::add_interval( + timestamp, + timeunit, + months_days_ns::new(24, 1, 60_000_000_000), + &timezone, + ); + let r = temporal_conversions::timestamp_to_datetime(r, timeunit, &timezone); + assert_eq!("1974-03-01 02:01:00 +01:00", format!("{}", r)); + + let r = temporal_conversions::add_interval( + timestamp, + timeunit, + months_days_ns::new(-1, 1, 60_000_000_000), + &timezone, + ); + let r = temporal_conversions::timestamp_to_datetime(r, timeunit, &timezone); + assert_eq!("1972-01-29 02:01:00 +01:00", format!("{}", r)); +} + +#[cfg(feature = "chrono-tz")] +#[test] +fn add_interval_timezone() { + // current time is Sun Mar 29 2020 00:00:00 GMT+0000 (Western European Standard Time) + // 1 hour later is Sun Mar 29 2020 02:00:00 GMT+0100 (Western European Summer Time) + let timestamp = 1585440000; + let timeunit = TimeUnit::Second; + let timezone = temporal_conversions::parse_offset_tz("Europe/Lisbon").unwrap(); + + let r = temporal_conversions::add_interval( + timestamp, + timeunit, + months_days_ns::new(0, 0, 60 * 60 * 1_000_000_000), + &timezone, + ); + let r = temporal_conversions::timestamp_to_datetime(r, timeunit, &timezone); + assert_eq!("2020-03-29 02:00:00 WEST", format!("{}", r)); + + // crosses two summer time changes and thus adds only 1 hour + let r = temporal_conversions::add_interval( + timestamp, + timeunit, + months_days_ns::new(7, 0, 60 * 60 * 1_000_000_000), + &timezone, + ); + let r = temporal_conversions::timestamp_to_datetime(r, timeunit, &timezone); + assert_eq!("2020-10-29 01:00:00 WET", format!("{}", r)); +}