From a5cfec4ab44d54b5aeaf4fd7c0505510996f30ae Mon Sep 17 00:00:00 2001 From: VasanthakumarV Date: Wed, 22 Sep 2021 21:30:28 +0530 Subject: [PATCH] Add `month` and `day` temporal extractors --- src/compute/temporal.rs | 130 ++++++++++++++++++++++++++--------- tests/it/compute/temporal.rs | 20 ++++++ 2 files changed, 116 insertions(+), 34 deletions(-) diff --git a/src/compute/temporal.rs b/src/compute/temporal.rs index b098c890778..62c0e690cb5 100644 --- a/src/compute/temporal.rs +++ b/src/compute/temporal.rs @@ -100,21 +100,31 @@ fn chrono_tz_hour( #[cfg(feature = "chrono-tz")] #[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz_year( +fn chrono_tz( array: &PrimitiveArray, time_unit: TimeUnit, timezone_str: &str, -) -> Result> { + op: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ let timezone = parse_offset_tz(timezone_str)?; - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + Ok(extract_impl(array, time_unit, timezone, op)) } #[cfg(not(feature = "chrono-tz"))] -fn chrono_tz_year( +fn chrono_tz( _: &PrimitiveArray, _: TimeUnit, timezone_str: &str, -) -> Result> { + _: F, +) -> Result> +where + O: NativeType, + F: Fn(chrono::DateTime) -> O, +{ Err(ArrowError::InvalidArgumentError(format!( "timezone \"{}\" cannot be parsed (feature chrono-tz is not active)", timezone_str @@ -249,54 +259,92 @@ pub fn can_hour(data_type: &DataType) -> bool { /// Extracts the years of a temporal array as [`PrimitiveArray`]. /// Use [`can_year`] to check if this operation is supported for the target [`DataType`]. pub fn year(array: &dyn Array) -> Result> { - let final_data_type = DataType::Int32; + if let DataType::Timestamp(time_unit, Some(timezone_str)) = array.data_type() { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.year())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.year()) + } + } else { + date_like(array, DataType::Int32, |x| x.year()) + } +} + +/// Extracts the months of a temporal array as [`PrimitiveArray`]. +/// Use [`can_month`] to check if this operation is supported for the target [`DataType`]. +pub fn month(array: &dyn Array) -> Result> { + if let DataType::Timestamp(time_unit, Some(timezone_str)) = array.data_type() { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.month())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.month()) + } + } else { + date_like(array, DataType::UInt32, |x| x.month()) + } +} + +/// Extracts the days of a temporal array as [`PrimitiveArray`]. +/// Use [`can_day`] to check if this operation is supported for the target [`DataType`]. +pub fn day(array: &dyn Array) -> Result> { + if let DataType::Timestamp(time_unit, Some(timezone_str)) = array.data_type() { + let time_unit = *time_unit; + let timezone = parse_offset(timezone_str); + + let array = array.as_any().downcast_ref().unwrap(); + + if let Ok(timezone) = timezone { + Ok(extract_impl(array, time_unit, timezone, |x| x.day())) + } else { + chrono_tz(array, time_unit, timezone_str, |x| x.day()) + } + } else { + date_like(array, DataType::UInt32, |x| x.day()) + } +} + +pub fn date_like(array: &dyn Array, data_type: DataType, op: F) -> Result> +where + O: NativeType, + F: Fn(chrono::NaiveDateTime) -> O, +{ match array.data_type() { DataType::Date32 => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| date32_to_datetime(x).year(), - final_data_type, - )) + Ok(unary(array, |x| op(date32_to_datetime(x)), data_type)) } DataType::Date64 => { let array = array .as_any() .downcast_ref::>() .unwrap(); - Ok(unary( - array, - |x| date64_to_datetime(x).year(), - final_data_type, - )) + Ok(unary(array, |x| op(date64_to_datetime(x)), data_type)) } DataType::Timestamp(time_unit, None) => { let array = array .as_any() .downcast_ref::>() .unwrap(); - let op = match time_unit { - TimeUnit::Second => |x| timestamp_s_to_datetime(x).year(), - TimeUnit::Millisecond => |x| timestamp_ms_to_datetime(x).year(), - TimeUnit::Microsecond => |x| timestamp_us_to_datetime(x).year(), - TimeUnit::Nanosecond => |x| timestamp_ns_to_datetime(x).year(), + let func = match time_unit { + TimeUnit::Second => timestamp_s_to_datetime, + TimeUnit::Millisecond => timestamp_ms_to_datetime, + TimeUnit::Microsecond => timestamp_us_to_datetime, + TimeUnit::Nanosecond => timestamp_ns_to_datetime, }; - Ok(unary(array, op, final_data_type)) - } - DataType::Timestamp(time_unit, Some(timezone_str)) => { - let time_unit = *time_unit; - let timezone = parse_offset(timezone_str); - - let array = array.as_any().downcast_ref().unwrap(); - - if let Ok(timezone) = timezone { - Ok(extract_impl(array, time_unit, timezone, |x| x.year())) - } else { - chrono_tz_year(array, time_unit, timezone_str) - } + Ok(unary(array, |x| op(func(x)), data_type)) } dt => Err(ArrowError::NotYetImplemented(format!( "\"year\" does not support type {:?}", @@ -324,3 +372,17 @@ pub fn can_year(data_type: &DataType) -> bool { DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) ) } + +pub fn can_month(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} + +pub fn can_day(data_type: &DataType) -> bool { + matches!( + data_type, + DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _) + ) +} diff --git a/tests/it/compute/temporal.rs b/tests/it/compute/temporal.rs index 160d880a91a..2e9da140a2f 100644 --- a/tests/it/compute/temporal.rs +++ b/tests/it/compute/temporal.rs @@ -78,6 +78,26 @@ fn naive_timestamp_micro_year() { assert_eq!(result, expected); } +#[test] +fn date64_month() { + let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64); + let result = month(&array).unwrap(); + + let expected = UInt32Array::from(&[Some(1), None]); + + assert_eq!(result, expected); +} + +#[test] +fn date64_day() { + let array = Int64Array::from(&[Some(1614764800000), None]).to(DataType::Date64); + let result = day(&array).unwrap(); + + let expected = UInt32Array::from(&[Some(3), None]); + + assert_eq!(result, expected); +} + #[test] fn timestamp_micro_hour() { let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp(