diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs index d52af32f42c4..a665b7c06bad 100644 --- a/arrow-arith/src/temporal.rs +++ b/arrow-arith/src/temporal.rs @@ -20,6 +20,7 @@ use std::sync::Arc; use arrow_array::cast::AsArray; +use cast::as_primitive_array; use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc}; use arrow_array::temporal_conversions::{ @@ -31,7 +32,7 @@ use arrow_array::timezone::Tz; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::ArrowNativeType; -use arrow_schema::{ArrowError, DataType}; +use arrow_schema::{ArrowError, DataType, IntervalUnit}; /// Valid parts to extract from date/time/timestamp arrays. /// @@ -111,6 +112,7 @@ where /// - Date32/Date64 /// - Time32/Time64 /// - Timestamp +/// - Interval /// /// Returns an [`Int32Array`] unless input was a dictionary type, in which case returns /// the dictionary but with this function applied onto its values. @@ -137,10 +139,21 @@ pub fn date_part(array: &dyn Array, part: DatePart) -> Result { - // todo!(); - // } + DataType::Interval(IntervalUnit::YearMonth) => { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } + DataType::Interval(IntervalUnit::DayTime) => { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } + DataType::Interval(IntervalUnit::MonthDayNano) => { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } DataType::Dictionary(_, _) => { let array = array.as_any_dictionary(); let values = date_part(array.values(), part)?; @@ -387,6 +400,88 @@ impl ExtractDatePartExt for PrimitiveArray { } } +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Year => Ok(self.unary_opt(|d| Some(d / 12))), + DatePart::Month => Ok(self.unary_opt(|d| Some(d % 12))), + + DatePart::Quarter + | DatePart::Week + | DatePart::Day + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear + | DatePart::Hour + | DatePart::Minute + | DatePart::Second + | DatePart::Millisecond + | DatePart::Microsecond + | DatePart::Nanosecond => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Week => Ok(self.unary_opt(|d| Some(d.days / 7))), + DatePart::Day => Ok(self.unary_opt(|d| Some(d.days))), + DatePart::Hour => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 60 * 1_000)))), + DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000)))), + DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000))), + DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds))), + DatePart::Microsecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000))), + DatePart::Nanosecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000_000))), + + DatePart::Quarter + | DatePart::Year + | DatePart::Month + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Year => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months / 12))), + DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months))), + DatePart::Week => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days / 7))), + DatePart::Day => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days))), + DatePart::Hour => { + Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 60 * 1_000_000_000)).try_into().ok())) + } + DatePart::Minute => { + Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000)).try_into().ok())) + } + DatePart::Second => { + Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000_000).try_into().ok())) + } + DatePart::Millisecond => { + Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000).try_into().ok())) + } + DatePart::Microsecond => { + Ok(self.unary_opt(|d| (d.nanoseconds / 1_000).try_into().ok())) + } + DatePart::Nanosecond => Ok(self.unary_opt(|d| d.nanoseconds.try_into().ok())), + + DatePart::Quarter + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + macro_rules! return_compute_error_with { ($msg:expr, $param:expr) => { return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg, $param))) } @@ -1500,4 +1595,203 @@ mod tests { ensure_returns_error(&Time64MicrosecondArray::from(vec![0])); ensure_returns_error(&Time64NanosecondArray::from(vec![0])); } + + // IntervalDayTimeType week, day, hour, miute, second, mili, u, nano; invalid month, year; ignores the other part + // IntervalMonthDayNanoType year -> nano; days don't affect months, time doesn't affect days, time doesn't affect months (and vice versa) + #[test] + fn test_interval_year_month_array() { + let input: IntervalYearMonthArray = vec![0, 5, 24].into(); + + let actual = date_part(&input, DatePart::Year).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(2, actual.value(2)); + + let actual = date_part(&input, DatePart::Month).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(5, actual.value(1)); + assert_eq!(0, actual.value(2)); + + assert!(date_part(&input, DatePart::Day).is_err()); + assert!(date_part(&input, DatePart::Week).is_err()); + } + + #[test] + fn test_interval_day_time_array() { + let input: IntervalDayTimeArray = vec![ + IntervalDayTime::ZERO, + IntervalDayTime::new(10, 42), + IntervalDayTime::new(10, 1042), + IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1), + ] + .into(); + + // Time doesn't affect days. + let actual = date_part(&input, DatePart::Day).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(10, actual.value(1)); + assert_eq!(10, actual.value(2)); + assert_eq!(10, actual.value(3)); + + let actual = date_part(&input, DatePart::Week).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(1, actual.value(1)); + assert_eq!(1, actual.value(2)); + assert_eq!(1, actual.value(3)); + + // Days doesn't affect time. + let actual = date_part(&input, DatePart::Nanosecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000_000, actual.value(1)); + assert_eq!(1_042_000_000, actual.value(2)); + // Overflow returns zero. + assert_eq!(0, actual.value(3)); + + let actual = date_part(&input, DatePart::Microsecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000, actual.value(1)); + assert_eq!(1_042_000, actual.value(2)); + // Overflow returns zero. + assert_eq!(0, actual.value(3)); + + let actual = date_part(&input, DatePart::Millisecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42, actual.value(1)); + assert_eq!(1042, actual.value(2)); + assert_eq!(MILLISECONDS_IN_DAY as i32 + 1, actual.value(3)); + + let actual = date_part(&input, DatePart::Second).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(1, actual.value(2)); + assert_eq!(24 * 60 * 60, actual.value(3)); + + let actual = date_part(&input, DatePart::Minute).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(0, actual.value(2)); + assert_eq!(24 * 60, actual.value(3)); + + let actual = date_part(&input, DatePart::Hour).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(0, actual.value(2)); + assert_eq!(24, actual.value(3)); + + // Month and year are not valid (since days in month varies). + assert!(date_part(&input, DatePart::Month).is_err()); + assert!(date_part(&input, DatePart::Year).is_err()); + } + + #[test] + fn test_interval_month_day_nano_array() { + let input: IntervalMonthDayNanoArray = vec![ + IntervalMonthDayNano::ZERO, + IntervalMonthDayNano::new(5, 10, 42), + IntervalMonthDayNano::new(16, 35, MILLISECONDS_IN_DAY * 1_000_000 + 1), + ] + .into(); + + // Year and month follow from month, but are not affected by days or nanos. + let actual = date_part(&input, DatePart::Year).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(1, actual.value(2)); + + let actual = date_part(&input, DatePart::Month).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(5, actual.value(1)); + assert_eq!(16, actual.value(2)); + + // Week and day follow from day, but are not affected by months or nanos. + let actual = date_part(&input, DatePart::Week).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(1, actual.value(1)); + assert_eq!(5, actual.value(2)); + + let actual = date_part(&input, DatePart::Day).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(10, actual.value(1)); + assert_eq!(35, actual.value(2)); + + // Times follow from nanos, but are not affected by months or dats. + let actual = date_part(&input, DatePart::Hour).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(24, actual.value(2)); + + let actual = date_part(&input, DatePart::Minute).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(24 * 60, actual.value(2)); + + let actual = date_part(&input, DatePart::Second).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(24 * 60 * 60, actual.value(2)); + + let actual = date_part(&input, DatePart::Millisecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(24 * 60 * 60 * 1_000, actual.value(2)); + + let actual = date_part(&input, DatePart::Microsecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + // Overflow gives zero. + assert_eq!(0, actual.value(2)); + + let actual = date_part(&input, DatePart::Nanosecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42, actual.value(1)); + // Overflow gives zero. + assert_eq!(0, actual.value(2)); + } + + #[test] + fn test_interval_array_invalid_parts() { + fn ensure_returns_error(array: &dyn Array) { + let invalid_parts = [ + DatePart::Quarter, + DatePart::DayOfWeekSunday0, + DatePart::DayOfWeekMonday0, + DatePart::DayOfYear, + ]; + + for part in invalid_parts { + let err = date_part(array, part).unwrap_err(); + let expected = format!( + "Compute error: {part} does not support: {}", + array.data_type() + ); + assert_eq!(expected, err.to_string()); + } + } + + ensure_returns_error(&IntervalYearMonthArray::from(vec![0])); + ensure_returns_error(&IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO])); + ensure_returns_error(&IntervalMonthDayNanoArray::from(vec![ + IntervalMonthDayNano::ZERO, + ])); + } }