Skip to content

Commit

Permalink
Implement data_part for intervals (#6071)
Browse files Browse the repository at this point in the history
Signed-off-by: Nick Cameron <[email protected]>
Co-authored-by: Andrew Lamb <[email protected]>
  • Loading branch information
nrc and alamb authored Jul 27, 2024
1 parent f42d242 commit e815d06
Showing 1 changed file with 299 additions and 5 deletions.
304 changes: 299 additions & 5 deletions arrow-arith/src/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use std::sync::Arc;

use arrow_array::cast::AsArray;
use cast::as_primitive_array;
use chrono::{Datelike, NaiveDateTime, Offset, TimeZone, Timelike, Utc};

use arrow_array::temporal_conversions::{
Expand All @@ -31,7 +32,7 @@ use arrow_array::timezone::Tz;
use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::ArrowNativeType;
use arrow_schema::{ArrowError, DataType};
use arrow_schema::{ArrowError, DataType, IntervalUnit};

/// Valid parts to extract from date/time/timestamp arrays.
///
Expand Down Expand Up @@ -111,6 +112,7 @@ where
/// - Date32/Date64
/// - Time32/Time64
/// - Timestamp
/// - Interval
///
/// Returns an [`Int32Array`] unless input was a dictionary type, in which case returns
/// the dictionary but with this function applied onto its values.
Expand All @@ -137,10 +139,21 @@ pub fn date_part(array: &dyn Array, part: DatePart) -> Result<ArrayRef, ArrowErr
let array = Arc::new(array) as ArrayRef;
Ok(array)
}
// TODO: support interval
// DataType::Interval(_) => {
// todo!();
// }
DataType::Interval(IntervalUnit::YearMonth) => {
let array = as_primitive_array::<IntervalYearMonthType>(array).date_part(part)?;
let array = Arc::new(array) as ArrayRef;
Ok(array)
}
DataType::Interval(IntervalUnit::DayTime) => {
let array = as_primitive_array::<IntervalDayTimeType>(array).date_part(part)?;
let array = Arc::new(array) as ArrayRef;
Ok(array)
}
DataType::Interval(IntervalUnit::MonthDayNano) => {
let array = as_primitive_array::<IntervalMonthDayNanoType>(array).date_part(part)?;
let array = Arc::new(array) as ArrayRef;
Ok(array)
}
DataType::Dictionary(_, _) => {
let array = array.as_any_dictionary();
let values = date_part(array.values(), part)?;
Expand Down Expand Up @@ -387,6 +400,88 @@ impl ExtractDatePartExt for PrimitiveArray<TimestampNanosecondType> {
}
}

impl ExtractDatePartExt for PrimitiveArray<IntervalYearMonthType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
match part {
DatePart::Year => Ok(self.unary_opt(|d| Some(d / 12))),
DatePart::Month => Ok(self.unary_opt(|d| Some(d % 12))),

DatePart::Quarter
| DatePart::Week
| DatePart::Day
| DatePart::DayOfWeekSunday0
| DatePart::DayOfWeekMonday0
| DatePart::DayOfYear
| DatePart::Hour
| DatePart::Minute
| DatePart::Second
| DatePart::Millisecond
| DatePart::Microsecond
| DatePart::Nanosecond => {
return_compute_error_with!(format!("{part} does not support"), self.data_type())
}
}
}
}

impl ExtractDatePartExt for PrimitiveArray<IntervalDayTimeType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
match part {
DatePart::Week => Ok(self.unary_opt(|d| Some(d.days / 7))),
DatePart::Day => Ok(self.unary_opt(|d| Some(d.days))),
DatePart::Hour => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 60 * 1_000)))),
DatePart::Minute => Ok(self.unary_opt(|d| Some(d.milliseconds / (60 * 1_000)))),
DatePart::Second => Ok(self.unary_opt(|d| Some(d.milliseconds / 1_000))),
DatePart::Millisecond => Ok(self.unary_opt(|d| Some(d.milliseconds))),
DatePart::Microsecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000))),
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.milliseconds.checked_mul(1_000_000))),

DatePart::Quarter
| DatePart::Year
| DatePart::Month
| DatePart::DayOfWeekSunday0
| DatePart::DayOfWeekMonday0
| DatePart::DayOfYear => {
return_compute_error_with!(format!("{part} does not support"), self.data_type())
}
}
}
}

impl ExtractDatePartExt for PrimitiveArray<IntervalMonthDayNanoType> {
fn date_part(&self, part: DatePart) -> Result<Int32Array, ArrowError> {
match part {
DatePart::Year => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months / 12))),
DatePart::Month => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.months))),
DatePart::Week => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days / 7))),
DatePart::Day => Ok(self.unary_opt(|d: IntervalMonthDayNano| Some(d.days))),
DatePart::Hour => {
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 60 * 1_000_000_000)).try_into().ok()))
}
DatePart::Minute => {
Ok(self.unary_opt(|d| (d.nanoseconds / (60 * 1_000_000_000)).try_into().ok()))
}
DatePart::Second => {
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000_000).try_into().ok()))
}
DatePart::Millisecond => {
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000_000).try_into().ok()))
}
DatePart::Microsecond => {
Ok(self.unary_opt(|d| (d.nanoseconds / 1_000).try_into().ok()))
}
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.nanoseconds.try_into().ok())),

DatePart::Quarter
| DatePart::DayOfWeekSunday0
| DatePart::DayOfWeekMonday0
| DatePart::DayOfYear => {
return_compute_error_with!(format!("{part} does not support"), self.data_type())
}
}
}
}

macro_rules! return_compute_error_with {
($msg:expr, $param:expr) => {
return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg, $param))) }
Expand Down Expand Up @@ -1500,4 +1595,203 @@ mod tests {
ensure_returns_error(&Time64MicrosecondArray::from(vec![0]));
ensure_returns_error(&Time64NanosecondArray::from(vec![0]));
}

// IntervalDayTimeType week, day, hour, miute, second, mili, u, nano; invalid month, year; ignores the other part
// IntervalMonthDayNanoType year -> nano; days don't affect months, time doesn't affect days, time doesn't affect months (and vice versa)
#[test]
fn test_interval_year_month_array() {
let input: IntervalYearMonthArray = vec![0, 5, 24].into();

let actual = date_part(&input, DatePart::Year).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(2, actual.value(2));

let actual = date_part(&input, DatePart::Month).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(5, actual.value(1));
assert_eq!(0, actual.value(2));

assert!(date_part(&input, DatePart::Day).is_err());
assert!(date_part(&input, DatePart::Week).is_err());
}

#[test]
fn test_interval_day_time_array() {
let input: IntervalDayTimeArray = vec![
IntervalDayTime::ZERO,
IntervalDayTime::new(10, 42),
IntervalDayTime::new(10, 1042),
IntervalDayTime::new(10, MILLISECONDS_IN_DAY as i32 + 1),
]
.into();

// Time doesn't affect days.
let actual = date_part(&input, DatePart::Day).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(10, actual.value(1));
assert_eq!(10, actual.value(2));
assert_eq!(10, actual.value(3));

let actual = date_part(&input, DatePart::Week).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(1, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(1, actual.value(3));

// Days doesn't affect time.
let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42_000_000, actual.value(1));
assert_eq!(1_042_000_000, actual.value(2));
// Overflow returns zero.
assert_eq!(0, actual.value(3));

let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42_000, actual.value(1));
assert_eq!(1_042_000, actual.value(2));
// Overflow returns zero.
assert_eq!(0, actual.value(3));

let actual = date_part(&input, DatePart::Millisecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42, actual.value(1));
assert_eq!(1042, actual.value(2));
assert_eq!(MILLISECONDS_IN_DAY as i32 + 1, actual.value(3));

let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(1, actual.value(2));
assert_eq!(24 * 60 * 60, actual.value(3));

let actual = date_part(&input, DatePart::Minute).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(0, actual.value(2));
assert_eq!(24 * 60, actual.value(3));

let actual = date_part(&input, DatePart::Hour).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(0, actual.value(2));
assert_eq!(24, actual.value(3));

// Month and year are not valid (since days in month varies).
assert!(date_part(&input, DatePart::Month).is_err());
assert!(date_part(&input, DatePart::Year).is_err());
}

#[test]
fn test_interval_month_day_nano_array() {
let input: IntervalMonthDayNanoArray = vec![
IntervalMonthDayNano::ZERO,
IntervalMonthDayNano::new(5, 10, 42),
IntervalMonthDayNano::new(16, 35, MILLISECONDS_IN_DAY * 1_000_000 + 1),
]
.into();

// Year and month follow from month, but are not affected by days or nanos.
let actual = date_part(&input, DatePart::Year).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(1, actual.value(2));

let actual = date_part(&input, DatePart::Month).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(5, actual.value(1));
assert_eq!(16, actual.value(2));

// Week and day follow from day, but are not affected by months or nanos.
let actual = date_part(&input, DatePart::Week).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(1, actual.value(1));
assert_eq!(5, actual.value(2));

let actual = date_part(&input, DatePart::Day).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(10, actual.value(1));
assert_eq!(35, actual.value(2));

// Times follow from nanos, but are not affected by months or dats.
let actual = date_part(&input, DatePart::Hour).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24, actual.value(2));

let actual = date_part(&input, DatePart::Minute).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24 * 60, actual.value(2));

let actual = date_part(&input, DatePart::Second).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24 * 60 * 60, actual.value(2));

let actual = date_part(&input, DatePart::Millisecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
assert_eq!(24 * 60 * 60 * 1_000, actual.value(2));

let actual = date_part(&input, DatePart::Microsecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(0, actual.value(1));
// Overflow gives zero.
assert_eq!(0, actual.value(2));

let actual = date_part(&input, DatePart::Nanosecond).unwrap();
let actual = actual.as_primitive::<Int32Type>();
assert_eq!(0, actual.value(0));
assert_eq!(42, actual.value(1));
// Overflow gives zero.
assert_eq!(0, actual.value(2));
}

#[test]
fn test_interval_array_invalid_parts() {
fn ensure_returns_error(array: &dyn Array) {
let invalid_parts = [
DatePart::Quarter,
DatePart::DayOfWeekSunday0,
DatePart::DayOfWeekMonday0,
DatePart::DayOfYear,
];

for part in invalid_parts {
let err = date_part(array, part).unwrap_err();
let expected = format!(
"Compute error: {part} does not support: {}",
array.data_type()
);
assert_eq!(expected, err.to_string());
}
}

ensure_returns_error(&IntervalYearMonthArray::from(vec![0]));
ensure_returns_error(&IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]));
ensure_returns_error(&IntervalMonthDayNanoArray::from(vec![
IntervalMonthDayNano::ZERO,
]));
}
}

0 comments on commit e815d06

Please sign in to comment.