Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added support to extract hours and years from timestamps with timezone #412

Merged
merged 1 commit into from
Sep 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 141 additions & 6 deletions src/compute/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,120 @@ use crate::array::*;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::temporal_conversions::*;
use crate::types::NativeType;
use crate::types::NaturalDataType;

use super::arity::unary;

/// Extracts the hours of a given temporal array as an array of integers
fn extract_impl<T, A, F>(
array: &PrimitiveArray<i64>,
time_unit: TimeUnit,
timezone: T,
extract: F,
) -> PrimitiveArray<A>
where
T: chrono::TimeZone,
A: NativeType + NaturalDataType,
F: Fn(chrono::DateTime<T>) -> A,
{
match time_unit {
TimeUnit::Second => {
let op = |x| {
let datetime = timestamp_s_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, DataType::UInt32)
}
TimeUnit::Millisecond => {
let op = |x| {
let datetime = timestamp_ms_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, A::DATA_TYPE)
}
TimeUnit::Microsecond => {
let op = |x| {
let datetime = timestamp_us_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, A::DATA_TYPE)
}
TimeUnit::Nanosecond => {
let op = |x| {
let datetime = timestamp_ns_to_datetime(x);
let offset = timezone.offset_from_utc_datetime(&datetime);
extract(chrono::DateTime::<T>::from_utc(datetime, offset))
};
unary(array, op, A::DATA_TYPE)
}
}
}

#[cfg(feature = "chrono-tz")]
#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))]
fn chrono_tz_hour(
array: &PrimitiveArray<i64>,
time_unit: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<u32>> {
let timezone = parse_offset_tz(timezone_str);
if let Some(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.hour()))
} else {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed",
timezone_str
)))
}
}

#[cfg(not(feature = "chrono-tz"))]
fn chrono_tz_hour(
_: &PrimitiveArray<i64>,
_: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<u32>> {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed (feature chrono-tz is not active)",
timezone_str
)))
}

#[cfg(feature = "chrono-tz")]
#[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))]
fn chrono_tz_year(
array: &PrimitiveArray<i64>,
time_unit: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<i32>> {
let timezone = parse_offset_tz(timezone_str);
if let Some(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.year()))
} else {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed",
timezone_str
)))
}
}

#[cfg(not(feature = "chrono-tz"))]
fn chrono_tz_year(
_: &PrimitiveArray<i64>,
_: TimeUnit,
timezone_str: &str,
) -> Result<PrimitiveArray<i32>> {
Err(ArrowError::InvalidArgumentError(format!(
"timezone \"{}\" cannot be parsed (feature chrono-tz is not active)",
timezone_str
)))
}

/// Extracts the hours of a temporal array as [`PrimitiveArray<u32>`].
/// Use [`can_hour`] to check if this operation is supported for the target [`DataType`].
pub fn hour(array: &dyn Array) -> Result<PrimitiveArray<u32>> {
let final_data_type = DataType::UInt32;
match array.data_type() {
Expand All @@ -37,7 +147,7 @@ pub fn hour(array: &dyn Array) -> Result<PrimitiveArray<u32>> {
.unwrap();
Ok(unary(array, |x| time32s_to_time(x).hour(), final_data_type))
}
DataType::Time32(TimeUnit::Microsecond) => {
DataType::Time32(TimeUnit::Millisecond) => {
let array = array
.as_any()
.downcast_ref::<PrimitiveArray<i32>>()
Expand Down Expand Up @@ -105,6 +215,18 @@ pub fn hour(array: &dyn Array) -> Result<PrimitiveArray<u32>> {
};
Ok(unary(array, op, final_data_type))
}
DataType::Timestamp(time_unit, Some(timezone_str)) => {
let time_unit = *time_unit;
let timezone = parse_offset(timezone_str);

let array = array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.hour()))
} else {
chrono_tz_hour(array, time_unit, timezone_str)
}
}
dt => Err(ArrowError::NotYetImplemented(format!(
"\"hour\" does not support type {:?}",
dt
Expand All @@ -129,16 +251,17 @@ pub fn can_hour(data_type: &DataType) -> bool {
matches!(
data_type,
DataType::Time32(TimeUnit::Second)
| DataType::Time32(TimeUnit::Microsecond)
| DataType::Time32(TimeUnit::Millisecond)
| DataType::Time64(TimeUnit::Microsecond)
| DataType::Time64(TimeUnit::Nanosecond)
| DataType::Date32
| DataType::Date64
| DataType::Timestamp(_, None)
| DataType::Timestamp(_, _)
)
}

/// Extracts the hours of a given temporal array as an array of integers
/// Extracts the years of a temporal array as [`PrimitiveArray<i32>`].
/// Use [`can_year`] to check if this operation is supported for the target [`DataType`].
pub fn year(array: &dyn Array) -> Result<PrimitiveArray<i32>> {
let final_data_type = DataType::Int32;
match array.data_type() {
Expand Down Expand Up @@ -177,6 +300,18 @@ pub fn year(array: &dyn Array) -> Result<PrimitiveArray<i32>> {
};
Ok(unary(array, op, final_data_type))
}
DataType::Timestamp(time_unit, Some(timezone_str)) => {
let time_unit = *time_unit;
let timezone = parse_offset(timezone_str);

let array = array.as_any().downcast_ref().unwrap();

if let Ok(timezone) = timezone {
Ok(extract_impl(array, time_unit, timezone, |x| x.year()))
} else {
chrono_tz_year(array, time_unit, timezone_str)
}
}
dt => Err(ArrowError::NotYetImplemented(format!(
"\"year\" does not support type {:?}",
dt
Expand All @@ -200,6 +335,6 @@ pub fn year(array: &dyn Array) -> Result<PrimitiveArray<i32>> {
pub fn can_year(data_type: &DataType) -> bool {
matches!(
data_type,
DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, None)
DataType::Date32 | DataType::Date64 | DataType::Timestamp(_, _)
)
}
46 changes: 42 additions & 4 deletions tests/it/compute/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ fn time64_micro_hour() {
}

#[test]
fn timestamp_micro_hour() {
fn naive_timestamp_micro_hour() {
let array = Int64Array::from(&[Some(37800000000), None])
.to(DataType::Timestamp(TimeUnit::Microsecond, None));

Expand All @@ -51,7 +51,7 @@ fn timestamp_micro_hour() {
}

#[test]
fn timestamp_date64_year() {
fn date64_year() {
let array = Int64Array::from(&[Some(1514764800000), None]).to(DataType::Date64);

let result = year(&array).unwrap();
Expand All @@ -60,7 +60,7 @@ fn timestamp_date64_year() {
}

#[test]
fn timestamp_date32_year() {
fn naive_timestamp_date32_year() {
let array = Int32Array::from(&[Some(15147), None]).to(DataType::Date32);

let result = year(&array).unwrap();
Expand All @@ -69,7 +69,7 @@ fn timestamp_date32_year() {
}

#[test]
fn timestamp_micro_year() {
fn naive_timestamp_micro_year() {
let array = Int64Array::from(&[Some(1612025847000000), None])
.to(DataType::Timestamp(TimeUnit::Microsecond, None));

Expand All @@ -78,6 +78,42 @@ fn timestamp_micro_year() {
assert_eq!(result, expected);
}

#[test]
fn timestamp_micro_hour() {
let array = Int64Array::from(&[Some(1621877130000000), None]).to(DataType::Timestamp(
TimeUnit::Microsecond,
Some("+01:00".to_string()),
));

let result = hour(&array).unwrap();
let expected = UInt32Array::from(&[Some(18), None]);
assert_eq!(result, expected);
}

#[cfg(feature = "chrono-tz")]
#[test]
fn timestamp_micro_hour_tz() {
let timestamp = 1621877130000000; // Mon May 24 2021 17:25:30 GMT+0000
let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp(
TimeUnit::Microsecond,
Some("GMT".to_string()),
));

let result = hour(&array).unwrap();
let expected = UInt32Array::from(&[Some(17), None]);
assert_eq!(result, expected);

// (Western European Summer Time in Lisbon) => +1 hour
let array = Int64Array::from(&[Some(timestamp), None]).to(DataType::Timestamp(
TimeUnit::Microsecond,
Some("Europe/Lisbon".to_string()),
));

let result = hour(&array).unwrap();
let expected = UInt32Array::from(&[Some(18), None]);
assert_eq!(result, expected);
}

#[test]
fn consistency_hour() {
use arrow2::array::new_null_array;
Expand All @@ -101,6 +137,7 @@ fn consistency_hour() {
Timestamp(TimeUnit::Millisecond, None),
Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Nanosecond, None),
Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
Time64(TimeUnit::Microsecond),
Time64(TimeUnit::Nanosecond),
Date32,
Expand Down Expand Up @@ -150,6 +187,7 @@ fn consistency_year() {
Timestamp(TimeUnit::Millisecond, None),
Timestamp(TimeUnit::Microsecond, None),
Timestamp(TimeUnit::Nanosecond, None),
Timestamp(TimeUnit::Nanosecond, Some("+00:00".to_string())),
Time64(TimeUnit::Microsecond),
Time64(TimeUnit::Nanosecond),
Date32,
Expand Down