From 987ecc27ecdf8b5c965367519827c0bbf73274ef Mon Sep 17 00:00:00 2001 From: Sumit Date: Mon, 16 Aug 2021 23:09:46 +0200 Subject: [PATCH] allow casting from Timestamp based arrays to utf8 (#664) the change adds uses the existing `PrimitiveArray::value_as_datetime` to support casting from `Timestamp(_,_)` to ``[Large]Utf8`. --- arrow/src/compute/kernels/cast.rs | 67 +++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/arrow/src/compute/kernels/cast.rs b/arrow/src/compute/kernels/cast.rs index af1e56da4169..7a470954672f 100644 --- a/arrow/src/compute/kernels/cast.rs +++ b/arrow/src/compute/kernels/cast.rs @@ -101,6 +101,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool { (LargeUtf8, Date64) => true, (LargeUtf8, Timestamp(TimeUnit::Nanosecond, None)) => true, (LargeUtf8, _) => DataType::is_numeric(to_type), + (Timestamp(_, _), Utf8) | (Timestamp(_, _), LargeUtf8) => true, (_, Utf8) | (_, LargeUtf8) => { DataType::is_numeric(from_type) || from_type == &Binary } @@ -468,6 +469,20 @@ pub fn cast_with_options( Int64 => cast_numeric_to_string::(array), Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), + Timestamp(unit, _) => match unit { + TimeUnit::Nanosecond => { + cast_timestamp_to_string::(array) + } + TimeUnit::Microsecond => { + cast_timestamp_to_string::(array) + } + TimeUnit::Millisecond => { + cast_timestamp_to_string::(array) + } + TimeUnit::Second => { + cast_timestamp_to_string::(array) + } + }, Binary => { let array = array.as_any().downcast_ref::().unwrap(); Ok(Arc::new( @@ -508,6 +523,20 @@ pub fn cast_with_options( Int64 => cast_numeric_to_string::(array), Float32 => cast_numeric_to_string::(array), Float64 => cast_numeric_to_string::(array), + Timestamp(unit, _) => match unit { + TimeUnit::Nanosecond => { + cast_timestamp_to_string::(array) + } + TimeUnit::Microsecond => { + cast_timestamp_to_string::(array) + } + TimeUnit::Millisecond => { + cast_timestamp_to_string::(array) + } + TimeUnit::Second => { + cast_timestamp_to_string::(array) + } + }, Binary => { let array = array.as_any().downcast_ref::().unwrap(); Ok(Arc::new( @@ -1003,6 +1032,28 @@ where unsafe { PrimitiveArray::::from_trusted_len_iter(iter) } } +/// Cast timestamp types to Utf8/LargeUtf8 +fn cast_timestamp_to_string(array: &ArrayRef) -> Result +where + T: ArrowTemporalType + ArrowNumericType, + i64: From<::Native>, + OffsetSize: StringOffsetSizeTrait, +{ + let array = array.as_any().downcast_ref::>().unwrap(); + + Ok(Arc::new( + (0..array.len()) + .map(|ix| { + if array.is_null(ix) { + None + } else { + array.value_as_datetime(ix).map(|v| v.to_string()) + } + }) + .collect::>(), + )) +} + /// Cast numeric types to Utf8 fn cast_numeric_to_string(array: &ArrayRef) -> Result where @@ -2171,6 +2222,22 @@ mod tests { assert!(c.is_null(2)); } + #[test] + fn test_cast_timestamp_to_string() { + let a = TimestampMillisecondArray::from_opt_vec( + vec![Some(864000000005), Some(1545696000001), None], + Some("UTC".to_string()), + ); + let array = Arc::new(a) as ArrayRef; + dbg!(&array); + let b = cast(&array, &DataType::Utf8).unwrap(); + let c = b.as_any().downcast_ref::().unwrap(); + assert_eq!(&DataType::Utf8, c.data_type()); + assert_eq!("1997-05-19 00:00:00.005", c.value(0)); + assert_eq!("2018-12-25 00:00:00.001", c.value(1)); + assert!(c.is_null(2)); + } + #[test] fn test_cast_between_timestamps() { let a = TimestampMillisecondArray::from_opt_vec(