From 38b8232c2c182702ea450200aeddd4edfff82c64 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 6 Feb 2022 09:35:26 +0100 Subject: [PATCH] add json serialization of timestamp/date32/date64 (#814) --- src/io/json/write/serialize.rs | 65 ++++++++++++++++++++++++++++++++++ tests/it/io/json/write.rs | 41 +++++++++++++++++++++ 2 files changed, 106 insertions(+) diff --git a/src/io/json/write/serialize.rs b/src/io/json/write/serialize.rs index b74e1c36302..59bab0bf646 100644 --- a/src/io/json/write/serialize.rs +++ b/src/io/json/write/serialize.rs @@ -1,9 +1,16 @@ +use chrono::{NaiveDate, NaiveDateTime}; use lexical_core::ToLexical; +use std::io::Write; use streaming_iterator::StreamingIterator; use crate::bitmap::utils::zip_validity; use crate::chunk::Chunk; +use crate::datatypes::TimeUnit; use crate::io::iterator::BufStreamingIterator; +use crate::temporal_conversions::{ + date32_to_date, date64_to_date, timestamp_ms_to_datetime, timestamp_ns_to_datetime, + timestamp_s_to_datetime, timestamp_us_to_datetime, +}; use crate::util::lexical_to_bytes_mut; use crate::{array::*, datatypes::DataType, types::NativeType}; @@ -136,6 +143,49 @@ fn list_serializer<'a, O: Offset>( )) } +fn date_serializer<'a, T, F>( + array: &'a PrimitiveArray, + convert: F, +) -> Box + 'a + Send + Sync> +where + T: NativeType, + F: Fn(T) -> NaiveDate + 'static + Send + Sync, +{ + Box::new(BufStreamingIterator::new( + array.iter(), + move |x, buf| { + if let Some(x) = x { + let nd = convert(*x); + write!(buf, "{}", nd).unwrap(); + } else { + buf.extend_from_slice(b"null") + } + }, + vec![], + )) +} + +fn timestamp_serializer<'a, F>( + array: &'a PrimitiveArray, + convert: F, +) -> Box + 'a + Send + Sync> +where + F: Fn(i64) -> NaiveDateTime + 'static + Send + Sync, +{ + Box::new(BufStreamingIterator::new( + array.iter(), + move |x, buf| { + if let Some(x) = x { + let ndt = convert(*x); + write!(buf, "{}", ndt).unwrap(); + } else { + buf.extend_from_slice(b"null") + } + }, + vec![], + )) +} + fn new_serializer<'a>( array: &'a dyn Array, ) -> Box + 'a + Send + Sync> { @@ -156,6 +206,21 @@ fn new_serializer<'a>( DataType::Struct(_) => struct_serializer(array.as_any().downcast_ref().unwrap()), DataType::List(_) => list_serializer::(array.as_any().downcast_ref().unwrap()), DataType::LargeList(_) => list_serializer::(array.as_any().downcast_ref().unwrap()), + DataType::Date32 => date_serializer(array.as_any().downcast_ref().unwrap(), date32_to_date), + DataType::Date64 => date_serializer(array.as_any().downcast_ref().unwrap(), date64_to_date), + DataType::Timestamp(tu, tz) => { + if tz.is_some() { + todo!("still have to implement timezone") + } else { + let convert = match tu { + TimeUnit::Nanosecond => timestamp_ns_to_datetime, + TimeUnit::Microsecond => timestamp_us_to_datetime, + TimeUnit::Millisecond => timestamp_ms_to_datetime, + TimeUnit::Second => timestamp_s_to_datetime, + }; + timestamp_serializer(array.as_any().downcast_ref().unwrap(), convert) + } + } other => todo!("Writing {:?} to JSON", other), } } diff --git a/tests/it/io/json/write.rs b/tests/it/io/json/write.rs index dc10fbff724..6145fa0ddbc 100644 --- a/tests/it/io/json/write.rs +++ b/tests/it/io/json/write.rs @@ -324,3 +324,44 @@ fn write_quotation_marks_in_utf8() -> Result<()> { ); Ok(()) } + +#[test] +fn write_date32() -> Result<()> { + let a = PrimitiveArray::from_data(DataType::Date32, vec![1000i32, 8000, 10000].into(), None); + + let batch = Chunk::try_new(vec![&a as &dyn Array]).unwrap(); + + let buf = write_batch( + batch, + vec!["c1".to_string()], + json_write::LineDelimited::default(), + )?; + + assert_eq!( + String::from_utf8(buf).unwrap().as_bytes(), + b"{\"c1\":1972-09-27}\n{\"c1\":1991-11-27}\n{\"c1\":1997-05-19}\n" + ); + Ok(()) +} +#[test] +fn write_timestamp() -> Result<()> { + let a = PrimitiveArray::from_data( + DataType::Timestamp(TimeUnit::Second, None), + vec![10i64, 1 << 32, 1 << 33].into(), + None, + ); + + let batch = Chunk::try_new(vec![&a as &dyn Array]).unwrap(); + + let buf = write_batch( + batch, + vec!["c1".to_string()], + json_write::LineDelimited::default(), + )?; + + assert_eq!( + String::from_utf8(buf).unwrap().as_bytes(), + b"{\"c1\":1970-01-01 00:00:10}\n{\"c1\":2106-02-07 06:28:16}\n{\"c1\":2242-03-16 12:56:32}\n" + ); + Ok(()) +}