Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Fixed issue with Time32/Time64 datatype in csv reader #1425

Merged
merged 5 commits into from
Mar 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 34 additions & 1 deletion src/io/csv/read_utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use chrono::Datelike;
use chrono::{Datelike, Timelike};

use crate::{
array::*,
Expand Down Expand Up @@ -202,6 +202,29 @@ pub(crate) fn deserialize_column<B: ByteRecordGeneric>(
.and_then(|x| x.parse::<chrono::NaiveDateTime>().ok())
.map(|x| x.timestamp_millis())
}),
Time32(time_unit) => deserialize_primitive(rows, column, datatype, |bytes| {
let factor = get_factor_from_timeunit(time_unit);
to_utf8(bytes)
.and_then(|x| x.parse::<chrono::NaiveTime>().ok())
.map(|x| {
(x.hour() * 3_600 * factor
+ x.minute() * 60 * factor
+ x.second() * factor
+ x.nanosecond() / (1_000_000_000 / factor)) as i32
})
}),
Time64(time_unit) => deserialize_primitive(rows, column, datatype, |bytes| {
let factor: u64 = get_factor_from_timeunit(time_unit).into();
to_utf8(bytes)
.and_then(|x| x.parse::<chrono::NaiveTime>().ok())
.map(|x| {
(x.hour() as u64 * 3_600 * factor
+ x.minute() as u64 * 60 * factor
+ x.second() as u64 * factor
+ x.nanosecond() as u64 / (1_000_000_000 / factor))
as i64
})
}),
Timestamp(time_unit, None) => deserialize_primitive(rows, column, datatype, |bytes| {
to_utf8(bytes)
.and_then(|x| x.parse::<chrono::NaiveDateTime>().ok())
Expand Down Expand Up @@ -274,3 +297,13 @@ where
.collect::<Result<Vec<_>>>()
.and_then(Chunk::try_new)
}

// Return the factor by how small is a time unit compared to seconds
fn get_factor_from_timeunit(time_unit: TimeUnit) -> u32 {
match time_unit {
TimeUnit::Second => 1,
TimeUnit::Millisecond => 1_000,
TimeUnit::Microsecond => 1_000_000,
TimeUnit::Nanosecond => 1_000_000_000,
}
}
6 changes: 3 additions & 3 deletions src/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub fn date64_to_date(milliseconds: i64) -> NaiveDate {
date64_to_datetime(milliseconds).date()
}

/// converts a `i32` representing a `time32(s)` to [`NaiveDateTime`]
/// converts a `i32` representing a `time32(s)` to [`NaiveTime`]
#[inline]
pub fn time32s_to_time(v: i32) -> NaiveTime {
NaiveTime::from_num_seconds_from_midnight_opt(v as u32, 0).expect("invalid time")
Expand All @@ -78,7 +78,7 @@ pub fn time32ms_to_time(v: i32) -> NaiveTime {
.expect("invalid time")
}

/// converts a `i64` representing a `time64(us)` to [`NaiveDateTime`]
/// converts a `i64` representing a `time64(us)` to [`NaiveTime`]
#[inline]
pub fn time64us_to_time(v: i64) -> NaiveTime {
NaiveTime::from_num_seconds_from_midnight_opt(
Expand All @@ -91,7 +91,7 @@ pub fn time64us_to_time(v: i64) -> NaiveTime {
.expect("invalid time")
}

/// converts a `i64` representing a `time64(ns)` to [`NaiveDateTime`]
/// converts a `i64` representing a `time64(ns)` to [`NaiveTime`]
#[inline]
pub fn time64ns_to_time(v: i64) -> NaiveTime {
NaiveTime::from_num_seconds_from_midnight_opt(
Expand Down
48 changes: 48 additions & 0 deletions tests/it/io/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,54 @@ fn date64() -> Result<()> {
Ok(())
}

#[test]
fn time32_s() -> Result<()> {
let result = test_deserialize(
"00:00:00,\n23:59:59,\n11:00:11,\n",
DataType::Time32(TimeUnit::Second),
)?;
let expected = Int32Array::from(&[Some(0), Some(86399), Some(39611)])
.to(DataType::Time32(TimeUnit::Second));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn time32_ms() -> Result<()> {
let result = test_deserialize(
"00:00:00.000,\n23:59:59.999,\n00:00:00.999,\n",
DataType::Time32(TimeUnit::Millisecond),
)?;
let expected = Int32Array::from(&[Some(0), Some(86_399_999), Some(999)])
.to(DataType::Time32(TimeUnit::Millisecond));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn time64_us() -> Result<()> {
let result = test_deserialize(
"00:00:00.000000,\n23:59:59.999999,\n00:00:00.000001,\n",
DataType::Time64(TimeUnit::Microsecond),
)?;
let expected = Int64Array::from(&[Some(0), Some(86_399_999_999), Some(1)])
.to(DataType::Time64(TimeUnit::Microsecond));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn time64_ns() -> Result<()> {
let result = test_deserialize(
"00:00:00.000000000,\n23:59:59.999999999,\n00:00:00.000000001,\n",
DataType::Time64(TimeUnit::Nanosecond),
)?;
let expected = Int64Array::from(&[Some(0), Some(86_399_999_999_999), Some(1)])
.to(DataType::Time64(TimeUnit::Nanosecond));
assert_eq!(expected, result.as_ref());
Ok(())
}

#[test]
fn decimal() -> Result<()> {
let result = test_deserialize("1.1,\n1.2,\n1.22,\n1.3,\n", DataType::Decimal(2, 1))?;
Expand Down