Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into cmc/arc_datatype
Browse files Browse the repository at this point in the history
  • Loading branch information
teh-cmc committed Apr 17, 2023
2 parents d274275 + 192decd commit 3c3c6ed
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 57 deletions.
8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,10 @@ odbc-api = { version = "0.36", optional = true }
ahash = "0.8"

# Support conversion to/from arrow-rs
arrow-buffer = { version = "36.0.0", optional = true }
arrow-schema = { version = "36.0.0", optional = true }
arrow-data = { version = "36.0.0", optional = true }
arrow-array = { version = "36.0.0", optional = true }
arrow-buffer = { version = "37.0.0", optional = true }
arrow-schema = { version = "37.0.0", optional = true }
arrow-data = { version = "37.0.0", optional = true }
arrow-array = { version = "37.0.0", optional = true }

[target.wasm32-unknown-unknown.dependencies]
getrandom = { version = "0.2", features = ["js"] }
Expand Down
21 changes: 21 additions & 0 deletions src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ impl From<Field> for arrow_schema::Field {
#[cfg(feature = "arrow")]
impl From<arrow_schema::Field> for Field {
fn from(value: arrow_schema::Field) -> Self {
(&value).into()
}
}

#[cfg(feature = "arrow")]
impl From<&arrow_schema::Field> for Field {
fn from(value: &arrow_schema::Field) -> Self {
let data_type = value.data_type().clone().into();
let metadata = value
.metadata()
Expand All @@ -73,3 +80,17 @@ impl From<arrow_schema::Field> for Field {
Self::new(value.name(), data_type, value.is_nullable()).with_metadata(metadata)
}
}

#[cfg(feature = "arrow")]
impl From<arrow_schema::FieldRef> for Field {
fn from(value: arrow_schema::FieldRef) -> Self {
value.as_ref().into()
}
}

#[cfg(feature = "arrow")]
impl From<&arrow_schema::FieldRef> for Field {
fn from(value: &arrow_schema::FieldRef) -> Self {
value.as_ref().into()
}
}
55 changes: 28 additions & 27 deletions src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,9 @@ pub enum DataType {
#[cfg(feature = "arrow")]
impl From<DataType> for arrow_schema::DataType {
fn from(value: DataType) -> Self {
use arrow_schema::Field as ArrowField;
use arrow_schema::UnionFields;

match value {
DataType::Null => Self::Null,
DataType::Boolean => Self::Boolean,
Expand All @@ -218,9 +221,10 @@ impl From<DataType> for arrow_schema::DataType {
DataType::Float16 => Self::Float16,
DataType::Float32 => Self::Float32,
DataType::Float64 => Self::Float64,
DataType::Timestamp(unit, tz) => {
Self::Timestamp(unit.into(), tz.map(Arc::unwrap_or_clone_polyfill))
}
DataType::Timestamp(unit, tz) => Self::Timestamp(
unit.into(),
tz.map(Arc::unwrap_or_clone_polyfill).map(Into::into),
),
DataType::Date32 => Self::Date32,
DataType::Date64 => Self::Date64,
DataType::Time32(unit) => Self::Time32(unit.into()),
Expand All @@ -232,40 +236,37 @@ impl From<DataType> for arrow_schema::DataType {
DataType::LargeBinary => Self::LargeBinary,
DataType::Utf8 => Self::Utf8,
DataType::LargeUtf8 => Self::LargeUtf8,
DataType::List(f) => Self::List(Box::new(Arc::unwrap_or_clone_polyfill(f).into())),
DataType::List(f) => Self::List(Arc::new(Arc::unwrap_or_clone_polyfill(f).into())),
DataType::FixedSizeList(f, size) => {
Self::FixedSizeList(Box::new(Arc::unwrap_or_clone_polyfill(f).into()), size as _)
Self::FixedSizeList(Arc::new(Arc::unwrap_or_clone_polyfill(f).into()), size as _)
}
DataType::LargeList(f) => {
Self::LargeList(Box::new(Arc::unwrap_or_clone_polyfill(f).into()))
Self::LargeList(Arc::new(Arc::unwrap_or_clone_polyfill(f).into()))
}
DataType::Struct(f) => Self::Struct(
Arc::unwrap_or_clone_polyfill(f)
.into_iter()
.map(Into::into)
.map(ArrowField::from)
.collect(),
),
DataType::Union(fields, Some(ids), mode) => {
let ids = Arc::unwrap_or_clone_polyfill(ids)
.into_iter()
.map(|x| x as _)
.collect();
.map(|x| x as _);
let fields = Arc::unwrap_or_clone_polyfill(fields)
.into_iter()
.map(Into::into)
.collect();
Self::Union(fields, ids, mode.into())
.map(ArrowField::from);
Self::Union(UnionFields::new(ids, fields), mode.into())
}
DataType::Union(fields, None, mode) => {
let ids = (0..fields.len() as i8).collect();
let ids = 0..fields.len() as i8;
let fields = Arc::unwrap_or_clone_polyfill(fields)
.into_iter()
.map(Into::into)
.collect();
Self::Union(fields, ids, mode.into())
.map(ArrowField::from);

Check warning on line 265 in src/datatypes/mod.rs

View check run for this annotation

Codecov / codecov/patch

src/datatypes/mod.rs#L263-L265

Added lines #L263 - L265 were not covered by tests
Self::Union(UnionFields::new(ids, fields), mode.into())
}
DataType::Map(f, ordered) => {
Self::Map(Box::new(Arc::unwrap_or_clone_polyfill(f).into()), ordered)
Self::Map(Arc::new(Arc::unwrap_or_clone_polyfill(f).into()), ordered)
}
DataType::Dictionary(key, value, _) => Self::Dictionary(
Box::new(DataType::from(key).into()),
Expand Down Expand Up @@ -296,7 +297,9 @@ impl From<arrow_schema::DataType> for DataType {
DataType::Float16 => Self::Float16,
DataType::Float32 => Self::Float32,
DataType::Float64 => Self::Float64,
DataType::Timestamp(unit, tz) => Self::Timestamp(unit.into(), tz.map(Arc::new)),
DataType::Timestamp(unit, tz) => {
Self::Timestamp(unit.into(), tz.map(|tz| Arc::new(tz.to_string())))
}
DataType::Date32 => Self::Date32,
DataType::Date64 => Self::Date64,
DataType::Time32(unit) => Self::Time32(unit.into()),
Expand All @@ -308,18 +311,16 @@ impl From<arrow_schema::DataType> for DataType {
DataType::LargeBinary => Self::LargeBinary,
DataType::Utf8 => Self::Utf8,
DataType::LargeUtf8 => Self::LargeUtf8,
DataType::List(f) => Self::List(Arc::new((*f).into())),
DataType::FixedSizeList(f, size) => {
Self::FixedSizeList(Arc::new((*f).into()), size as _)
}
DataType::LargeList(f) => Self::LargeList(Arc::new((*f).into())),
DataType::List(f) => Self::List(Arc::new(f.into())),
DataType::FixedSizeList(f, size) => Self::FixedSizeList(Arc::new(f.into()), size as _),
DataType::LargeList(f) => Self::LargeList(Arc::new(f.into())),
DataType::Struct(f) => Self::Struct(Arc::new(f.into_iter().map(Into::into).collect())),
DataType::Union(fields, ids, mode) => {
let ids = Arc::new(ids.into_iter().map(|x| x as _).collect());
let fields = Arc::new(fields.into_iter().map(Into::into).collect());
DataType::Union(fields, mode) => {
let ids = Arc::new(fields.iter().map(|(x, _)| x as _).collect());
let fields = Arc::new(fields.iter().map(|(_, f)| f.into()).collect());
Self::Union(fields, Some(ids), mode.into())
}
DataType::Map(f, ordered) => Self::Map(std::sync::Arc::new((*f).into()), ordered),
DataType::Map(f, ordered) => Self::Map(std::sync::Arc::new(f.into()), ordered),
DataType::Dictionary(key, value) => {
let key = match *key {
DataType::Int8 => IntegerType::Int8,
Expand Down
66 changes: 48 additions & 18 deletions src/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,36 +114,66 @@ pub fn timestamp_s_to_datetime(seconds: i64) -> NaiveDateTime {
/// converts a `i64` representing a `timestamp(ms)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_ms_to_datetime(v: i64) -> NaiveDateTime {
NaiveDateTime::from_timestamp_opt(
// extract seconds from milliseconds
v / MILLISECONDS,
// discard extracted seconds and convert milliseconds to nanoseconds
(v % MILLISECONDS * MICROSECONDS) as u32,
)
if v >= 0 {
NaiveDateTime::from_timestamp_opt(
// extract seconds from milliseconds
v / MILLISECONDS,
// discard extracted seconds and convert milliseconds to nanoseconds
(v % MILLISECONDS * MICROSECONDS) as u32,
)
} else {
// note: negative values require 'div_floor' rounding behaviour, which isn't
// yet stabilised (see - https://github.com/rust-lang/rust/issues/88581).
let secs_rem = (v / MILLISECONDS, v % MILLISECONDS);
NaiveDateTime::from_timestamp_opt(
secs_rem.0 - (secs_rem.1 != 0) as i64,
(v % MILLISECONDS * MICROSECONDS).unsigned_abs() as u32,
)
}
.expect("invalid or out-of-range datetime")
}

/// converts a `i64` representing a `timestamp(us)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_us_to_datetime(v: i64) -> NaiveDateTime {
NaiveDateTime::from_timestamp_opt(
// extract seconds from microseconds
v / MICROSECONDS,
// discard extracted seconds and convert microseconds to nanoseconds
(v % MICROSECONDS * MILLISECONDS) as u32,
)
if v >= 0 {
NaiveDateTime::from_timestamp_opt(
// extract seconds from microseconds
v / MICROSECONDS,
// discard extracted seconds and convert microseconds to nanoseconds
(v % MICROSECONDS * MILLISECONDS) as u32,
)
} else {
// note: negative values require 'div_floor' rounding behaviour, which isn't
// yet stabilised (see - https://github.com/rust-lang/rust/issues/88581).
let secs_rem = (v / MICROSECONDS, v % MICROSECONDS);
NaiveDateTime::from_timestamp_opt(
secs_rem.0 - (secs_rem.1 != 0) as i64,
(v % MICROSECONDS * MILLISECONDS).unsigned_abs() as u32,
)
}
.expect("invalid or out-of-range datetime")
}

/// converts a `i64` representing a `timestamp(ns)` to [`NaiveDateTime`]
#[inline]
pub fn timestamp_ns_to_datetime(v: i64) -> NaiveDateTime {
NaiveDateTime::from_timestamp_opt(
// extract seconds from nanoseconds
v / NANOSECONDS,
// discard extracted seconds
(v % NANOSECONDS) as u32,
)
if v >= 0 {
NaiveDateTime::from_timestamp_opt(
// extract seconds from nanoseconds
v / NANOSECONDS,
// discard extracted seconds
(v % NANOSECONDS) as u32,
)
} else {
// note: negative values require 'div_floor' rounding behaviour, which isn't
// yet stabilised (see - https://github.com/rust-lang/rust/issues/88581).
let secs_rem = (v / NANOSECONDS, v % NANOSECONDS);
NaiveDateTime::from_timestamp_opt(
secs_rem.0 - (secs_rem.1 != 0) as i64,
(v % NANOSECONDS).unsigned_abs() as u32,
)
}
.expect("invalid or out-of-range datetime")
}

Expand Down
2 changes: 1 addition & 1 deletion tests/it/arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn test_arrow2_roundtrip(array: &dyn arrow_array::Array) {
assert_eq!(back.len(), array.len());

match array.data_type() {
d @ arrow_schema::DataType::Union(_, _, arrow_schema::UnionMode::Sparse) => {
d @ arrow_schema::DataType::Union(_, arrow_schema::UnionMode::Sparse) => {
// Temporary workaround https://github.com/apache/arrow-rs/issues/4044
let data = array.to_data();
let type_ids = data.buffers()[0].slice_with_length(data.offset(), data.len());
Expand Down
82 changes: 75 additions & 7 deletions tests/it/temporal_conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use arrow2::temporal_conversions;
use arrow2::types::months_days_ns;
use std::sync::Arc;

use chrono::NaiveDateTime;

#[test]
fn naive() {
let expected = "Timestamp(Nanosecond, None)[1996-12-19 16:39:57, 1996-12-19 13:39:57, None]";
Expand All @@ -28,10 +30,10 @@ fn naive() {

#[test]
fn naive_scalar() {
let fmt = "%Y-%m-%dT%H:%M:%S:z";
let fmt = "%Y-%m-%dT%H:%M:%S.%9f%z";
let str = "2023-04-07T12:23:34.123456789Z";

let str = "2023-04-07T12:23:34.000000000Z";
let nanos_expected = 1680870214000000000;
let nanos_expected = 1680870214123456789;

// seconds
let r = temporal_conversions::utf8_to_naive_timestamp_scalar(str, fmt, &TimeUnit::Second);
Expand All @@ -49,10 +51,10 @@ fn naive_scalar() {

#[test]
fn naive_scalar_no_tz() {
let fmt = "%Y-%m-%dT%H:%M:%S";
let fmt = "%Y-%m-%dT%H:%M:%S.%9f";

let str = "2023-04-07T12:23:34.000000000";
let nanos_expected = 1680870214000000000;
let str = "2023-04-07T12:23:34.123456789";
let nanos_expected = 1680870214123456789;

// seconds
let r = temporal_conversions::utf8_to_naive_timestamp_scalar(str, fmt, &TimeUnit::Second);
Expand Down Expand Up @@ -95,7 +97,6 @@ fn scalar_tz_aware_no_timezone() {

let tz = temporal_conversions::parse_offset("-02:00").unwrap();
let str = "2023-04-07T10:23:34.000000000-02:00";
let _nanos_expected = 1680870214000000000 as i64;

// seconds
let r = temporal_conversions::utf8_to_timestamp_scalar(str, fmt, &tz, &TimeUnit::Second);
Expand Down Expand Up @@ -124,6 +125,73 @@ fn naive_no_tz() {
assert_eq!(format!("{r:?}"), expected);
}

#[test]
fn timestamp_to_datetime() {
let fmt = "%Y-%m-%dT%H:%M:%S.%9f";
let ts = 1680870214123456789;

// positive milliseconds
assert_eq!(
temporal_conversions::timestamp_ms_to_datetime(ts / 1_000_000),
NaiveDateTime::parse_from_str("2023-04-07T12:23:34.123000000", fmt).unwrap()
);
// positive microseconds
assert_eq!(
temporal_conversions::timestamp_us_to_datetime(ts / 1_000),
NaiveDateTime::parse_from_str("2023-04-07T12:23:34.123456000", fmt).unwrap()
);
// positive nanoseconds
assert_eq!(
temporal_conversions::timestamp_ns_to_datetime(ts),
NaiveDateTime::parse_from_str("2023-04-07T12:23:34.123456789", fmt).unwrap()
);

let ts = -15548276987654321;

// negative milliseconds
assert_eq!(
temporal_conversions::timestamp_ms_to_datetime(ts / 1_000_000),
NaiveDateTime::parse_from_str("1969-07-05T01:02:03.987000000", fmt).unwrap()
);
// negative microseconds
assert_eq!(
temporal_conversions::timestamp_us_to_datetime(ts / 1_000),
NaiveDateTime::parse_from_str("1969-07-05T01:02:03.987654000", fmt).unwrap()
);
// negative nanoseconds
assert_eq!(
temporal_conversions::timestamp_ns_to_datetime(ts),
NaiveDateTime::parse_from_str("1969-07-05T01:02:03.987654321", fmt).unwrap()
);

let fmt = "%Y-%m-%dT%H:%M:%S";
let ts = -2209075200000000000;
let expected = NaiveDateTime::parse_from_str("1899-12-31T00:00:00", fmt).unwrap();

assert_eq!(
temporal_conversions::timestamp_ms_to_datetime(ts / 1_000_000),
expected
);
assert_eq!(
temporal_conversions::timestamp_us_to_datetime(ts / 1_000),
expected
);
assert_eq!(temporal_conversions::timestamp_ns_to_datetime(ts), expected);
}

#[test]
fn timestamp_to_negative_datetime() {
let fmt = "%Y-%m-%d %H:%M:%S";
let ts = -63135596800000000;
let expected = NaiveDateTime::parse_from_str("-0031-04-24 22:13:20", fmt).unwrap();

assert_eq!(
temporal_conversions::timestamp_ms_to_datetime(ts / 1_000),
expected
);
assert_eq!(temporal_conversions::timestamp_us_to_datetime(ts), expected);
}

#[test]
fn tz_aware() {
let tz = Arc::new("-02:00".to_string());
Expand Down
1 change: 1 addition & 0 deletions tests/it/test_util.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#[allow(dead_code)]
pub fn arrow_test_data() -> String {
"testing/arrow-testing/data".to_string()
}

0 comments on commit 3c3c6ed

Please sign in to comment.