diff --git a/integration-testing/unskip.patch b/integration-testing/unskip.patch index fb84d7d94d2..674bea2b154 100644 --- a/integration-testing/unskip.patch +++ b/integration-testing/unskip.patch @@ -1,5 +1,5 @@ diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py -index d0c4b3d6c..bc4b83e68 100644 +index d0c4b3d6c..936351c80 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1568,8 +1568,7 @@ def get_generated_json_files(tempdir=None): @@ -12,7 +12,7 @@ index d0c4b3d6c..bc4b83e68 100644 generate_decimal256_case() .skip_category('Go') # TODO(ARROW-7948): Decimal + Go -@@ -1579,8 +1578,7 @@ def get_generated_json_files(tempdir=None): +@@ -1579,13 +1578,11 @@ def get_generated_json_files(tempdir=None): generate_datetime_case(), generate_interval_case() @@ -22,7 +22,13 @@ index d0c4b3d6c..bc4b83e68 100644 generate_month_day_nano_interval_case() .skip_category('Go') -@@ -1603,13 +1601,11 @@ def get_generated_json_files(tempdir=None): +- .skip_category('JS') +- .skip_category('Rust'), ++ .skip_category('JS'), + + + generate_map_case() +@@ -1603,13 +1600,11 @@ def get_generated_json_files(tempdir=None): generate_nested_large_offsets_case() .skip_category('Go') diff --git a/src/array/display.rs b/src/array/display.rs index ecf6036e84f..ed403d33cba 100644 --- a/src/array/display.rs +++ b/src/array/display.rs @@ -128,6 +128,15 @@ pub fn get_value_display<'a>(array: &'a dyn Array) -> Box Strin x.milliseconds() )) } + + Interval(IntervalUnit::MonthDayNano) => { + dyn_primitive!(array, months_days_ns, |x: months_days_ns| format!( + "{}m{}d{}ns", + x.months(), + x.days(), + x.ns() + )) + } Duration(TimeUnit::Second) => dyn_primitive!(array, i64, |x| format!("{}s", x)), Duration(TimeUnit::Millisecond) => dyn_primitive!(array, i64, |x| format!("{}ms", x)), Duration(TimeUnit::Microsecond) => dyn_primitive!(array, i64, |x| format!("{}us", x)), diff --git a/src/array/equal/mod.rs b/src/array/equal/mod.rs index 32612a1fe07..c4acef75e8c 100644 --- a/src/array/equal/mod.rs +++ b/src/array/equal/mod.rs @@ -1,4 +1,4 @@ -use crate::types::{days_ms, NativeType}; +use crate::types::NativeType; use super::*; diff --git a/src/array/mod.rs b/src/array/mod.rs index 3b23a377e36..7403af5b381 100644 --- a/src/array/mod.rs +++ b/src/array/mod.rs @@ -17,7 +17,7 @@ use std::any::Any; use std::fmt::Display; use crate::error::Result; -use crate::types::days_ms; +use crate::types::{days_ms, months_days_ns}; use crate::{ bitmap::{Bitmap, MutableBitmap}, datatypes::DataType, @@ -186,6 +186,7 @@ macro_rules! with_match_primitive_type {( ) => ({ macro_rules! __with_ty__ {( $_ $T:ident ) => ( $($body)* )} use crate::datatypes::PrimitiveType::*; + use crate::types::{days_ms, months_days_ns}; match $key_type { Int8 => __with_ty__! { i8 }, Int16 => __with_ty__! { i16 }, @@ -193,6 +194,7 @@ macro_rules! with_match_primitive_type {( Int64 => __with_ty__! { i64 }, Int128 => __with_ty__! { i128 }, DaysMs => __with_ty__! { days_ms }, + MonthDayNano => __with_ty__! { months_days_ns }, UInt8 => __with_ty__! { u8 }, UInt16 => __with_ty__! { u16 }, UInt32 => __with_ty__! { u32 }, diff --git a/src/array/primitive/mod.rs b/src/array/primitive/mod.rs index 4410e2e5134..90cdc27c11c 100644 --- a/src/array/primitive/mod.rs +++ b/src/array/primitive/mod.rs @@ -3,7 +3,7 @@ use crate::{ buffer::Buffer, datatypes::*, error::ArrowError, - types::{days_ms, NativeType}, + types::{days_ms, months_days_ns, NativeType}, }; use super::Array; @@ -176,6 +176,8 @@ pub type Int64Array = PrimitiveArray; pub type Int128Array = PrimitiveArray; /// A type definition [`PrimitiveArray`] for [`days_ms`] pub type DaysMsArray = PrimitiveArray; +/// A type definition [`PrimitiveArray`] for [`months_days_ns`] +pub type MonthsDaysNsArray = PrimitiveArray; /// A type definition [`PrimitiveArray`] for `f32` pub type Float32Array = PrimitiveArray; /// A type definition [`PrimitiveArray`] for `f64` @@ -201,6 +203,8 @@ pub type Int64Vec = MutablePrimitiveArray; pub type Int128Vec = MutablePrimitiveArray; /// A type definition [`MutablePrimitiveArray`] for [`days_ms`] pub type DaysMsVec = MutablePrimitiveArray; +/// A type definition [`MutablePrimitiveArray`] for [`months_days_ns`] +pub type MonthsDaysNsVec = MutablePrimitiveArray; /// A type definition [`MutablePrimitiveArray`] for `f32` pub type Float32Vec = MutablePrimitiveArray; /// A type definition [`MutablePrimitiveArray`] for `f64` diff --git a/src/compute/aggregate/memory.rs b/src/compute/aggregate/memory.rs index 1d1cfbe3cfe..9f60f37752c 100644 --- a/src/compute/aggregate/memory.rs +++ b/src/compute/aggregate/memory.rs @@ -1,7 +1,6 @@ use crate::array::*; use crate::bitmap::Bitmap; use crate::datatypes::PhysicalType; -use crate::types::days_ms; fn validity_size(validity: &Option) -> usize { validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0) diff --git a/src/compute/filter.rs b/src/compute/filter.rs index 54102d0e1e3..df752e75a63 100644 --- a/src/compute/filter.rs +++ b/src/compute/filter.rs @@ -15,14 +15,10 @@ // specific language governing permissions and limitations // under the License. -use crate::array::growable::make_growable; -use crate::array::growable::Growable; +use crate::array::growable::{make_growable, Growable}; +use crate::bitmap::{utils::SlicesIterator, Bitmap, MutableBitmap}; use crate::record_batch::RecordBatch; -use crate::{array::*, bitmap::Bitmap, types::NativeType}; -use crate::{ - bitmap::{utils::SlicesIterator, MutableBitmap}, - types::days_ms, -}; +use crate::{array::*, types::NativeType}; use crate::{buffer::MutableBuffer, error::Result}; /// Function that can filter arbitrary arrays diff --git a/src/compute/take/mod.rs b/src/compute/take/mod.rs index 751ebfc9f34..65ce13cf815 100644 --- a/src/compute/take/mod.rs +++ b/src/compute/take/mod.rs @@ -21,7 +21,7 @@ use crate::{ array::{new_empty_array, Array, NullArray, PrimitiveArray}, datatypes::DataType, error::Result, - types::{days_ms, Index}, + types::Index, }; mod binary; diff --git a/src/datatypes/mod.rs b/src/datatypes/mod.rs index 9b406e4a208..b7e6a7d3674 100644 --- a/src/datatypes/mod.rs +++ b/src/datatypes/mod.rs @@ -147,6 +147,13 @@ pub enum IntervalUnit { /// Indicates the number of elapsed days and milliseconds, /// stored as 2 contiguous 32-bit integers (8-bytes in total). DayTime, + /// The values are stored contiguously in 16 byte blocks. Months and + /// days are encoded as 32 bit integers and nanoseconds is encoded as a + /// 64 bit integer. All integers are signed. Each field is independent + /// (e.g. there is no constraint that nanoseconds have the same sign + /// as days or that the quantitiy of nanoseconds represents less + /// then a day's worth of time). + MonthDayNano, } impl DataType { @@ -197,6 +204,9 @@ impl DataType { Float32 => PhysicalType::Primitive(PrimitiveType::Float32), Float64 => PhysicalType::Primitive(PrimitiveType::Float64), Interval(IntervalUnit::DayTime) => PhysicalType::Primitive(PrimitiveType::DaysMs), + Interval(IntervalUnit::MonthDayNano) => { + PhysicalType::Primitive(PrimitiveType::MonthDayNano) + } Binary => PhysicalType::Binary, FixedSizeBinary(_) => PhysicalType::FixedSizeBinary, LargeBinary => PhysicalType::LargeBinary, diff --git a/src/datatypes/physical_type.rs b/src/datatypes/physical_type.rs index aff17e906d1..11dde7e1829 100644 --- a/src/datatypes/physical_type.rs +++ b/src/datatypes/physical_type.rs @@ -45,6 +45,8 @@ pub enum PrimitiveType { Float64, /// Two i32 representing days and ms DaysMs, + /// months_days_ns(i32, i32, i64) + MonthDayNano, } /// The set of physical types: unique in-memory representations of an Arrow array. diff --git a/src/ffi/array.rs b/src/ffi/array.rs index 60af0df0fe2..518102ecbed 100644 --- a/src/ffi/array.rs +++ b/src/ffi/array.rs @@ -20,7 +20,6 @@ use super::ffi::ArrowArrayRef; use crate::array::{BooleanArray, FromFfi}; use crate::error::{ArrowError, Result}; -use crate::types::days_ms; use crate::{array::*, datatypes::PhysicalType}; /// Reads a valid `ffi` interface into a `Box` diff --git a/src/ffi/schema.rs b/src/ffi/schema.rs index 083e006beed..95078c335a9 100644 --- a/src/ffi/schema.rs +++ b/src/ffi/schema.rs @@ -323,6 +323,9 @@ fn to_format(data_type: &DataType) -> String { DataType::Duration(TimeUnit::Nanosecond) => "tDn".to_string(), DataType::Interval(IntervalUnit::YearMonth) => "tiM".to_string(), DataType::Interval(IntervalUnit::DayTime) => "tiD".to_string(), + DataType::Interval(IntervalUnit::MonthDayNano) => { + todo!("Spec for FFI for MonthDayNano still not defined.") + } DataType::Timestamp(unit, tz) => { let unit = match unit { TimeUnit::Second => "s".to_string(), diff --git a/src/io/ipc/convert.rs b/src/io/ipc/convert.rs index 4f218189966..ea6c6d98c89 100644 --- a/src/io/ipc/convert.rs +++ b/src/io/ipc/convert.rs @@ -255,6 +255,7 @@ fn get_data_type(field: ipc::Field, extension: Extension, may_be_dictionary: boo match interval.unit() { ipc::IntervalUnit::YEAR_MONTH => DataType::Interval(IntervalUnit::YearMonth), ipc::IntervalUnit::DAY_TIME => DataType::Interval(IntervalUnit::DayTime), + ipc::IntervalUnit::MONTH_DAY_NANO => DataType::Interval(IntervalUnit::MonthDayNano), z => panic!("Interval type with unit of {:?} unsupported", z), } } @@ -604,6 +605,7 @@ pub(crate) fn get_fb_field_type<'a>( let interval_unit = match unit { IntervalUnit::YearMonth => ipc::IntervalUnit::YEAR_MONTH, IntervalUnit::DayTime => ipc::IntervalUnit::DAY_TIME, + IntervalUnit::MonthDayNano => ipc::IntervalUnit::MONTH_DAY_NANO, }; builder.add_unit(interval_unit); FbFieldType { diff --git a/src/io/ipc/gen/Schema.rs b/src/io/ipc/gen/Schema.rs index 594551f2103..25cd2880ecd 100644 --- a/src/io/ipc/gen/Schema.rs +++ b/src/io/ipc/gen/Schema.rs @@ -18,7 +18,7 @@ #![allow(dead_code)] #![allow(unused_imports)] -use flatbuffers::EndianScalar; +use flatbuffers::{EndianScalar, Follow}; use std::{cmp::Ordering, mem}; // automatically generated by the FlatBuffers compiler, do not modify @@ -45,7 +45,7 @@ pub const ENUM_VALUES_METADATA_VERSION: [MetadataVersion; 5] = [ MetadataVersion::V5, ]; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct MetadataVersion(pub i16); #[allow(non_upper_case_globals)] @@ -104,7 +104,9 @@ impl flatbuffers::Push for MetadataVersion { type Output = MetadataVersion; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -115,6 +117,7 @@ impl flatbuffers::EndianScalar for MetadataVersion { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -127,7 +130,6 @@ impl<'a> flatbuffers::Verifiable for MetadataVersion { v: &mut flatbuffers::Verifier, pos: usize, ) -> Result<(), flatbuffers::InvalidFlatbuffer> { - use flatbuffers::Verifiable; i16::run_verifier(v, pos) } } @@ -171,7 +173,7 @@ pub const ENUM_VALUES_FEATURE: [Feature; 3] = [ /// Enums added to this list should be assigned power-of-two values /// to facilitate exchanging and comparing bitmaps for supported /// features. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct Feature(pub i64); #[allow(non_upper_case_globals)] @@ -225,7 +227,9 @@ impl flatbuffers::Push for Feature { type Output = Feature; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -236,6 +240,7 @@ impl flatbuffers::EndianScalar for Feature { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i64::from_le(self.0); Self(b) @@ -271,7 +276,7 @@ pub const ENUM_MAX_UNION_MODE: i16 = 1; #[allow(non_camel_case_types)] pub const ENUM_VALUES_UNION_MODE: [UnionMode; 2] = [UnionMode::Sparse, UnionMode::Dense]; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct UnionMode(pub i16); #[allow(non_upper_case_globals)] @@ -313,7 +318,9 @@ impl flatbuffers::Push for UnionMode { type Output = UnionMode; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -324,6 +331,7 @@ impl flatbuffers::EndianScalar for UnionMode { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -360,7 +368,7 @@ pub const ENUM_MAX_PRECISION: i16 = 2; pub const ENUM_VALUES_PRECISION: [Precision; 3] = [Precision::HALF, Precision::SINGLE, Precision::DOUBLE]; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct Precision(pub i16); #[allow(non_upper_case_globals)] @@ -404,7 +412,9 @@ impl flatbuffers::Push for Precision { type Output = Precision; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -415,6 +425,7 @@ impl flatbuffers::EndianScalar for Precision { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -450,7 +461,7 @@ pub const ENUM_MAX_DATE_UNIT: i16 = 1; #[allow(non_camel_case_types)] pub const ENUM_VALUES_DATE_UNIT: [DateUnit; 2] = [DateUnit::DAY, DateUnit::MILLISECOND]; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct DateUnit(pub i16); #[allow(non_upper_case_globals)] @@ -492,7 +503,9 @@ impl flatbuffers::Push for DateUnit { type Output = DateUnit; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -503,6 +516,7 @@ impl flatbuffers::EndianScalar for DateUnit { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -543,7 +557,7 @@ pub const ENUM_VALUES_TIME_UNIT: [TimeUnit; 4] = [ TimeUnit::NANOSECOND, ]; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct TimeUnit(pub i16); #[allow(non_upper_case_globals)] @@ -594,7 +608,9 @@ impl flatbuffers::Push for TimeUnit { type Output = TimeUnit; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -605,6 +621,7 @@ impl flatbuffers::EndianScalar for TimeUnit { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -632,31 +649,37 @@ pub const ENUM_MIN_INTERVAL_UNIT: i16 = 0; since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021." )] -pub const ENUM_MAX_INTERVAL_UNIT: i16 = 1; +pub const ENUM_MAX_INTERVAL_UNIT: i16 = 2; #[deprecated( since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021." )] #[allow(non_camel_case_types)] -pub const ENUM_VALUES_INTERVAL_UNIT: [IntervalUnit; 2] = - [IntervalUnit::YEAR_MONTH, IntervalUnit::DAY_TIME]; +pub const ENUM_VALUES_INTERVAL_UNIT: [IntervalUnit; 3] = [ + IntervalUnit::YEAR_MONTH, + IntervalUnit::DAY_TIME, + IntervalUnit::MONTH_DAY_NANO, +]; -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct IntervalUnit(pub i16); #[allow(non_upper_case_globals)] impl IntervalUnit { pub const YEAR_MONTH: Self = Self(0); pub const DAY_TIME: Self = Self(1); + pub const MONTH_DAY_NANO: Self = Self(2); pub const ENUM_MIN: i16 = 0; - pub const ENUM_MAX: i16 = 1; - pub const ENUM_VALUES: &'static [Self] = &[Self::YEAR_MONTH, Self::DAY_TIME]; + pub const ENUM_MAX: i16 = 2; + pub const ENUM_VALUES: &'static [Self] = + &[Self::YEAR_MONTH, Self::DAY_TIME, Self::MONTH_DAY_NANO]; /// Returns the variant's name or "" if unknown. pub fn variant_name(self) -> Option<&'static str> { match self { Self::YEAR_MONTH => Some("YEAR_MONTH"), Self::DAY_TIME => Some("DAY_TIME"), + Self::MONTH_DAY_NANO => Some("MONTH_DAY_NANO"), _ => None, } } @@ -683,7 +706,9 @@ impl flatbuffers::Push for IntervalUnit { type Output = IntervalUnit; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -694,6 +719,7 @@ impl flatbuffers::EndianScalar for IntervalUnit { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -755,7 +781,7 @@ pub const ENUM_VALUES_TYPE: [Type; 22] = [ /// ---------------------------------------------------------------------- /// Top-level Type value, enabling extensible type-specific metadata. We can /// add new logical types to Type without breaking backwards compatibility -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct Type(pub u8); #[allow(non_upper_case_globals)] @@ -847,7 +873,6 @@ impl std::fmt::Debug for Type { } } } -pub struct TypeUnionTableOffset {} impl<'a> flatbuffers::Follow<'a> for Type { type Inner = Self; #[inline] @@ -861,7 +886,9 @@ impl flatbuffers::Push for Type { type Output = Type; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -872,6 +899,7 @@ impl flatbuffers::EndianScalar for Type { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = u8::from_le(self.0); Self(b) @@ -890,6 +918,8 @@ impl<'a> flatbuffers::Verifiable for Type { } impl flatbuffers::SimpleToVerifyInSlice for Type {} +pub struct TypeUnionTableOffset {} + #[deprecated( since = "2.0.0", note = "Use associated constants instead. This will no longer be generated in 2021." @@ -912,7 +942,7 @@ pub const ENUM_VALUES_DICTIONARY_KIND: [DictionaryKind; 1] = [DictionaryKind::De /// Maintained for forwards compatibility, in the future /// Dictionaries might be explicit maps between integers and values /// allowing for non-contiguous index values -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct DictionaryKind(pub i16); #[allow(non_upper_case_globals)] @@ -952,7 +982,9 @@ impl flatbuffers::Push for DictionaryKind { type Output = DictionaryKind; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -963,6 +995,7 @@ impl flatbuffers::EndianScalar for DictionaryKind { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -1000,7 +1033,7 @@ pub const ENUM_VALUES_ENDIANNESS: [Endianness; 2] = [Endianness::Little, Endiann /// ---------------------------------------------------------------------- /// Endianness of the platform producing the data -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] #[repr(transparent)] pub struct Endianness(pub i16); #[allow(non_upper_case_globals)] @@ -1042,7 +1075,9 @@ impl flatbuffers::Push for Endianness { type Output = Endianness; #[inline] fn push(&self, dst: &mut [u8], _rest: &[u8]) { - unsafe { flatbuffers::emplace_scalar::(dst, self.0) }; + unsafe { + flatbuffers::emplace_scalar::(dst, self.0); + } } } @@ -1053,6 +1088,7 @@ impl flatbuffers::EndianScalar for Endianness { Self(b) } #[inline] + #[allow(clippy::wrong_self_convention)] fn from_little_endian(self) -> Self { let b = i16::from_le(self.0); Self(b) @@ -1077,6 +1113,11 @@ impl flatbuffers::SimpleToVerifyInSlice for Endianness {} #[repr(transparent)] #[derive(Clone, Copy, PartialEq)] pub struct Buffer(pub [u8; 16]); +impl Default for Buffer { + fn default() -> Self { + Self([0; 16]) + } +} impl std::fmt::Debug for Buffer { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { f.debug_struct("Buffer") @@ -1134,7 +1175,7 @@ impl<'a> flatbuffers::Verifiable for Buffer { v.in_buffer::(pos) } } -impl Buffer { +impl<'a> Buffer { #[allow(clippy::too_many_arguments)] pub fn new(offset: i64, length: i64) -> Self { let mut s = Self([0; 16]); @@ -2911,8 +2952,33 @@ pub enum TimestampOffset {} /// leap seconds, as a 64-bit integer. Note that UNIX time does not include /// leap seconds. /// -/// The Timestamp metadata supports both "time zone naive" and "time zone -/// aware" timestamps. Read about the timezone attribute for more detail +/// Date & time libraries often have multiple different data types for temporal +/// data. In order to ease interoperability between different implementations the +/// Arrow project has some recommendations for encoding these types into a Timestamp +/// column. +/// +/// An "instant" represents a single moment in time that has no meaningful time zone +/// or the time zone is unknown. A column of instants can also contain values from +/// multiple time zones. To encode an instant set the timezone string to "UTC". +/// +/// A "zoned date-time" represents a single moment in time that has a meaningful +/// reference time zone. To encode a zoned date-time as a Timestamp set the timezone +/// string to the name of the timezone. There is some ambiguity between an instant +/// and a zoned date-time with the UTC time zone. Both of these are stored the same. +/// Typically, this distinction does not matter. If it does, then an application should +/// use custom metadata or an extension type to distinguish between the two cases. +/// +/// An "offset date-time" represents a single moment in time combined with a meaningful +/// offset from UTC. To encode an offset date-time as a Timestamp set the timezone string +/// to the numeric time zone offset string (e.g. "+03:00"). +/// +/// A "local date-time" does not represent a single moment in time. It represents a wall +/// clock time combined with a date. Because of daylight savings time there may multiple +/// instants that correspond to a single local date-time in any given time zone. A +/// local date-time is often stored as a struct or a Date32/Time64 pair. However, it can +/// also be encoded into a Timestamp column. To do so the value should be the the time +/// elapsed from the Unix epoch so that a wall clock in UTC would display the desired time. +/// The timezone string should be set to null or the empty string. pub struct Timestamp<'a> { pub _tab: flatbuffers::Table<'a>, } @@ -2963,11 +3029,9 @@ impl<'a> Timestamp<'a> { /// Whether a timezone string is present indicates different semantics about /// the data: /// - /// * If the time zone is null or equal to an empty string, the data is "time - /// zone naive" and shall be displayed *as is* to the user, not localized - /// to the locale of the user. This data can be though of as UTC but - /// without having "UTC" as the time zone, it is not considered to be - /// localized to any time zone + /// * If the time zone is null or an empty string, the data is a local date-time + /// and does not represent a single moment in time. Instead it represents a wall clock + /// time and care should be taken to avoid interpreting it semantically as an instant. /// /// * If the time zone is set to a valid value, values can be displayed as /// "localized" to that time zone, even though the underlying 64-bit diff --git a/src/io/ipc/read/deserialize.rs b/src/io/ipc/read/deserialize.rs index 663ec064280..d32904115cb 100644 --- a/src/io/ipc/read/deserialize.rs +++ b/src/io/ipc/read/deserialize.rs @@ -11,10 +11,10 @@ use std::{ use gen::Schema::MetadataVersion; +use crate::array::*; use crate::datatypes::{DataType, PhysicalType}; use crate::error::Result; use crate::io::ipc::gen::Message::BodyCompression; -use crate::{array::*, types::days_ms}; use super::super::gen; use super::array::*; diff --git a/src/io/ipc/write/serialize.rs b/src/io/ipc/write/serialize.rs index 220f7b28fc3..4c4f7b1555b 100644 --- a/src/io/ipc/write/serialize.rs +++ b/src/io/ipc/write/serialize.rs @@ -22,7 +22,7 @@ use crate::{ endianess::is_native_little_endian, io::ipc::gen::Message, trusted_len::TrustedLen, - types::{days_ms, NativeType}, + types::NativeType, }; use crate::io::ipc::gen::Schema; diff --git a/src/io/json_integration/read.rs b/src/io/json_integration/read.rs index cba5271985c..6a6cdfd7de0 100644 --- a/src/io/json_integration/read.rs +++ b/src/io/json_integration/read.rs @@ -27,7 +27,7 @@ use crate::{ datatypes::{DataType, PhysicalType, PrimitiveType, Schema}, error::{ArrowError, Result}, record_batch::RecordBatch, - types::{days_ms, NativeType}, + types::{days_ms, months_days_ns, NativeType}, }; use super::{ArrowJsonBatch, ArrowJsonColumn, ArrowJsonDictionaryBatch}; @@ -58,7 +58,7 @@ fn to_offsets(offsets: Option<&Vec>) -> Buffer { .collect() } -fn to_interval(value: &Value) -> days_ms { +fn to_days_ms(value: &Value) -> days_ms { if let Value::Object(v) = value { let days = v.get("days").unwrap(); let milliseconds = v.get("milliseconds").unwrap(); @@ -75,7 +75,26 @@ fn to_interval(value: &Value) -> days_ms { } } -fn to_primitive_interval( +fn to_months_days_ns(value: &Value) -> months_days_ns { + if let Value::Object(v) = value { + let months = v.get("months").unwrap(); + let days = v.get("days").unwrap(); + let nanoseconds = v.get("nanoseconds").unwrap(); + match (months, days, nanoseconds) { + (Value::Number(months), Value::Number(days), Value::Number(nanoseconds)) => { + let months = months.as_i64().unwrap() as i32; + let days = days.as_i64().unwrap() as i32; + let nanoseconds = nanoseconds.as_i64().unwrap(); + months_days_ns::new(months, days, nanoseconds) + } + (_, _, _) => panic!(), + } + } else { + panic!() + } +} + +fn to_primitive_days_ms( json_col: &ArrowJsonColumn, data_type: DataType, ) -> PrimitiveArray { @@ -85,11 +104,26 @@ fn to_primitive_interval( .as_ref() .unwrap() .iter() - .map(to_interval) + .map(to_days_ms) .collect(); PrimitiveArray::::from_data(data_type, values, validity) } +fn to_primitive_months_days_ns( + json_col: &ArrowJsonColumn, + data_type: DataType, +) -> PrimitiveArray { + let validity = to_validity(&json_col.validity); + let values = json_col + .data + .as_ref() + .unwrap() + .iter() + .map(to_months_days_ns) + .collect(); + PrimitiveArray::::from_data(data_type, values, validity) +} + fn to_decimal(json_col: &ArrowJsonColumn, data_type: DataType) -> PrimitiveArray { let validity = to_validity(&json_col.validity); let values = json_col @@ -246,8 +280,9 @@ pub fn to_array( Primitive(PrimitiveType::Int32) => Ok(Arc::new(to_primitive::(json_col, data_type))), Primitive(PrimitiveType::Int64) => Ok(Arc::new(to_primitive::(json_col, data_type))), Primitive(PrimitiveType::Int128) => Ok(Arc::new(to_decimal(json_col, data_type))), - Primitive(PrimitiveType::DaysMs) => { - Ok(Arc::new(to_primitive_interval(json_col, data_type))) + Primitive(PrimitiveType::DaysMs) => Ok(Arc::new(to_primitive_days_ms(json_col, data_type))), + Primitive(PrimitiveType::MonthDayNano) => { + Ok(Arc::new(to_primitive_months_days_ns(json_col, data_type))) } Primitive(PrimitiveType::UInt8) => Ok(Arc::new(to_primitive::(json_col, data_type))), Primitive(PrimitiveType::UInt16) => Ok(Arc::new(to_primitive::(json_col, data_type))), diff --git a/src/io/json_integration/schema.rs b/src/io/json_integration/schema.rs index 839e27550c6..09dc46d45ad 100644 --- a/src/io/json_integration/schema.rs +++ b/src/io/json_integration/schema.rs @@ -104,6 +104,7 @@ impl ToJson for DataType { DataType::Interval(unit) => json!({"name": "interval", "unit": match unit { IntervalUnit::YearMonth => "YEAR_MONTH", IntervalUnit::DayTime => "DAY_TIME", + IntervalUnit::MonthDayNano => "MONTH_DAY_NANO", }}), DataType::Duration(unit) => json!({"name": "duration", "unit": match unit { TimeUnit::Second => "SECOND", @@ -328,6 +329,7 @@ fn to_data_type(item: &Value, mut children: Vec) -> Result { "interval" => match item.get("unit") { Some(p) if p == "DAY_TIME" => DataType::Interval(IntervalUnit::DayTime), Some(p) if p == "YEAR_MONTH" => DataType::Interval(IntervalUnit::YearMonth), + Some(p) if p == "MONTH_DAY_NANO" => DataType::Interval(IntervalUnit::MonthDayNano), _ => { return Err(ArrowError::Schema( "interval unit missing or invalid".to_string(), diff --git a/src/scalar/equal.rs b/src/scalar/equal.rs index 503f9fc6856..3ce92132305 100644 --- a/src/scalar/equal.rs +++ b/src/scalar/equal.rs @@ -1,4 +1,5 @@ use super::*; +use crate::types::days_ms; impl PartialEq for dyn Scalar { fn eq(&self, other: &Self) -> bool { diff --git a/src/scalar/mod.rs b/src/scalar/mod.rs index da3f2241af3..ad5e1acf1c3 100644 --- a/src/scalar/mod.rs +++ b/src/scalar/mod.rs @@ -2,7 +2,7 @@ //! the zero-dimension of an [`crate::array::Array`]. use std::any::Any; -use crate::{array::*, datatypes::*, types::days_ms}; +use crate::{array::*, datatypes::*}; mod equal; mod primitive; diff --git a/src/types/mod.rs b/src/types/mod.rs index 61125a6282d..8e120ee274a 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -125,6 +125,10 @@ natural_type!(i64, DataType::Int64); natural_type!(f32, DataType::Float32); natural_type!(f64, DataType::Float64); natural_type!(days_ms, DataType::Interval(IntervalUnit::DayTime)); +natural_type!( + months_days_ns, + DataType::Interval(IntervalUnit::MonthDayNano) +); natural_type!(i128, DataType::Decimal(32, 32)); // users should set the decimal when creating an array create_relation!(u8, PhysicalType::Primitive(PrimitiveType::UInt8)); @@ -221,3 +225,108 @@ impl days_ms { self.0[1] } } + +/// The in-memory representation of the MonthDayNano variant of the "Interval" logical type. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] +#[allow(non_camel_case_types)] +#[repr(C)] +pub struct months_days_ns(i32, i32, i64); + +impl std::fmt::Display for months_days_ns { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}m {}d {}ns", self.months(), self.days(), self.ns()) + } +} + +unsafe impl NativeType for months_days_ns { + type Bytes = [u8; 16]; + #[inline] + fn to_le_bytes(&self) -> Self::Bytes { + let months = self.months().to_le_bytes(); + let days = self.days().to_le_bytes(); + let ns = self.ns().to_le_bytes(); + let mut result = [0; 16]; + result[0] = months[0]; + result[1] = months[1]; + result[2] = months[2]; + result[3] = months[3]; + result[4] = days[0]; + result[5] = days[1]; + result[6] = days[2]; + result[7] = days[3]; + (0..8).for_each(|i| { + result[8 + i] = ns[i]; + }); + result + } + + #[inline] + fn to_be_bytes(&self) -> Self::Bytes { + let months = self.months().to_be_bytes(); + let days = self.days().to_be_bytes(); + let ns = self.ns().to_be_bytes(); + let mut result = [0; 16]; + result[0] = months[0]; + result[1] = months[1]; + result[2] = months[2]; + result[3] = months[3]; + result[4] = days[0]; + result[5] = days[1]; + result[6] = days[2]; + result[7] = days[3]; + (0..8).for_each(|i| { + result[8 + i] = ns[i]; + }); + result + } + + #[inline] + fn from_be_bytes(bytes: Self::Bytes) -> Self { + let mut months = [0; 4]; + months[0] = bytes[0]; + months[1] = bytes[1]; + months[2] = bytes[2]; + months[3] = bytes[3]; + let mut days = [0; 4]; + days[0] = bytes[4]; + days[1] = bytes[5]; + days[2] = bytes[6]; + days[3] = bytes[7]; + let mut ns = [0; 8]; + (0..8).for_each(|i| { + ns[i] = bytes[8 + i]; + }); + Self( + i32::from_be_bytes(months), + i32::from_be_bytes(days), + i64::from_be_bytes(ns), + ) + } +} + +create_relation!( + months_days_ns, + PhysicalType::Primitive(PrimitiveType::MonthDayNano) +); + +impl months_days_ns { + #[inline] + pub fn new(months: i32, days: i32, nanoseconds: i64) -> Self { + Self(months, days, nanoseconds) + } + + #[inline] + pub fn months(&self) -> i32 { + self.0 + } + + #[inline] + pub fn days(&self) -> i32 { + self.1 + } + + #[inline] + pub fn ns(&self) -> i64 { + self.2 + } +} diff --git a/tests/it/array/primitive/mod.rs b/tests/it/array/primitive/mod.rs index 57173a6415e..0dc9cab6ffc 100644 --- a/tests/it/array/primitive/mod.rs +++ b/tests/it/array/primitive/mod.rs @@ -1,6 +1,12 @@ -use arrow2::{array::*, bitmap::Bitmap, datatypes::*, types::days_ms}; use std::iter::FromIterator; +use arrow2::{ + array::*, + bitmap::Bitmap, + datatypes::*, + types::{days_ms, months_days_ns}, +}; + mod mutable; #[test] @@ -220,3 +226,33 @@ fn display_interval_days_ms() { let array = DaysMsArray::from(&[Some(days_ms::new(1, 1)), None, Some(days_ms::new(2, 2))]); assert_eq!(format!("{}", array), "Interval(DayTime)[1d1ms, , 2d2ms]"); } + +#[test] +fn display_months_days_ns() { + let data = &[ + Some(months_days_ns::new(1, 1, 2)), + None, + Some(months_days_ns::new(2, 3, 3)), + ]; + + let array = MonthsDaysNsArray::from(&data); + + assert_eq!( + format!("{}", array), + "Interval(MonthDayNano)[1m1d2ns, , 2m3d3ns]" + ); +} + +#[test] +fn months_days_ns() { + let data = &[ + months_days_ns::new(1, 1, 2), + months_days_ns::new(1, 1, 3), + months_days_ns::new(2, 3, 3), + ]; + + let array = MonthsDaysNsArray::from_slice(&data); + + let a = array.values().as_slice(); + assert_eq!(a, data.as_ref()); +}