diff --git a/parquet_integration/write_parquet.py b/parquet_integration/write_parquet.py index eb6a27e8c73..2cd0833882c 100644 --- a/parquet_integration/write_parquet.py +++ b/parquet_integration/write_parquet.py @@ -28,6 +28,7 @@ def case_basic_nullable(size=1): pa.field("decimal_9", pa.decimal128(9, 0)), pa.field("decimal_18", pa.decimal128(18, 0)), pa.field("decimal_26", pa.decimal128(26, 0)), + pa.field("timestamp_us", pa.timestamp("us")), ] schema = pa.schema(fields) @@ -43,6 +44,7 @@ def case_basic_nullable(size=1): "decimal_9": decimal * size, "decimal_18": decimal * size, "decimal_26": decimal * size, + "timestamp_us": int64 * size, }, schema, f"basic_nullable_{size*10}.parquet", diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index 27d4e446b4f..e3638304f4b 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -144,13 +144,41 @@ fn dict_read< ) } Timestamp(TimeUnit::Nanosecond, None) => match metadata.descriptor().type_() { - ParquetType::PrimitiveType { physical_type, .. } => match physical_type { - PhysicalType::Int96 => primitive::iter_to_dict_array::( + ParquetType::PrimitiveType { + physical_type, + logical_type, + .. + } => match (physical_type, logical_type) { + (PhysicalType::Int96, _) => primitive::iter_to_dict_array::( iter, metadata, DataType::Timestamp(TimeUnit::Nanosecond, None), int96_to_i64_ns, ), + (_, Some(LogicalType::TIMESTAMP(TimestampType { unit, .. }))) => match unit { + ParquetTimeUnit::MILLIS(_) => { + primitive::iter_to_dict_array::( + iter, + metadata, + data_type, + |x: i64| x * 1_000_000, + ) + } + ParquetTimeUnit::MICROS(_) => { + primitive::iter_to_dict_array::( + iter, + metadata, + data_type, + |x: i64| x * 1_000, + ) + } + ParquetTimeUnit::NANOS(_) => primitive::iter_to_dict_array::( + iter, + metadata, + data_type, + |x: i64| x, + ), + }, _ => primitive::iter_to_dict_array::( iter, metadata, @@ -243,14 +271,33 @@ fn page_iter_to_array match metadata.descriptor().type_() { - ParquetType::PrimitiveType { physical_type, .. } => match physical_type { - PhysicalType::Int96 => primitive::iter_to_array( + ParquetType::PrimitiveType { + physical_type, + logical_type, + .. + } => match (physical_type, logical_type) { + (PhysicalType::Int96, _) => primitive::iter_to_array( iter, metadata, DataType::Timestamp(TimeUnit::Nanosecond, None), nested, int96_to_i64_ns, ), + (_, Some(LogicalType::TIMESTAMP(TimestampType { unit, .. }))) => match unit { + ParquetTimeUnit::MILLIS(_) => { + primitive::iter_to_array(iter, metadata, data_type, nested, |x: i64| { + x * 1_000_000 + }) + } + ParquetTimeUnit::MICROS(_) => { + primitive::iter_to_array(iter, metadata, data_type, nested, |x: i64| { + x * 1_000 + }) + } + ParquetTimeUnit::NANOS(_) => { + primitive::iter_to_array(iter, metadata, data_type, nested, |x: i64| x) + } + }, _ => primitive::iter_to_array(iter, metadata, data_type, nested, |x: i64| x), }, _ => unreachable!(), diff --git a/src/io/parquet/read/schema/metadata.rs b/src/io/parquet/read/schema/metadata.rs index 02ebbae9925..c7cd2370305 100644 --- a/src/io/parquet/read/schema/metadata.rs +++ b/src/io/parquet/read/schema/metadata.rs @@ -127,7 +127,8 @@ mod tests { "string_large", "decimal_9", "decimal_18", - "decimal_26" + "decimal_26", + "timestamp_us" ] ); Ok(()) diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index dec3adc7209..9924de91da8 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -321,6 +321,10 @@ pub fn pyarrow_nullable(column: usize) -> Box { .collect::>(); Box::new(PrimitiveArray::::from(values).to(DataType::Decimal(26, 0))) } + 10 => Box::new( + PrimitiveArray::::from(i64_values) + .to(DataType::Timestamp(TimeUnit::Microsecond, None)), + ), _ => unreachable!(), } } @@ -392,6 +396,13 @@ pub fn pyarrow_nullable_statistics(column: usize) -> Option> max_value: Some(9i128), data_type: DataType::Decimal(26, 0), }), + 10 => Box::new(PrimitiveStatistics:: { + data_type: DataType::Timestamp(TimeUnit::Microsecond, None), + distinct_count: None, + null_count: Some(3), + min_value: Some(0), + max_value: Some(9), + }), _ => unreachable!(), }) } diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs index 04a2e1a9cef..800770248ec 100644 --- a/tests/it/io/parquet/read.rs +++ b/tests/it/io/parquet/read.rs @@ -338,6 +338,11 @@ fn v2_decimal_26_nullable() -> Result<()> { test_pyarrow_integration(9, 2, "basic", false, false, None) } +#[test] +fn v1_timestamp_us_nullable() -> Result<()> { + test_pyarrow_integration(10, 1, "basic", false, false, None) +} + #[test] fn v2_decimal_26_required() -> Result<()> { test_pyarrow_integration(8, 2, "basic", false, true, None)