From 98f6e45d333fb964e286327d48e29242186464be Mon Sep 17 00:00:00 2001 From: Jorge Leitao Date: Wed, 19 Jan 2022 16:12:05 +0100 Subject: [PATCH] Added support to read and write float dict from parquet (#778) --- src/io/parquet/read/mod.rs | 6 ++++++ src/io/parquet/write/dictionary.rs | 2 ++ tests/it/io/parquet/mod.rs | 12 +++++++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/io/parquet/read/mod.rs b/src/io/parquet/read/mod.rs index e4579cdbfd7..76eec05e0e8 100644 --- a/src/io/parquet/read/mod.rs +++ b/src/io/parquet/read/mod.rs @@ -190,6 +190,12 @@ fn dict_read< Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => { primitive::iter_to_dict_array::(iter, metadata, data_type, |x: i64| x) } + Float32 => { + primitive::iter_to_dict_array::(iter, metadata, data_type, |x: f32| x) + } + Float64 => { + primitive::iter_to_dict_array::(iter, metadata, data_type, |x: f64| x) + } Utf8 => binary::iter_to_dict_array::(iter, metadata, data_type), LargeUtf8 => binary::iter_to_dict_array::(iter, metadata, data_type), other => Err(ArrowError::NotYetImplemented(format!( diff --git a/src/io/parquet/write/dictionary.rs b/src/io/parquet/write/dictionary.rs index cad4ea78abf..52b6a407a50 100644 --- a/src/io/parquet/write/dictionary.rs +++ b/src/io/parquet/write/dictionary.rs @@ -142,6 +142,8 @@ pub fn array_to_pages( DataType::UInt16 => dyn_prim!(u16, i32, array, options), DataType::UInt32 => dyn_prim!(u32, i32, array, options), DataType::UInt64 => dyn_prim!(i64, i64, array, options), + DataType::Float32 => dyn_prim!(f32, f32, array, options), + DataType::Float64 => dyn_prim!(f64, f64, array, options), DataType::Utf8 => { let values = array.values().as_any().downcast_ref().unwrap(); diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 8654f7ced12..65a79319a43 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -716,11 +716,21 @@ fn arrow_type() -> Result<()> { let dt1 = DataType::Duration(TimeUnit::Second); let array = PrimitiveArray::::from([Some(1), None, Some(2)]).to(dt1.clone()); let array2 = Utf8Array::::from([Some("a"), None, Some("bb")]); + + let indices = PrimitiveArray::from_values((0..3u64).map(|x| x % 2)); + let values = PrimitiveArray::from_slice([1.0f32, 3.0]); + let array3 = DictionaryArray::from_data(indices, std::sync::Arc::new(values)); + let schema = Schema::from(vec![ Field::new("a1", dt1, true), Field::new("a2", array2.data_type().clone(), true), + Field::new("a3", array3.data_type().clone(), true), ]); - let batch = Chunk::try_new(vec![Arc::new(array) as Arc, Arc::new(array2)])?; + let batch = Chunk::try_new(vec![ + Arc::new(array) as Arc, + Arc::new(array2), + Arc::new(array3), + ])?; let r = integration_write(&schema, &[batch.clone()])?;