Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added support to read and write float dict from parquet #778

Merged
merged 1 commit into from
Jan 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@ fn dict_read<
Int64 | Date64 | Time64(_) | Duration(_) | Timestamp(_, _) => {
primitive::iter_to_dict_array::<K, _, _, _, _, _>(iter, metadata, data_type, |x: i64| x)
}
Float32 => {
primitive::iter_to_dict_array::<K, _, _, _, _, _>(iter, metadata, data_type, |x: f32| x)
}
Float64 => {
primitive::iter_to_dict_array::<K, _, _, _, _, _>(iter, metadata, data_type, |x: f64| x)
}
Utf8 => binary::iter_to_dict_array::<K, i32, _, _>(iter, metadata, data_type),
LargeUtf8 => binary::iter_to_dict_array::<K, i64, _, _>(iter, metadata, data_type),
other => Err(ArrowError::NotYetImplemented(format!(
Expand Down
2 changes: 2 additions & 0 deletions src/io/parquet/write/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ pub fn array_to_pages<K: DictionaryKey>(
DataType::UInt16 => dyn_prim!(u16, i32, array, options),
DataType::UInt32 => dyn_prim!(u32, i32, array, options),
DataType::UInt64 => dyn_prim!(i64, i64, array, options),
DataType::Float32 => dyn_prim!(f32, f32, array, options),
DataType::Float64 => dyn_prim!(f64, f64, array, options),
DataType::Utf8 => {
let values = array.values().as_any().downcast_ref().unwrap();

Expand Down
12 changes: 11 additions & 1 deletion tests/it/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -716,11 +716,21 @@ fn arrow_type() -> Result<()> {
let dt1 = DataType::Duration(TimeUnit::Second);
let array = PrimitiveArray::<i64>::from([Some(1), None, Some(2)]).to(dt1.clone());
let array2 = Utf8Array::<i64>::from([Some("a"), None, Some("bb")]);

let indices = PrimitiveArray::from_values((0..3u64).map(|x| x % 2));
let values = PrimitiveArray::from_slice([1.0f32, 3.0]);
let array3 = DictionaryArray::from_data(indices, std::sync::Arc::new(values));

let schema = Schema::from(vec![
Field::new("a1", dt1, true),
Field::new("a2", array2.data_type().clone(), true),
Field::new("a3", array3.data_type().clone(), true),
]);
let batch = Chunk::try_new(vec![Arc::new(array) as Arc<dyn Array>, Arc::new(array2)])?;
let batch = Chunk::try_new(vec![
Arc::new(array) as Arc<dyn Array>,
Arc::new(array2),
Arc::new(array3),
])?;

let r = integration_write(&schema, &[batch.clone()])?;

Expand Down