Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Fixed error in writing fixedSizeListArray (#941)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Apr 21, 2022
1 parent ddede6c commit 76c1ff9
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 4 deletions.
1 change: 1 addition & 0 deletions src/io/parquet/read/deserialize/binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ impl<'a> ValuesDictionary<'a> {
}
}

#[derive(Debug)]
enum State<'a> {
Optional(OptionalPageValidity<'a>, BinaryIter<'a>),
Required(Required<'a>),
Expand Down
5 changes: 5 additions & 0 deletions src/io/parquet/read/deserialize/fixed_size_binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use super::super::utils::{
use super::super::DataPages;
use super::utils::FixedSizeBinary;

#[derive(Debug)]
struct Optional<'a> {
values: std::slice::ChunksExact<'a, u8>,
validity: OptionalPageValidity<'a>,
Expand All @@ -37,6 +38,7 @@ impl<'a> Optional<'a> {
}
}

#[derive(Debug)]
struct Required<'a> {
pub values: std::slice::ChunksExact<'a, u8>,
}
Expand Down Expand Up @@ -77,6 +79,7 @@ impl<'a> FilteredRequired<'a> {
}
}

#[derive(Debug)]
struct RequiredDictionary<'a> {
pub values: hybrid_rle::HybridRleDecoder<'a>,
dict: &'a FixedLenByteArrayPageDict,
Expand All @@ -95,6 +98,7 @@ impl<'a> RequiredDictionary<'a> {
}
}

#[derive(Debug)]
struct OptionalDictionary<'a> {
values: hybrid_rle::HybridRleDecoder<'a>,
validity: OptionalPageValidity<'a>,
Expand All @@ -113,6 +117,7 @@ impl<'a> OptionalDictionary<'a> {
}
}

#[derive(Debug)]
enum State<'a> {
Optional(Optional<'a>),
Required(Required<'a>),
Expand Down
50 changes: 48 additions & 2 deletions src/io/parquet/read/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ mod struct_;
mod utils;

use crate::{
array::{Array, BinaryArray, ListArray, Utf8Array},
array::{Array, BinaryArray, FixedSizeListArray, ListArray, Utf8Array},
datatypes::{DataType, Field},
error::{ArrowError, Result},
};
Expand Down Expand Up @@ -66,6 +66,15 @@ fn create_list(
validity.and_then(|x| x.into()),
))
}
DataType::FixedSizeList(_, _) => {
let (_, validity) = nested.nested.pop().unwrap().inner();

Arc::new(FixedSizeListArray::new(
data_type,
values,
validity.and_then(|x| x.into()),
))
}
_ => {
return Err(ArrowError::NotYetImplemented(format!(
"Read nested datatype {:?}",
Expand Down Expand Up @@ -103,6 +112,16 @@ where
types.pop();
boolean::iter_to_arrays_nested(columns.pop().unwrap(), init.pop().unwrap(), chunk_size)
}
Int8 => {
types.pop();
primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init.pop().unwrap(),
field.data_type().clone(),
chunk_size,
|x: i32| x as i8,
)
}
Int16 => {
types.pop();
primitive::iter_to_arrays_nested(
Expand All @@ -113,6 +132,16 @@ where
|x: i32| x as i16,
)
}
Int32 => {
types.pop();
primitive::iter_to_arrays_nested(
columns.pop().unwrap(),
init.pop().unwrap(),
field.data_type().clone(),
chunk_size,
|x: i32| x,
)
}
Int64 => {
types.pop();
primitive::iter_to_arrays_nested(
Expand Down Expand Up @@ -193,7 +222,24 @@ where
let columns = columns.into_iter().rev().collect();
Box::new(struct_::StructIterator::new(columns, fields.clone()))
}
_ => todo!(),
FixedSizeList(inner, _) => {
let iter = columns_to_iter_recursive(
vec![columns.pop().unwrap()],
types,
inner.as_ref().clone(),
init,
chunk_size,
)?;
let iter = iter.map(move |x| {
let (mut nested, array) = x?;
println!("{nested:?}");
println!("{array:?}");
let array = create_list(field.data_type().clone(), &mut nested, array)?;
Ok((nested, array))
});
Box::new(iter) as _
}
other => todo!("{other:?}"),
})
}

Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ pub(super) fn extend_from_decoder<'a, T: Default, P: Pushable<T>, I: Iterator<It
}

/// The state of a partially deserialized page
pub(super) trait PageState<'a> {
pub(super) trait PageState<'a>: std::fmt::Debug {
fn len(&self) -> usize;
}

Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ fn nested_array_to_page(
}
DataType::FixedSizeList(_, size) => {
let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
let offsets = (0..array.len())
let offsets = (0..=array.len())
.map(|x| (*size * x) as i32)
.collect::<Vec<_>>();
list_array_to_page(
Expand Down

0 comments on commit 76c1ff9

Please sign in to comment.