Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
fix double nestedness
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Dec 11, 2022
1 parent b5fe680 commit a22cce5
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 29 deletions.
20 changes: 7 additions & 13 deletions src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,23 +187,17 @@ fn slice_parquet_array<'a>(
}

fn get_max_length(array: &dyn Array, nested: &[Nested]) -> usize {
let mut sum = 0;
for nested in nested.iter() {
// get the length that should be sliced.
// that is the inner nested structure that
// dictates how often the primitive should be repeated
for nested in nested.iter().rev() {
match nested {
Nested::LargeList(l_nested) => {
sum += l_nested.offsets.len() - 1;
}
Nested::List(l_nested) => {
sum += l_nested.offsets.len() - 1;
}
Nested::LargeList(l_nested) => return l_nested.offsets.len() - 1,
Nested::List(l_nested) => return l_nested.offsets.len() - 1,
_ => {}
}
}
if sum > 0 {
sum
} else {
array.len()
}
array.len()
}

/// Returns an iterator of [`Page`].
Expand Down
32 changes: 16 additions & 16 deletions src/io/parquet/write/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,48 +150,48 @@ fn to_nested_recursive<'a>(
}

fn to_leaves(array: &dyn Array) -> Vec<&dyn Array> {
let mut leafs = vec![];
to_leaves_recursive(array, &mut leafs);
leafs
let mut leaves = vec![];
to_leaves_recursive(array, &mut leaves);
leaves
}

fn to_leaves_recursive<'a>(array: &'a dyn Array, leafs: &mut Vec<&'a dyn Array>) {
fn to_leaves_recursive<'a>(array: &'a dyn Array, leaves: &mut Vec<&'a dyn Array>) {
use PhysicalType::*;
match array.data_type().to_physical_type() {
Struct => {
let array = array.as_any().downcast_ref::<StructArray>().unwrap();
array
.values()
.iter()
.for_each(|a| to_leaves_recursive(a.as_ref(), leafs));
.for_each(|a| to_leaves_recursive(a.as_ref(), leaves));
}
List => {
let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
to_leaves_recursive(array.values().as_ref(), leafs);
to_leaves_recursive(array.values().as_ref(), leaves);
}
LargeList => {
let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
to_leaves_recursive(array.values().as_ref(), leafs);
to_leaves_recursive(array.values().as_ref(), leaves);
}
Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8
| LargeUtf8 | Dictionary(_) => leafs.push(array),
| LargeUtf8 | Dictionary(_) => leaves.push(array),
other => todo!("Writing {:?} to parquet not yet implemented", other),
}
}

fn to_parquet_leafs(type_: ParquetType) -> Vec<ParquetPrimitiveType> {
let mut leafs = vec![];
to_parquet_leafs_recursive(type_, &mut leafs);
leafs
fn to_parquet_leaves(type_: ParquetType) -> Vec<ParquetPrimitiveType> {
let mut leaves = vec![];
to_parquet_leaves_recursive(type_, &mut leaves);
leaves
}

fn to_parquet_leafs_recursive(type_: ParquetType, leafs: &mut Vec<ParquetPrimitiveType>) {
fn to_parquet_leaves_recursive(type_: ParquetType, leaves: &mut Vec<ParquetPrimitiveType>) {
match type_ {
ParquetType::PrimitiveType(primitive) => leafs.push(primitive),
ParquetType::PrimitiveType(primitive) => leaves.push(primitive),
ParquetType::GroupType { fields, .. } => {
fields
.into_iter()
.for_each(|type_| to_parquet_leafs_recursive(type_, leafs));
.for_each(|type_| to_parquet_leaves_recursive(type_, leaves));
}
}
}
Expand All @@ -206,7 +206,7 @@ pub fn array_to_columns<A: AsRef<dyn Array> + Send + Sync>(
let array = array.as_ref();
let nested = to_nested(array, &type_)?;

let types = to_parquet_leafs(type_);
let types = to_parquet_leaves(type_);

let values = to_leaves(array);

Expand Down

0 comments on commit a22cce5

Please sign in to comment.