From 9ad04ca48f025faca75647187c010fbc574f909f Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Wed, 22 Jun 2022 04:00:57 +0000 Subject: [PATCH] Simpler --- src/io/parquet/read/deserialize/binary/mod.rs | 37 +------------------ .../parquet/read/deserialize/binary/nested.rs | 29 +++++++++++++-- .../parquet/read/deserialize/boolean/mod.rs | 25 +------------ .../read/deserialize/boolean/nested.rs | 22 +++++++++-- .../parquet/read/deserialize/primitive/mod.rs | 33 +---------------- .../read/deserialize/primitive/nested.rs | 23 ++++++++++++ 6 files changed, 73 insertions(+), 96 deletions(-) diff --git a/src/io/parquet/read/deserialize/binary/mod.rs b/src/io/parquet/read/deserialize/binary/mod.rs index 6eee318772d..613b95e9ca9 100644 --- a/src/io/parquet/read/deserialize/binary/mod.rs +++ b/src/io/parquet/read/deserialize/binary/mod.rs @@ -3,40 +3,7 @@ mod dictionary; mod nested; mod utils; -use crate::{ - array::{Array, Offset}, - datatypes::DataType, -}; - -use self::basic::TraitBinaryArray; -use self::nested::ArrayIterator; -use super::{ - nested_utils::{InitNested, NestedArrayIter}, - DataPages, -}; - +pub use self::nested::NestedIter; pub use basic::Iter; pub use dictionary::DictIter; - -/// Converts [`DataPages`] to an [`Iterator`] of [`Array`] -pub fn iter_to_arrays_nested<'a, O, A, I>( - iter: I, - init: Vec, - data_type: DataType, - chunk_size: Option, -) -> NestedArrayIter<'a> -where - I: 'a + DataPages, - A: TraitBinaryArray, - O: Offset, -{ - Box::new( - ArrayIterator::::new(iter, init, data_type, chunk_size).map(|x| { - x.map(|(mut nested, array)| { - let _ = nested.nested.pop().unwrap(); // the primitive - let values = Box::new(array) as Box; - (nested, values) - }) - }), - ) -} +pub use nested::iter_to_arrays_nested; diff --git a/src/io/parquet/read/deserialize/binary/nested.rs b/src/io/parquet/read/deserialize/binary/nested.rs index f7b16fe03ef..b5703812940 100644 --- a/src/io/parquet/read/deserialize/binary/nested.rs +++ b/src/io/parquet/read/deserialize/binary/nested.rs @@ -6,6 +6,7 @@ use parquet2::{ schema::Repetition, }; +use crate::array::Array; use crate::{ array::Offset, bitmap::MutableBitmap, datatypes::DataType, error::Result, io::parquet::read::DataPages, @@ -141,7 +142,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { } } -pub struct ArrayIterator, I: DataPages> { +pub struct NestedIter, I: DataPages> { iter: I, data_type: DataType, init: Vec, @@ -150,7 +151,7 @@ pub struct ArrayIterator, I: DataPages> { phantom_a: std::marker::PhantomData, } -impl, I: DataPages> ArrayIterator { +impl, I: DataPages> NestedIter { pub fn new( iter: I, init: Vec, @@ -168,7 +169,7 @@ impl, I: DataPages> ArrayIterator { } } -impl, I: DataPages> Iterator for ArrayIterator { +impl, I: DataPages> Iterator for NestedIter { type Item = Result<(NestedState, A)>; fn next(&mut self) -> Option { @@ -189,3 +190,25 @@ impl, I: DataPages> Iterator for ArrayIterator } } } + +/// Converts [`DataPages`] to an [`Iterator`] of [`TraitBinaryArray`] +pub fn iter_to_arrays_nested<'a, O, A, I>( + iter: I, + init: Vec, + data_type: DataType, + chunk_size: Option, +) -> NestedArrayIter<'a> +where + I: 'a + DataPages, + A: TraitBinaryArray, + O: Offset, +{ + Box::new( + NestedIter::::new(iter, init, data_type, chunk_size).map(|result| { + let (mut nested, array) = result?; + let _ = nested.nested.pop().unwrap(); // the primitive + let array = Box::new(array) as Box; + Ok((nested, array)) + }), + ) +} diff --git a/src/io/parquet/read/deserialize/boolean/mod.rs b/src/io/parquet/read/deserialize/boolean/mod.rs index dde0a14852a..01ca1fb1122 100644 --- a/src/io/parquet/read/deserialize/boolean/mod.rs +++ b/src/io/parquet/read/deserialize/boolean/mod.rs @@ -1,28 +1,5 @@ mod basic; mod nested; -use self::nested::ArrayIterator; -use super::{ - nested_utils::{InitNested, NestedArrayIter}, - DataPages, -}; - pub use self::basic::Iter; - -/// Converts [`DataPages`] to an [`Iterator`] of [`Array`] -pub fn iter_to_arrays_nested<'a, I: 'a>( - iter: I, - init: Vec, - chunk_size: Option, -) -> NestedArrayIter<'a> -where - I: DataPages, -{ - Box::new(ArrayIterator::new(iter, init, chunk_size).map(|x| { - x.map(|(mut nested, array)| { - let _ = nested.nested.pop().unwrap(); // the primitive - let values = array.boxed(); - (nested, values) - }) - })) -} +pub use nested::iter_to_arrays_nested; diff --git a/src/io/parquet/read/deserialize/boolean/nested.rs b/src/io/parquet/read/deserialize/boolean/nested.rs index 8c20694ed5c..f2b4ccd983f 100644 --- a/src/io/parquet/read/deserialize/boolean/nested.rs +++ b/src/io/parquet/read/deserialize/boolean/nested.rs @@ -101,14 +101,14 @@ impl<'a> NestedDecoder<'a> for BooleanDecoder { /// An iterator adapter over [`DataPages`] assumed to be encoded as boolean arrays #[derive(Debug)] -pub struct ArrayIterator { +pub struct NestedIter { iter: I, init: Vec, items: VecDeque<(NestedState, (MutableBitmap, MutableBitmap))>, chunk_size: Option, } -impl ArrayIterator { +impl NestedIter { pub fn new(iter: I, init: Vec, chunk_size: Option) -> Self { Self { iter, @@ -123,7 +123,7 @@ fn finish(data_type: &DataType, values: MutableBitmap, validity: MutableBitmap) BooleanArray::new(data_type.clone(), values.into(), validity.into()) } -impl Iterator for ArrayIterator { +impl Iterator for NestedIter { type Item = Result<(NestedState, BooleanArray)>; fn next(&mut self) -> Option { @@ -144,3 +144,19 @@ impl Iterator for ArrayIterator { } } } + +/// Converts [`DataPages`] to an [`Iterator`] of [`BooleanArray`] +pub fn iter_to_arrays_nested<'a, I: 'a>( + iter: I, + init: Vec, + chunk_size: Option, +) -> NestedArrayIter<'a> +where + I: DataPages, +{ + Box::new(NestedIter::new(iter, init, chunk_size).map(|result| { + let (mut nested, array) = result?; + let _ = nested.nested.pop().unwrap(); // the primitive + Ok((nested, array.boxed())) + })) +} diff --git a/src/io/parquet/read/deserialize/primitive/mod.rs b/src/io/parquet/read/deserialize/primitive/mod.rs index e49cdb80ea5..b9f87520c8d 100644 --- a/src/io/parquet/read/deserialize/primitive/mod.rs +++ b/src/io/parquet/read/deserialize/primitive/mod.rs @@ -2,35 +2,6 @@ mod basic; mod dictionary; mod nested; -pub use dictionary::DictIter; - -use crate::datatypes::DataType; - -use super::{nested_utils::*, DataPages}; - pub use basic::Iter; -use nested::ArrayIterator; - -/// Converts [`DataPages`] to an [`Iterator`] of [`Array`] -pub fn iter_to_arrays_nested<'a, I, T, P, F>( - iter: I, - init: Vec, - data_type: DataType, - chunk_size: Option, - op: F, -) -> NestedArrayIter<'a> -where - I: 'a + DataPages, - T: crate::types::NativeType, - P: parquet2::types::NativeType, - F: 'a + Copy + Send + Sync + Fn(P) -> T, -{ - Box::new( - ArrayIterator::::new(iter, init, data_type, chunk_size, op).map(|x| { - x.map(|(mut nested, array)| { - let _ = nested.nested.pop().unwrap(); // the primitive - (nested, array.boxed()) - }) - }), - ) -} +pub use dictionary::DictIter; +pub use nested::iter_to_arrays_nested; diff --git a/src/io/parquet/read/deserialize/primitive/nested.rs b/src/io/parquet/read/deserialize/primitive/nested.rs index 587e1967adc..ce6d7a94eb5 100644 --- a/src/io/parquet/read/deserialize/primitive/nested.rs +++ b/src/io/parquet/read/deserialize/primitive/nested.rs @@ -229,3 +229,26 @@ where } } } + +/// Converts [`DataPages`] to an [`Iterator`] of [`Array`] +pub fn iter_to_arrays_nested<'a, I, T, P, F>( + iter: I, + init: Vec, + data_type: DataType, + chunk_size: Option, + op: F, +) -> NestedArrayIter<'a> +where + I: 'a + DataPages, + T: crate::types::NativeType, + P: parquet2::types::NativeType, + F: 'a + Copy + Send + Sync + Fn(P) -> T, +{ + Box::new( + ArrayIterator::::new(iter, init, data_type, chunk_size, op).map(|result| { + let (mut nested, array) = result?; + let _ = nested.nested.pop().unwrap(); // the primitive + Ok((nested, array.boxed())) + }), + ) +}