Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 24, 2022
1 parent a3d23c5 commit ac31893
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 18 deletions.
7 changes: 2 additions & 5 deletions src/io/parquet/read/deserialize/binary/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,12 @@ use std::collections::VecDeque;
use parquet2::{encoding::Encoding, page::DataPage, schema::Repetition};

use crate::{
array::Offset,
bitmap::MutableBitmap,
datatypes::DataType,
error::Result,
array::Offset, bitmap::MutableBitmap, datatypes::DataType, error::Result,
io::parquet::read::DataPages,
};

use super::super::utils::MaybeNext;
use super::super::nested_utils::*;
use super::super::utils::MaybeNext;
use super::utils::Binary;
use super::{
super::utils,
Expand Down
7 changes: 3 additions & 4 deletions src/io/parquet/read/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,13 @@ use super::{infer_schema, read_metadata, FileMetaData, RowGroupDeserializer, Row

type GroupFilter = Arc<dyn Fn(usize, &RowGroupMetaData) -> bool>;

/// An iterator of [`Chunk`] coming from row groups of a paquet file.
/// An iterator of [`Chunk`]s coming from row groups of a parquet file.
///
/// This can be thought of flatten chain of [`Iterator<Item=Chunk>`] - each row group is sequentially
/// This can be thought of a flatten chain of [`Iterator<Item=Chunk>`] - each row group is sequentially
/// mapped to an [`Iterator<Item=Chunk>`] and each iterator is iterated upon until either the limit
/// or the last iterator ends.
///
/// # Implementation
/// Note that because
/// This iterator mixes IO-bounded and CPU-bounded operations.
pub struct FileReader<R: Read + Seek> {
row_groups: RowGroupReader<R>,
metadata: FileMetaData,
Expand Down
17 changes: 9 additions & 8 deletions src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@ mod row_group;
pub mod schema;
pub mod statistics;

use std::{
io::{Read, Seek},
sync::Arc,
};

use futures::{AsyncRead, AsyncSeek};

// re-exports of parquet2's relevant APIs
pub use parquet2::{
error::ParquetError,
fallible_streaming_iterator,
Expand All @@ -32,21 +29,25 @@ pub use parquet2::{
FallibleStreamingIterator,
};

use crate::{array::Array, error::Result};

pub use deserialize::{column_iter_to_arrays, get_page_iterator};
pub use file::{FileReader, RowGroupReader};
pub use row_group::*;
pub(crate) use schema::is_type_nullable;
pub use schema::{infer_schema, FileMetaData};

//use simple::nested_utils::{InitNested, NestedArrayIter, NestedState};
use std::{
io::{Read, Seek},
sync::Arc,
};

use crate::{array::Array, error::Result};

/// Trait describing a [`FallibleStreamingIterator`] of [`DataPage`]
pub trait DataPages:
FallibleStreamingIterator<Item = DataPage, Error = ParquetError> + Send + Sync
{
}

impl<I: FallibleStreamingIterator<Item = DataPage, Error = ParquetError> + Send + Sync> DataPages
for I
{
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/row_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl Iterator for RowGroupDeserializer {
}
}

/// Returns all the parquet columns associated to `field_name`.
/// Returns all [`ColumnChunkMetaData`] associated to `field_name`.
/// For non-nested parquet types, this returns a single column
pub(super) fn get_field_columns<'a>(
columns: &'a [ColumnChunkMetaData],
Expand Down

0 comments on commit ac31893

Please sign in to comment.