Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Simpler and added docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 1, 2022
1 parent 41a40e7 commit 7d18273
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ impl<O: Offset> TraitBinaryArray<O> for Utf8Array<O> {
}
}

impl<'a, O: Offset> DecodedState<'a> for (Binary<O>, MutableBitmap) {
impl<O: Offset> DecodedState for (Binary<O>, MutableBitmap) {
fn len(&self) -> usize {
self.0.len()
}
Expand Down
8 changes: 4 additions & 4 deletions src/io/parquet/read/deserialize/binary/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,10 @@ impl<'a> Iterator for BinaryIter<'a> {
if self.values.is_empty() {
return None;
}
let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
self.values = &self.values[4..];
let result = &self.values[..length];
self.values = &self.values[length..];
let (length, remaining) = self.values.split_at(4);
let length = u32::from_le_bytes(length.try_into().unwrap()) as usize;
let (result, remaining) = remaining.split_at(length);
self.values = remaining;
Some(result)
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/boolean/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ impl<'a> utils::PageState<'a> for State<'a> {
}
}

impl<'a> DecodedState<'a> for (MutableBitmap, MutableBitmap) {
impl DecodedState for (MutableBitmap, MutableBitmap) {
fn len(&self) -> usize {
self.0.len()
}
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/fixed_size_binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ struct BinaryDecoder {
size: usize,
}

impl<'a> DecodedState<'a> for (FixedSizeBinary, MutableBitmap) {
impl DecodedState for (FixedSizeBinary, MutableBitmap) {
fn len(&self) -> usize {
self.0.len()
}
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/nested_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl Nested for NestedStruct {
pub(super) trait NestedDecoder<'a> {
type State: PageState<'a>;
type Dictionary;
type DecodedState: DecodedState<'a>;
type DecodedState: DecodedState;

fn build_state(
&self,
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/deserialize/primitive/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ where
}
}

impl<'a, T: std::fmt::Debug> utils::DecodedState<'a> for (Vec<T>, MutableBitmap) {
impl<T: std::fmt::Debug> utils::DecodedState for (Vec<T>, MutableBitmap) {
fn len(&self) -> usize {
self.0.len()
}
Expand Down
22 changes: 15 additions & 7 deletions src/io/parquet/read/deserialize/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -337,36 +337,40 @@ pub(super) trait PageState<'a>: std::fmt::Debug {
}

/// The state of a partially deserialized page
pub(super) trait DecodedState<'a>: std::fmt::Debug {
// the number of values that this decoder already consumed
pub(super) trait DecodedState: std::fmt::Debug {
// the number of values that the state already has
fn len(&self) -> usize;
}

/// A decoder that knows how to map `State` -> Array
pub(super) trait Decoder<'a> {
/// The state that this decoder derives from a [`DataPage`]. This is bound to the page.
type State: PageState<'a>;
/// The dictionary representation that the decoder uses
type Dict;
type DecodedState: DecodedState<'a>;
/// The target state that this Decoder decodes into.
type DecodedState: DecodedState;

/// Creates a new `Self::State`
fn build_state(
&self,
page: &'a DataPage,
dict: Option<&'a Self::Dict>,
) -> Result<Self::State, Error>;

/// Initializes a new state
/// Initializes a new [`Self::DecodedState`].
fn with_capacity(&self, capacity: usize) -> Self::DecodedState;

/// extends (values, validity) by deserializing items in `State`.
/// It guarantees that the length of `values` is at most `values.len() + remaining`.
/// extends [`Self::DecodedState`] by deserializing items in [`Self::State`].
/// It guarantees that the length of `decoded` is at most `decoded.len() + remaining`.
fn extend_from_state(
&self,
page: &mut Self::State,
decoded: &mut Self::DecodedState,
additional: usize,
);

/// Deserializes a [`DictPage`] into a representation suited for decoding using it.
/// Deserializes a [`DictPage`] into [`Self::Dict`].
fn deserialize_dict(&self, page: &DictPage) -> Self::Dict;
}

Expand Down Expand Up @@ -404,10 +408,14 @@ pub(super) fn extend_from_new_page<'a, T: Decoder<'a>>(
}
}

/// Represents what happened when a new page was consumed
#[derive(Debug)]
pub enum MaybeNext<P> {
/// Whether the page was sufficient to fill `chunk_size`
Some(P),
/// whether there are no more pages or intermediary decoded states
None,
/// Whether the page was insufficient to fill `chunk_size` and a new page is required
More,
}

Expand Down

0 comments on commit 7d18273

Please sign in to comment.