Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Faster required reading
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Feb 16, 2022
1 parent e60d3b1 commit e97db83
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ futures = { version = "0.3", optional = true }
ahash = { version = "0.7", optional = true }

# parquet support
parquet2 = { git = "https://github.com/jorgecarleitao/parquet2", branch = "pod", optional = true, default_features = false, features = ["stream"] }
parquet2 = { version = "0.10", optional = true, default_features = false, features = ["stream"] }

# avro support
avro-schema = { version = "0.2", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/boolean/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl<'a> Optional<'a> {
let (_, _, values_buffer) = split_buffer(page);

Self {
values: BitmapIter::new(values_buffer, 0, page.num_values()),
values: BitmapIter::new(values_buffer, 0, values_buffer.len() * 8),
validity: OptionalPageValidity::new(page),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/read/boolean/nested.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ impl<'a> Decoder<'a, bool, MutableBitmap> for BooleanDecoder {
match (page.encoding(), is_optional) {
(Encoding::Plain, true) => {
let (_, _, values) = utils::split_buffer(page);
let values = BitmapIter::new(values, 0, page.num_values());
let values = BitmapIter::new(values, 0, values.len() * 8);

Ok(State::Optional(Optional::new(page), values))
}
Expand Down
16 changes: 12 additions & 4 deletions src/io/parquet/read/primitive/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ pub(super) struct Values<'a, P>
where
P: ParquetNativeType,
{
pub values: std::iter::Copied<std::slice::Iter<'a, P>>,
pub values: std::slice::ChunksExact<'a, u8>,
phantom: std::marker::PhantomData<P>,
}

impl<'a, P> Values<'a, P>
Expand All @@ -33,7 +34,8 @@ where
let (_, _, values) = utils::split_buffer(page);
assert_eq!(values.len() % std::mem::size_of::<P>(), 0);
Self {
values: decode(values).iter().copied(),
values: values.chunks_exact(std::mem::size_of::<P>()),
phantom: std::marker::PhantomData,
}
}

Expand Down Expand Up @@ -190,10 +192,16 @@ where
page_validity,
Some(remaining),
values,
page_values.values.by_ref().map(self.op),
page_values.values.by_ref().map(decode).map(self.op),
),
State::Required(page) => {
values.extend(page.values.by_ref().map(self.op).take(remaining));
values.extend(
page.values
.by_ref()
.map(decode)
.map(self.op)
.take(remaining),
);
}
State::OptionalDictionary(page_validity, page_values) => {
let op1 = |index: u32| page_values.dict[index as usize];
Expand Down
7 changes: 4 additions & 3 deletions src/io/parquet/read/primitive/nested.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use std::collections::VecDeque;

use parquet2::{
encoding::Encoding, page::DataPage, schema::Repetition, types::NativeType as ParquetNativeType,
encoding::Encoding, page::DataPage, schema::Repetition, types::decode,
types::NativeType as ParquetNativeType,
};

use crate::{
Expand Down Expand Up @@ -122,14 +123,14 @@ where
read_optional_values(
page_validity.definition_levels.by_ref(),
max_def,
page_values.values.by_ref().map(self.op),
page_values.values.by_ref().map(decode).map(self.op),
values,
validity,
remaining,
)
}
State::Required(page) => {
values.extend(page.values.by_ref().map(self.op).take(remaining));
values.extend(page.values.by_ref().map(decode).map(self.op).take(remaining));
}
//State::OptionalDictionary(page) => todo!(),
//State::RequiredDictionary(page) => todo!(),
Expand Down

0 comments on commit e97db83

Please sign in to comment.