Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Finished migration
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Aug 10, 2022
1 parent f45772f commit a1dd795
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 4 deletions.
3 changes: 1 addition & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ futures = { version = "0.3", optional = true }
async-stream = { version = "0.3.2", optional = true }

# parquet support
#parquet2 = { version = "0.14.0", optional = true, default_features = false }
parquet2 = { git = "https://github.com/jorgecarleitao/parquet2", branch = "delay_dict", optional = true, default_features = false }
parquet2 = { version = "0.15.0", optional = true, default_features = false, features = ["async"] }

# avro support
avro-schema = { version = "0.3", optional = true }
Expand Down
2 changes: 1 addition & 1 deletion benches/read_parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ fn add_benchmark(c: &mut Criterion) {

let buffer = to_buffer(size, true, true, false, false);
let a = format!("read ts dict 2^{}", i);
c.bench_function(&a, |b| b.iter(|| read_batch(&buffer, size, 11).unwrap()));
c.bench_function(&a, |b| b.iter(|| read_chunk(&buffer, size, 11).unwrap()));

let a = format!("read utf8 2^{}", i);
c.bench_function(&a, |b| b.iter(|| read_chunk(&buffer, size, 2).unwrap()));
Expand Down
2 changes: 2 additions & 0 deletions src/io/parquet/read/deserialize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ pub fn get_page_iterator<R: Read + Seek>(
reader: R,
pages_filter: Option<PageFilter>,
buffer: Vec<u8>,
max_header_size: usize,
) -> Result<PageReader<R>> {
Ok(_get_page_iterator(
column_metadata,
reader,
pages_filter,
buffer,
max_header_size,
)?)
}

Expand Down
2 changes: 2 additions & 0 deletions src/io/parquet/read/row_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,11 +180,13 @@ pub fn to_deserializer<'a>(
let (columns, types): (Vec<_>, Vec<_>) = columns
.into_iter()
.map(|(column_meta, chunk)| {
let len = chunk.len();
let pages = PageReader::new(
std::io::Cursor::new(chunk),
column_meta,
std::sync::Arc::new(|_, _| true),
vec![],
len * 2 + 1024,
);
(
BasicDecompressor::new(pages, vec![]),
Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/write/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::types::NativeType;

use parquet2::schema::types::PrimitiveType as ParquetPrimitiveType;
pub use parquet2::{
compression::{BrotliLevel, CompressionLevel, CompressionOptions, GzipLevel, ZstdLevel},
compression::{BrotliLevel, CompressionOptions, GzipLevel, ZstdLevel},
encoding::Encoding,
fallible_streaming_iterator,
metadata::{Descriptor, FileMetaData, KeyValue, SchemaDescriptor, ThriftFileMetaData},
Expand Down

0 comments on commit a1dd795

Please sign in to comment.