Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Improved performance.
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Dec 9, 2021
1 parent 3e5207a commit bb98a1d
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
6 changes: 5 additions & 1 deletion src/io/parquet/read/binary/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,19 +214,23 @@ fn read_plain_optional<O: Offset>(

pub(super) fn read_plain_required<O: Offset>(
buffer: &[u8],
_length: usize,
additional: usize,
offsets: &mut MutableBuffer<O>,
values: &mut MutableBuffer<u8>,
) {
let mut last_offset = *offsets.as_mut_slice().last().unwrap();

let values_iterator = utils::BinaryIter::new(buffer);

// each value occupies 4 bytes + len declared in 4 bytes => reserve accordingly.
values.reserve(buffer.len() - 4 * additional);
let a = values.capacity();
for value in values_iterator {
last_offset += O::from_usize(value.len()).unwrap();
values.extend_from_slice(value);
offsets.push(last_offset);
}
debug_assert_eq!(a, values.capacity());
}

pub(super) fn extend_from_page<O: Offset>(
Expand Down
6 changes: 4 additions & 2 deletions src/io/parquet/read/utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use parquet2::encoding::{get_length, Encoding};
use std::convert::TryInto;

use parquet2::encoding::Encoding;
use parquet2::metadata::ColumnDescriptor;
use parquet2::page::{split_buffer as _split_buffer, DataPage, DataPageHeader};

Expand All @@ -22,7 +24,7 @@ impl<'a> Iterator for BinaryIter<'a> {
if self.values.is_empty() {
return None;
}
let length = get_length(self.values) as usize;
let length = u32::from_le_bytes(self.values[0..4].try_into().unwrap()) as usize;
self.values = &self.values[4..];
let result = &self.values[..length];
self.values = &self.values[length..];
Expand Down

0 comments on commit bb98a1d

Please sign in to comment.