From 7e5b0594de9c8c983616e574e40c4706c9cb1225 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Thu, 9 Dec 2021 06:40:43 +0000 Subject: [PATCH] Fixed error in declaring size of compressed dict page. --- src/page/page_dict/mod.rs | 9 +++++++-- src/write/compression.rs | 7 ++++++- src/write/page.rs | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/page/page_dict/mod.rs b/src/page/page_dict/mod.rs index b1bb14e42..c61536cf3 100644 --- a/src/page/page_dict/mod.rs +++ b/src/page/page_dict/mod.rs @@ -37,11 +37,16 @@ impl EncodedDictPage { pub struct CompressedDictPage { pub(crate) buffer: Vec, pub(crate) num_values: usize, + pub(crate) uncompressed_page_size: usize, } impl CompressedDictPage { - pub fn new(buffer: Vec, num_values: usize) -> Self { - Self { buffer, num_values } + pub fn new(buffer: Vec, uncompressed_page_size: usize, num_values: usize) -> Self { + Self { + buffer, + uncompressed_page_size, + num_values, + } } } diff --git a/src/write/compression.rs b/src/write/compression.rs index 13e627481..2f13ced8a 100644 --- a/src/write/compression.rs +++ b/src/write/compression.rs @@ -59,12 +59,17 @@ fn compress_dict( mut buffer, num_values, } = page; + let uncompressed_page_size = buffer.len(); if compression != Compression::Uncompressed { compression::compress(compression, &buffer, &mut compressed_buffer)?; } else { std::mem::swap(&mut buffer, &mut compressed_buffer); } - Ok(CompressedDictPage::new(compressed_buffer, num_values)) + Ok(CompressedDictPage::new( + compressed_buffer, + uncompressed_page_size, + num_values, + )) } pub fn compress( diff --git a/src/write/page.rs b/src/write/page.rs index 80fb54f36..cc5d8b2eb 100644 --- a/src/write/page.rs +++ b/src/write/page.rs @@ -127,7 +127,7 @@ fn assemble_data_page_header(compressed_page: &CompressedDataPage) -> ParquetPag fn assemble_dict_page_header(page: &CompressedDictPage) -> ParquetPageHeader { ParquetPageHeader { type_: PageType::DICTIONARY_PAGE, - uncompressed_page_size: page.buffer.len() as i32, + uncompressed_page_size: page.uncompressed_page_size as i32, compressed_page_size: page.buffer.len() as i32, crc: None, data_page_header: None,