Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Fixed error in writing dict-encoded and compressed pages
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao committed Oct 10, 2021
1 parent b19dde2 commit 95ecff8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
23 changes: 14 additions & 9 deletions src/io/parquet/write/dictionary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,12 @@ fn encode_keys<K: DictionaryKey>(
}

macro_rules! dyn_prim {
($from:ty, $to:ty, $array:expr) => {{
($from:ty, $to:ty, $array:expr, $options:expr) => {{
let values = $array.values().as_any().downcast_ref().unwrap();

let mut buffer = vec![];
primitive_encode_plain::<$from, $to>(values, false, &mut buffer);
let buffer = utils::compress(buffer, $options, 0)?;

CompressedPage::Dict(CompressedDictPage::new(buffer, values.len()))
}};
Expand All @@ -137,46 +138,50 @@ where
Encoding::PlainDictionary | Encoding::RleDictionary => {
// write DictPage
let dict_page = match array.values().data_type().to_logical_type() {
DataType::Int8 => dyn_prim!(i8, i32, array),
DataType::Int16 => dyn_prim!(i16, i32, array),
DataType::Int8 => dyn_prim!(i8, i32, array, options),
DataType::Int16 => dyn_prim!(i16, i32, array, options),
DataType::Int32 | DataType::Date32 | DataType::Time32(_) => {
dyn_prim!(i32, i32, array)
dyn_prim!(i32, i32, array, options)
}
DataType::Int64
| DataType::Date64
| DataType::Time64(_)
| DataType::Timestamp(_, _)
| DataType::Duration(_) => dyn_prim!(i64, i64, array),
DataType::UInt8 => dyn_prim!(u8, i32, array),
DataType::UInt16 => dyn_prim!(u16, i32, array),
DataType::UInt32 => dyn_prim!(u32, i32, array),
DataType::UInt64 => dyn_prim!(i64, i64, array),
| DataType::Duration(_) => dyn_prim!(i64, i64, array, options),
DataType::UInt8 => dyn_prim!(u8, i32, array, options),
DataType::UInt16 => dyn_prim!(u16, i32, array, options),
DataType::UInt32 => dyn_prim!(u32, i32, array, options),
DataType::UInt64 => dyn_prim!(i64, i64, array, options),
DataType::Utf8 => {
let values = array.values().as_any().downcast_ref().unwrap();

let mut buffer = vec![];
utf8_encode_plain::<i32>(values, false, &mut buffer);
let buffer = utils::compress(buffer, options, 0)?;
CompressedPage::Dict(CompressedDictPage::new(buffer, values.len()))
}
DataType::LargeUtf8 => {
let values = array.values().as_any().downcast_ref().unwrap();

let mut buffer = vec![];
utf8_encode_plain::<i64>(values, false, &mut buffer);
let buffer = utils::compress(buffer, options, 0)?;
CompressedPage::Dict(CompressedDictPage::new(buffer, values.len()))
}
DataType::Binary => {
let values = array.values().as_any().downcast_ref().unwrap();

let mut buffer = vec![];
binary_encode_plain::<i32>(values, false, &mut buffer);
let buffer = utils::compress(buffer, options, 0)?;
CompressedPage::Dict(CompressedDictPage::new(buffer, values.len()))
}
DataType::LargeBinary => {
let values = array.values().as_any().downcast_ref().unwrap();

let mut buffer = vec![];
binary_encode_plain::<i64>(values, false, &mut buffer);
let buffer = utils::compress(buffer, options, 0)?;
CompressedPage::Dict(CompressedDictPage::new(buffer, values.len()))
}
other => {
Expand Down
12 changes: 12 additions & 0 deletions tests/it/io/parquet/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,18 @@ fn i32_optional_v2_dict() -> Result<()> {
)
}

#[test]
fn i32_optional_v2_dict_compressed() -> Result<()> {
round_trip(
6,
true,
false,
Version::V2,
Compression::Snappy,
Encoding::RleDictionary,
)
}

// Decimal Testing
#[test]
fn decimal_9_optional_v1() -> Result<()> {
Expand Down

0 comments on commit 95ecff8

Please sign in to comment.