Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Correctly handle negative decimals
Browse files Browse the repository at this point in the history
  • Loading branch information
mdrach committed Dec 13, 2021
1 parent 6ec9cf5 commit 2fd7a87
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 5 deletions.
5 changes: 3 additions & 2 deletions parquet_integration/write_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ def case_basic_nullable(size=1):


def case_basic_required(size=1):
int64 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
int64 = [-1, 0, 1, 2, 3, 4, 5, 6, 7, 8]
uint32 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
float64 = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0]
string = ["Hello", "bbb", "aa", "", "bbb", "abc", "bbb", "bbb", "def", "aaa"]
boolean = [True, True, False, False, False, True, True, True, True, True]
Expand Down Expand Up @@ -82,7 +83,7 @@ def case_basic_required(size=1):
"string": string * size,
"bool": boolean * size,
"date": int64 * size,
"uint32": int64 * size,
"uint32": uint32 * size,
"decimal_9": decimal * size,
"decimal_18": decimal * size,
"decimal_26": decimal * size,
Expand Down
8 changes: 6 additions & 2 deletions src/io/parquet/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,8 @@ fn page_iter_to_array<I: FallibleStreamingIterator<Item = DataPage, Error = Parq
n
)))
} else {
let paddings = (0..(16 - *n)).map(|_| 0u8).collect::<Vec<_>>();
let zeros_padding = (0..(16 - *n)).map(|_| 0u8).collect::<Vec<_>>();
let ones_padding = (0..(16 - *n)).map(|_| !0u8).collect::<Vec<_>>();
fixed_size_binary::iter_to_array(
iter,
DataType::FixedSizeBinary(*n as usize),
Expand All @@ -285,7 +286,10 @@ fn page_iter_to_array<I: FallibleStreamingIterator<Item = DataPage, Error = Parq
.into_iter()
.map(|v| {
v.and_then(|v1| {
[&paddings, v1]
// Pad with the value of the MSB to correctly handle (two's complement) negative integers.
let msb_set = v1.last().unwrap_or(&0) >> 7 == 1;
let padding = if msb_set { &ones_padding } else { &zeros_padding };
[padding, v1]
.concat()
.try_into()
.map(i128::from_be_bytes)
Expand Down
2 changes: 1 addition & 1 deletion tests/it/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ pub fn pyarrow_nullable_statistics(column: usize) -> Option<Box<dyn Statistics>>
// these values match the values in `integration`
pub fn pyarrow_required(column: usize) -> Box<dyn Array> {
let i64_values = &[
Some(-1),
Some(0),
Some(1),
Some(2),
Expand All @@ -408,7 +409,6 @@ pub fn pyarrow_required(column: usize) -> Box<dyn Array> {
Some(6),
Some(7),
Some(8),
Some(9),
];

match column {
Expand Down

0 comments on commit 2fd7a87

Please sign in to comment.