Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Commit

Permalink
Added more tests (#1102)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorgecarleitao authored Jun 26, 2022
1 parent 88f05bb commit 8605f45
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 18 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/integration-parquet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
python -m venv venv
source venv/bin/activate
pip install --upgrade pip
pip install pyarrow==6 pyspark==3
pip install pyarrow==8 pyspark==3
python main.py
# test against spark
python main_spark.py
5 changes: 1 addition & 4 deletions arrow-parquet-integration-testing/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,8 @@ def _expected(file: str):
pyarrow.date64(),
pyarrow.time32("s"),
pyarrow.timestamp("s"),
# the issue here is the second, not the tz
pyarrow.timestamp("s", tz="UTC"),
pyarrow.duration("s"),
pyarrow.duration("ms"),
pyarrow.duration("us"),
pyarrow.duration("ns"),
]


Expand Down
26 changes: 14 additions & 12 deletions arrow-parquet-integration-testing/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ use arrow2::{
json_integration::read,
json_integration::ArrowJson,
parquet::write::{
CompressionOptions as ParquetCompression, Encoding, FileWriter, RowGroupIterator,
Version as ParquetVersion, WriteOptions,
transverse, CompressionOptions as ParquetCompression, Encoding, FileWriter,
RowGroupIterator, Version as ParquetVersion, WriteOptions,
},
},
};
Expand Down Expand Up @@ -174,16 +174,18 @@ fn main() -> Result<()> {
let encodings = schema
.fields
.iter()
.map(|x| match x.data_type() {
DataType::Dictionary(..) => vec![Encoding::RleDictionary],
DataType::Utf8 | DataType::LargeUtf8 => {
vec![if args.encoding_utf8 == EncodingScheme::Delta {
Encoding::DeltaLengthByteArray
} else {
Encoding::Plain
}]
}
_ => vec![Encoding::Plain],
.map(|f| {
transverse(&f.data_type, |dt| match dt {
DataType::Dictionary(..) => Encoding::RleDictionary,
DataType::Utf8 | DataType::LargeUtf8 => {
if args.encoding_utf8 == EncodingScheme::Delta {
Encoding::DeltaLengthByteArray
} else {
Encoding::Plain
}
}
_ => Encoding::Plain,
})
})
.collect();

Expand Down
2 changes: 1 addition & 1 deletion src/io/parquet/write/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ fn to_leafs_recursive<'a>(array: &'a dyn Array, leafs: &mut Vec<&'a dyn Array>)
}
Null | Boolean | Primitive(_) | Binary | FixedSizeBinary | LargeBinary | Utf8
| LargeUtf8 | Dictionary(_) => leafs.push(array),
_ => todo!(),
other => todo!("Writing {:?} to parquet not yet implemented", other),
}
}

Expand Down

0 comments on commit 8605f45

Please sign in to comment.