From 6654ff7bb7de2134711075afd5225962214aa644 Mon Sep 17 00:00:00 2001 From: "Jorge C. Leitao" Date: Sat, 7 May 2022 10:51:09 +0100 Subject: [PATCH] Simpler testing --- tests/it/io/mod.rs | 2 +- tests/it/io/parquet/integration.rs | 41 +++++++++++++++++++++++++++++ tests/it/io/parquet/mod.rs | 42 ++---------------------------- tests/it/io/parquet/read.rs | 3 +++ tests/it/io/parquet/write.rs | 5 ++++ 5 files changed, 52 insertions(+), 41 deletions(-) create mode 100644 tests/it/io/parquet/integration.rs diff --git a/tests/it/io/mod.rs b/tests/it/io/mod.rs index cfd49263a20..d192b806ee4 100644 --- a/tests/it/io/mod.rs +++ b/tests/it/io/mod.rs @@ -7,7 +7,7 @@ mod json; #[cfg(feature = "io_json")] mod ndjson; -#[cfg(feature = "io_ipc")] +#[cfg(feature = "io_json_integration")] mod ipc; #[cfg(feature = "io_parquet")] diff --git a/tests/it/io/parquet/integration.rs b/tests/it/io/parquet/integration.rs new file mode 100644 index 00000000000..71ce7facaf8 --- /dev/null +++ b/tests/it/io/parquet/integration.rs @@ -0,0 +1,41 @@ +use arrow2::error::Result; + +use super::{integration_read, integration_write}; +use crate::io::ipc::read_gzip_json; + +fn test_file(version: &str, file_name: &str) -> Result<()> { + let (schema, _, batches) = read_gzip_json(version, file_name)?; + + // empty batches are not written/read from parquet and can be ignored + let batches = batches + .into_iter() + .filter(|x| !x.is_empty()) + .collect::>(); + + let data = integration_write(&schema, &batches)?; + + let (read_schema, read_batches) = integration_read(&data)?; + + assert_eq!(schema, read_schema); + assert_eq!(batches, read_batches); + + Ok(()) +} + +#[test] +fn roundtrip_100_primitive() -> Result<()> { + test_file("1.0.0-littleendian", "generated_primitive")?; + test_file("1.0.0-bigendian", "generated_primitive") +} + +#[test] +fn roundtrip_100_dict() -> Result<()> { + test_file("1.0.0-littleendian", "generated_dictionary")?; + test_file("1.0.0-bigendian", "generated_dictionary") +} + +#[test] +fn roundtrip_100_extension() -> Result<()> { + test_file("1.0.0-littleendian", "generated_extension")?; + test_file("1.0.0-bigendian", "generated_extension") +} diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs index 225f19df9c8..e7786a10ec6 100644 --- a/tests/it/io/parquet/mod.rs +++ b/tests/it/io/parquet/mod.rs @@ -6,8 +6,8 @@ use arrow2::{ io::parquet::read::statistics::*, io::parquet::read::*, io::parquet::write::*, }; -use crate::io::ipc::read_gzip_json; - +#[cfg(feature = "io_json_integration")] +mod integration; mod read; mod read_indexes; mod write; @@ -789,7 +789,6 @@ pub fn pyarrow_struct_statistics(column: &str) -> Statistics { } } -/// Round-trip with parquet using the same integration files used for IPC integration tests. fn integration_write(schema: &Schema, batches: &[Chunk>]) -> Result> { let options = WriteOptions { write_statistics: true, @@ -841,43 +840,6 @@ fn integration_read(data: &[u8]) -> Result { Ok((schema, batches)) } -fn test_file(version: &str, file_name: &str) -> Result<()> { - let (schema, _, batches) = read_gzip_json(version, file_name)?; - - // empty batches are not written/read from parquet and can be ignored - let batches = batches - .into_iter() - .filter(|x| !x.is_empty()) - .collect::>(); - - let data = integration_write(&schema, &batches)?; - - let (read_schema, read_batches) = integration_read(&data)?; - - assert_eq!(schema, read_schema); - assert_eq!(batches, read_batches); - - Ok(()) -} - -#[test] -fn roundtrip_100_primitive() -> Result<()> { - test_file("1.0.0-littleendian", "generated_primitive")?; - test_file("1.0.0-bigendian", "generated_primitive") -} - -#[test] -fn roundtrip_100_dict() -> Result<()> { - test_file("1.0.0-littleendian", "generated_dictionary")?; - test_file("1.0.0-bigendian", "generated_dictionary") -} - -#[test] -fn roundtrip_100_extension() -> Result<()> { - test_file("1.0.0-littleendian", "generated_extension")?; - test_file("1.0.0-bigendian", "generated_extension") -} - /// Tests that when arrow-specific types (Duration and LargeUtf8) are written to parquet, we can rountrip its /// logical types. #[test] diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs index 0e4d7726389..57b6cbf155d 100644 --- a/tests/it/io/parquet/read.rs +++ b/tests/it/io/parquet/read.rs @@ -458,6 +458,7 @@ fn v1_nested_edge_2() -> Result<()> { test_pyarrow_integration("null", 1, "nested_edge", false, false, None) } +#[cfg(feature = "io_parquet_compression")] #[test] fn all_types() -> Result<()> { let path = "testing/parquet-testing/data/alltypes_plain.parquet"; @@ -495,6 +496,7 @@ fn all_types() -> Result<()> { Ok(()) } +#[cfg(feature = "io_parquet_compression")] #[test] fn all_types_chunked() -> Result<()> { // this has one batch with 8 elements @@ -546,6 +548,7 @@ fn all_types_chunked() -> Result<()> { Ok(()) } +#[cfg(feature = "io_parquet_compression")] #[test] fn invalid_utf8() { let invalid_data = &[ diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs index 424f04bd1d2..e412ed593ae 100644 --- a/tests/it/io/parquet/write.rs +++ b/tests/it/io/parquet/write.rs @@ -97,6 +97,7 @@ fn int64_optional_v2() -> Result<()> { ) } +#[cfg(feature = "io_parquet_compression")] #[test] fn int64_optional_v2_compressed() -> Result<()> { round_trip( @@ -157,6 +158,7 @@ fn utf8_required_v2() -> Result<()> { ) } +#[cfg(feature = "io_parquet_compression")] #[test] fn utf8_optional_v2_compressed() -> Result<()> { round_trip( @@ -169,6 +171,7 @@ fn utf8_optional_v2_compressed() -> Result<()> { ) } +#[cfg(feature = "io_parquet_compression")] #[test] fn utf8_required_v2_compressed() -> Result<()> { round_trip( @@ -229,6 +232,7 @@ fn bool_required_v2_uncompressed() -> Result<()> { ) } +#[cfg(feature = "io_parquet_compression")] #[test] fn bool_required_v2_compressed() -> Result<()> { round_trip( @@ -386,6 +390,7 @@ fn i32_optional_v2_dict() -> Result<()> { ) } +#[cfg(feature = "io_parquet_compression")] #[test] fn i32_optional_v2_dict_compressed() -> Result<()> { round_trip(