Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Allowed feature-specific test runs #985

Merged
merged 1 commit into from
May 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/it/io/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mod json;
#[cfg(feature = "io_json")]
mod ndjson;

#[cfg(feature = "io_ipc")]
#[cfg(feature = "io_json_integration")]
mod ipc;

#[cfg(feature = "io_parquet")]
Expand Down
41 changes: 41 additions & 0 deletions tests/it/io/parquet/integration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
use arrow2::error::Result;

use super::{integration_read, integration_write};
use crate::io::ipc::read_gzip_json;

fn test_file(version: &str, file_name: &str) -> Result<()> {
let (schema, _, batches) = read_gzip_json(version, file_name)?;

// empty batches are not written/read from parquet and can be ignored
let batches = batches
.into_iter()
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();

let data = integration_write(&schema, &batches)?;

let (read_schema, read_batches) = integration_read(&data)?;

assert_eq!(schema, read_schema);
assert_eq!(batches, read_batches);

Ok(())
}

#[test]
fn roundtrip_100_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive")?;
test_file("1.0.0-bigendian", "generated_primitive")
}

#[test]
fn roundtrip_100_dict() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary")?;
test_file("1.0.0-bigendian", "generated_dictionary")
}

#[test]
fn roundtrip_100_extension() -> Result<()> {
test_file("1.0.0-littleendian", "generated_extension")?;
test_file("1.0.0-bigendian", "generated_extension")
}
42 changes: 2 additions & 40 deletions tests/it/io/parquet/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use arrow2::{
io::parquet::read::statistics::*, io::parquet::read::*, io::parquet::write::*,
};

use crate::io::ipc::read_gzip_json;

#[cfg(feature = "io_json_integration")]
mod integration;
mod read;
mod read_indexes;
mod write;
Expand Down Expand Up @@ -789,7 +789,6 @@ pub fn pyarrow_struct_statistics(column: &str) -> Statistics {
}
}

/// Round-trip with parquet using the same integration files used for IPC integration tests.
fn integration_write(schema: &Schema, batches: &[Chunk<Arc<dyn Array>>]) -> Result<Vec<u8>> {
let options = WriteOptions {
write_statistics: true,
Expand Down Expand Up @@ -841,43 +840,6 @@ fn integration_read(data: &[u8]) -> Result<IntegrationRead> {
Ok((schema, batches))
}

fn test_file(version: &str, file_name: &str) -> Result<()> {
let (schema, _, batches) = read_gzip_json(version, file_name)?;

// empty batches are not written/read from parquet and can be ignored
let batches = batches
.into_iter()
.filter(|x| !x.is_empty())
.collect::<Vec<_>>();

let data = integration_write(&schema, &batches)?;

let (read_schema, read_batches) = integration_read(&data)?;

assert_eq!(schema, read_schema);
assert_eq!(batches, read_batches);

Ok(())
}

#[test]
fn roundtrip_100_primitive() -> Result<()> {
test_file("1.0.0-littleendian", "generated_primitive")?;
test_file("1.0.0-bigendian", "generated_primitive")
}

#[test]
fn roundtrip_100_dict() -> Result<()> {
test_file("1.0.0-littleendian", "generated_dictionary")?;
test_file("1.0.0-bigendian", "generated_dictionary")
}

#[test]
fn roundtrip_100_extension() -> Result<()> {
test_file("1.0.0-littleendian", "generated_extension")?;
test_file("1.0.0-bigendian", "generated_extension")
}

/// Tests that when arrow-specific types (Duration and LargeUtf8) are written to parquet, we can rountrip its
/// logical types.
#[test]
Expand Down
3 changes: 3 additions & 0 deletions tests/it/io/parquet/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,7 @@ fn v1_nested_edge_2() -> Result<()> {
test_pyarrow_integration("null", 1, "nested_edge", false, false, None)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn all_types() -> Result<()> {
let path = "testing/parquet-testing/data/alltypes_plain.parquet";
Expand Down Expand Up @@ -495,6 +496,7 @@ fn all_types() -> Result<()> {
Ok(())
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn all_types_chunked() -> Result<()> {
// this has one batch with 8 elements
Expand Down Expand Up @@ -546,6 +548,7 @@ fn all_types_chunked() -> Result<()> {
Ok(())
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn invalid_utf8() {
let invalid_data = &[
Expand Down
5 changes: 5 additions & 0 deletions tests/it/io/parquet/write.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ fn int64_optional_v2() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn int64_optional_v2_compressed() -> Result<()> {
round_trip(
Expand Down Expand Up @@ -157,6 +158,7 @@ fn utf8_required_v2() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn utf8_optional_v2_compressed() -> Result<()> {
round_trip(
Expand All @@ -169,6 +171,7 @@ fn utf8_optional_v2_compressed() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn utf8_required_v2_compressed() -> Result<()> {
round_trip(
Expand Down Expand Up @@ -229,6 +232,7 @@ fn bool_required_v2_uncompressed() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn bool_required_v2_compressed() -> Result<()> {
round_trip(
Expand Down Expand Up @@ -386,6 +390,7 @@ fn i32_optional_v2_dict() -> Result<()> {
)
}

#[cfg(feature = "io_parquet_compression")]
#[test]
fn i32_optional_v2_dict_compressed() -> Result<()> {
round_trip(
Expand Down