From 6654ff7bb7de2134711075afd5225962214aa644 Mon Sep 17 00:00:00 2001
From: "Jorge C. Leitao" <jorgecarleitao@gmail.com>
Date: Sat, 7 May 2022 10:51:09 +0100
Subject: [PATCH] Simpler testing

---
 tests/it/io/mod.rs                 |  2 +-
 tests/it/io/parquet/integration.rs | 41 +++++++++++++++++++++++++++++
 tests/it/io/parquet/mod.rs         | 42 ++----------------------------
 tests/it/io/parquet/read.rs        |  3 +++
 tests/it/io/parquet/write.rs       |  5 ++++
 5 files changed, 52 insertions(+), 41 deletions(-)
 create mode 100644 tests/it/io/parquet/integration.rs
diff --git a/tests/it/io/mod.rs b/tests/it/io/mod.rs
index cfd49263a20..d192b806ee4 100644
--- a/tests/it/io/mod.rs
+++ b/tests/it/io/mod.rs
@@ -7,7 +7,7 @@ mod json;
 #[cfg(feature = "io_json")]
 mod ndjson;
 
-#[cfg(feature = "io_ipc")]
+#[cfg(feature = "io_json_integration")]
 mod ipc;
 
 #[cfg(feature = "io_parquet")]
diff --git a/tests/it/io/parquet/integration.rs b/tests/it/io/parquet/integration.rs
new file mode 100644
index 00000000000..71ce7facaf8
--- /dev/null
+++ b/tests/it/io/parquet/integration.rs
@@ -0,0 +1,41 @@
+use arrow2::error::Result;
+
+use super::{integration_read, integration_write};
+use crate::io::ipc::read_gzip_json;
+
+fn test_file(version: &str, file_name: &str) -> Result<()> {
+    let (schema, _, batches) = read_gzip_json(version, file_name)?;
+
+    // empty batches are not written/read from parquet and can be ignored
+    let batches = batches
+        .into_iter()
+        .filter(|x| !x.is_empty())
+        .collect::<Vec<_>>();
+
+    let data = integration_write(&schema, &batches)?;
+
+    let (read_schema, read_batches) = integration_read(&data)?;
+
+    assert_eq!(schema, read_schema);
+    assert_eq!(batches, read_batches);
+
+    Ok(())
+}
+
+#[test]
+fn roundtrip_100_primitive() -> Result<()> {
+    test_file("1.0.0-littleendian", "generated_primitive")?;
+    test_file("1.0.0-bigendian", "generated_primitive")
+}
+
+#[test]
+fn roundtrip_100_dict() -> Result<()> {
+    test_file("1.0.0-littleendian", "generated_dictionary")?;
+    test_file("1.0.0-bigendian", "generated_dictionary")
+}
+
+#[test]
+fn roundtrip_100_extension() -> Result<()> {
+    test_file("1.0.0-littleendian", "generated_extension")?;
+    test_file("1.0.0-bigendian", "generated_extension")
+}
diff --git a/tests/it/io/parquet/mod.rs b/tests/it/io/parquet/mod.rs
index 225f19df9c8..e7786a10ec6 100644
--- a/tests/it/io/parquet/mod.rs
+++ b/tests/it/io/parquet/mod.rs
@@ -6,8 +6,8 @@ use arrow2::{
     io::parquet::read::statistics::*, io::parquet::read::*, io::parquet::write::*,
 };
 
-use crate::io::ipc::read_gzip_json;
-
+#[cfg(feature = "io_json_integration")]
+mod integration;
 mod read;
 mod read_indexes;
 mod write;
@@ -789,7 +789,6 @@ pub fn pyarrow_struct_statistics(column: &str) -> Statistics {
     }
 }
 
-/// Round-trip with parquet using the same integration files used for IPC integration tests.
 fn integration_write(schema: &Schema, batches: &[Chunk<Arc<dyn Array>>]) -> Result<Vec<u8>> {
     let options = WriteOptions {
         write_statistics: true,
@@ -841,43 +840,6 @@ fn integration_read(data: &[u8]) -> Result<IntegrationRead> {
     Ok((schema, batches))
 }
 
-fn test_file(version: &str, file_name: &str) -> Result<()> {
-    let (schema, _, batches) = read_gzip_json(version, file_name)?;
-
-    // empty batches are not written/read from parquet and can be ignored
-    let batches = batches
-        .into_iter()
-        .filter(|x| !x.is_empty())
-        .collect::<Vec<_>>();
-
-    let data = integration_write(&schema, &batches)?;
-
-    let (read_schema, read_batches) = integration_read(&data)?;
-
-    assert_eq!(schema, read_schema);
-    assert_eq!(batches, read_batches);
-
-    Ok(())
-}
-
-#[test]
-fn roundtrip_100_primitive() -> Result<()> {
-    test_file("1.0.0-littleendian", "generated_primitive")?;
-    test_file("1.0.0-bigendian", "generated_primitive")
-}
-
-#[test]
-fn roundtrip_100_dict() -> Result<()> {
-    test_file("1.0.0-littleendian", "generated_dictionary")?;
-    test_file("1.0.0-bigendian", "generated_dictionary")
-}
-
-#[test]
-fn roundtrip_100_extension() -> Result<()> {
-    test_file("1.0.0-littleendian", "generated_extension")?;
-    test_file("1.0.0-bigendian", "generated_extension")
-}
-
 /// Tests that when arrow-specific types (Duration and LargeUtf8) are written to parquet, we can rountrip its
 /// logical types.
 #[test]
diff --git a/tests/it/io/parquet/read.rs b/tests/it/io/parquet/read.rs
index 0e4d7726389..57b6cbf155d 100644
--- a/tests/it/io/parquet/read.rs
+++ b/tests/it/io/parquet/read.rs
@@ -458,6 +458,7 @@ fn v1_nested_edge_2() -> Result<()> {
     test_pyarrow_integration("null", 1, "nested_edge", false, false, None)
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn all_types() -> Result<()> {
     let path = "testing/parquet-testing/data/alltypes_plain.parquet";
@@ -495,6 +496,7 @@ fn all_types() -> Result<()> {
     Ok(())
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn all_types_chunked() -> Result<()> {
     // this has one batch with 8 elements
@@ -546,6 +548,7 @@ fn all_types_chunked() -> Result<()> {
     Ok(())
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn invalid_utf8() {
     let invalid_data = &[
diff --git a/tests/it/io/parquet/write.rs b/tests/it/io/parquet/write.rs
index 424f04bd1d2..e412ed593ae 100644
--- a/tests/it/io/parquet/write.rs
+++ b/tests/it/io/parquet/write.rs
@@ -97,6 +97,7 @@ fn int64_optional_v2() -> Result<()> {
     )
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn int64_optional_v2_compressed() -> Result<()> {
     round_trip(
@@ -157,6 +158,7 @@ fn utf8_required_v2() -> Result<()> {
     )
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn utf8_optional_v2_compressed() -> Result<()> {
     round_trip(
@@ -169,6 +171,7 @@ fn utf8_optional_v2_compressed() -> Result<()> {
     )
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn utf8_required_v2_compressed() -> Result<()> {
     round_trip(
@@ -229,6 +232,7 @@ fn bool_required_v2_uncompressed() -> Result<()> {
     )
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn bool_required_v2_compressed() -> Result<()> {
     round_trip(
@@ -386,6 +390,7 @@ fn i32_optional_v2_dict() -> Result<()> {
     )
 }
 
+#[cfg(feature = "io_parquet_compression")]
 #[test]
 fn i32_optional_v2_dict_compressed() -> Result<()> {
     round_trip(