Python Integration test documentation and expanded tests

* README.md referred to directories that did not exist, * Update pyarrow for fixtures generation from 4 to 7, * Removed `println`'s in test functions, and * Expanded integration parquet read tests to include more compression types.
jorgecarleitao · Apr 28, 2022 · d8244cc · d8244cc
1 parent 95826dd
commit d8244cc
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -81,15 +81,14 @@ There are integration tests against parquet files generated by pyarrow.
 To run then, you will need to run
 
 ```bash
-cd integration-tests
 python3 -m venv venv
 venv/bin/pip install pip --upgrade
-venv/bin/pip install pyarrow==4
-venv/bin/python integration/write_pyarrow.py
+venv/bin/pip install pyarrow==7
+venv/bin/python tests/write_pyarrow.py
 cargo test
 ```
 
-before. This is only needed once (per change in the `integration-tests/integration/write_pyarrow.py`).
+before. This is only needed once (per change in the `tests/write_pyarrow.py`).
 
 ## How to implement page readers
 

diff --git a/tests/it/read/mod.rs b/tests/it/read/mod.rs
@@ -116,14 +116,7 @@ pub fn read_column<R: std::io::Read + std::io::Seek>(
         .fields()
         .iter()
         .enumerate()
-        .filter_map(|(i, x)| {
-            println!("{}", x.name());
-            if x.name() == field {
-                Some(i)
-            } else {
-                None
-            }
-        })
+        .filter_map(|(i, x)| if x.name() == field { Some(i) } else { None })
         .next()
         .unwrap();
 
@@ -153,14 +146,7 @@ pub async fn read_column_async<
         .fields()
         .iter()
         .enumerate()
-        .filter_map(|(i, x)| {
-            println!("{}", x.name());
-            if x.name() == field {
-                Some(i)
-            } else {
-                None
-            }
-        })
+        .filter_map(|(i, x)| if x.name() == field { Some(i) } else { None })
         .next()
         .unwrap();
 
@@ -407,6 +393,16 @@ fn pyarrow_v1_non_dict_int64_optional() -> Result<()> {
     test_pyarrow_integration("basic", "int64", 1, false, false, "")
 }
 
+#[test]
+fn pyarrow_v1_non_dict_int64_optional_brotli() -> Result<()> {
+    test_pyarrow_integration("basic", "int64", 1, false, false, "/brotli")
+}
+
+#[test]
+fn pyarrow_v1_non_dict_int64_optional_gzip() -> Result<()> {
+    test_pyarrow_integration("basic", "int64", 1, false, false, "/gzip")
+}
+
 #[test]
 fn pyarrow_v1_non_dict_int64_optional_snappy() -> Result<()> {
     test_pyarrow_integration("basic", "int64", 1, false, false, "/snappy")
@@ -417,6 +413,11 @@ fn pyarrow_v1_non_dict_int64_optional_lz4() -> Result<()> {
     test_pyarrow_integration("basic", "int64", 1, false, false, "/lz4")
 }
 
+#[test]
+fn pyarrow_v1_non_dict_int64_optional_zstd() -> Result<()> {
+    test_pyarrow_integration("basic", "int64", 1, false, false, "/zstd")
+}
+
 #[test]
 fn pyarrow_v2_non_dict_int64_optional() -> Result<()> {
     test_pyarrow_integration("basic", "int64", 2, false, false, "")

diff --git a/tests/write_pyarrow.py b/tests/write_pyarrow.py
@@ -150,5 +150,5 @@ def write_pyarrow(
 for case in [case_basic_nullable, case_basic_required, case_nested, case_struct]:
     for version in [1, 2]:
         for use_dict in [False, True]:
-            for compression in [None, "snappy", "lz4"]:
+            for compression in [None, "brotli", "lz4", "gzip", "snappy", "zstd"]:
                 write_pyarrow(case, 1, version, use_dict, compression)