Refactor tests to be more independent (#165)

* converting tests to use pathlib.Path instead of os converting stream tests to use CliRunner with input=fh * removed slow fixtures, added intermediate files for filter tweaked some test code to look more consistent * use isolated_filesystem to not keep test output files * created a new fixture for test_correct and made the piped version of the test work
broadinstitute · Sep 24, 2022 · 07865eb · 07865eb
1 parent 11a0723
commit 07865eb
Show file tree

Hide file tree

Showing 14 changed files with 189 additions and 334 deletions.
diff --git a/tests/integration/test_annotate.py b/tests/integration/test_annotate.py
@@ -1,53 +1,45 @@
 import pytest
-
-import subprocess
-import sys
-import os
+import pathlib
 
 from click.testing import CliRunner
 
 from longbow.__main__ import main_entry as longbow
 
 from ..utils import assert_reads_files_equal
-from ..utils import cat_file_to_pipe
 
-################################################################################
 
 TOOL_NAME = "annotate"
 
-TEST_DATA_FOLDER = path = os.path.abspath(
-    __file__ + os.path.sep + "../../" + os.path.sep + "test_data"
-) + os.path.sep
-EXPECTED_DATA_FOLDER = TEST_DATA_FOLDER + TOOL_NAME + os.path.sep
-
-################################################################################
-
-
-@pytest.mark.parametrize("input_bam, expected_bam, model_name", [
-    [TEST_DATA_FOLDER + "mas15_test_input.bam", EXPECTED_DATA_FOLDER + "mas15v2_expected.bam", "mas_15_sc_10x5p_single_none"],
-    [TEST_DATA_FOLDER + "mas10_test_input.bam", EXPECTED_DATA_FOLDER + "mas10v2_expected.bam", "mas_10_sc_10x5p_single_none"],
-])
+TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
+TEST_PARAMS = [
+    [
+        TEST_DATA_FOLDER / "mas15_test_input.bam",
+        TEST_DATA_FOLDER / "annotate" / "mas15v2_expected.bam",
+        "mas_15_sc_10x5p_single_none",
+    ],
+    [
+        TEST_DATA_FOLDER / "mas10_test_input.bam",
+        TEST_DATA_FOLDER / "annotate" / "mas10v2_expected.bam",
+        "mas_10_sc_10x5p_single_none",
+    ],
+]
+
+
+@pytest.mark.parametrize("input_bam, expected_bam, model_name", TEST_PARAMS)
 def test_annotate(tmpdir, input_bam, expected_bam, model_name):
-
-    actual_file = tmpdir.join(f"{TOOL_NAME}_actual_out.{model_name}.bam")
-    args = ["annotate", "-t", 1, "-v", "INFO", "-m", model_name, input_bam, "-o", str(actual_file)]
+    actual_bam = tmpdir.join(f"{TOOL_NAME}_actual_out.{model_name}.bam")
+    args = ["annotate", "-t", 1, "-v", "INFO", "-m", model_name, str(input_bam), "-o", str(actual_bam)]
 
     runner = CliRunner()
     result = runner.invoke(longbow, args)
 
-    os.system(f"cp {str(actual_file)} .")
-
     assert result.exit_code == 0
-    assert_reads_files_equal(actual_file, expected_bam, order_matters=True)
+    assert_reads_files_equal(actual_bam, expected_bam, order_matters=True)
 
 
-@pytest.mark.parametrize("input_bam, expected_bam, model_name", [
-    [TEST_DATA_FOLDER + "mas15_test_input.bam", EXPECTED_DATA_FOLDER + "mas15v2_expected.bam", "mas_15_sc_10x5p_single_none"],
-    [TEST_DATA_FOLDER + "mas10_test_input.bam", EXPECTED_DATA_FOLDER + "mas10v2_expected.bam", "mas_10_sc_10x5p_single_none"],
-])
+@pytest.mark.parametrize("input_bam, expected_bam, model_name", TEST_PARAMS)
 def test_annotate_from_pipe(tmpdir, input_bam, expected_bam, model_name):
     actual_bam = tmpdir.join(f"annotate_actual_out.{model_name}.pipe.bam")
-
     args = ["annotate", "-t", 1, "-v", "INFO", "-m", model_name, "-f", "-o", str(actual_bam), "-"]
 
     runner = CliRunner()

diff --git a/tests/integration/test_convert.py b/tests/integration/test_convert.py
@@ -1,41 +1,33 @@
 import pytest
 from click.testing import CliRunner
 
-import subprocess
-import sys
-import os
-import time
+import pathlib
 import tempfile
 import gzip
 
 import pysam
 
 from longbow.__main__ import main_entry as longbow
 
-from ..utils import assert_reads_files_equal
-from ..utils import cat_file_to_pipe
 
-
-TEST_DATA_FOLDER = path = os.path.abspath(
-    __file__ + os.path.sep + "../../" + os.path.sep + "test_data"
-) + os.path.sep
+TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
 
 
 @pytest.mark.parametrize("input_bam", [
-    [TEST_DATA_FOLDER + "mas15_test_input.bam"],
-    [TEST_DATA_FOLDER + "mas10_test_input.bam"],
+    TEST_DATA_FOLDER / "mas15_test_input.bam",
+    TEST_DATA_FOLDER / "mas10_test_input.bam",
 ])
 def test_convert_from_file(tmpdir, input_bam):
 
     pysam.set_verbosity(0)
-    with pysam.AlignmentFile(input_bam[0], "rb", require_index=False, check_sq=False) as input_file:
+    with pysam.AlignmentFile(input_bam, "rb", require_index=False, check_sq=False) as input_file:
         with tempfile.NamedTemporaryFile(delete=True, suffix=".fq.gz") as tf:
             with gzip.open(tf.name, "wb") as output_fq:
                 for bam_record in input_file:
                     output_fq.write(f'@{bam_record.query_name}\n{bam_record.query_sequence}\n+\n{bam_record.qual}\n'.encode("utf-8"))
 
             actual_bam = tmpdir.join(f"convert_actual_out.bam")
-            args = ["convert", "-f", "-o", actual_bam, input_bam[0]]
+            args = ["convert", "-f", "-o", actual_bam, str(input_bam)]
 
             runner = CliRunner()
             result = runner.invoke(longbow, args)

diff --git a/tests/integration/test_correct.py b/tests/integration/test_correct.py
@@ -1,33 +1,27 @@
 import pytest
-import os
-import sys
-import subprocess
+import pathlib
 
 from click.testing import CliRunner
 
 from longbow.__main__ import main_entry as longbow
 
 from ..utils import assert_reads_files_equal
-from ..utils import cat_file_to_pipe
 from ..utils import convert_sam_to_bam
 
-################################################################################
 
 TOOL_NAME = "correct"
+TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data" / TOOL_NAME
 
-TEST_DATA_FOLDER = path = os.path.abspath(
-    __file__ + os.path.sep + "../../" + os.path.sep + "test_data"
-) + os.path.sep + TOOL_NAME + os.path.sep
 
-
-################################################################################
-
-
-@pytest.mark.parametrize("input_sam, expected_bc_corrected_sam, expected_bc_uncorrected_sam", [
-    [TEST_DATA_FOLDER + "correct_test_data.sam", TEST_DATA_FOLDER + "correct_expected_corrected_data.sam",
-     TEST_DATA_FOLDER + "correct_expected_uncorrected_data.sam"],
+@pytest.fixture(scope="function", params=[
+    (
+        TEST_DATA_FOLDER / "correct_test_data.sam",
+        TEST_DATA_FOLDER / "correct_expected_corrected_data.sam",
+        TEST_DATA_FOLDER / "correct_expected_uncorrected_data.sam",
+    )
 ])
-def test_correct(tmpdir, input_sam, expected_bc_corrected_sam, expected_bc_uncorrected_sam):
+def input_data_files(tmpdir, request):
+    input_sam, expected_bc_corrected_sam, expected_bc_uncorrected_sam = request.param
 
     # Convert test files to bam:
     input_bam = tmpdir.join("input.bam")
@@ -36,33 +30,56 @@ def test_correct(tmpdir, input_sam, expected_bc_corrected_sam, expected_bc_uncor
     expected_bc_corrected_bam = tmpdir.join("expected.bam")
     convert_sam_to_bam(expected_bc_corrected_sam, expected_bc_corrected_bam)
 
+    return input_bam, expected_bc_corrected_bam, expected_bc_uncorrected_sam
+
+
+def test_correct(tmpdir, input_data_files):
+    input_bam, expected_bc_corrected_bam, expected_bc_uncorrected_sam = input_data_files
+
     actual_bc_corrected_file = tmpdir.join(f"{TOOL_NAME}_actual_out.mas15.bam")
     actual_bc_uncorrected_file = tmpdir.join(f"{TOOL_NAME}_actual_bc_uncorrected_out.mas15.bam")
-    args = ["correct", "-t", 1, "-m", "mas_15_sc_10x5p_single_none", "-v", "INFO",
-            "-a", f"{TEST_DATA_FOLDER}barcode_allow_list.txt", str(input_bam), "-o", str(actual_bc_corrected_file),
-            "--barcode-uncorrectable-bam", str(actual_bc_uncorrected_file)]
+    args = [
+        "correct",
+        "-t", 1,
+        "-m", "mas_15_sc_10x5p_single_none",
+        "-a", str(TEST_DATA_FOLDER / "barcode_allow_list.txt"),
+        str(input_bam),
+        "-o", str(actual_bc_corrected_file),
+        "--barcode-uncorrectable-bam", str(actual_bc_uncorrected_file)
+    ]
 
     runner = CliRunner(mix_stderr=False)
-    result = runner.invoke(longbow, args)
+    with runner.isolated_filesystem():
+        result = runner.invoke(longbow, args)
 
     assert result.exit_code == 0
 
-    os.system(f"cp -v {str(actual_bc_corrected_file)} .")
-
     # Equal files result as True:
     assert_reads_files_equal(actual_bc_corrected_file, expected_bc_corrected_bam, order_matters=True)
     assert_reads_files_equal(actual_bc_uncorrected_file, expected_bc_uncorrected_sam, order_matters=True)
 
 
-@pytest.mark.skip(reason="`correct` command currently does not accept data from a pipe")
-def test_correct_from_pipe(tmpdir, extracted_bam_file_from_pipeline):
-    actual_file = tmpdir.join(f"correct_actual_out.pipe.bam")
+def test_correct_from_pipe(tmpdir, input_data_files):
+    input_bam, expected_bc_corrected_bam, expected_bc_uncorrected_sam = input_data_files
 
-    proc = subprocess.Popen(
-        [sys.executable, "-m", "longbow", "correct", "-t", 1, "-f", "-o", actual_file],
-        stdin=subprocess.PIPE
-    )
+    actual_bc_corrected_file = tmpdir.join(f"{TOOL_NAME}_actual_out.mas15.pipe.bam")
+    actual_bc_uncorrected_file = tmpdir.join(f"{TOOL_NAME}_actual_bc_uncorrected_out.mas15.pipe.bam")
+
+    args = [
+        "correct",
+        "-t", 1,
+        "-m", "mas_15_sc_10x5p_single_none",
+        "-a", str(TEST_DATA_FOLDER / "barcode_allow_list.txt"),
+        "-o", str(actual_bc_corrected_file),
+        "--barcode-uncorrectable-bam", str(actual_bc_uncorrected_file)
+    ]
 
-    cat_file_to_pipe(extracted_bam_file_from_pipeline, proc)
+    runner = CliRunner()
+    with runner.isolated_filesystem(), open(input_bam, "rb") as fh:
+        result = runner.invoke(longbow, args, input=fh)
+
+    assert result.exit_code == 0
 
-    assert proc.returncode == 0
+    # Equal files result as True:
+    assert_reads_files_equal(actual_bc_corrected_file, expected_bc_corrected_bam, order_matters=True)
+    assert_reads_files_equal(actual_bc_uncorrected_file, expected_bc_uncorrected_sam, order_matters=True)
diff --git a/tests/integration/test_demultiplex.py b/tests/integration/test_demultiplex.py
@@ -1,54 +1,34 @@
 import pytest
-import os
-import sys
-import subprocess
-import tempfile
+import pathlib
 
 from click.testing import CliRunner
 
 from longbow.__main__ import main_entry as longbow
 
-from ..utils import cat_file_to_pipe
+TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
+TEST_PARAMS = [
+    TEST_DATA_FOLDER / "annotate" / "mas15v2_expected.bam",
+    TEST_DATA_FOLDER / "annotate" / "mas10v2_expected.bam",
+]
 
-TEST_DATA_FOLDER = path = os.path.abspath(
-    __file__ + os.path.sep + "../../" + os.path.sep + "test_data"
-) + os.path.sep
 
-
-@pytest.fixture(scope="module", params=[
-    (TEST_DATA_FOLDER + "mas15_test_input.bam", "mas_15_sc_10x5p_single_none"),
-    (TEST_DATA_FOLDER + "mas10_test_input.bam", "mas_10_sc_10x5p_single_none"),
-])
-def annotated_bam_file_from_pipeline(request):
-    input_bam, model_name = request.param
-
-    with tempfile.NamedTemporaryFile(delete=True) as annotate_bam:
-
-        runner = CliRunner()
-
-        result_annotate = runner.invoke(longbow, ["annotate", "-m", model_name, "-f", "-o", annotate_bam.name, input_bam])
-        assert result_annotate.exit_code == 0
-
-        # Yield file here so that when we return, we get to clean up automatically
-        yield annotate_bam.name
-
-
-def test_demultiplex_from_file(tmpdir, annotated_bam_file_from_pipeline):
-    args = ["demultiplex", "-d", "YN", "-o", "demux", annotated_bam_file_from_pipeline]
+@pytest.mark.parametrize("input_bam", TEST_PARAMS)
+def test_demultiplex_from_file(tmpdir, input_bam):
+    args = ["demultiplex", "-d", "YN", "-o", "demux", str(input_bam)]
 
     runner = CliRunner()
-    result = runner.invoke(longbow, args)
+    with runner.isolated_filesystem():
+        result = runner.invoke(longbow, args)
 
     assert result.exit_code == 0
 
 
-@pytest.mark.skip(reason="this test is broken and I don't know why")
-def test_demultiplex_from_pipe(tmpdir, annotated_bam_file_from_pipeline):
-    proc = subprocess.Popen(
-        [ sys.executable, "-m", "longbow", "demultiplex", "-d", "YN", "-o", "demux", annotated_bam_file_from_pipeline ],
-        stdin=subprocess.PIPE
-    )
+@pytest.mark.parametrize("input_bam", TEST_PARAMS)
+def test_demultiplex_from_pipe(tmpdir, input_bam):
+    args = ["demultiplex", "-d", "YN", "-o" "demux"]
 
-    cat_file_to_pipe(annotated_bam_file_from_pipeline, proc)
+    runner = CliRunner()
+    with runner.isolated_filesystem(), open(input_bam, "rb") as fh:
+        result = runner.invoke(longbow, args, input=fh)
 
-    assert proc.returncode == 0
+    assert result.exit_code == 0
diff --git a/tests/integration/test_extract.py b/tests/integration/test_extract.py
@@ -1,63 +1,37 @@
 import pytest
-import os
-import sys
-import subprocess
-import tempfile
+import pathlib
 
 from click.testing import CliRunner
 
 from longbow.__main__ import main_entry as longbow
 
-from ..utils import cat_file_to_pipe
 
-TEST_DATA_FOLDER = path = os.path.abspath(
-    __file__ + os.path.sep + "../../" + os.path.sep + "test_data"
-) + os.path.sep
+TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
+TEST_PARAMS = [
+    TEST_DATA_FOLDER / "segment" / "mas_15_sc_10x5p_single_none.expected.bam",
+    TEST_DATA_FOLDER / "segment" / "mas_10_sc_10x5p_single_none.expected.bam",
+]
 
 
-@pytest.fixture(scope="module", params=[
-    (TEST_DATA_FOLDER + "mas15_test_input.bam", "mas_15_sc_10x5p_single_none"),
-    (TEST_DATA_FOLDER + "mas10_test_input.bam", "mas_10_sc_10x5p_single_none"),
-])
-def segmented_bam_file_from_pipeline(request):
-    input_bam, model_name = request.param
-
-    with tempfile.NamedTemporaryFile(delete=True) as annotate_bam, \
-         tempfile.NamedTemporaryFile(delete=True) as filter_bam, \
-         tempfile.NamedTemporaryFile(delete=True) as segment_bam:
-
-        runner = CliRunner()
-
-        result_annotate = runner.invoke(longbow, ["annotate", "-t", 1, "-m", model_name, "-f", "-o", annotate_bam.name, input_bam])
-        assert result_annotate.exit_code == 0
-
-        result_filter = runner.invoke(longbow, ["filter", "-m", model_name, "-f", "-o", filter_bam.name, annotate_bam.name])
-        assert result_filter.exit_code == 0
-
-        result_segment = runner.invoke(longbow, ["segment", "-t", 1, "-m", model_name, "-f", "-o", segment_bam.name,  filter_bam.name])
-        assert result_segment.exit_code == 0
-
-        # Yield file here so that when we return, we get to clean up automatically
-        yield segment_bam.name
-
-
-def test_extract_from_file(tmpdir, segmented_bam_file_from_pipeline):
+@pytest.mark.parametrize("input_bam", TEST_PARAMS)
+def test_extract_from_file(tmpdir, input_bam):
     actual_file = tmpdir.join("extract_actual_out.bam")
-    args = ["extract", "-f", "-o", actual_file, segmented_bam_file_from_pipeline]
+    args = ["extract", "-f", "-o", actual_file, str(input_bam)]
 
     runner = CliRunner()
     result = runner.invoke(longbow, args)
 
     assert result.exit_code == 0
 
 
-def test_extract_from_pipe(tmpdir, segmented_bam_file_from_pipeline):
+@pytest.mark.parametrize("input_bam", TEST_PARAMS)
+def test_extract_from_pipe(tmpdir, input_bam):
     actual_file = tmpdir.join("extract_actual_out.pipe.bam")
 
     args = ["extract", "-f", "-o", actual_file]
 
     runner = CliRunner()
-    with open(segmented_bam_file_from_pipeline, "rb") as fh:
+    with open(input_bam, "rb") as fh:
         result = runner.invoke(longbow, args, input=fh)
 
     assert result.exit_code == 0