Skip to content

Commit

Permalink
Refactor tests to be more independent (#165)
Browse files Browse the repository at this point in the history
* converting tests to use pathlib.Path instead of os
converting stream tests to use CliRunner with input=fh

* removed slow fixtures, added intermediate files for filter
tweaked some test code to look more consistent

* use isolated_filesystem to not keep test output files

* created a new fixture for test_correct and made the piped version of the test work
  • Loading branch information
jamestwebber authored Sep 24, 2022
1 parent 11a0723 commit 07865eb
Show file tree
Hide file tree
Showing 14 changed files with 189 additions and 334 deletions.
50 changes: 21 additions & 29 deletions tests/integration/test_annotate.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,45 @@
import pytest

import subprocess
import sys
import os
import pathlib

from click.testing import CliRunner

from longbow.__main__ import main_entry as longbow

from ..utils import assert_reads_files_equal
from ..utils import cat_file_to_pipe

################################################################################

TOOL_NAME = "annotate"

TEST_DATA_FOLDER = path = os.path.abspath(
__file__ + os.path.sep + "../../" + os.path.sep + "test_data"
) + os.path.sep
EXPECTED_DATA_FOLDER = TEST_DATA_FOLDER + TOOL_NAME + os.path.sep

################################################################################


@pytest.mark.parametrize("input_bam, expected_bam, model_name", [
[TEST_DATA_FOLDER + "mas15_test_input.bam", EXPECTED_DATA_FOLDER + "mas15v2_expected.bam", "mas_15_sc_10x5p_single_none"],
[TEST_DATA_FOLDER + "mas10_test_input.bam", EXPECTED_DATA_FOLDER + "mas10v2_expected.bam", "mas_10_sc_10x5p_single_none"],
])
TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
TEST_PARAMS = [
[
TEST_DATA_FOLDER / "mas15_test_input.bam",
TEST_DATA_FOLDER / "annotate" / "mas15v2_expected.bam",
"mas_15_sc_10x5p_single_none",
],
[
TEST_DATA_FOLDER / "mas10_test_input.bam",
TEST_DATA_FOLDER / "annotate" / "mas10v2_expected.bam",
"mas_10_sc_10x5p_single_none",
],
]


@pytest.mark.parametrize("input_bam, expected_bam, model_name", TEST_PARAMS)
def test_annotate(tmpdir, input_bam, expected_bam, model_name):

actual_file = tmpdir.join(f"{TOOL_NAME}_actual_out.{model_name}.bam")
args = ["annotate", "-t", 1, "-v", "INFO", "-m", model_name, input_bam, "-o", str(actual_file)]
actual_bam = tmpdir.join(f"{TOOL_NAME}_actual_out.{model_name}.bam")
args = ["annotate", "-t", 1, "-v", "INFO", "-m", model_name, str(input_bam), "-o", str(actual_bam)]

runner = CliRunner()
result = runner.invoke(longbow, args)

os.system(f"cp {str(actual_file)} .")

assert result.exit_code == 0
assert_reads_files_equal(actual_file, expected_bam, order_matters=True)
assert_reads_files_equal(actual_bam, expected_bam, order_matters=True)


@pytest.mark.parametrize("input_bam, expected_bam, model_name", [
[TEST_DATA_FOLDER + "mas15_test_input.bam", EXPECTED_DATA_FOLDER + "mas15v2_expected.bam", "mas_15_sc_10x5p_single_none"],
[TEST_DATA_FOLDER + "mas10_test_input.bam", EXPECTED_DATA_FOLDER + "mas10v2_expected.bam", "mas_10_sc_10x5p_single_none"],
])
@pytest.mark.parametrize("input_bam, expected_bam, model_name", TEST_PARAMS)
def test_annotate_from_pipe(tmpdir, input_bam, expected_bam, model_name):
actual_bam = tmpdir.join(f"annotate_actual_out.{model_name}.pipe.bam")

args = ["annotate", "-t", 1, "-v", "INFO", "-m", model_name, "-f", "-o", str(actual_bam), "-"]

runner = CliRunner()
Expand Down
20 changes: 6 additions & 14 deletions tests/integration/test_convert.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,33 @@
import pytest
from click.testing import CliRunner

import subprocess
import sys
import os
import time
import pathlib
import tempfile
import gzip

import pysam

from longbow.__main__ import main_entry as longbow

from ..utils import assert_reads_files_equal
from ..utils import cat_file_to_pipe


TEST_DATA_FOLDER = path = os.path.abspath(
__file__ + os.path.sep + "../../" + os.path.sep + "test_data"
) + os.path.sep
TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"


@pytest.mark.parametrize("input_bam", [
[TEST_DATA_FOLDER + "mas15_test_input.bam"],
[TEST_DATA_FOLDER + "mas10_test_input.bam"],
TEST_DATA_FOLDER / "mas15_test_input.bam",
TEST_DATA_FOLDER / "mas10_test_input.bam",
])
def test_convert_from_file(tmpdir, input_bam):

pysam.set_verbosity(0)
with pysam.AlignmentFile(input_bam[0], "rb", require_index=False, check_sq=False) as input_file:
with pysam.AlignmentFile(input_bam, "rb", require_index=False, check_sq=False) as input_file:
with tempfile.NamedTemporaryFile(delete=True, suffix=".fq.gz") as tf:
with gzip.open(tf.name, "wb") as output_fq:
for bam_record in input_file:
output_fq.write(f'@{bam_record.query_name}\n{bam_record.query_sequence}\n+\n{bam_record.qual}\n'.encode("utf-8"))

actual_bam = tmpdir.join(f"convert_actual_out.bam")
args = ["convert", "-f", "-o", actual_bam, input_bam[0]]
args = ["convert", "-f", "-o", actual_bam, str(input_bam)]

runner = CliRunner()
result = runner.invoke(longbow, args)
Expand Down
79 changes: 48 additions & 31 deletions tests/integration/test_correct.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,27 @@
import pytest
import os
import sys
import subprocess
import pathlib

from click.testing import CliRunner

from longbow.__main__ import main_entry as longbow

from ..utils import assert_reads_files_equal
from ..utils import cat_file_to_pipe
from ..utils import convert_sam_to_bam

################################################################################

TOOL_NAME = "correct"
TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data" / TOOL_NAME

TEST_DATA_FOLDER = path = os.path.abspath(
__file__ + os.path.sep + "../../" + os.path.sep + "test_data"
) + os.path.sep + TOOL_NAME + os.path.sep


################################################################################


@pytest.mark.parametrize("input_sam, expected_bc_corrected_sam, expected_bc_uncorrected_sam", [
[TEST_DATA_FOLDER + "correct_test_data.sam", TEST_DATA_FOLDER + "correct_expected_corrected_data.sam",
TEST_DATA_FOLDER + "correct_expected_uncorrected_data.sam"],
@pytest.fixture(scope="function", params=[
(
TEST_DATA_FOLDER / "correct_test_data.sam",
TEST_DATA_FOLDER / "correct_expected_corrected_data.sam",
TEST_DATA_FOLDER / "correct_expected_uncorrected_data.sam",
)
])
def test_correct(tmpdir, input_sam, expected_bc_corrected_sam, expected_bc_uncorrected_sam):
def input_data_files(tmpdir, request):
input_sam, expected_bc_corrected_sam, expected_bc_uncorrected_sam = request.param

# Convert test files to bam:
input_bam = tmpdir.join("input.bam")
Expand All @@ -36,33 +30,56 @@ def test_correct(tmpdir, input_sam, expected_bc_corrected_sam, expected_bc_uncor
expected_bc_corrected_bam = tmpdir.join("expected.bam")
convert_sam_to_bam(expected_bc_corrected_sam, expected_bc_corrected_bam)

return input_bam, expected_bc_corrected_bam, expected_bc_uncorrected_sam


def test_correct(tmpdir, input_data_files):
input_bam, expected_bc_corrected_bam, expected_bc_uncorrected_sam = input_data_files

actual_bc_corrected_file = tmpdir.join(f"{TOOL_NAME}_actual_out.mas15.bam")
actual_bc_uncorrected_file = tmpdir.join(f"{TOOL_NAME}_actual_bc_uncorrected_out.mas15.bam")
args = ["correct", "-t", 1, "-m", "mas_15_sc_10x5p_single_none", "-v", "INFO",
"-a", f"{TEST_DATA_FOLDER}barcode_allow_list.txt", str(input_bam), "-o", str(actual_bc_corrected_file),
"--barcode-uncorrectable-bam", str(actual_bc_uncorrected_file)]
args = [
"correct",
"-t", 1,
"-m", "mas_15_sc_10x5p_single_none",
"-a", str(TEST_DATA_FOLDER / "barcode_allow_list.txt"),
str(input_bam),
"-o", str(actual_bc_corrected_file),
"--barcode-uncorrectable-bam", str(actual_bc_uncorrected_file)
]

runner = CliRunner(mix_stderr=False)
result = runner.invoke(longbow, args)
with runner.isolated_filesystem():
result = runner.invoke(longbow, args)

assert result.exit_code == 0

os.system(f"cp -v {str(actual_bc_corrected_file)} .")

# Equal files result as True:
assert_reads_files_equal(actual_bc_corrected_file, expected_bc_corrected_bam, order_matters=True)
assert_reads_files_equal(actual_bc_uncorrected_file, expected_bc_uncorrected_sam, order_matters=True)


@pytest.mark.skip(reason="`correct` command currently does not accept data from a pipe")
def test_correct_from_pipe(tmpdir, extracted_bam_file_from_pipeline):
actual_file = tmpdir.join(f"correct_actual_out.pipe.bam")
def test_correct_from_pipe(tmpdir, input_data_files):
input_bam, expected_bc_corrected_bam, expected_bc_uncorrected_sam = input_data_files

proc = subprocess.Popen(
[sys.executable, "-m", "longbow", "correct", "-t", 1, "-f", "-o", actual_file],
stdin=subprocess.PIPE
)
actual_bc_corrected_file = tmpdir.join(f"{TOOL_NAME}_actual_out.mas15.pipe.bam")
actual_bc_uncorrected_file = tmpdir.join(f"{TOOL_NAME}_actual_bc_uncorrected_out.mas15.pipe.bam")

args = [
"correct",
"-t", 1,
"-m", "mas_15_sc_10x5p_single_none",
"-a", str(TEST_DATA_FOLDER / "barcode_allow_list.txt"),
"-o", str(actual_bc_corrected_file),
"--barcode-uncorrectable-bam", str(actual_bc_uncorrected_file)
]

cat_file_to_pipe(extracted_bam_file_from_pipeline, proc)
runner = CliRunner()
with runner.isolated_filesystem(), open(input_bam, "rb") as fh:
result = runner.invoke(longbow, args, input=fh)

assert result.exit_code == 0

assert proc.returncode == 0
# Equal files result as True:
assert_reads_files_equal(actual_bc_corrected_file, expected_bc_corrected_bam, order_matters=True)
assert_reads_files_equal(actual_bc_uncorrected_file, expected_bc_uncorrected_sam, order_matters=True)
56 changes: 18 additions & 38 deletions tests/integration/test_demultiplex.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,34 @@
import pytest
import os
import sys
import subprocess
import tempfile
import pathlib

from click.testing import CliRunner

from longbow.__main__ import main_entry as longbow

from ..utils import cat_file_to_pipe
TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
TEST_PARAMS = [
TEST_DATA_FOLDER / "annotate" / "mas15v2_expected.bam",
TEST_DATA_FOLDER / "annotate" / "mas10v2_expected.bam",
]

TEST_DATA_FOLDER = path = os.path.abspath(
__file__ + os.path.sep + "../../" + os.path.sep + "test_data"
) + os.path.sep


@pytest.fixture(scope="module", params=[
(TEST_DATA_FOLDER + "mas15_test_input.bam", "mas_15_sc_10x5p_single_none"),
(TEST_DATA_FOLDER + "mas10_test_input.bam", "mas_10_sc_10x5p_single_none"),
])
def annotated_bam_file_from_pipeline(request):
input_bam, model_name = request.param

with tempfile.NamedTemporaryFile(delete=True) as annotate_bam:

runner = CliRunner()

result_annotate = runner.invoke(longbow, ["annotate", "-m", model_name, "-f", "-o", annotate_bam.name, input_bam])
assert result_annotate.exit_code == 0

# Yield file here so that when we return, we get to clean up automatically
yield annotate_bam.name


def test_demultiplex_from_file(tmpdir, annotated_bam_file_from_pipeline):
args = ["demultiplex", "-d", "YN", "-o", "demux", annotated_bam_file_from_pipeline]
@pytest.mark.parametrize("input_bam", TEST_PARAMS)
def test_demultiplex_from_file(tmpdir, input_bam):
args = ["demultiplex", "-d", "YN", "-o", "demux", str(input_bam)]

runner = CliRunner()
result = runner.invoke(longbow, args)
with runner.isolated_filesystem():
result = runner.invoke(longbow, args)

assert result.exit_code == 0


@pytest.mark.skip(reason="this test is broken and I don't know why")
def test_demultiplex_from_pipe(tmpdir, annotated_bam_file_from_pipeline):
proc = subprocess.Popen(
[ sys.executable, "-m", "longbow", "demultiplex", "-d", "YN", "-o", "demux", annotated_bam_file_from_pipeline ],
stdin=subprocess.PIPE
)
@pytest.mark.parametrize("input_bam", TEST_PARAMS)
def test_demultiplex_from_pipe(tmpdir, input_bam):
args = ["demultiplex", "-d", "YN", "-o" "demux"]

cat_file_to_pipe(annotated_bam_file_from_pipeline, proc)
runner = CliRunner()
with runner.isolated_filesystem(), open(input_bam, "rb") as fh:
result = runner.invoke(longbow, args, input=fh)

assert proc.returncode == 0
assert result.exit_code == 0
50 changes: 12 additions & 38 deletions tests/integration/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,37 @@
import pytest
import os
import sys
import subprocess
import tempfile
import pathlib

from click.testing import CliRunner

from longbow.__main__ import main_entry as longbow

from ..utils import cat_file_to_pipe

TEST_DATA_FOLDER = path = os.path.abspath(
__file__ + os.path.sep + "../../" + os.path.sep + "test_data"
) + os.path.sep
TEST_DATA_FOLDER = pathlib.Path(__file__).parent.parent / "test_data"
TEST_PARAMS = [
TEST_DATA_FOLDER / "segment" / "mas_15_sc_10x5p_single_none.expected.bam",
TEST_DATA_FOLDER / "segment" / "mas_10_sc_10x5p_single_none.expected.bam",
]


@pytest.fixture(scope="module", params=[
(TEST_DATA_FOLDER + "mas15_test_input.bam", "mas_15_sc_10x5p_single_none"),
(TEST_DATA_FOLDER + "mas10_test_input.bam", "mas_10_sc_10x5p_single_none"),
])
def segmented_bam_file_from_pipeline(request):
input_bam, model_name = request.param

with tempfile.NamedTemporaryFile(delete=True) as annotate_bam, \
tempfile.NamedTemporaryFile(delete=True) as filter_bam, \
tempfile.NamedTemporaryFile(delete=True) as segment_bam:

runner = CliRunner()

result_annotate = runner.invoke(longbow, ["annotate", "-t", 1, "-m", model_name, "-f", "-o", annotate_bam.name, input_bam])
assert result_annotate.exit_code == 0

result_filter = runner.invoke(longbow, ["filter", "-m", model_name, "-f", "-o", filter_bam.name, annotate_bam.name])
assert result_filter.exit_code == 0

result_segment = runner.invoke(longbow, ["segment", "-t", 1, "-m", model_name, "-f", "-o", segment_bam.name, filter_bam.name])
assert result_segment.exit_code == 0

# Yield file here so that when we return, we get to clean up automatically
yield segment_bam.name


def test_extract_from_file(tmpdir, segmented_bam_file_from_pipeline):
@pytest.mark.parametrize("input_bam", TEST_PARAMS)
def test_extract_from_file(tmpdir, input_bam):
actual_file = tmpdir.join("extract_actual_out.bam")
args = ["extract", "-f", "-o", actual_file, segmented_bam_file_from_pipeline]
args = ["extract", "-f", "-o", actual_file, str(input_bam)]

runner = CliRunner()
result = runner.invoke(longbow, args)

assert result.exit_code == 0


def test_extract_from_pipe(tmpdir, segmented_bam_file_from_pipeline):
@pytest.mark.parametrize("input_bam", TEST_PARAMS)
def test_extract_from_pipe(tmpdir, input_bam):
actual_file = tmpdir.join("extract_actual_out.pipe.bam")

args = ["extract", "-f", "-o", actual_file]

runner = CliRunner()
with open(segmented_bam_file_from_pipeline, "rb") as fh:
with open(input_bam, "rb") as fh:
result = runner.invoke(longbow, args, input=fh)

assert result.exit_code == 0
Loading

0 comments on commit 07865eb

Please sign in to comment.