diff --git a/src/longbow/extract/command.py b/src/longbow/extract/command.py index 65bc498d..6600a69c 100644 --- a/src/longbow/extract/command.py +++ b/src/longbow/extract/command.py @@ -185,7 +185,7 @@ def main(pbi, output_bam, force, base_padding, create_barcode_conf_file, # Set up our barcode confidence file here: barcode_conf_file = None if create_barcode_conf_file: - if lb_model.has_barcode_annotation: + if lb_model.has_cell_barcode_annotation: logger.info(f"Creating barcode confidence file: {longbow.utils.constants.BARCODE_CONF_FILE_NAME}") barcode_conf_file = open(longbow.utils.constants.BARCODE_CONF_FILE_NAME, 'w') else: @@ -230,6 +230,7 @@ def main(pbi, output_bam, force, base_padding, create_barcode_conf_file, ) ) else: + # Read is already segmented, so we don't have to do anything else: segmented_reads = [read] extracted_segment = False diff --git a/src/longbow/segment/command.py b/src/longbow/segment/command.py index 0f01b263..f9bfa6e6 100644 --- a/src/longbow/segment/command.py +++ b/src/longbow/segment/command.py @@ -138,21 +138,20 @@ def main(threads, output_bam, create_barcode_conf_file, model, ignore_cbc_and_um else: # Check to see if the model has a CBC / UMI. If not, we should turn on this flag # and warn the user: + has_cbc_or_umi_annotation = False - if lb_model.annotation_segments: - for seg_tag_list in lb_model.annotation_segments.values(): - for seg_tag, seg_pos_tag in seg_tag_list: - if seg_tag == longbow.utils.constants.READ_UMI_TAG or \ - seg_tag == longbow.utils.constants.READ_BARCODE_TAG or \ - seg_tag == longbow.utils.constants.READ_RAW_UMI_TAG or \ - seg_tag == longbow.utils.constants.READ_RAW_BARCODE_TAG: - has_cbc_or_umi_annotation = True - break - if has_cbc_or_umi_annotation: - break + if lb_model.has_umi_annotation: + logger.info("Model has UMI annotation.") + else: + has_cbc_or_umi_annotation = True + + if lb_model.has_cell_barcode_annotation: + logger.info("Model has Cell Barcode annotation.") + else: + has_cbc_or_umi_annotation = True if not has_cbc_or_umi_annotation: - logger.warning("Model does not have CBC or UMI tags. All segments will be emitted.") + logger.warning("Model does not have Cell Barcode or UMI tags. All segments will be emitted.") ignore_cbc_and_umi = True out_header = bam_utils.create_bam_header_with_program_group(logger.name, bam_file.header, models=[lb_model]) @@ -241,13 +240,16 @@ def _sub_process_write_fn( barcode_conf_file = None if create_barcode_conf_file: - if model.has_barcode_annotation: + if model.has_cell_barcode_annotation: logger.info(f"Creating barcode confidence file: {longbow.utils.constants.BARCODE_CONF_FILE_NAME}") barcode_conf_file = open(longbow.utils.constants.BARCODE_CONF_FILE_NAME, 'w') else: logger.warning(f"Model does not have a barcode output, but barcode creation flag was given. " f"Barcode confidence file will NOT be created.") + model_annotates_cbc = model.has_cell_barcode_annotation + model_annotates_umi = model.has_umi_annotation + with pysam.AlignmentFile( out_bam_file_name, "wb", header=out_bam_header ) as out_bam_file: @@ -280,6 +282,8 @@ def _sub_process_write_fn( out_bam_file, barcode_conf_file, ignore_cbc_and_umi, + model_annotates_cbc, + model_annotates_umi ) # Increment our counters: @@ -514,7 +518,7 @@ def segment_read_with_bounded_region_algorithm(read, model, segments=None): def _write_segmented_read( - model, read, segments, delimiters, bam_out, barcode_conf_file, ignore_cbc_and_umi + model, read, segments, delimiters, bam_out, barcode_conf_file, ignore_cbc_and_umi, model_annotates_cbc, model_annotates_umi ): """Split and write out the segments of each read to the given bam output file. @@ -546,6 +550,8 @@ def _write_segmented_read( delim_name, prev_delim_name, ignore_cbc_and_umi, + model_annotates_cbc, + model_annotates_umi, sri ) @@ -570,6 +576,8 @@ def _write_split_array_element( delim_name, prev_delim_name, ignore_cbc_and_umi, + model_annotates_cbc, + model_annotates_umi, split_read_index ): """Write out an individual array element that has been split out according to the given coordinates.""" @@ -579,10 +587,24 @@ def _write_split_array_element( if barcode_conf_file is not None and a.has_tag(longbow.utils.constants.READ_BARCODE_CONF_FACTOR_TAG): barcode_conf_file.write(f"{a.get_tag(longbow.utils.constants.READ_RAW_BARCODE_TAG)}\t{a.get_tag(longbow.utils.constants.READ_BARCODE_CONF_FACTOR_TAG)}\n") - has_cbc_and_umi = bam_utils.has_cbc_and_umi(a) - - if has_cbc_and_umi or ignore_cbc_and_umi: + if ignore_cbc_and_umi: bam_out.write(a) + return True + else: + # We have to check the CBC and UMI: + # TODO: Expand this for other annotation segments. + cbc_ok = (not model_annotates_cbc) or bam_utils.has_cbc(a) + umi_ok = (not model_annotates_umi) or bam_utils.has_umi(a) + + if not cbc_ok: + logger.warning(f"Read {a.query_name} has no CBC. Ignoring.") + return False + elif not umi_ok: + logger.warning(f"Read {a.query_name} has no UMI. Ignoring.") + return False + else: + bam_out.write(a) + return True def create_simple_split_array_element(delim_name, end_coord, model, prev_delim_name, read, segments, start_coord, split_read_index): @@ -659,7 +681,7 @@ def create_simple_split_array_element(delim_name, end_coord, model, prev_delim_n a.set_tag(longbow.utils.constants.READ_BARCODE_CONF_FACTOR_TAG, conf_factor) # Set IsoSeq3-compatible tags: - a.set_tag(longbow.utils.constants.READ_CLIPPED_SEQS_LIST_TAG, ','.join(clipped_tags)) + a.set_tag(longbow.utils.constants.READ_CLIPPED_SEQS_LIST_TAG, ','.join(sorted(clipped_tags))) a.set_tag(longbow.utils.constants.READ_NUM_CONSENSUS_PASSES_TAG, 1) a.set_tag(longbow.utils.constants.READ_ZMW_NAMES_TAG, read.query_name) a.set_tag(longbow.utils.constants.READ_NUM_ZMWS_TAG, 1) diff --git a/src/longbow/utils/bam_utils.py b/src/longbow/utils/bam_utils.py index 265d9d72..9ab5e0d6 100644 --- a/src/longbow/utils/bam_utils.py +++ b/src/longbow/utils/bam_utils.py @@ -410,8 +410,12 @@ def get_confidence_factor_raw_quals(quals: array.array, scale_factor: float = CO ) -def has_cbc_and_umi(read): - return read.has_tag(READ_RAW_BARCODE_TAG) and read.has_tag(READ_RAW_UMI_TAG) +def has_cbc(read): + return read.has_tag(READ_RAW_BARCODE_TAG) + + +def has_umi(read): + return read.has_tag(READ_RAW_UMI_TAG) def get_model_name_from_bam_header(header): diff --git a/src/longbow/utils/constants.py b/src/longbow/utils/constants.py index f4d34030..4a152325 100644 --- a/src/longbow/utils/constants.py +++ b/src/longbow/utils/constants.py @@ -72,6 +72,9 @@ READ_SPATIAL_BARCODE2_TAG = "X2" READ_SPATIAL_BARCODE2_POS_TAG = "XQ" +READ_DEMUX_TAG = "id" +READ_DEMUX_POS_TAG = "ip" + READ_BGZF_VIRTUAL_OFFSET_TAG = "vo" COULD_CORRECT_BARCODE_TAG = "YC" # True IFF barcode correction was able to be performed (including "correction" where the original barcode did not change). False otherwise. diff --git a/src/longbow/utils/model.py b/src/longbow/utils/model.py index 1a5776ba..c71eadc8 100644 --- a/src/longbow/utils/model.py +++ b/src/longbow/utils/model.py @@ -100,21 +100,19 @@ def __init__(self, @property def has_umi_annotation(self): - return self._has_annotation(longbow.utils.constants.READ_RAW_UMI_TAG) + return self._has_annotation(longbow.utils.constants.READ_RAW_UMI_TAG) or self._has_annotation(longbow.utils.constants.READ_UMI_TAG) @property - def has_barcode_annotation(self): - return self._has_annotation(longbow.utils.constants.READ_RAW_BARCODE_TAG) + def has_cell_barcode_annotation(self): + return self._has_annotation(longbow.utils.constants.READ_RAW_BARCODE_TAG) or self._has_annotation(longbow.utils.constants.READ_BARCODE_TAG) def _has_annotation(self, annotation_tag): - if self.annotation_segments is None: - return False - - # Get all annotation tags: - for k, tag_tuple_list in self.annotation_segments.items(): - for tag_tuple in tag_tuple_list: - if annotation_tag in tag_tuple: - return True + if self.annotation_segments: + # Get all annotation tags: + for _, tag_tuple_list in self.annotation_segments.items(): + for tag_tuple in tag_tuple_list: + if annotation_tag in tag_tuple: + return True # If we're still running, we didn't find our annotation tag. # Therefore we don't have the given annotation. @@ -1308,7 +1306,7 @@ def build_pre_configured_model(model_name): # Model naming convention: # prefix (mas, isoseq) # modality (bulk, sc, spatial) - # input library type (10x5p, 103p, slideseq) + # input library type (10x5p, 10x3p, slideseq) # umi style (none, single, dual) # plexity (pbkit, internal, none) # @@ -1411,8 +1409,8 @@ def build_pre_configured_model(model_name): "named_random_segments": {"UMI", "cDNA", "CBC"}, "coding_region": "cDNA", "annotation_segments": { - "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), ( - longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], + "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), + (longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], "CBC": [(longbow.utils.constants.READ_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG), ( longbow.utils.constants.READ_RAW_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG)], }, @@ -1510,8 +1508,8 @@ def build_pre_configured_model(model_name): "named_random_segments": {"UMI", "cDNA", "CBC"}, "coding_region": "cDNA", "annotation_segments": { - "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), ( - longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], + "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), + (longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], "CBC": [(longbow.utils.constants.READ_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG), ( longbow.utils.constants.READ_RAW_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG)], }, @@ -1536,7 +1534,7 @@ def build_pre_configured_model(model_name): ("L", "5p_Adapter", "UMI", "SLS", "cDNA", "Poly_A", "sample_index", "3p_Adapter"), ("M", "5p_Adapter", "UMI", "SLS", "cDNA", "Poly_A", "sample_index", "3p_Adapter"), ("N", "5p_Adapter", "UMI", "SLS", "cDNA", "Poly_A", "sample_index", "3p_Adapter"), - ("O", "5p_Adapter", "UMI", "SLS", "cDNA", "Poly_A", "3p_Adapter", "sample_index", "P"), + ("O", "5p_Adapter", "UMI", "SLS", "cDNA", "Poly_A", "sample_index", "3p_Adapter", "P"), ), "adapters": { "5p_Adapter": "TCTACACGACGCTCTTCCGATCT", @@ -1610,8 +1608,9 @@ def build_pre_configured_model(model_name): "named_random_segments": {"UMI", "cDNA", "sample_index"}, "coding_region": "cDNA", "annotation_segments": { - "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], - "sample_index": [(longbow.utils.constants.READ_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG)], + "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), + (longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], + "sample_index": [(longbow.utils.constants.READ_DEMUX_TAG, longbow.utils.constants.READ_DEMUX_POS_TAG)], }, "deprecated": False, }, @@ -1688,8 +1687,8 @@ def build_pre_configured_model(model_name): "named_random_segments": {"UMI", "cDNA", "CBC"}, "coding_region": "cDNA", "annotation_segments": { - "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), ( - longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], + "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), + (longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], "CBC": [(longbow.utils.constants.READ_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG), ( longbow.utils.constants.READ_RAW_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG)], }, @@ -1796,7 +1795,8 @@ def build_pre_configured_model(model_name): "named_random_segments": {"UMI", "SBC2", "SBC1", "cDNA"}, "coding_region": "cDNA", "annotation_segments": { - "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], + "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), + (longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], "SBC1": [(longbow.utils.constants.READ_SPATIAL_BARCODE1_TAG, longbow.utils.constants.READ_SPATIAL_BARCODE1_POS_TAG)], "SBC2": [(longbow.utils.constants.READ_SPATIAL_BARCODE2_TAG, @@ -1923,8 +1923,8 @@ def build_pre_configured_model(model_name): "named_random_segments": {"UMI", "cDNA", "CBC"}, "coding_region": "cDNA", "annotation_segments": { - "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), ( - longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], + "UMI": [(longbow.utils.constants.READ_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG), + (longbow.utils.constants.READ_RAW_UMI_TAG, longbow.utils.constants.READ_UMI_POS_TAG)], "CBC": [(longbow.utils.constants.READ_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG), ( longbow.utils.constants.READ_RAW_BARCODE_TAG, longbow.utils.constants.READ_BARCODE_POS_TAG)], }, diff --git a/tests/integration/test_segment.py b/tests/integration/test_segment.py index c767b6b6..caebab4f 100644 --- a/tests/integration/test_segment.py +++ b/tests/integration/test_segment.py @@ -9,52 +9,85 @@ from longbow.__main__ import main_entry as longbow from ..utils import cat_file_to_pipe +from ..utils import convert_sam_to_bam +from ..utils import assert_reads_files_equal + +################################################################################ + +TOOL_NAME = "segment" TEST_DATA_FOLDER = path = os.path.abspath( __file__ + os.path.sep + "../../" + os.path.sep + "test_data" ) + os.path.sep +EXPECTED_DATA_FOLDER = TEST_DATA_FOLDER + TOOL_NAME + os.path.sep + +################################################################################ @pytest.fixture(scope="module", params=[ - (TEST_DATA_FOLDER + "mas15_test_input.bam", "mas_15_sc_10x5p_single_none"), - (TEST_DATA_FOLDER + "mas10_test_input.bam", "mas_10_sc_10x5p_single_none"), + (TEST_DATA_FOLDER + "mas15_test_input.bam", + EXPECTED_DATA_FOLDER + "mas_15_sc_10x5p_single_none.expected.bam", + "mas_15_sc_10x5p_single_none"), + + (TEST_DATA_FOLDER + "mas10_test_input.bam", + EXPECTED_DATA_FOLDER + "mas_10_sc_10x5p_single_none.expected.bam", + "mas_10_sc_10x5p_single_none"), + + (TEST_DATA_FOLDER + "mas_15_bulk_10x5p_single_internal.bam", + EXPECTED_DATA_FOLDER + "mas_15_bulk_10x5p_single_internal.expected.bam", + "mas_15_bulk_10x5p_single_internal"), ]) def filtered_bam_file_from_pipeline(request): - input_bam, model_name = request.param + input_file, expected_file, model_name = request.param + + if input_file.endswith(".bam"): + input_bam = input_file + else: + # Convert test file to bam: + with tempfile.NamedTemporaryFile(delete=True) as input_bam: + convert_sam_to_bam(input_file, input_bam) with tempfile.NamedTemporaryFile(delete=True) as annotate_bam, \ tempfile.NamedTemporaryFile(delete=True) as filter_bam: runner = CliRunner() - result_annotate = runner.invoke(longbow, ["annotate", "-m", model_name, "-f", "-o", annotate_bam.name, input_bam]) + result_annotate = runner.invoke(longbow, ["annotate", "-t", 1, "-m", model_name, + "-f", "-o", annotate_bam.name, input_bam]) assert result_annotate.exit_code == 0 - result_filter = runner.invoke(longbow, ["filter", "-m", model_name, "-f", "-o", filter_bam.name, annotate_bam.name]) + result_filter = runner.invoke(longbow, ["filter", "-m", model_name, + "-f", "-o", filter_bam.name, annotate_bam.name]) assert result_filter.exit_code == 0 # Yield file here so that when we return, we get to clean up automatically - yield filter_bam.name + yield filter_bam.name, expected_file, model_name def test_segment_from_file(tmpdir, filtered_bam_file_from_pipeline): + input_bam_file, expected_bam, model_name = filtered_bam_file_from_pipeline + actual_file = tmpdir.join(f"segment_actual_out.bam") - args = ["segment", "-f", "-o", actual_file, filtered_bam_file_from_pipeline] + args = ["segment", "-t", 1, "-f", "-o", actual_file, input_bam_file] runner = CliRunner() result = runner.invoke(longbow, args) assert result.exit_code == 0 + assert_reads_files_equal(actual_file, expected_bam, order_matters=True) def test_segment_from_pipe(tmpdir, filtered_bam_file_from_pipeline): + input_bam_file, expected_bam, model_name = filtered_bam_file_from_pipeline + actual_file = tmpdir.join(f"filter_actual_out.pipe.bam") proc = subprocess.Popen( - [ sys.executable, "-m", "longbow", "segment", "-f", "-o", actual_file ], + [sys.executable, "-m", "longbow", "segment", "-t", "1", "-f", "-o", actual_file], stdin=subprocess.PIPE ) - cat_file_to_pipe(filtered_bam_file_from_pipeline, proc) + cat_file_to_pipe(input_bam_file, proc) assert proc.returncode == 0 + assert_reads_files_equal(actual_file, expected_bam, order_matters=True) diff --git a/tests/test_data/mas_15_bulk_10x5p_single_internal.bam b/tests/test_data/mas_15_bulk_10x5p_single_internal.bam new file mode 100644 index 00000000..3840be22 Binary files /dev/null and b/tests/test_data/mas_15_bulk_10x5p_single_internal.bam differ diff --git a/tests/test_data/models/mas_15_bulk_10x5p_single_internal.json b/tests/test_data/models/mas_15_bulk_10x5p_single_internal.json index bbeff812..b0662bff 100644 --- a/tests/test_data/models/mas_15_bulk_10x5p_single_internal.json +++ b/tests/test_data/models/mas_15_bulk_10x5p_single_internal.json @@ -150,8 +150,8 @@ "SLS", "cDNA", "Poly_A", - "3p_Adapter", "sample_index", + "3p_Adapter", "P" ] ], @@ -191,22 +191,22 @@ }, "direct_connections": { "3p_Adapter": [ - "L", - "K", - "B", - "G", - "M", "N", "P", "D", - "I", + "E", "C", - "J", + "F", + "M", "O", - "E", - "A", + "B", "H", - "F" + "I", + "K", + "J", + "L", + "A", + "G" ], "A": [ "5p_Adapter" @@ -276,17 +276,17 @@ ] }, "start_element_names": [ - "5p_Adapter", - "A" + "A", + "5p_Adapter" ], "end_element_names": [ - "Poly_A", - "P" + "P", + "Poly_A" ], "named_random_segments": [ - "UMI", + "cDNA", "sample_index", - "cDNA" + "UMI" ], "coding_region": "cDNA", "annotation_segments": { @@ -294,12 +294,16 @@ [ "ZU", "XU" + ], + [ + "XM", + "XU" ] ], "sample_index": [ [ - "CR", - "XB" + "id", + "ip" ] ] } diff --git a/tests/test_data/segment/mas_10_sc_10x5p_single_none.expected.bam b/tests/test_data/segment/mas_10_sc_10x5p_single_none.expected.bam new file mode 100644 index 00000000..9d98ba1b Binary files /dev/null and b/tests/test_data/segment/mas_10_sc_10x5p_single_none.expected.bam differ diff --git a/tests/test_data/segment/mas_15_bulk_10x5p_single_internal.expected.bam b/tests/test_data/segment/mas_15_bulk_10x5p_single_internal.expected.bam new file mode 100644 index 00000000..f277b725 Binary files /dev/null and b/tests/test_data/segment/mas_15_bulk_10x5p_single_internal.expected.bam differ diff --git a/tests/test_data/segment/mas_15_sc_10x5p_single_none.expected.bam b/tests/test_data/segment/mas_15_sc_10x5p_single_none.expected.bam new file mode 100644 index 00000000..a39f63cb Binary files /dev/null and b/tests/test_data/segment/mas_15_sc_10x5p_single_none.expected.bam differ diff --git a/tests/unit/test_model.py b/tests/unit/test_model.py new file mode 100644 index 00000000..e0fca8d4 --- /dev/null +++ b/tests/unit/test_model.py @@ -0,0 +1,42 @@ +import pytest + +import pysam +import tempfile +import os +import json + +from longbow.utils import bam_utils +from longbow.utils import model + + +TEST_DATA_FOLDER = path = os.path.abspath( + __file__ + os.path.sep + "../../" + os.path.sep + "test_data" +) + + +@pytest.mark.parametrize("model_name, expected_value", [ + ["mas_15_sc_10x5p_single_none", True], + ["mas_15_sc_10x3p_single_none", True], + ["mas_15_bulk_10x5p_single_internal", False], + ["mas_10_sc_10x5p_single_none", True], + ["mas_15_spatial_slide-seq_single_none", False], + ["mas_15_bulk_teloprimeV2_single_none", False], + ["isoseq_1_sc_10x5p_single_none", True], +]) +def test_has_cell_barcode_annotation(model_name, expected_value): + lb_model = model.LibraryModel.build_pre_configured_model(model_name) + assert lb_model.has_cell_barcode_annotation == expected_value + + +@pytest.mark.parametrize("model_name, expected_value", [ + ["mas_15_sc_10x5p_single_none", True], + ["mas_15_sc_10x3p_single_none", True], + ["mas_15_bulk_10x5p_single_internal", True], + ["mas_10_sc_10x5p_single_none", True], + ["mas_15_spatial_slide-seq_single_none", True], + ["mas_15_bulk_teloprimeV2_single_none", False], + ["isoseq_1_sc_10x5p_single_none", True], +]) +def test_has_umi_annotation(model_name, expected_value): + lb_model = model.LibraryModel.build_pre_configured_model(model_name) + assert lb_model.has_umi_annotation == expected_value