Skip to content

Commit

Permalink
skip first codon when checking if translation from antismash and biop…
Browse files Browse the repository at this point in the history
…ython match, cds 1 and 2 from test file are manually altered
  • Loading branch information
CatarinaCarolina committed Aug 31, 2023
1 parent 02120ca commit 66e6422
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 9 deletions.
21 changes: 13 additions & 8 deletions src/genbank/cds.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,16 +250,21 @@ def parse(
correct_translation = check_translation(aa_seq, nt_seq, feature)

if not correct_translation:
logging.warning(
"CDS (%s, %s) from %s:"
" translation provided by antiSMASH and generated by biopython"
" do not match, consider checking if there is something"
" special with this CDS",
nt_start,
nt_stop,
parent_gbk.path,
correct_skip_start_translation = check_translation(
aa_seq[1:], nt_seq[1:], feature
)

if not correct_skip_start_translation:
logging.warning(
"CDS (%s, %s) from %s:"
" translation provided by antiSMASH and generated by biopython"
" do not match, consider checking if there is something"
" special with this CDS",
nt_start,
nt_stop,
parent_gbk.path,
)

cds.aa_seq = aa_seq
return cds

Expand Down
57 changes: 57 additions & 0 deletions test/genbank/test_cds.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Contains tests for the CDS class and functions"""

# from python
import logging
from pathlib import Path
from unittest import TestCase

Expand Down Expand Up @@ -411,3 +412,59 @@ def test_parse_translate_aa_seq_cds_0(self):
translation = cds_0.aa_seq

self.assertEqual(expected_translation, translation)

def test_parse_translation_antismash_start_codon_mismatch(self):
"""Tests whether the aa seq start codon mismatch (antismash vs biopython)
in the feature with location: 1537..2157 raises no warnings"""

parent_gbk_file_path = Path(
"test/test_data/valid_gbk_folder/valid_input_region_cds_no_trans.gbk"
)
parent_gbk = GBK.parse(parent_gbk_file_path, "query")

feature = SeqFeature(FeatureLocation(1536, 2156, strand=1), type="CDS")

feature.qualifiers["translation"] = [
"MTYRESARTTTRRIPGAVVPVARRIRGVLLAGLRAVGTRIARSPG"
"RPVRPQDRAGLGKTHGAVPAGVTVFDDDVPAVTRLDPALLSALRRAATAAADGGVELCV"
"NSGWRSPEYQSRLLREAVAKYGSAAAAARWVATPETSIHVAGKAVDIGPPASASWLSEH"
"GADYGLCRVYRNEPWHFELRPEAIEHGCPPLYADPSHDPRLRR"
]

with self.assertLogs(level=logging.INFO) as cm:
logging.info("nonsense")
CDS.parse(feature, parent_gbk)

# cm.output a list of strings of all the logs
str = " translation provided by antiSMASH and generated by biopython"

warning = any(str in log for log in cm.output)
self.assertEqual(warning, False)

def test_parse_translation_antismash_larger_mismatch(self):
"""Tests whether the aa seq start codon mismatch (antismash vs biopython)
in the feature with location: 1537..2157 raises a warning"""

parent_gbk_file_path = Path(
"test/test_data/valid_gbk_folder/valid_input_region_cds_no_trans.gbk"
)
parent_gbk = GBK.parse(parent_gbk_file_path, "query")

feature = SeqFeature(FeatureLocation(1536, 2156, strand=1), type="CDS")

feature.qualifiers["translation"] = [
"MTYRESARTTTRRIPGAVVPVARRIRGVLLAGLRAVGTRIARSPG"
"RPVRPQDRAGLGKTHGAVPAGVTVFDDDVPAVTRLDPALLSALRRAATAAADGGVELCV"
"NSGWRSPEYQSRLLREAVAKYGSAAAAARWVATPETSIHVAGKAVDIGPPASASWLSEH"
"GADYGLCRVYRNEPWHFELRPEAIEHGCPPLYADPSHDPXXXX"
]

with self.assertLogs(level=logging.INFO) as cm:
logging.info("nonsense")
CDS.parse(feature, parent_gbk)

# cm.output a list of strings of all the logs
str = " translation provided by antiSMASH and generated by biopython"
warning = any(str in log for log in cm.output)

self.assertEqual(warning, True)
Original file line number Diff line number Diff line change
Expand Up @@ -3847,7 +3847,7 @@ ORIGIN
1321 tgaggctcag cagttcgaga gcggtgtcgc cgttgcccgc ggtgtccgcc gcgatcgctt
1381 ccaaacgcag gccgtcgcgg atggcttcgg ccagataggg ctcgtcctcg acgatcagca
1441 cgcgcatgcc ccgatggtac gagaggccac tacatatcgt cggcatatgg aaaatcgcat
1501 acgtgccggc aacacatcgc cgacttgaat ggacacatga cctaccgcga gtcggcccgg
1501 acgtgccggc aacacatcgc cgacttgaat ggacacgtga cctaccgcga gtcggcccgg
1561 acgacgaccc gccggattcc cggcgccgtc gtgccggtgg cccgccggat tcgcggggtc
1621 cttctcgccg gcctgcgcgc cgtcggcacg aggattgccc ggtcgcccgg tcgcccggtc
1681 cgcccccagg accgtgccgg cctcggcaag acccacggtg ccgtccccgc cggggtgacg
Expand Down

0 comments on commit 66e6422

Please sign in to comment.