pyhmmer fixes for pharokka proteins and custom hmms

gbouras13 · Jan 19, 2025 · d0dd03d · d0dd03d
1 parent e0b7142
commit d0dd03d
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 4 deletions.
diff --git a/bin/custom_db.py b/bin/custom_db.py
@@ -30,14 +30,15 @@ def run_custom_pyhmmer(custom_hmm, out_dir, threads, gene_predictor, evalue):
 
     # run hmmscan and get all results
     results = []
+    alphabet = pyhmmer.easel.Alphabet.amino() # https://github.com/althonos/pyhmmer/issues/80 to solve #357 #331 need to specify the alphabet explicitly
     with pyhmmer.plan7.HMMFile(custom_hmm) as hmms:  # hmms
         with pyhmmer.easel.SequenceFile(
-            amino_acid_fasta_file, digital=True
+            amino_acid_fasta_file, digital=True, alphabet=alphabet
         ) as seqs:  # amino acid sequences
             for hits in pyhmmer.hmmer.hmmscan(
                 seqs, hmms, cpus=int(threads), E=float(evalue)
             ):  # run hmmscan
-                protein = hits.query_name.decode()  # get protein from the hit
+                protein = hits.query.name.decode()   # get protein from the hit query.name - this changed in pyhmmer v 0.11.0 from hits.query_name.decode() which was removed. So need pyhmmer >=0.11.0 
                 for hit in hits:
                     if hit.included:
                         # include the hit to the result collection

diff --git a/bin/proteins.py b/bin/proteins.py
@@ -217,14 +217,15 @@ def run_pyhmmer_proteins(input_fasta, db_dir, threads, evalue):
 
     # run hmmscan and get all results
     results = []
+    alphabet = pyhmmer.easel.Alphabet.amino() # https://github.com/althonos/pyhmmer/issues/80 to solve #357 #331 need to specify the alphabet explicitly
     with pyhmmer.plan7.HMMFile(os.path.join(db_dir, "all_phrogs.h3m")) as hmms:  # hmms
         with pyhmmer.easel.SequenceFile(
-            input_fasta, digital=True
+            input_fasta, digital=True, alphabet=alphabet
         ) as seqs:  # amino acid sequences
             for hits in pyhmmer.hmmer.hmmscan(
                 seqs, hmms, cpus=int(threads), E=float(evalue)
             ):  # run hmmscan
-                protein = hits.query_name.decode()  # get protein from the hit
+                protein = hits.query.name.decode()   # get protein from the hit query.name - this changed in pyhmmer v 0.11.0 from hits.query_name.decode() which was removed. So need pyhmmer >=0.11.0 
                 for hit in hits:
                     if hit.included:
                         # include the hit to the result collection