Skip to content

Commit

Permalink
pyhmmer fixes for pharokka proteins and custom hmms
Browse files Browse the repository at this point in the history
  • Loading branch information
gbouras13 committed Jan 19, 2025
1 parent e0b7142 commit d0dd03d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
5 changes: 3 additions & 2 deletions bin/custom_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,15 @@ def run_custom_pyhmmer(custom_hmm, out_dir, threads, gene_predictor, evalue):

# run hmmscan and get all results
results = []
alphabet = pyhmmer.easel.Alphabet.amino() # https://github.com/althonos/pyhmmer/issues/80 to solve #357 #331 need to specify the alphabet explicitly
with pyhmmer.plan7.HMMFile(custom_hmm) as hmms: # hmms
with pyhmmer.easel.SequenceFile(
amino_acid_fasta_file, digital=True
amino_acid_fasta_file, digital=True, alphabet=alphabet
) as seqs: # amino acid sequences
for hits in pyhmmer.hmmer.hmmscan(
seqs, hmms, cpus=int(threads), E=float(evalue)
): # run hmmscan
protein = hits.query_name.decode() # get protein from the hit
protein = hits.query.name.decode() # get protein from the hit query.name - this changed in pyhmmer v 0.11.0 from hits.query_name.decode() which was removed. So need pyhmmer >=0.11.0
for hit in hits:
if hit.included:
# include the hit to the result collection
Expand Down
5 changes: 3 additions & 2 deletions bin/proteins.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,15 @@ def run_pyhmmer_proteins(input_fasta, db_dir, threads, evalue):

# run hmmscan and get all results
results = []
alphabet = pyhmmer.easel.Alphabet.amino() # https://github.com/althonos/pyhmmer/issues/80 to solve #357 #331 need to specify the alphabet explicitly
with pyhmmer.plan7.HMMFile(os.path.join(db_dir, "all_phrogs.h3m")) as hmms: # hmms
with pyhmmer.easel.SequenceFile(
input_fasta, digital=True
input_fasta, digital=True, alphabet=alphabet
) as seqs: # amino acid sequences
for hits in pyhmmer.hmmer.hmmscan(
seqs, hmms, cpus=int(threads), E=float(evalue)
): # run hmmscan
protein = hits.query_name.decode() # get protein from the hit
protein = hits.query.name.decode() # get protein from the hit query.name - this changed in pyhmmer v 0.11.0 from hits.query_name.decode() which was removed. So need pyhmmer >=0.11.0
for hit in hits:
if hit.included:
# include the hit to the result collection
Expand Down

0 comments on commit d0dd03d

Please sign in to comment.