Skip to content

Commit

Permalink
Merge pull request #298 from gbouras13/dev
Browse files Browse the repository at this point in the history
v 1.5.1
  • Loading branch information
gbouras13 authored Oct 26, 2023
2 parents 1dc340f + ed572d2 commit 77c2d96
Show file tree
Hide file tree
Showing 16 changed files with 344 additions and 111 deletions.
6 changes: 6 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
History
=======

1.5.1 (2023-10-26)
------------------

* Fixes `dnaapler` version to `>=0.4.0` with new changes to dnaapler
* Adds `.svg` format output with `pharokka_plotter.py`

1.5.0 (2023-09-20)
------------------

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,7 @@ The 673 crAss-like genomes were run with `-m` (defaults to `--mmseqs2_only` in v

Benchmarking was conducted on Enterbacteria Phage Lambda (Genbank accession J02459) Staphylococcus Phage SAOMS1 (Genbank Accession MW460250) and 673 crAss-like phage genomes in one multiFASTA input taken from Yutin, N., Benler, S., Shmakov, S.A. et al. Analysis of metagenome-assembled viral genomes from the human gut reveals diverse putative CrAss-like phages with unique genomic features. Nat Commun 12, 1044 (2021) https://doi.org/10.1038/s41467-021-21350-w.


For the crAss-like phage genomes, `pharokka` meta mode `-m` was enabled.

| Phage Lambda | `pharokka` PHANOTATE | `pharokka` Prodigal | Prokka with PHROGs |
Expand Down
60 changes: 39 additions & 21 deletions bin/pharokka.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,42 @@
from custom_db import run_custom_pyhmmer
from databases import check_db_installation
from hmm import run_pyhmmer
from input_commands import (check_dependencies, get_input, instantiate_dirs,
instantiate_split_output,
validate_and_extract_genbank, validate_custom_hmm,
validate_fasta, validate_gene_predictor,
validate_meta, validate_terminase,
validate_threads)
from input_commands import (
check_dependencies,
get_input,
instantiate_dirs,
instantiate_split_output,
validate_and_extract_genbank,
validate_custom_hmm,
validate_fasta,
validate_gene_predictor,
validate_meta,
validate_terminase,
validate_threads,
)
from loguru import logger
from post_processing import Pharok, remove_post_processing_files
from processes import (concat_phanotate_meta, concat_trnascan_meta,
convert_gff_to_gbk, reorient_terminase, run_aragorn,
run_dnaapler, run_mash_dist, run_mash_sketch,
run_minced, run_mmseqs, run_phanotate,
run_phanotate_fasta_meta, run_phanotate_txt_meta,
run_pyrodigal, run_pyrodigal_gv, run_trna_scan,
run_trnascan_meta, split_input_fasta, translate_fastas)
from processes import (
concat_phanotate_meta,
concat_trnascan_meta,
convert_gff_to_gbk,
reorient_terminase,
run_aragorn,
run_dnaapler,
run_mash_dist,
run_mash_sketch,
run_minced,
run_mmseqs,
run_phanotate,
run_phanotate_fasta_meta,
run_phanotate_txt_meta,
run_pyrodigal,
run_pyrodigal_gv,
run_trna_scan,
run_trnascan_meta,
split_input_fasta,
translate_fastas,
)
from util import count_contigs, get_version


Expand Down Expand Up @@ -160,14 +181,9 @@ def main():
)

if dnaapler_success == True:
if contig_count == 1:
input_fasta = os.path.join(
input_fasta = os.path.join(
out_dir, "dnaapler/dnaapler_reoriented.fasta"
)
elif contig_count > 1: # dnaapler all
input_fasta = os.path.join(
out_dir, "dnaapler/dnaapler_all_reoriented.fasta"
)
destination_file = os.path.join(
out_dir, f"{prefix}_dnaapler_reoriented.fasta"
)
Expand Down Expand Up @@ -440,7 +456,7 @@ def main():
pharok.extract_terl()

# run mash
if args.skip_mash is False: # skips mash
if args.skip_mash is False: # skips mash
logger.info("Finding the closest match for each contig in INPHARED using mash.")
# in process.py
run_mash_sketch(input_fasta, out_dir, logdir)
Expand All @@ -449,7 +465,9 @@ def main():
pharok.inphared_top_hits()
else:
logger.info("You have chosen --skip_mash.")
logger.info("Skipping finding the closest match for each contig in INPHARED using mash.")
logger.info(
"Skipping finding the closest match for each contig in INPHARED using mash."
)

# delete tmp files
remove_post_processing_files(out_dir, gene_predictor, args.meta)
Expand Down
17 changes: 17 additions & 0 deletions bin/pharokka_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,10 @@ def get_input():
# check if plot already exists
if args.outdir == "":
plot_file = str(args.plot_name) + ".png"
svg_plot_file = str(args.plot_name) + ".svg"
else:
plot_file = os.path.join(args.outdir, f"{args.plot_name}.png")
svg_plot_file = os.path.join(args.outdir, f"{args.plot_name}.svg")

if args.force == True:
if os.path.isfile(plot_file) == True:
Expand All @@ -175,6 +177,20 @@ def get_input():
f"Output plot file {plot_file} already exists and force was not specified. Please specify -f or --force to overwrite the output plot file."
)

if args.force == True:
if os.path.isfile(svg_plot_file) == True:
os.remove(svg_plot_file)
else:
logger.warning(
"--force was specified even though the output plot file does not already exist."
)
logger.warning("Continuing")
else:
if os.path.isfile(svg_plot_file) == True:
logger.error(
f"Output plot file {svg_plot_file} already exists and force was not specified. Please specify -f or --force to overwrite the output plot file."
)

# flag to see if user provided gff and genbank or output directory
gff_genbank_flag = True

Expand Down Expand Up @@ -283,6 +299,7 @@ def get_input():
args.plot_title,
args.truncate,
plot_file,
svg_plot_file,
args.dpi,
args.label_size,
args.label_hypotheticals,
Expand Down
18 changes: 13 additions & 5 deletions bin/pharokka_proteins.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,20 @@
from pathlib import Path

from databases import check_db_installation
from input_commands import (check_dependencies, instantiate_dirs,
validate_fasta, validate_threads)
from input_commands import (
check_dependencies,
instantiate_dirs,
validate_fasta,
validate_threads,
)
from loguru import logger
from post_processing import remove_directory, remove_file
from proteins import (Pharok_Prot, get_input_proteins, run_mmseqs_proteins,
run_pyhmmer_proteins)
from proteins import (
Pharok_Prot,
get_input_proteins,
run_mmseqs_proteins,
run_pyhmmer_proteins,
)
from util import get_version


Expand Down Expand Up @@ -80,7 +88,7 @@ def main():

# dependencies
logger.info("Checking dependencies.")
check_dependencies(False) # to check pharokka_proteins.py, don't need mash
check_dependencies(False) # to check pharokka_proteins.py, don't need mash

# instantiation/checking fasta and gene_predictor
validate_fasta(args.infile)
Expand Down
5 changes: 5 additions & 0 deletions bin/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def create_plot(
plot_title,
truncate,
outfile,
svg_plot_file,
dpi,
label_size,
label_hypotheticals,
Expand Down Expand Up @@ -581,4 +582,8 @@ def create_plot(

dpi = int(dpi)

# save as png
fig.savefig(outfile, dpi=dpi)

# Save the image as an SVG
fig.savefig(svg_plot_file, format='svg', dpi=dpi)
Loading

0 comments on commit 77c2d96

Please sign in to comment.