Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
merge
  • Loading branch information
Verity Hill committed Aug 11, 2023
2 parents 0f6b8f5 + e7ce897 commit 2d66630
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 17 deletions.
3 changes: 3 additions & 0 deletions denv_pipeline/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def main(sysargs = sys.argv[1:]):
parser.add_argument("--outdir", dest="outdir", help="location where files will be stored.")
parser.add_argument("--reference-directory", "-rd", help="location where bed files and reference genomes are")
parser.add_argument("--depth", help="depth to map sequences to. Default=10")
parser.add_argument("--threshold", help="threshold to call consensus positions at, default=0.75",dest="threshold")

parser.add_argument("--temp", dest="temp", action="store_true", help="keep intermediate files")
parser.add_argument("--tempdir", dest="tempdir", help="where the temporary files go")
Expand Down Expand Up @@ -72,6 +73,8 @@ def main(sysargs = sys.argv[1:]):
if "indir" not in config:
config["indir"] = config["outdir"]

error_checks.check_threshold(config)

config["outdir"] = config["outdir"].rstrip("/")
config = set_up_scripts.set_up_temporary_directory_path(config)
config = set_up_scripts.set_up_reference_directory(config)
Expand Down
15 changes: 10 additions & 5 deletions denv_pipeline/scripts/denv_pipeline.smk
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ rule mapper:
mapper_script = os.path.join(workflow.current_basedir,"mapper.sh"),
primer_dir = config["reference_directory"],
depth = config["depth"],
threshold = config["threshold"],
tempdir = config["tempdir"],
python_script = os.path.join(workflow.current_basedir,"serotype_caller.py"),
python_script2 = os.path.join(workflow.current_basedir, "make_empty_files.py")
Expand All @@ -35,7 +36,7 @@ rule mapper:
cpus_per_task=2,
runtime=300
run:
shell("{params.mapper_script} {wildcards.sample} {input.read_location}/*R1* {input.read_location}/*R2* {params.primer_dir} {params.python_script} {params.python_script2} {params.depth} {params.tempdir} {log.log} >> {log.log} 2>&1")
shell("{params.mapper_script} {wildcards.sample} {input.read_location}/*R1* {input.read_location}/*R2* {params.primer_dir} {params.python_script} {params.python_script2} {params.depth} {params.threshold} {params.tempdir} {log.log} >> {log.log} 2>&1")

if not os.path.exists(os.path.join(params.tempdir,f"{wildcards.sample}_all_virustype_info.txt")):
shell("touch {params.tempdir}/{wildcards.sample}_all_virustype_info.txt")
Expand Down Expand Up @@ -80,10 +81,14 @@ rule make_qc_plots:
serotype_dict, colour_dict, patch_list = visualisations.prepare_for_plots(input.serotype_calls_file)

visualisations.variant_plot(params.results_dir, input.variant_summary_file, serotype_dict, colour_dict, patch_list)
if config["download"]:
source = os.path.join(params.results_dir, "variant_plot.pdf")
dest = os.path.join(config["outdir"], "downloads")
shell("cp -r {source} {dest}")

if config["ct_file"] and config["ct_column"] and config["id_column"]:
visualisations.ct_plot(params.results_dir, config["ct_file"], config["ct_column"], config["id_column"], input.serotype_calls_file, serotype_dict, colour_dict, patch_list)




if config["download"]:
source = os.path.join(params.results_dir, "ct_plot.pdf")
dest = os.path.join(config["outdir"], "downloads")
shell("cp -r {source} {dest}")
9 changes: 6 additions & 3 deletions denv_pipeline/scripts/mapper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ primer_dir=$4
serotype_caller=$5
empty_file_maker=$6
depth=$7
tempdir=$8
log=$9
threshold=$8
tempdir=$9
log=$10

while IFS= read -r virustype || [[ -n "$virustype" ]]; do

Expand All @@ -22,9 +23,11 @@ while IFS= read -r virustype || [[ -n "$virustype" ]]; do
fi

echo "----->>>>>Mapping reads against serotype "${virustype}" reference sequence"
which bwa
bwa mem -v 1 -t 2 ${fasta} $read1 $read2 | samtools view -bS -F 4 -F 2048 | samtools sort -o ${tempdir}/${fname}.${virustype}.bam >> ${log} 2>&1

echo "----->>>>>Trimming bam file"
which ivar
ivar trim -e -i ${tempdir}/${fname}.${virustype}.bam -b ${bed} -p ${tempdir}/${fname}.${virustype}.trimmed.bam >> ${log} 2>&1

if ! [ -s ${tempdir}/${fname%.*}.${virustype}.trimmed.bam ]; then
Expand All @@ -42,7 +45,7 @@ while IFS= read -r virustype || [[ -n "$virustype" ]]; do
samtools index ${tempdir}/${fname}.${virustype}.sort.bam >> ${log} 2>&1

echo "----->>>>>Generating consensus sequence"
samtools mpileup -aa --reference ${fasta} -A -d 10000 -Q 0 ${tempdir}/${fname}.${virustype}.sort.bam | ivar consensus -t 0.75 -m ${depth} -p ${tempdir}/${fname}.${virustype}.${depth}.cons -i ${consensus_name} >> ${log} 2>&1
samtools mpileup -aa --reference ${fasta} -A -d 10000 -Q 0 ${tempdir}/${fname}.${virustype}.sort.bam | ivar consensus -t ${threshold} -m ${depth} -p ${tempdir}/${fname}.${virustype}.${depth}.cons -i ${consensus_name} >> ${log} 2>&1

echo "----->>>>>Aligning consensus cps sequence against the reference serotype "${virustype}" cps sequence"
nextalign run --reference ${fasta} --output-fasta ${tempdir}/${fname}.${virustype}.${depth}.out.aln ${tempdir}/${fname}.${virustype}.${depth}.cons.fa >> ${log} 2>&1
Expand Down
7 changes: 5 additions & 2 deletions denv_pipeline/scripts/visualisations.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ def prepare_for_plots(final_serotype_calls):
all_viruses.add(l['serotype_called'])

colour_dict = {}
lst = mpl.colormaps['viridis'](range(len(all_viruses)))
custom_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#567CBE","#D58A80", "#ADB3D9"], len(all_viruses))

lst = custom_cmap(range(len(all_viruses)))
for i,j in enumerate(all_viruses):
colour_dict[j] = rgb2hex(lst[i])

patch_list = []
for serotype,hexc in colour_dict.items():
patch_list.append(mpatches.Patch(color=hexc, label=serotype))

return virus_dict, colour_dict, patch_list

def variant_plot(results_dir, variants_summary_file, virus_dict, colour_dict, patch_list):
Expand Down Expand Up @@ -113,3 +115,4 @@ def ct_plot(results_dir, ct_file, ct_column, id_column, final_serotype_calls, vi
plt.legend(handles=patch_list,fontsize=15,frameon=False)

plt.savefig(os.path.join(results_dir, "ct_plot.pdf"), bbox_inches="tight")

21 changes: 14 additions & 7 deletions denv_pipeline/utils/error_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,22 +118,22 @@ def check_primer_dir(config):
def check_env_activated():

if pkgutil.find_loader('snakemake') is None or pkgutil.find_loader('Bio') is None:
sys.stderr.write(green(f"Error: installation not correct. Ensure that environment is activated and you have run 'pip install .'"))
sys.stderr.write(green(f"Error: installation not correct. Ensure that environment is activated and you have run 'pip install .'\n"))
sys.exit(-1)


def check_ct_file(config):

if config["ct_file"] and not config["ct_column"]:
sys.stderr.write(green(f"Error: ct_file specified but no ct_column for ct vs coverage plot. Please provide Ct column name and id column name."))
sys.stderr.write(green(f"Error: ct_file specified but no ct_column for ct vs coverage plot. Please provide Ct column name and id column name.\n"))
sys.exit(-1)

if config["ct_file"] and not config["id_column"]:
sys.stderr.write(green(f"Error: ct_file specified but no id_column for ct vs coverage plot. Please provide Ct column name and id column name."))
sys.stderr.write(green(f"Error: ct_file specified but no id_column for ct vs coverage plot. Please provide Ct column name and id column name.\n"))
sys.exit(-1)

if (config["ct_column"] or config["id_column"]) and not config["ct_file"]:
sys.stderr.write(green(f"Error: ct_column or id_column specified but no ct_file for ct vs coverage plot. Please provide file containing Ct information."))
sys.stderr.write(green(f"Error: ct_column or id_column specified but no ct_file for ct vs coverage plot. Please provide file containing Ct information.\n"))
sys.exit(-1)

if not os.path.exists(config["ct_file"]):
Expand All @@ -145,8 +145,15 @@ def check_ct_file(config):
data = csv.DictReader(f)
headers = data.fieldnames
if config["ct_column"] not in headers:
sys.stderr.write(green(f"Error: {config['ct_column']} not found in ct_file"))
sys.stderr.write(green(f"Error: {config['ct_column']} not found in ct_file\n"))
sys.exit(-1)
if config["id_column"] not in headers:
sys.stderr.write(green(f"Error: {config['id_column']} not found in ct_file"))
sys.exit(-1)
sys.stderr.write(green(f"Error: {config['id_column']} not found in ct_file\n"))
sys.exit(-1)


def check_threshold(config):

if float(config["threshold"]) > 1:
sys.stderr.write(green(f"Error: consensus threshold must be between 0 and 1\n"))
sys.exit(-1)
2 changes: 2 additions & 0 deletions denv_pipeline/utils/set_up_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def get_defaults(config):
config["config"] = False

config["depth"] = 10
config["threshold"] = 0.75
config["tempdir"] = "temporary_files"

config['ct_file'] = False
Expand Down Expand Up @@ -194,6 +195,7 @@ def get_valid_keys():
valid_keys.append("outdir")
valid_keys.append("reference_directory")
valid_keys.append("depth")
valid_keys.append("threshold")
valid_keys.append("temp")
valid_keys.append("tempdir")
valid_keys.append("download")
Expand Down
1 change: 1 addition & 0 deletions tests/dry_run_config.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
depth: 50
threshold: 0.75
ct_file: "test_ct_file.csv"
ct_column: "test_ct"
id_column: "test_id"
Expand Down

0 comments on commit 2d66630

Please sign in to comment.