diff --git a/denv_pipeline/scripts/denv_pipeline.smk b/denv_pipeline/scripts/denv_pipeline.smk index b5396fc..d9e8dd7 100644 --- a/denv_pipeline/scripts/denv_pipeline.smk +++ b/denv_pipeline/scripts/denv_pipeline.smk @@ -79,17 +79,20 @@ rule make_qc_plots: params: results_dir = rules.summary.params.results_dir run: - serotype_dict, colour_dict, patch_list = visualisations.prepare_for_plots(input.serotype_calls_file) - - visualisations.variant_plot(params.results_dir, input.variant_summary_file, serotype_dict, colour_dict, patch_list) - if config["download"]: - source = os.path.join(params.results_dir, "variant_plot.pdf") - dest = os.path.join(config["outdir"], "downloads") - shell("cp -r {source} {dest}") - - if config["ct_file"] and config["ct_column"] and config["id_column"]: - visualisations.ct_plot(params.results_dir, config["ct_file"], config["ct_column"], config["id_column"], input.serotype_calls_file, serotype_dict, colour_dict, patch_list) + result = visualisations.prepare_for_plots(input.serotype_calls_file) + if result: + serotype_dict, colour_dict, patch_list = result + visualisations.variant_plot(params.results_dir, input.variant_summary_file, serotype_dict, colour_dict, patch_list) if config["download"]: - source = os.path.join(params.results_dir, "ct_plot.pdf") + source = os.path.join(params.results_dir, "variant_plot.pdf") dest = os.path.join(config["outdir"], "downloads") shell("cp -r {source} {dest}") + + if config["ct_file"] and config["ct_column"] and config["id_column"]: + visualisations.ct_plot(params.results_dir, config["ct_file"], config["ct_column"], config["id_column"], input.serotype_calls_file, serotype_dict, colour_dict, patch_list) + if config["download"]: + source = os.path.join(params.results_dir, "ct_plot.pdf") + dest = os.path.join(config["outdir"], "downloads") + shell("cp -r {source} {dest}") + else: + shell("touch {output.variant_plot}") diff --git a/denv_pipeline/scripts/make_summary_files.py b/denv_pipeline/scripts/make_summary_files.py index 5382404..1c50a0f 100644 --- a/denv_pipeline/scripts/make_summary_files.py +++ b/denv_pipeline/scripts/make_summary_files.py @@ -17,9 +17,7 @@ def summarise_files(config, per_sample_files, serotype_call_file, top_call_file, with open(file) as f: data = csv.DictReader(f, delimiter="\t") for l in data: - if not l['serotype_called'] in all_coverage[l['sample_id']]: - all_coverage[l['sample_id']][l['serotype_called']] = [] - all_coverage[l['sample_id']][l['serotype_called']].append(l['coverage_untrimmed']) + all_coverage[l['sample_id']][l['reference_sequence_name']] = float(l['coverage_untrimmed']) possible_tops.append(l) all_lines.append(l) @@ -202,22 +200,31 @@ def pull_low_coverage_seqs(config, all_coverage, high_coverage): if name not in high_coverage: lst = [] for sero, cov in cov_dict.items(): - if "sylvatic" not in sero: - lst.append(cov) + lst.append(cov) in_order = sorted(lst, reverse=True) - if len(in_order) > 1: + if len(in_order) > 1: #I'm not sure when it would ever be less than 6, but leaving for now just in case if in_order[0] > (in_order[1] + 5): top = [i for i in cov_dict if cov_dict[i]==in_order[0]][0] else: - top = "NA" + first = [i for i in cov_dict if cov_dict[i]==in_order[0]][0] + second = [i for i in cov_dict if cov_dict[i]==in_order[1]][0] + + if second == f"{first}_sylvatic" or first == f"{second}_sylvatic": + if in_order[0] > (in_order[2] + 5): + top = [i for i in cov_dict if cov_dict[i]==in_order[0]][0] + else: + top = "NA" + else: + top = "NA" + elif len(in_order) == 1: top = in_order[0] else: top = "NA" top_call[name] = top - + low_cov_seqs = set() depth = config["depth"] with open(os.path.join(config["outdir"], "results", "low_coverage_calls.csv"), 'w') as fw: diff --git a/denv_pipeline/scripts/visualisations.py b/denv_pipeline/scripts/visualisations.py index 498fb5a..4090cad 100644 --- a/denv_pipeline/scripts/visualisations.py +++ b/denv_pipeline/scripts/visualisations.py @@ -22,15 +22,18 @@ def prepare_for_plots(final_serotype_calls): colour_dict = {} custom_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#567CBE","#D58A80", "#ADB3D9"], len(all_viruses)) - lst = custom_cmap(range(len(all_viruses))) - for i,j in enumerate(all_viruses): - colour_dict[j] = rgb2hex(lst[i]) - - patch_list = [] - for serotype,hexc in colour_dict.items(): - patch_list.append(mpatches.Patch(color=hexc, label=serotype)) - - return virus_dict, colour_dict, patch_list + if len(all_viruses) == 0: + return + else: + lst = custom_cmap(range(len(all_viruses))) + for i,j in enumerate(all_viruses): + colour_dict[j] = rgb2hex(lst[i]) + + patch_list = [] + for serotype,hexc in colour_dict.items(): + patch_list.append(mpatches.Patch(color=hexc, label=serotype)) + + return virus_dict, colour_dict, patch_list def variant_plot(results_dir, variants_summary_file, virus_dict, colour_dict, patch_list):