Skip to content

Commit

Permalink
fix low coverage script
Browse files Browse the repository at this point in the history
  • Loading branch information
ViralVerity committed Aug 7, 2024
1 parent 677d04d commit a7c7bf1
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 28 deletions.
25 changes: 14 additions & 11 deletions denv_pipeline/scripts/denv_pipeline.smk
Original file line number Diff line number Diff line change
Expand Up @@ -79,17 +79,20 @@ rule make_qc_plots:
params:
results_dir = rules.summary.params.results_dir
run:
serotype_dict, colour_dict, patch_list = visualisations.prepare_for_plots(input.serotype_calls_file)

visualisations.variant_plot(params.results_dir, input.variant_summary_file, serotype_dict, colour_dict, patch_list)
if config["download"]:
source = os.path.join(params.results_dir, "variant_plot.pdf")
dest = os.path.join(config["outdir"], "downloads")
shell("cp -r {source} {dest}")

if config["ct_file"] and config["ct_column"] and config["id_column"]:
visualisations.ct_plot(params.results_dir, config["ct_file"], config["ct_column"], config["id_column"], input.serotype_calls_file, serotype_dict, colour_dict, patch_list)
result = visualisations.prepare_for_plots(input.serotype_calls_file)
if result:
serotype_dict, colour_dict, patch_list = result
visualisations.variant_plot(params.results_dir, input.variant_summary_file, serotype_dict, colour_dict, patch_list)
if config["download"]:
source = os.path.join(params.results_dir, "ct_plot.pdf")
source = os.path.join(params.results_dir, "variant_plot.pdf")
dest = os.path.join(config["outdir"], "downloads")
shell("cp -r {source} {dest}")

if config["ct_file"] and config["ct_column"] and config["id_column"]:
visualisations.ct_plot(params.results_dir, config["ct_file"], config["ct_column"], config["id_column"], input.serotype_calls_file, serotype_dict, colour_dict, patch_list)
if config["download"]:
source = os.path.join(params.results_dir, "ct_plot.pdf")
dest = os.path.join(config["outdir"], "downloads")
shell("cp -r {source} {dest}")
else:
shell("touch {output.variant_plot}")
23 changes: 15 additions & 8 deletions denv_pipeline/scripts/make_summary_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ def summarise_files(config, per_sample_files, serotype_call_file, top_call_file,
with open(file) as f:
data = csv.DictReader(f, delimiter="\t")
for l in data:
if not l['serotype_called'] in all_coverage[l['sample_id']]:
all_coverage[l['sample_id']][l['serotype_called']] = []
all_coverage[l['sample_id']][l['serotype_called']].append(l['coverage_untrimmed'])
all_coverage[l['sample_id']][l['reference_sequence_name']] = float(l['coverage_untrimmed'])

possible_tops.append(l)
all_lines.append(l)
Expand Down Expand Up @@ -202,22 +200,31 @@ def pull_low_coverage_seqs(config, all_coverage, high_coverage):
if name not in high_coverage:
lst = []
for sero, cov in cov_dict.items():
if "sylvatic" not in sero:
lst.append(cov)
lst.append(cov)

in_order = sorted(lst, reverse=True)
if len(in_order) > 1:
if len(in_order) > 1: #I'm not sure when it would ever be less than 6, but leaving for now just in case
if in_order[0] > (in_order[1] + 5):
top = [i for i in cov_dict if cov_dict[i]==in_order[0]][0]
else:
top = "NA"
first = [i for i in cov_dict if cov_dict[i]==in_order[0]][0]
second = [i for i in cov_dict if cov_dict[i]==in_order[1]][0]

if second == f"{first}_sylvatic" or first == f"{second}_sylvatic":
if in_order[0] > (in_order[2] + 5):
top = [i for i in cov_dict if cov_dict[i]==in_order[0]][0]
else:
top = "NA"
else:
top = "NA"

elif len(in_order) == 1:
top = in_order[0]
else:
top = "NA"

top_call[name] = top

low_cov_seqs = set()
depth = config["depth"]
with open(os.path.join(config["outdir"], "results", "low_coverage_calls.csv"), 'w') as fw:
Expand Down
21 changes: 12 additions & 9 deletions denv_pipeline/scripts/visualisations.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,18 @@ def prepare_for_plots(final_serotype_calls):
colour_dict = {}
custom_cmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#567CBE","#D58A80", "#ADB3D9"], len(all_viruses))

lst = custom_cmap(range(len(all_viruses)))
for i,j in enumerate(all_viruses):
colour_dict[j] = rgb2hex(lst[i])

patch_list = []
for serotype,hexc in colour_dict.items():
patch_list.append(mpatches.Patch(color=hexc, label=serotype))

return virus_dict, colour_dict, patch_list
if len(all_viruses) == 0:
return
else:
lst = custom_cmap(range(len(all_viruses)))
for i,j in enumerate(all_viruses):
colour_dict[j] = rgb2hex(lst[i])

patch_list = []
for serotype,hexc in colour_dict.items():
patch_list.append(mpatches.Patch(color=hexc, label=serotype))

return virus_dict, colour_dict, patch_list

def variant_plot(results_dir, variants_summary_file, virus_dict, colour_dict, patch_list):

Expand Down

0 comments on commit a7c7bf1

Please sign in to comment.