Skip to content

Commit

Permalink
ExpansionHunter extract sex from a ped file. (#281)
Browse files Browse the repository at this point in the history
* ExpansionHunter extract sex from a ped file.

* Update wdl/ExpansionHunter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Update wdl/ExpansionHunterScatter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Update wdl/ExpansionHunter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Update wdl/ExpansionHunterScatter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Update wdl/ExpansionHunter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Add parameter metadata for few arguments.

* Errout if sample ID or ped file are not defined & few bug fixes.

* Improve on determining sample sex.

* Remove output_prefix & make sample_id required.

* Update wdl/ExpansionHunter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Update wdl/ExpansionHunterScatter.wdl

Co-authored-by: Mark Walker <[email protected]>

* Update a comment.

Co-authored-by: Mark Walker <[email protected]>
  • Loading branch information
VJalili and mwalker174 authored Jan 11, 2022
1 parent 6437798 commit 479d2ac
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 28 deletions.
45 changes: 27 additions & 18 deletions wdl/ExpansionHunter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,17 @@ workflow ExpansionHunter {
File reference_fasta
File? reference_fasta_index
File variant_catalog
File? output_prefix
String sample_id
File? ped_file
String expansion_hunter_docker
RuntimeAttr? runtime_attr
}
parameter_meta {
ped_file: "This file is used to extract the sex of the bam_or_cram file."
sample_id: "The ped_file needs to be provided as well to determine sample sex. The ID must match the sample ID given in the second column (`Individual ID` column) of the given PED file. This ID will also be used as an output prefix."
}

Boolean is_bam = basename(bam_or_cram, ".bam") + ".bam" == basename(bam_or_cram)
File bam_or_cram_index_ =
if defined(bam_or_cram_index) then
Expand All @@ -36,23 +42,15 @@ workflow ExpansionHunter {
reference_fasta_index,
reference_fasta + ".fai"])

String output_prefix_ =
if defined(output_prefix) then
select_first([output_prefix])
else
if is_bam then
basename(bam_or_cram, ".bam")
else
basename(bam_or_cram, ".cram")
call RunExpansionHunter {
input:
bam_or_cram = bam_or_cram,
bam_or_cram_index = bam_or_cram_index_,
reference_fasta = reference_fasta,
reference_fasta_index = reference_fasta_index_,
variant_catalog = variant_catalog,
output_prefix = output_prefix_,
sample_id = sample_id,
ped_file = ped_file,
expansion_hunter_docker = expansion_hunter_docker,
runtime_attr_override = runtime_attr,
}
Expand All @@ -72,28 +70,39 @@ task RunExpansionHunter {
File reference_fasta
File reference_fasta_index
File variant_catalog
String output_prefix
String sample_id
File? ped_file
String expansion_hunter_docker
RuntimeAttr? runtime_attr_override
}
output {
File json = "${output_prefix}.json"
File vcf = "${output_prefix}.vcf"
File overlapping_reads = "${output_prefix}_realigned.bam"
File timing = "${output_prefix}_timing.tsv"
File json = "${sample_id}.json"
File vcf = "${sample_id}.vcf"
File overlapping_reads = "${sample_id}_realigned.bam"
File timing = "${sample_id}_timing.tsv"
}

command <<<
set -euxo pipefail

sex=""
if ~{defined(ped_file)}; then
sex=$(awk -F '\t' '{if ($2 == "~{sample_id}") {if ($5 == "1") {print "--sex male"; exit 0} else if ($5 == "2") {print "--sex female"; exit 0}}}' < ~{ped_file} )
if [ "$sex" = "" ]; then
echo "The Sex of the sample defined in the PED file is other than male or female. ExpansionHunter only supports male or female samples."
exit 1
fi
fi

ExpansionHunter \
--reads ~{bam_or_cram} \
--reference ~{reference_fasta} \
--variant-catalog ~{variant_catalog} \
--output-prefix ~{output_prefix} \
--output-prefix ~{sample_id} \
--cache-mates \
--record-timing
--record-timing \
$sex
>>>

RuntimeAttr runtime_attr_str_profile_default = object {
Expand Down
20 changes: 10 additions & 10 deletions wdl/ExpansionHunterScatter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,20 @@ workflow ExpansionHunterScatter {
input {
Array[File] bams_or_crams
Array[File]? bams_or_crams_indexes
Array[String]? sample_ids
Array[String] sample_ids
File? ped_file
File reference_fasta
File? reference_fasta_index
File variant_catalog
String expansion_hunter_docker
RuntimeAttr? runtime_attr
}
parameter_meta {
ped_file: "This file is used to extract the sex of the BAM/CRAM files."
sample_ids: "One ID per sample, in the same order as the files in bams_or_crams. These IDs must match the ID given in the second column (`Individual ID` column) of the given PED file. These IDs will also be used as an output prefix."
}

scatter (i in range(length(bams_or_crams))) {
File bam_or_cram_ = bams_or_crams[i]
Boolean is_bam =
Expand All @@ -28,14 +34,7 @@ workflow ExpansionHunterScatter {
File reference_fasta_index_ = select_first([
reference_fasta_index, reference_fasta + ".fai"])

String output_prefix =
if defined(sample_ids) then
select_first([sample_ids])[i]
else
if is_bam then
basename(bam_or_cram_, ".bam")
else
basename(bam_or_cram_, ".cram")
String sample_id = sample_ids[i]

call ExpansionHunter.ExpansionHunter as expanionHunter {
input:
Expand All @@ -44,7 +43,8 @@ workflow ExpansionHunterScatter {
reference_fasta=reference_fasta,
reference_fasta_index=reference_fasta_index_,
variant_catalog=variant_catalog,
output_prefix=output_prefix,
sample_id=sample_id,
ped_file=ped_file,
expansion_hunter_docker=expansion_hunter_docker,
runtime_attr=runtime_attr
}
Expand Down

0 comments on commit 479d2ac

Please sign in to comment.