Skip to content

Commit

Permalink
Readding preprocess vcf for vapor
Browse files Browse the repository at this point in the history
  • Loading branch information
kjaisingh committed Feb 4, 2025
1 parent bdb3bcc commit 5a48f28
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self, record):
self.length = record.info['SVLEN']
self.cnv_gt_5kbp = (record.info['SVTYPE'] == 'DEL' or record.info['SVTYPE'] == 'DUP') and self.length >= 5000
self.gt_50bp = self.length >= 50
self.is_dragen = 'dragen' in record.info['ALGORITHMS']
self.is_melt = 'melt' in record.info['ALGORITHMS']
self.is_scramble = 'scramble' in record.info['ALGORITHMS']
self.is_manta = 'manta' in record.info['ALGORITHMS']
Expand Down Expand Up @@ -164,10 +165,10 @@ def __str__(self):
if len(sample_intersection) < 0.50 * max_freq:
continue
# Determine which to filter
# Special case if one is a Manta insertion and the other is MEI, keep the MEI
if first.is_manta and first.svtype == "INS" and second.is_mei:
# Special case if one is a Dragen/Manta insertion and the other is MEI, keep the MEI
if (first.is_dragen or first.is_manta) and first.svtype == "INS" and second.is_mei:
sorted_data_list = [second, first]
elif second.is_manta and second.svtype == "INS" and first.is_mei:
elif (second.is_dragen or second.is_manta) and second.svtype == "INS" and first.is_mei:
sorted_data_list = [first, second]
else:
# Otherwise use sorting spec
Expand Down
91 changes: 91 additions & 0 deletions wdl/PreprocessVcfForVapor.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
version 1.0

workflow PreprocessVcfForVapor {
input {
String sample_id # Sample identifier
File vcf_path # Path to the input VCF file
File contigs_fai # Path to the contigs file
Int min_size # Minimum size for standardization
String sv_pipeline_docker # Docker image path for GATK-SV
}

call StandardizeVcf {
input:
sample_id = sample_id,
vcf_path = vcf_path,
contigs_fai = contigs_fai,
min_size = min_size,
sv_pipeline_docker = sv_pipeline_docker
}

call Vcf2Bed {
input:
sample_id = sample_id,
vcf_path = StandardizeVcf.standardized_vcf,
sv_pipeline_docker = sv_pipeline_docker
}

output {
File dragen_sr_bed = Vcf2Bed.vcf2bed_vapor
}
}

task StandardizeVcf {
input {
String sample_id
File vcf_path
File contigs_fai
Int min_size
String sv_pipeline_docker
}

command <<<
set -eu -o pipefail

svtk standardize \
--sample-names ~{sample_id} \
--contigs ~{contigs_fai} \
--min-size ~{min_size} \
~{vcf_path} \
~{sample_id}.std_dragen.vcf.gz \
dragen
>>>

output {
File standardized_vcf = "~{sample_id}.std_dragen.vcf.gz"
}

runtime {
cpu: 1
memory: "2 GiB"
disks: "local-disk 2 HDD"
docker: sv_pipeline_docker
}
}

task Vcf2Bed {
input {
String sample_id
File vcf_path
String sv_pipeline_docker
}

command <<<
set -eu -o pipefail

svtk vcf2bed --info SVTYPE --info SVLEN ~{vcf_path} - | awk '$7 != "BND"' > ~{sample_id}.bed
>>>

output {
File vcf2bed_vapor = "~{sample_id}.bed"
}

runtime {
cpu: 1
memory: "2 GiB"
disks: "local-disk 2 HDD"
docker: sv_pipeline_docker
}
}

0 comments on commit 5a48f28

Please sign in to comment.