Misspelt track_names variable

broadinstitute · Feb 7, 2025 · 9e5e7c7 · 9e5e7c7
1 parent 2f07e58
commit 9e5e7c7
Show file tree

Hide file tree

Showing 15 changed files with 62 additions and 11 deletions.
diff --git a/src/denovo/denovo_svs.py b/src/denovo/denovo_svs.py
@@ -772,7 +772,7 @@ def main():
     print("Took %f seconds to process" % delta)
 
     # Filter out INS that are manta or melt only and are SR only, have GQ=0, and FILTER contains 'HIGH_SR_BACKGROUND'
-    # TODO: Do I also update this to reference Dragen?
+    # TODO: Do we also have to filter out DRAGEN-only records?
     verbose_print('Filtering out INS that are manta or melt only and SR only, with GQ=0 and FILTER contains HIGH_SR_BACKGROUND', verbose)
     start = time.time()
     remove_ins = bed_child[(bed_child['SVTYPE'] == 'INS') & ((bed_child['ALGORITHMS'] == 'manta') | (bed_child['ALGORITHMS'] == 'melt')) & (bed_child['EVIDENCE_FIX'] == 'SR') & ((bed_child['GQ'] == '0') | (bed_child.FILTER.str.contains('HIGH_SR_BACKGROUND')))]['name_famid'].to_list()

diff --git a/src/sv-pipeline/02_evidence_assessment/02a_rdtest/scripts/sub_merge_rdtest.sh b/src/sv-pipeline/02_evidence_assessment/02a_rdtest/scripts/sub_merge_rdtest.sh
@@ -6,7 +6,7 @@
 set -e
 
 for batch in Phase1 Pilot; do
-  for source in delly lumpy manta wham depth; do
+  for source in delly dragen lumpy manta wham depth; do
     for chrom in $(seq 1 22) X Y; do
       bsub -q normal -o merge_logs/${batch}.${source}.${chrom}.out -sla miket_sc -J "merge_${batch}_${source}_${chrom}" "
         ./merge.sh $batch $source $chrom" > /dev/null

diff --git a/src/sv-pipeline/02_evidence_assessment/02a_rdtest/scripts/sub_rdtest_splits.XZ.sh b/src/sv-pipeline/02_evidence_assessment/02a_rdtest/scripts/sub_rdtest_splits.XZ.sh
@@ -10,7 +10,7 @@ medianfile=/data/talkowski/Samples/common-mind/matrices/CMC.all.binCov.median
 famfile=/data/talkowski/Samples/common-mind/ref/CMC.fam
 
 for batch in CMC; do
-  for source in delly lumpy manta wham depth; do
+  for source in delly dragen lumpy manta wham depth; do
     for chrom in 1; do
     # for chrom in $(seq 1 22) X Y; do
       for bed in split_beds/${batch}.${source}.${chrom}.*; do

diff --git a/src/sv-pipeline/02_evidence_assessment/02a_rdtest/scripts/sub_rdtest_splits.sh b/src/sv-pipeline/02_evidence_assessment/02a_rdtest/scripts/sub_rdtest_splits.sh
@@ -10,7 +10,7 @@ medianfile=/data/talkowski/Samples/SFARI/deep_sv/asc_540/bincov/matrices/ASC540.
 famfile=/data/talkowski/Samples/SFARI/lists/SFARI_Real.fam
 
 for batch in Phase1 Pilot; do
-  for source in delly lumpy manta wham depth; do
+  for source in delly dragen lumpy manta wham depth; do
     for chrom in 1; do
     # for chrom in $(seq 1 22) X Y; do
       for bed in split_beds/${batch}.${source}.${chrom}.*; do

diff --git a/src/sv-pipeline/03_variant_filtering/README.md b/src/sv-pipeline/03_variant_filtering/README.md
@@ -6,7 +6,7 @@ This workflow integrate, filters and genotypes the structural variation(SVs) cal
 
 ## Manual process
 #### Evidence aggragation
-a. To aggregate evidence for **pesr callers** (eg. delly, lumpy, manta, wham), for each `{source}` and `{chrom}`: 
+a. To aggregate evidence for **pesr callers** (eg. delly, dragen, lumpy, manta, wham), for each `{source}` and `{chrom}`: 
 ```
 python scripts/aggregate.py \
 	-r ../02_evidence_assessment/02a_rdtest/rdtest/{batch}.{source}.{chrom}.metrics \

diff --git a/src/sv-pipeline/scripts/make_scramble_vcf.py b/src/sv-pipeline/scripts/make_scramble_vcf.py
@@ -428,6 +428,8 @@ def __parse_arguments(argv: List[Text]) -> argparse.Namespace:
                         help="BAM/CRAM file, must be indexed")
     parser.add_argument("--mei-bed", type=str, required=True,
                         help="Bed file containing MEI intervals from the reference")
+    parser.add_argument("--dragen-vcf", type=str, required=True,
+                        help="Dragen vcf")
     parser.add_argument("--manta-vcf", type=str, required=True,
                         help="Manta vcf")
     parser.add_argument("--sample", type=str, required=True,
@@ -493,11 +495,14 @@ def main(argv: Optional[List[Text]] = None):
                     l1_size=arguments.l1_size)
     logging.info("Loading MEI bed...")
     mei_trees = create_trees_from_bed_records(arguments.mei_bed, padding=arguments.mei_padding)
-    # TODO: Do I also update this to reference Dragen?
     logging.info("Loading Manta deletions...")
     with pysam.VariantFile(arguments.manta_vcf) as f_manta:
         del_filter_trees = dict()
         add_del_ends_to_trees(vcf=f_manta, trees=del_filter_trees, padding=arguments.del_filter_window)
+    logging.info("Loading Dragen deletions...")
+    with pysam.VariantFile(arguments.manta_vcf) as f_dragen:
+        del_filter_trees = dict()
+        add_del_ends_to_trees(vcf=f_dragen, trees=del_filter_trees, padding=arguments.del_filter_window)
     logging.info("Writing vcf...")
     with pysam.VariantFile(arguments.out, "w", header=header) as vcf, \
             pysam.AlignmentFile(arguments.alignments_file, reference_filename=arguments.reference) as samfile:

diff --git a/src/svtk/svtk/cli/standardize_vcf.py b/src/svtk/svtk/cli/standardize_vcf.py
@@ -30,7 +30,7 @@ def main(argv):
     parser.add_argument('vcf', help='Raw VCF.')
     parser.add_argument('fout', help='Standardized VCF.')
     parser.add_argument('source', help='Source algorithm. '
-                        '[delly,lumpy,manta,wham,melt,scramble,dragen]')
+                        '[delly,dragen,lumpy,manta,wham,melt,scramble]')
     parser.add_argument('-p', '--prefix', help='If provided, variant names '
                         'will be overwritten with this prefix.')
     parser.add_argument('--include-reference-sites', action='store_true',

diff --git a/src/svtk/svtk/cxsv/complex_sv.py b/src/svtk/svtk/cxsv/complex_sv.py
@@ -525,6 +525,7 @@ def report_insertion_strip_CNVs(self):
             self.svtype = 'INS'
 
     # Where Manta calls two insertions flanking a duplication, report just the dup
+    # TODO: Do we also have to do this for Dragen calls?
     def report_manta_tandem_dup(self):
         record = self.dups[0]
         self.cpx_type = record.alts[0].strip('<>')

diff --git a/wdl/GATKSVPipelineBatch.wdl b/wdl/GATKSVPipelineBatch.wdl
@@ -14,6 +14,8 @@ import "TestUtils.wdl" as tu
 # Runs GatherSampleEvidence, EvidenceQC, GatherBatchEvidence, ClusterBatch, GenerateBatchMetrics, FilterBatch, GenotypeBatch, RegenotypeCNVs,
 # and MakeCohortVcf (CombineBatches, ResolveComplexVariants, GenotypeComplexVariants, and GenotypeComplexVariants)
 
+# TODO: Do we also have to include Dragen?
+
 workflow GATKSVPipelineBatch {
   input {
     # Batch data

diff --git a/wdl/GATKSVPipelinePhase1.wdl b/wdl/GATKSVPipelinePhase1.wdl
@@ -9,6 +9,8 @@ import "Structs.wdl"
 # One mighty WDL to rule them all...
 # Runs GatherBatchEvidence, ClusterBatch, GenerateBatchMetrics, FilterBatch
 
+# TODO: Do we also have to include Dragen?
+
 workflow GATKSVPipelinePhase1 {
   input {
 

diff --git a/wdl/GATKSVPipelineSingleSample.wdl b/wdl/GATKSVPipelineSingleSample.wdl
@@ -23,6 +23,8 @@ import "Structs.wdl"
 # Runs GatherSampleEvidence, EvidenceQC, GatherBatchEvidence, ClusterBatch, FilterBatch.MergePesrVcfs, GenotypeBatch, 
 # MakeCohortVcf (CombineBatches, ResolveComplexVariants, GenotypeComplexVariants, GenotypeComplexVariants), and AnnotateVcf
 
+# TODO: Do we also have to include Dragen?
+
 workflow GATKSVPipelineSingleSample {
   meta {
     allowNestedInputs: true

diff --git a/wdl/GatherSampleEvidenceMetrics.wdl b/wdl/GatherSampleEvidenceMetrics.wdl
@@ -8,11 +8,13 @@ workflow GatherSampleEvidenceMetrics {
     File? coverage_counts
     File? pesr_disc
     File? pesr_split
+    File? dragen_vcf
     File? manta_vcf
     File? melt_vcf
     File? scramble_vcf
     File? wham_vcf
 
+    File? baseline_dragen_vcf
     File? baseline_manta_vcf
     File? baseline_melt_vcf
     File? baseline_scramble_vcf
@@ -23,6 +25,8 @@ workflow GatherSampleEvidenceMetrics {
     Int min_size = 50
     String sv_pipeline_docker
 
+    RuntimeAttr? runtime_attr_dragen_std
+    RuntimeAttr? runtime_attr_dragen_metrics
     RuntimeAttr? runtime_attr_manta_std
     RuntimeAttr? runtime_attr_manta_metrics
     RuntimeAttr? runtime_attr_melt_std
@@ -35,6 +39,41 @@ workflow GatherSampleEvidenceMetrics {
     RuntimeAttr? runtime_attr_counts_metrics
   }
 
+  if (defined(dragen_vcf)) {
+    call tu.StandardizeVCF as Dragen_Std {
+      input:
+        vcf = select_first([dragen_vcf]),
+        sample_id = sample,
+        caller = "dragen",
+        contig_index = contig_index,
+        min_size = min_size,
+        sv_pipeline_docker = sv_pipeline_docker,
+        runtime_attr_override = runtime_attr_dragen_std
+    }
+    if (defined(baseline_dragen_vcf)) {
+      call tu.StandardizeVCF as Dragen_Std_Base {
+        input:
+          vcf = select_first([baseline_dragen_vcf]),
+          sample_id = sample,
+          caller = "dragen",
+          contig_index = contig_index,
+          min_size = min_size,
+          sv_pipeline_docker = sv_pipeline_docker,
+          runtime_attr_override = runtime_attr_dragen_std
+      }
+    }
+    call tu.VCFMetrics as Dragen_Metrics {
+      input:
+        vcf = Dragen_Std.out,
+        baseline_vcf = Dragen_Std_Base.out,
+        samples = [sample],
+        prefix = "dragen_" + sample,
+        types = "DEL,DUP,INS,INV,BND",
+        contig_list = contig_list,
+        sv_pipeline_docker = sv_pipeline_docker,
+        runtime_attr_override = runtime_attr_dragen_metrics
+    }
+  }
   if (defined(manta_vcf)) {
     call tu.StandardizeVCF as Manta_Std {
       input:
@@ -182,6 +221,6 @@ workflow GatherSampleEvidenceMetrics {
   }
 
   output {
-    Array[File] sample_metrics_files = select_all([Manta_Metrics.out, Melt_Metrics.out, Scramble_Metrics.out, Wham_Metrics.out, SRMetrics.out, PEMetrics.out, CountsMetrics.out])
+    Array[File] sample_metrics_files = select_all([Dragen_Metrics.out, Manta_Metrics.out, Melt_Metrics.out, Scramble_Metrics.out, Wham_Metrics.out, SRMetrics.out, PEMetrics.out, CountsMetrics.out])
   }
 }
diff --git a/wdl/SVConcordance.wdl b/wdl/SVConcordance.wdl
@@ -127,7 +127,7 @@ task SVConcordanceTask {
     JVM_MAX_MEM=$(getJavaMem MemTotal)
     echo "JVM memory: $JVM_MAX_MEM"
 
-    TRACK_NAME_CMD=$( 
+    TRACK_NAMES_CMD=$( 
       if [ ~{if defined(track_names) then "1" else "0"} -eq 1 ]; then
         echo "--track-name ~{sep='--track-name' track_names}"
       fi

diff --git a/wdl/Scramble.wdl b/wdl/Scramble.wdl
@@ -15,7 +15,7 @@ workflow Scramble {
     File original_bam_or_cram_file
     File original_bam_or_cram_index
     File counts_file
-    # TODO: Do I also update this to reference Dragen?
+    # TODO: Do we also have to include Dragen?
     File manta_vcf
     String sample_name
     File reference_fasta

diff --git a/wdl/TinyResolve.wdl b/wdl/TinyResolve.wdl
@@ -8,7 +8,7 @@ import "Utils.wdl" as util
 workflow TinyResolve {
   input {
     Array[String] samples         # Sample ID
-    # TODO: Do I also update this to reference Dragen?
+    # TODO: Do we also have to include Dragen calls?
     File manta_vcf_tar           # tarballed Manta VCFs
     File cytoband
     Array[File] discfile