From f0771a60a8d725efbe3fc47b2a07599b18f5981e Mon Sep 17 00:00:00 2001 From: epiercehoffman Date: Mon, 16 Aug 2021 15:28:02 -0400 Subject: [PATCH] Module name defenestration (#213) Rename modules to be more descriptive. See PR comment for old => new naming key --- README.md | 279 +++---- ...ATKSVPipelineBatch.ref_panel_1kg.json.tmpl | 64 +- .../cohort_mode_workspace_dashboard.md.tmpl | 102 ++- .../AnnotateVcf.SingleBatch.json.tmpl | 24 + .../AnnotateVcf.json.tmpl | 24 + .../ClusterBatch.json.tmpl | 26 + .../EvidenceQC.json.tmpl | 16 + .../FilterBatch.json.tmpl | 19 + .../GatherBatchEvidence.json.tmpl | 65 ++ .../GatherSampleEvidence.json.tmpl | 36 + .../GenerateBatchMetrics.json.tmpl | 34 + .../GenotypeBatch.SingleBatch.json.tmpl | 29 + .../GenotypeBatch.json.tmpl | 29 + .../MakeCohortVcf.SingleBatch.json.tmpl | 43 ++ .../MakeCohortVcf.json.tmpl | 43 ++ .../MergeBatchSites.json.tmpl | 6 + .../MergeCohortVcfs.json.tmpl | 6 - .../Module00a.json.tmpl | 36 - .../Module00b.json.tmpl | 16 - .../Module00c.json.tmpl | 65 -- .../Module01.json.tmpl | 26 - .../Module02.json.tmpl | 34 - .../Module03.json.tmpl | 19 - .../Module04.SingleBatch.json.tmpl | 29 - .../Module04.json.tmpl | 29 - .../Module04b.SingleBatch.json.tmpl | 25 - .../Module04b.json.tmpl | 25 - .../Module0506.SingleBatch.json.tmpl | 43 -- .../Module0506.json.tmpl | 43 -- .../Module08Annotation.SingleBatch.json.tmpl | 24 - .../Module08Annotation.json.tmpl | 24 - .../RegenotypeCNVs.SingleBatch.json.tmpl | 25 + .../RegenotypeCNVs.json.tmpl | 25 + .../ClusterBatchOutputs.json.tmpl | 7 + .../GatherBatchEvidenceOutputs.json.tmpl | 34 + .../MergeBatchSitesOutputs.json.tmpl | 4 + .../MergeCohortVcfsOutputs.json.tmpl | 4 - .../Module00cOutputs.json.tmpl | 34 - .../Module01Outputs.json.tmpl | 7 - scripts/test/validate.sh | 2 +- .../AnnotateVcf/AnnotateVcf.json.tmpl | 24 + ...ateFunctionalAnnotationResources.json.tmpl | 12 + .../PrepareGencode.json.tmpl} | 0 .../PrepareNoncoding.json.tmpl} | 0 .../ClusterBatch/ClusterBatch.json.tmpl | 26 + .../EvidenceQC/EvidenceQC.json.tmpl | 16 + .../FilterBatch/FilterBatch.json.tmpl | 20 + .../FilterBatch/FilterBatchQc.json.tmpl | 23 + ...GatherBatchEvidence.baf_from_vcf.json.tmpl | 64 ++ .../GatherBatchEvidence.json.tmpl | 64 ++ .../GatherSampleEvidenceBatch.json.tmpl | 38 + .../GenerateBatchMetrics.json.tmpl | 33 + .../GenotypeBatch/GenotypeBatch.json.tmpl | 29 + .../MakeCohortVcf/MakeCohortVcf.json.tmpl | 68 ++ .../MergeBatchSites/MergeBatchSites.json.tmpl | 6 + .../RegenotypeCNVs/RegenotypeCNVs.json.tmpl | 25 + .../TrainGCNV.json.tmpl} | 0 .../batch/GATKSVPipelineBatch.json.tmpl | 64 +- .../module00a/Module00aBatch.json.tmpl | 38 - .../module00b/Module00b.json.tmpl | 16 - .../Module00c.baf_from_vcf.json.tmpl | 64 -- .../module00c/Module00c.json.tmpl | 64 -- .../module01/Module01.json.tmpl | 26 - .../module02/Module02.json.tmpl | 33 - .../module03/Module03.json.tmpl | 20 - .../module03/Module03Qc.json.tmpl | 23 - .../module04/MergeCohortVcfs.test.json.tmpl | 6 - .../module04/Module04.json.tmpl | 29 - .../module04b/Module04b.test.json.tmpl | 25 - .../module0506/Module0506.json.tmpl | 68 -- .../Module08Annotation.test.json.tmpl | 24 - ...odule08Preprocessing.wdl.example.json.tmpl | 12 - wdl/AnnotateExternalAF.wdl | 2 +- wdl/AnnotateExternalAFperContig.wdl | 2 +- wdl/AnnotateILFeatures.wdl | 8 +- wdl/AnnotateVcf.wdl | 181 ++--- wdl/BAFTest.wdl | 4 +- wdl/BatchEvidenceMerging.wdl | 6 +- wdl/CleanVcf.wdl | 693 ++---------------- wdl/CleanVcfChromosome.wdl | 677 +++++++++++++++++ wdl/{Module01.wdl => ClusterBatch.wdl} | 8 +- ...e01Metrics.wdl => ClusterBatchMetrics.wdl} | 4 +- wdl/ClusterSingleChromosome.wdl | 2 +- wdl/CollectQcPerSample.wdl | 2 +- ...dule0506Cluster.wdl => CombineBatches.wdl} | 6 +- wdl/Duphold.wdl | 4 +- wdl/{Module00b.wdl => EvidenceQC.wdl} | 2 +- wdl/{Module03.wdl => FilterBatch.wdl} | 8 +- ...le03Metrics.wdl => FilterBatchMetrics.wdl} | 4 +- wdl/{Module03Qc.wdl => FilterBatchQc.wdl} | 4 +- wdl/FilterCleanupQualRecalibration.wdl | 2 +- wdl/GATKSVPipelineBatch.wdl | 139 ++-- wdl/GATKSVPipelineBatchMetrics.wdl | 281 ------- wdl/GATKSVPipelinePhase1.wdl | 208 +++--- wdl/GATKSVPipelineSingleSample.wdl | 213 +++--- ...{Module00c.wdl => GatherBatchEvidence.wdl} | 10 +- ...ics.wdl => GatherBatchEvidenceMetrics.wdl} | 4 +- ...Module00a.wdl => GatherSampleEvidence.wdl} | 8 +- ...atch.wdl => GatherSampleEvidenceBatch.wdl} | 44 +- ...cs.wdl => GatherSampleEvidenceMetrics.wdl} | 2 +- ...{Module02.wdl => GenerateBatchMetrics.wdl} | 12 +- ...cs.wdl => GenerateBatchMetricsMetrics.wdl} | 8 +- ...GenerateFunctionalAnnotationResources.wdl} | 4 +- wdl/{Module04.wdl => GenotypeBatch.wdl} | 14 +- ...04Metrics.wdl => GenotypeBatchMetrics.wdl} | 4 +- ...notype.wdl => GenotypeComplexVariants.wdl} | 6 +- wdl/GenotypeCpxCnvs.wdl | 4 +- wdl/GenotypeCpxCnvsPerBatch.wdl | 4 +- wdl/GenotypeDepthPart2.wdl | 22 +- wdl/GenotypePESRPart2.wdl | 34 +- wdl/Genotype_2.wdl | 10 +- wdl/GermlineCNVCohort.wdl | 2 +- wdl/{Module0506.wdl => MakeCohortVcf.wdl} | 68 +- ...06Metrics.wdl => MakeCohortVcfMetrics.wdl} | 4 +- wdl/MasterVcfQc.wdl | 2 +- ...ergeCohortVcfs.wdl => MergeBatchSites.wdl} | 2 +- wdl/Module0506Clean.wdl | 102 --- ...Module07FilterCleanupQualRecalibration.wdl | 2 +- wdl/Module07MinGQ.wdl | 2 +- wdl/Module07Preprocessing.wdl | 72 -- wdl/Module07XfBatchEffect.wdl | 2 +- wdl/Module08Annotation.wdl | 109 --- wdl/Module10Benchmark.wdl | 16 +- wdl/Mosaic.wdl | 2 +- wdl/PETest.wdl | 6 +- wdl/PETestChromosome.wdl | 14 +- wdl/PerSampleExternalBenchmark.wdl | 2 +- wdl/PruneAndAddVafs.wdl | 4 +- wdl/RDTest.wdl | 4 +- wdl/RDTestChromosome.wdl | 6 +- wdl/RdPeSrAnno.wdl | 10 +- wdl/{Module04b.wdl => RegenotypeCNVs.wdl} | 6 +- ...Resolve.wdl => ResolveComplexVariants.wdl} | 6 +- wdl/ResolveCpxSv.wdl | 2 +- wdl/ReviseSVtypeINStoMEI.wdl | 2 +- wdl/ReviseSVtypeINStoMEIperContig.wdl | 2 +- wdl/SRTest.wdl | 6 +- wdl/SRTestChromosome.wdl | 14 +- wdl/ScatterAnnotateVcfByChrom.wdl | 140 ++++ wdl/ScatterCpxGenotyping.wdl | 4 +- wdl/ShardedCluster.wdl | 2 +- wdl/ShardedQcCollection.wdl | 2 +- ...ks02.wdl => TasksGenerateBatchMetrics.wdl} | 0 wdl/{Tasks04.wdl => TasksGenotypeBatch.wdl} | 0 wdl/{Tasks0506.wdl => TasksMakeCohortVcf.wdl} | 0 wdl/TrainPEGenotyping.wdl | 8 +- wdl/TrainRDGenotyping.wdl | 10 +- wdl/TrainSRGenotyping.wdl | 8 +- wdl/VaPoRVcf.wdl | 1 - wdl/VcfClusterSingleChromsome.wdl | 2 +- wdl/XfBatchEffect.wdl | 2 +- wdl/prune_add_af.wdl | 2 +- 152 files changed, 2677 insertions(+), 3008 deletions(-) create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/EvidenceQC.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherBatchEvidence.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherSampleEvidence.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenerateBatchMetrics.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.SingleBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.SingleBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeBatchSites.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeCohortVcfs.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00a.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00b.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00c.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module01.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module02.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module03.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.SingleBatch.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.SingleBatch.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.SingleBatch.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.SingleBatch.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.SingleBatch.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/ClusterBatchOutputs.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/GatherBatchEvidenceOutputs.json.tmpl create mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeBatchSitesOutputs.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeCohortVcfsOutputs.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module00cOutputs.json.tmpl delete mode 100644 input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module01Outputs.json.tmpl create mode 100644 test_input_templates/AnnotateVcf/AnnotateVcf.json.tmpl create mode 100644 test_input_templates/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl rename test_input_templates/{module08/PrepareGencode.wdl.example.json.tmpl => AnnotateVcf/PrepareGencode.json.tmpl} (100%) rename test_input_templates/{module08/PrepareNoncoding.wdl.example.json.tmpl => AnnotateVcf/PrepareNoncoding.json.tmpl} (100%) create mode 100644 test_input_templates/ClusterBatch/ClusterBatch.json.tmpl create mode 100644 test_input_templates/EvidenceQC/EvidenceQC.json.tmpl create mode 100644 test_input_templates/FilterBatch/FilterBatch.json.tmpl create mode 100644 test_input_templates/FilterBatch/FilterBatchQc.json.tmpl create mode 100644 test_input_templates/GatherBatchEvidence/GatherBatchEvidence.baf_from_vcf.json.tmpl create mode 100644 test_input_templates/GatherBatchEvidence/GatherBatchEvidence.json.tmpl create mode 100644 test_input_templates/GatherSampleEvidence/GatherSampleEvidenceBatch.json.tmpl create mode 100644 test_input_templates/GenerateBatchMetrics/GenerateBatchMetrics.json.tmpl create mode 100644 test_input_templates/GenotypeBatch/GenotypeBatch.json.tmpl create mode 100644 test_input_templates/MakeCohortVcf/MakeCohortVcf.json.tmpl create mode 100644 test_input_templates/MergeBatchSites/MergeBatchSites.json.tmpl create mode 100644 test_input_templates/RegenotypeCNVs/RegenotypeCNVs.json.tmpl rename test_input_templates/{gcnv/trainGCNV.test.json.tmpl => TrainGCNV/TrainGCNV.json.tmpl} (100%) delete mode 100644 test_input_templates/module00a/Module00aBatch.json.tmpl delete mode 100644 test_input_templates/module00b/Module00b.json.tmpl delete mode 100644 test_input_templates/module00c/Module00c.baf_from_vcf.json.tmpl delete mode 100644 test_input_templates/module00c/Module00c.json.tmpl delete mode 100644 test_input_templates/module01/Module01.json.tmpl delete mode 100644 test_input_templates/module02/Module02.json.tmpl delete mode 100644 test_input_templates/module03/Module03.json.tmpl delete mode 100644 test_input_templates/module03/Module03Qc.json.tmpl delete mode 100644 test_input_templates/module04/MergeCohortVcfs.test.json.tmpl delete mode 100644 test_input_templates/module04/Module04.json.tmpl delete mode 100644 test_input_templates/module04b/Module04b.test.json.tmpl delete mode 100644 test_input_templates/module0506/Module0506.json.tmpl delete mode 100644 test_input_templates/module08/Module08Annotation.test.json.tmpl delete mode 100644 test_input_templates/module08/Module08Preprocessing.wdl.example.json.tmpl create mode 100644 wdl/CleanVcfChromosome.wdl rename wdl/{Module01.wdl => ClusterBatch.wdl} (97%) rename wdl/{Module01Metrics.wdl => ClusterBatchMetrics.wdl} (97%) rename wdl/{Module0506Cluster.wdl => CombineBatches.wdl} (98%) rename wdl/{Module00b.wdl => EvidenceQC.wdl} (99%) rename wdl/{Module03.wdl => FilterBatch.wdl} (98%) rename wdl/{Module03Metrics.wdl => FilterBatchMetrics.wdl} (96%) rename wdl/{Module03Qc.wdl => FilterBatchQc.wdl} (98%) delete mode 100644 wdl/GATKSVPipelineBatchMetrics.wdl rename wdl/{Module00c.wdl => GatherBatchEvidence.wdl} (98%) rename wdl/{Module00cMetrics.wdl => GatherBatchEvidenceMetrics.wdl} (98%) rename wdl/{Module00a.wdl => GatherSampleEvidence.wdl} (98%) rename wdl/{Module00aBatch.wdl => GatherSampleEvidenceBatch.wdl} (84%) rename wdl/{Module00aMetrics.wdl => GatherSampleEvidenceMetrics.wdl} (99%) rename wdl/{Module02.wdl => GenerateBatchMetrics.wdl} (97%) rename wdl/{Module02Metrics.wdl => GenerateBatchMetricsMetrics.wdl} (80%) rename wdl/{Module08Preprocessing.wdl => GenerateFunctionalAnnotationResources.wdl} (94%) rename wdl/{Module04.wdl => GenotypeBatch.wdl} (97%) rename wdl/{Module04Metrics.wdl => GenotypeBatchMetrics.wdl} (97%) rename wdl/{Module0506ComplexGenotype.wdl => GenotypeComplexVariants.wdl} (96%) rename wdl/{Module0506.wdl => MakeCohortVcf.wdl} (88%) rename wdl/{Module0506Metrics.wdl => MakeCohortVcfMetrics.wdl} (97%) rename wdl/{MergeCohortVcfs.wdl => MergeBatchSites.wdl} (99%) delete mode 100644 wdl/Module0506Clean.wdl delete mode 100644 wdl/Module07Preprocessing.wdl delete mode 100644 wdl/Module08Annotation.wdl rename wdl/{Module04b.wdl => RegenotypeCNVs.wdl} (99%) rename wdl/{Module0506ComplexResolve.wdl => ResolveComplexVariants.wdl} (98%) create mode 100644 wdl/ScatterAnnotateVcfByChrom.wdl rename wdl/{Tasks02.wdl => TasksGenerateBatchMetrics.wdl} (100%) rename wdl/{Tasks04.wdl => TasksGenotypeBatch.wdl} (100%) rename wdl/{Tasks0506.wdl => TasksMakeCohortVcf.wdl} (100%) diff --git a/README.md b/README.md index 0b8dc1065..60133800b 100644 --- a/README.md +++ b/README.md @@ -11,19 +11,19 @@ A structural variation discovery pipeline for Illumina short-read whole-genome s * [Single-sample mode](#single-sample-mode) * [gCNV model](#gcnv-training-overview) * [Module Descriptions](#descriptions) - * [Module 00a](#module00a) - Raw callers and evidence collection - * [Module 00b](#module00b) - Batch QC - * [gCNV training](#gcnv-training) - gCNV model creation - * [Module 00c](#module00c) - Batch evidence merging, BAF generation, and depth callers - * [Module 01](#module01) - Site clustering - * [Module 02](#module02) - Site metrics - * [Module 03](#module03) - Filtering - * [Gather Cohort VCFs](#gather-vcfs) - Cross-batch site merging - * [Module 04](#module04) - Genotyping - * [Module 04b](#module04b) - Genotype refinement (optional) - * [Module 05/06](#module0506) - Cross-batch integration, complex event resolution, and VCF cleanup + * [GatherSampleEvidence](#gather-sample-evidence) - Raw callers and evidence collection + * [EvidenceQC](#evidence-qc) - Batch QC + * [TrainGCNV](#gcnv-training) - gCNV model creation + * [GatherBatchEvidence](#gather-batch-evidence) - Batch evidence merging, BAF generation, and depth callers + * [ClusterBatch](#cluster-batch) - Site clustering + * [GenerateBatchMetrics](#generate-batch-metrics) - Site metrics + * [FilterBatch](#filter-batch) - Filtering + * [MergeBatchSites](#merge-batch-sites) - Cross-batch site merging + * [GenotypeBatch](#genotype-batch) - Genotyping + * [RegenotypeCNVs](#regenotype-cnvs) - Genotype refinement (optional) + * [MakeCohortVcf](#make-cohort-vcf) - Cross-batch integration, complex event resolution, and VCF cleanup * [Module 07](#module07) - Downstream Filtering - * [Module 08](#module08) - Annotation + * [AnnotateVcf](#annotate-vcf) - Annotation * [Module 09](#module09) - QC and Visualization * Additional modules - Mosaic and de novo * [Troubleshooting](#troubleshooting) @@ -44,7 +44,7 @@ A structural variation discovery pipeline for Illumina short-read whole-genome s ### Data: * Illumina short-read whole-genome CRAMs or BAMs, aligned to hg38 with [bwa-mem](https://github.com/lh3/bwa). BAMs must also be indexed. * Indexed GVCFs produced by GATK HaplotypeCaller, or a jointly genotyped VCF. -* Family structure definitions file in [PED format](https://gatk.broadinstitute.org/hc/en-us/articles/360035531972-PED-Pedigree-format). Sex aneuploidies (detected in [Module 00b](#module00b)) should be entered as sex = 0. +* Family structure definitions file in [PED format](https://gatk.broadinstitute.org/hc/en-us/articles/360035531972-PED-Pedigree-format). Sex aneuploidies (detected in [EvidenceQC](#evidence-qc)) should be entered as sex = 0. #### Sample ID requirements: @@ -59,13 +59,13 @@ Sample IDs should not: The same requirements apply to family IDs in the PED file, as well as batch IDs and the cohort ID provided as workflow inputs. -Sample IDs are provided to [Module00a](#module00a) directly and need not match sample names from the BAM/CRAM headers or GVCFs. `GetSampleID.wdl` can be used to fetch BAM sample IDs and also generates a set of alternate IDs that are considered safe for this pipeline; alternatively, [this script](https://github.com/talkowski-lab/gnomad_sv_v3/blob/master/sample_id/convert_sample_ids.py) transforms a list of sample IDs to fit these requirements. Currently, sample IDs can be replaced again in [Module 00c](#module00c). +Sample IDs are provided to [GatherSampleEvidence](#gather-sample-evidence) directly and need not match sample names from the BAM/CRAM headers or GVCFs. `GetSampleID.wdl` can be used to fetch BAM sample IDs and also generates a set of alternate IDs that are considered safe for this pipeline; alternatively, [this script](https://github.com/talkowski-lab/gnomad_sv_v3/blob/master/sample_id/convert_sample_ids.py) transforms a list of sample IDs to fit these requirements. Currently, sample IDs can be replaced again in [GatherBatchEvidence](#gather-batch-evidence). The following inputs will need to be updated with the transformed sample IDs: -* Sample ID list for [Module00a](#module00a) or [Module 00c](#module00c) +* Sample ID list for [GatherSampleEvidence](#gather-sample-evidence) or [GatherBatchEvidence](#gather-batch-evidence) * PED file -If using a SNP VCF in [Module 00c](#module00c), it does not need to be re-headered; simply provide the `vcf_samples` argument. +If using a SNP VCF in [GatherBatchEvidence](#gather-batch-evidence), it does not need to be re-headered; simply provide the `vcf_samples` argument. ## Citation @@ -84,20 +84,20 @@ There are two scripts for running the full pipeline: * `wdl/GATKSVPipelineSingleSample.wdl`: Runs GATK-SV on a single sample, given a reference panel #### Inputs -Example workflow inputs can be found in `/inputs`. All required resources are available in public Google buckets. +Example workflow inputs can be found in `/input_templates` or `/test_input_templates`. All required resources are available in public Google buckets. #### MELT **Important**: The example input files contain MELT inputs that are NOT public (see [Requirements](#requirements)). These include: * `GATKSVPipelineSingleSample.melt_docker` and `GATKSVPipelineBatch.melt_docker` - MELT docker URI (see [Docker readme](https://github.com/talkowski-lab/gatk-sv-v1/blob/master/dockerfiles/README.md)) -* `GATKSVPipelineSingleSample.ref_std_melt_vcfs` - Standardized MELT VCFs ([Module00c](#module00c)) +* `GATKSVPipelineSingleSample.ref_std_melt_vcfs` - Standardized MELT VCFs ([GatherBatchEvidence](#gather-batch-evidence)) The input values are provided only as an example and are not publicly accessible. In order to include MELT, these values must be provided by the user. MELT can be disabled by deleting these inputs and setting `GATKSVPipelineBatch.use_melt` to `false`. #### Requester pays buckets **Important**: The following parameters must be set when certain input data is in requester pays (RP) buckets: -* `GATKSVPipelineSingleSample.requester_pays_cram` and `GATKSVPipelineBatch.Module00aBatch.requester_pays_crams` - set to `True` if inputs are CRAM format and in an RP bucket, otherwise `False`. +* `GATKSVPipelineSingleSample.requester_pays_cram` and `GATKSVPipelineBatch.GatherSampleEvidenceBatch.requester_pays_crams` - set to `True` if inputs are CRAM format and in an RP bucket, otherwise `False`. * `GATKSVPipelineBatch.GATKSVPipelinePhase1.gcs_project_for_requester_pays` - set to your Google Cloud Project ID if gVCFs are in an RP bucket, otherwise omit this parameter. #### Execution @@ -106,10 +106,11 @@ We recommend running the pipeline on a dedicated [Cromwell](https://github.com/b ``` > mkdir gatksv_run && cd gatksv_run > mkdir wdl && cd wdl -> cp $GATK_SV_V1_ROOT/wdl/*.wdl . +> cp $GATK_SV_ROOT/wdl/*.wdl . > zip dep.zip *.wdl > cd .. -> cp $GATK_SV_V1_ROOT/inputs/GATKSVPipelineBatch.ref_panel_1kg.json GATKSVPipelineBatch.my_run.json +> bash scripts/inputs/build_default_inputs.sh -d $GATK_SV_ROOT +> cp $GATK_SV_ROOT/inputs/GATKSVPipelineBatch.ref_panel_1kg.json GATKSVPipelineBatch.my_run.json > cromshell submit wdl/GATKSVPipelineBatch.wdl GATKSVPipelineBatch.my_run.json cromwell_config.json wdl/dep.zip ``` @@ -117,24 +118,25 @@ where `cromwell_config.json` is a Cromwell [workflow options file](https://cromw ## Pipeline Overview The pipeline consists of a series of modules that perform the following: -* [Module 00a](#module00a): SV evidence collection, including calls from a configurable set of algorithms (Delly, Manta, MELT, and Wham), read depth (RD), split read positions (SR), and discordant pair positions (PE). -* [Module 00b](#module00b): Dosage bias scoring and ploidy estimation -* [Module 00c](#module00c): Copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) generation; call and evidence aggregation -* [Module 01](#module01): Variant clustering -* [Module 02](#module02): Variant filtering metric generation -* [Module 03](#module03): Variant filtering; outlier exclusion -* [Module 04](#module04): Genotyping -* [Module 05/06](#module0506): Cross-batch integration; complex variant resolution and re-genotyping; vcf cleanup +* [GatherSampleEvidence](#gather-sample-evidence): SV evidence collection, including calls from a configurable set of algorithms (Delly, Manta, MELT, and Wham), read depth (RD), split read positions (SR), and discordant pair positions (PE). +* [EvidenceQC](#evidence-qc): Dosage bias scoring and ploidy estimation +* [GatherBatchEvidence](#gather-batch-evidence): Copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) generation; call and evidence aggregation +* [ClusterBatch](#cluster-batch): Variant clustering +* [GenerateBatchMetrics](#generate-batch-metrics): Variant filtering metric generation +* [FilterBatch](#filter-batch): Variant filtering; outlier exclusion +* [GenotypeBatch](#genotype-batch): Genotyping +* [MakeCohortVcf](#make-cohort-vcf): Cross-batch integration; complex variant resolution and re-genotyping; vcf cleanup * [Module 07](#module07): Downstream filtering, including minGQ, batch effect check, outlier samples removal and final recalibration; -* [Module 08](#module08): Annotations, including functional annotation, allele frequency (AF) annotation and AF annotation with external population callsets; +* [AnnotateVcf](#annotate-vcf): Annotations, including functional annotation, allele frequency (AF) annotation and AF annotation with external population callsets; * [Module 09](#module09): Visualization, including scripts that generates IGV screenshots and rd plots. * Additional modules to be added: de novo and mosaic scripts Repository structure: -* `/inputs`: Example workflow parameter files for running gCNV training, GATK-SV batch mode, and GATK-SV single-sample mode +* `/input_templates`: Example workflow parameter file templates for running gCNV training, GATK-SV batch mode, and GATK-SV single-sample mode. Generate parameter files from templates using `scripts/inputs/build_default_inputs.sh`. +* `/input_values`: Example workflow input values to populate templates. Please note that file inputs may not be publicly available. * `/dockerfiles`: Resources for building pipeline docker images (see [readme](https://github.com/talkowski-lab/gatk-sv-v1/blob/master/dockerfiles/README.md)) -* `/wdl`: WDLs running the pipeline. There is a master WDL for running each module, e.g. `Module01.wdl`. +* `/wdl`: WDLs running the pipeline. There is a master WDL for running each module, e.g. `ClusterBatch.wdl`. * `/scripts`: scripts for running tests, building dockers, and analyzing cromwell metadata files * `/src`: main pipeline scripts * `/RdTest`: scripts for depth testing @@ -143,7 +145,7 @@ Repository structure: * `/svtest`: Python module for generating various summary metrics from module outputs * `/svtk`: Python module of tools for SV-related datafile parsing and analysis * `/WGD`: whole-genome dosage scoring scripts -* `/test`: WDL test parameter files. Please note that file inputs may not be publicly available. +* `/test_input_templates`: WDL test parameter file templates. Generate parameter files from templates using `scripts/inputs/build_default_inputs.sh`. ## Cohort mode @@ -152,46 +154,46 @@ A minimum cohort size of 100 with roughly equal number of males and females is r For larger cohorts, samples should be split up into batches of about 100-500 samples. Refer to the [Batching](#batching) section for further guidance on creating batches. The pipeline should be executed as follows: -* Modules [00a](#module00a) and [00b](#module00b) can be run on arbitrary cohort partitions -* Modules [00c](#module00c), [01](#module01), [02](#module02), and [03](#module03) are run separately per batch -* [Module 04](#module04) is run separately per batch, using filtered variants ([Module 03](#module03) output) combined across all batches -* [Module 05/06](#module0506) and beyond are run on all batches together +* Modules [GatherSampleEvidence](#gather-sample-evidence) and [EvidenceQC](#evidence-qc) can be run on arbitrary cohort partitions +* Modules [GatherBatchEvidence](#gather-batch-evidence), [ClusterBatch](#cluster-batch), [GenerateBatchMetrics](#generate-batch-metrics), and [FilterBatch](#filter-batch) are run separately per batch +* [GenotypeBatch](#genotype-batch) is run separately per batch, using filtered variants ([FilterBatch](#filter-batch) output) combined across all batches +* [MakeCohortVcf](#make-cohort-vcf) and beyond are run on all batches together -Note: [Module 00c](#module00c) requires a [trained gCNV model](#gcnv-training). +Note: [GatherBatchEvidence](#gather-batch-evidence) requires a [trained gCNV model](#gcnv-training). #### Batching -For larger cohorts, samples should be split up into batches of about 100-500 samples with similar characteristics. We recommend batching based on overall coverage and dosage score (WGD), which can be generated in [Module 00b](#module00b). An example batching process is outlined below: +For larger cohorts, samples should be split up into batches of about 100-500 samples with similar characteristics. We recommend batching based on overall coverage and dosage score (WGD), which can be generated in [EvidenceQC](#evidence-qc). An example batching process is outlined below: 1. Divide the cohort into PCR+ and PCR- samples -2. Partition the samples by median coverage from [Module00b](#module00b), grouping samples with similar median coverage together. The end goal is to divide the cohort into roughly equal-sized batches of about 100-500 samples; if your partitions based on coverage are larger or uneven, you can partition the cohort further in the next step to obtain the final batches. -3. Optionally, divide the samples further by dosage score (WGD) from [Module00b](#module00b), grouping samples with similar WGD score together, to obtain roughly equal-sized batches of about 100-500 samples -4. Maintain a roughly equal sex balance within each batch, based on sex assignments from [Module00b](#module00b) +2. Partition the samples by median coverage from [EvidenceQC](#evidence-qc), grouping samples with similar median coverage together. The end goal is to divide the cohort into roughly equal-sized batches of about 100-500 samples; if your partitions based on coverage are larger or uneven, you can partition the cohort further in the next step to obtain the final batches. +3. Optionally, divide the samples further by dosage score (WGD) from [EvidenceQC](#evidence-qc), grouping samples with similar WGD score together, to obtain roughly equal-sized batches of about 100-500 samples +4. Maintain a roughly equal sex balance within each batch, based on sex assignments from [EvidenceQC](#evidence-qc) ## Single-sample mode `GATKSVPipelineSingleSample.wdl` runs the pipeline on a single sample using a fixed reference panel. An example reference panel containing 156 samples from the [NYGC 1000G Terra workspace](https://app.terra.bio/#workspaces/anvil-datastorage/1000G-high-coverage-2019) is provided with `inputs/GATKSVPipelineSingleSample.ref_panel_1kg.na12878.json`. -Custom reference panels can be generated by running `GATKSVPipelineBatch.wdl` and `trainGCNV.wdl` and using the outputs to replace the following single-sample workflow inputs: +Custom reference panels can be generated by running `GATKSVPipelineBatch.wdl` and `TrainGCNV.wdl` and using the outputs to replace the following single-sample workflow inputs: * `GATKSVPipelineSingleSample.ref_ped_file` : `batch.ped` - Manually created (see [data requirements](#requirements)) -* `GATKSVPipelineSingleSample.contig_ploidy_model_tar` : `batch-contig-ploidy-model.tar.gz` - gCNV contig ploidy model ([gCNV training](#gcnv-training)) -* `GATKSVPipelineSingleSample.gcnv_model_tars` : `batch-model-files-*.tar.gz` - gCNV model tarballs ([gCNV training](#gcnv-training)) -* `GATKSVPipelineSingleSample.ref_pesr_disc_files` - `sample.disc.txt.gz` - Paired-end evidence files ([Module 00a](#module00a)) -* `GATKSVPipelineSingleSample.ref_pesr_split_files` - `sample.split.txt.gz` - Split read evidence files ([Module 00a](#module00a)) -* `GATKSVPipelineSingleSample.ref_panel_bincov_matrix`: `batch.RD.txt.gz` - Read counts matrix ([Module 00c](#module00c)) -* `GATKSVPipelineSingleSample.ref_panel_del_bed` : `batch.DEL.bed.gz` - Depth deletion calls ([Module 00c](#module00c)) -* `GATKSVPipelineSingleSample.ref_panel_dup_bed` : `batch.DUP.bed.gz` - Depth duplication calls ([Module 00c](#module00c)) +* `GATKSVPipelineSingleSample.contig_ploidy_model_tar` : `batch-contig-ploidy-model.tar.gz` - gCNV contig ploidy model ([TrainGCNV](#gcnv-training)) +* `GATKSVPipelineSingleSample.gcnv_model_tars` : `batch-model-files-*.tar.gz` - gCNV model tarballs ([TrainGCNV](#gcnv-training)) +* `GATKSVPipelineSingleSample.ref_pesr_disc_files` - `sample.disc.txt.gz` - Paired-end evidence files ([GatherSampleEvidence](#gather-sample-evidence)) +* `GATKSVPipelineSingleSample.ref_pesr_split_files` - `sample.split.txt.gz` - Split read evidence files ([GatherSampleEvidence](#gather-sample-evidence)) +* `GATKSVPipelineSingleSample.ref_panel_bincov_matrix`: `batch.RD.txt.gz` - Read counts matrix ([GatherBatchEvidence](#gather-batch-evidence)) +* `GATKSVPipelineSingleSample.ref_panel_del_bed` : `batch.DEL.bed.gz` - Depth deletion calls ([GatherBatchEvidence](#gather-batch-evidence)) +* `GATKSVPipelineSingleSample.ref_panel_dup_bed` : `batch.DUP.bed.gz` - Depth duplication calls ([GatherBatchEvidence](#gather-batch-evidence)) * `GATKSVPipelineSingleSample.ref_samples` - Reference panel sample IDs -* `GATKSVPipelineSingleSample.ref_std_manta_vcfs` - `std_XXX.manta.sample.vcf.gz` - Standardized Manta VCFs ([Module 00c](#module00c)) -* `GATKSVPipelineSingleSample.ref_std_melt_vcfs` - `std_XXX.melt.sample.vcf.gz` - Standardized Melt VCFs ([Module 00c](#module00c)) -* `GATKSVPipelineSingleSample.ref_std_wham_vcfs` - `std_XXX.wham.sample.vcf.gz` - Standardized Wham VCFs ([Module 00c](#module00c)) -* `GATKSVPipelineSingleSample.cutoffs` : `batch.cutoffs` - Filtering cutoffs ([Module 03](#module03)) -* `GATKSVPipelineSingleSample.genotype_pesr_pesr_sepcutoff` : `genotype_pesr.pesr_sepcutoff.txt` - Genotyping cutoffs ([Module 04](#module04)) -* `GATKSVPipelineSingleSample.genotype_pesr_depth_sepcutoff` : `genotype_pesr.depth_sepcutoff.txt` - Genotyping cutoffs ([Module 04](#module04)) -* `GATKSVPipelineSingleSample.genotype_depth_pesr_sepcutoff` : `genotype_depth.pesr_sepcutoff.txt` - Genotyping cutoffs ([Module 04](#module04)) -* `GATKSVPipelineSingleSample.genotype_depth_depth_sepcutoff` : `genotype_depth.depth_sepcutoff.txt` - Genotyping cutoffs ([Module 04](#module04)) -* `GATKSVPipelineSingleSample.PE_metrics` : `pe_metric_file.txt` - Paired-end evidence genotyping metrics ([Module 04](#module04)) -* `GATKSVPipelineSingleSample.SR_metrics` : `sr_metric_file.txt` - Split read evidence genotyping metrics ([Module 04](#module04)) -* `GATKSVPipelineSingleSample.ref_panel_vcf` : `batch.cleaned.vcf.gz` - Final output VCF ([Module 05/06](#module0506)) +* `GATKSVPipelineSingleSample.ref_std_manta_vcfs` - `std_XXX.manta.sample.vcf.gz` - Standardized Manta VCFs ([GatherBatchEvidence](#gather-batch-evidence)) +* `GATKSVPipelineSingleSample.ref_std_melt_vcfs` - `std_XXX.melt.sample.vcf.gz` - Standardized Melt VCFs ([GatherBatchEvidence](#gather-batch-evidence)) +* `GATKSVPipelineSingleSample.ref_std_wham_vcfs` - `std_XXX.wham.sample.vcf.gz` - Standardized Wham VCFs ([GatherBatchEvidence](#gather-batch-evidence)) +* `GATKSVPipelineSingleSample.cutoffs` : `batch.cutoffs` - Filtering cutoffs ([FilterBatch](#filter-batch)) +* `GATKSVPipelineSingleSample.genotype_pesr_pesr_sepcutoff` : `genotype_pesr.pesr_sepcutoff.txt` - Genotyping cutoffs ([GenotypeBatch](#genotype-batch)) +* `GATKSVPipelineSingleSample.genotype_pesr_depth_sepcutoff` : `genotype_pesr.depth_sepcutoff.txt` - Genotyping cutoffs ([GenotypeBatch](#genotype-batch)) +* `GATKSVPipelineSingleSample.genotype_depth_pesr_sepcutoff` : `genotype_depth.pesr_sepcutoff.txt` - Genotyping cutoffs ([GenotypeBatch](#genotype-batch)) +* `GATKSVPipelineSingleSample.genotype_depth_depth_sepcutoff` : `genotype_depth.depth_sepcutoff.txt` - Genotyping cutoffs ([GenotypeBatch](#genotype-batch)) +* `GATKSVPipelineSingleSample.PE_metrics` : `pe_metric_file.txt` - Paired-end evidence genotyping metrics ([GenotypeBatch](#genotype-batch)) +* `GATKSVPipelineSingleSample.SR_metrics` : `sr_metric_file.txt` - Split read evidence genotyping metrics ([GenotypeBatch](#genotype-batch)) +* `GATKSVPipelineSingleSample.ref_panel_vcf` : `batch.cleaned.vcf.gz` - Final output VCF ([MakeCohortVcf](#make-cohort-vcf)) ## gCNV Training @@ -199,9 +201,11 @@ Both the cohort and single-sample modes use the GATK gCNV depth calling pipeline ## Module Descriptions -The following sections briefly describe each module and highlights inter-dependent input/output files. Note that input/output mappings can also be gleaned from `GATKSVPipelineBatch.wdl`, and example input files for each module can be found in `/test`. +The following sections briefly describe each module and highlights inter-dependent input/output files. Note that input/output mappings can also be gleaned from `GATKSVPipelineBatch.wdl`, and example input files for each module can be found in `/test_input_templates`. + +## GatherSampleEvidence +*Formerly Module00a* -## Module 00a Runs raw evidence collection on each sample. Note: a list of sample IDs must be provided. Refer to the [sample ID requirements](#sampleids) for specifications of allowable sample IDs. IDs that do not meet these requirements may cause errors. @@ -217,7 +221,9 @@ Note: a list of sample IDs must be provided. Refer to the [sample ID requirement * B-allele fraction (BAF) file -## Module 00b +## EvidenceQC +*Formerly Module00b* + Runs ploidy estimation, dosage scoring, and optionally VCF QC. The results from this module can be used for QC and batching. For large cohorts, we recommend dividing samples into smaller batches (~500 samples) with ~1:1 male:female ratio. Refer to the [Batching](#batching) section for further guidance on creating batches. @@ -225,54 +231,57 @@ For large cohorts, we recommend dividing samples into smaller batches (~500 samp We also recommend using sex assignments generated from the ploidy estimates and incorporating them into the PED file. #### Prerequisites: -* [Module 00a](#module00a) +* [GatherSampleEvidence](#gather-sample-evidence) #### Inputs: -* Read count files ([Module 00a](#module00a)) -* (Optional) SV call VCFs ([Module 00a](#module00a)) +* Read count files ([GatherSampleEvidence](#gather-sample-evidence)) +* (Optional) SV call VCFs ([GatherSampleEvidence](#gather-sample-evidence)) #### Outputs: * Per-sample dosage scores with plots +* Median coverage per sample * Ploidy estimates, sex assignments, with plots * (Optional) Outlier samples detected by call counts #### Preliminary Sample QC -The purpose of sample filtering at this stage after Module00b is to prevent very poor quality samples from interfering with the results for the rest of the callset. In general, samples that are borderline are okay to leave in, but you should choose filtering thresholds to suit the needs of your cohort and study. There will be future opportunities (as part of [Module03](#module03)) for filtering before the joint genotyping stage if necessary. Here are a few of the basic QC checks that we recommend: +The purpose of sample filtering at this stage after EvidenceQC is to prevent very poor quality samples from interfering with the results for the rest of the callset. In general, samples that are borderline are okay to leave in, but you should choose filtering thresholds to suit the needs of your cohort and study. There will be future opportunities (as part of [FilterBatch](#filter-batch)) for filtering before the joint genotyping stage if necessary. Here are a few of the basic QC checks that we recommend: * Look at the X and Y ploidy plots, and check that sex assignments match your expectations. If there are discrepancies, check for sample swaps and update your PED file before proceeding. * Look at the dosage score (WGD) distribution and check that it is centered around 0 (the distribution of WGD for PCR- samples is expected to be slightly lower than 0, and the distribution of WGD for PCR+ samples is expected to be slightly greater than 0. Refer to the [gnomAD-SV paper](https://doi.org/10.1038/s41586-020-2287-8) for more information on WGD score). Optionally filter outliers. * Look at the low outliers for each SV caller (samples with much lower than typical numbers of SV calls per contig for each caller). An empty low outlier file means there were no outliers below the median and no filtering is necessary. Check that no samples had zero calls. * Look at the high outliers for each SV caller and optionally filter outliers; samples with many more SV calls than average may be poor quality. -## gCNV Training -Trains a gCNV model for use in [Module 00c](#module00c). The WDL can be found at `/gcnv/trainGCNV.wdl`. +## TrainGCNV +Trains a gCNV model for use in [GatherBatchEvidence](#gather-batch-evidence). The WDL can be found at `/wdl/TrainGCNV.wdl`. #### Prerequisites: -* [Module 00a](#module00a) -* (Recommended) [Module 00b](#module00b) +* [GatherSampleEvidence](#gather-sample-evidence) +* (Recommended) [EvidenceQC](#evidence-qc) #### Inputs: -* Read count files ([Module 00a](#module00a)) +* Read count files ([GatherSampleEvidence](#gather-sample-evidence)) #### Outputs: * Contig ploidy model tarball * gCNV model tarballs -## Module 00c +## GatherBatchEvidence +*Formerly Module00c* + Runs CNV callers (cnMOPs, GATK gCNV) and combines single-sample raw evidence into a batch. See [above](#cohort-mode) for more information on batching. #### Prerequisites: -* [Module 00a](#module00a) -* (Recommended) [Module 00b](#module00b) -* gCNV training +* [GatherSampleEvidence](#gather-sample-evidence) +* (Recommended) [EvidenceQC](#evidence-qc) +* [gCNV training](#gcnv-training) #### Inputs: -* PED file (updated with [Module 00b](#module00b) sex assignments, including sex = 0 for sex aneuploidies. Calls will not be made on sex chromosomes when sex = 0 in order to avoid generating many confusing calls or upsetting normalized copy numbers for the batch.) +* PED file (updated with [EvidenceQC](#evidence-qc) sex assignments, including sex = 0 for sex aneuploidies. Calls will not be made on sex chromosomes when sex = 0 in order to avoid generating many confusing calls or upsetting normalized copy numbers for the batch.) * Per-sample GVCFs generated with HaplotypeCaller (`gvcfs` input), or a jointly-genotyped VCF (position-sharded, `snp_vcfs` input or `snp_vcfs_shard_list` input). The jointly-genotyped VCF may contain multi-allelic sites and indels, but only biallelic SNVs will be used by the pipeline. We recommend shards of 10 GB or less to lower compute time and resources. -* Read count, BAF, PE, and SR files ([Module 00a](#module00a)) -* Caller VCFs ([Module 00a](#module00a)) -* Contig ploidy model and gCNV model files (gCNV training) +* Read count, BAF, PE, and SR files ([GatherSampleEvidence](#gather-sample-evidence)) +* Caller VCFs ([GatherSampleEvidence](#gather-sample-evidence)) +* Contig ploidy model and gCNV model files ([gCNV training](#gcnv-training)) #### Outputs: * Combined read count matrix, SR, PE, and BAF files @@ -282,47 +291,53 @@ Runs CNV callers (cnMOPs, GATK gCNV) and combines single-sample raw evidence int * (Optional) Evidence QC plots -## Module 01 +## ClusterBatch +*Formerly Module01* + Clusters SV calls across a batch. #### Prerequisites: -* [Module 00c](#module00c) +* [GatherBatchEvidence](#gather-batch-evidence) #### Inputs: -* Standardized call VCFs ([Module 00c](#module00c)) -* Depth-only (DEL/DUP) calls ([Module 00c](#module00c)) +* Standardized call VCFs ([GatherBatchEvidence](#gather-batch-evidence)) +* Depth-only (DEL/DUP) calls ([GatherBatchEvidence](#gather-batch-evidence)) #### Outputs: * Clustered SV VCFs * Clustered depth-only call VCF -## Module 02 +## GenerateBatchMetrics +*Formerly Module02* + Generates variant metrics for filtering. #### Prerequisites: -* [Module 01](#module01) +* [ClusterBatch](#cluster-batch) #### Inputs: -* Combined read count matrix, SR, PE, and BAF files ([Module 00c](#module00c)) -* Per-sample median coverage estimates ([Module 00c](#module00c)) -* Clustered SV VCFs ([Module 01](#module01)) -* Clustered depth-only call VCF ([Module 01](#module01)) +* Combined read count matrix, SR, PE, and BAF files ([GatherBatchEvidence](#gather-batch-evidence)) +* Per-sample median coverage estimates ([GatherBatchEvidence](#gather-batch-evidence)) +* Clustered SV VCFs ([ClusterBatch](#cluster-batch)) +* Clustered depth-only call VCF ([ClusterBatch](#cluster-batch)) #### Outputs: * Metrics file -## Module 03 +## FilterBatch +*Formerly Module03* + Filters poor quality variants and filters outlier samples. #### Prerequisites: -* [Module 02](#module02) +* [GenerateBatchMetrics](#generate-batch-metrics) #### Inputs: * Batch PED file -* Metrics file ([Module 02](#module02)) -* Clustered SV and depth-only call VCFs ([Module 01](#module01)) +* Metrics file ([GenerateBatchMetrics](#generate-batch-metrics)) +* Clustered SV and depth-only call VCFs ([ClusterBatch](#cluster-batch)) #### Outputs: * Filtered SV (non-depth-only a.k.a. "PESR") VCF with outlier samples excluded @@ -331,32 +346,35 @@ Filters poor quality variants and filters outlier samples. * PED file with outlier samples excluded -## Merge Cohort VCFs -Combines filtered variants across batches. The WDL can be found at: `/wdl/MergeCohortVcfs.wdl`. +## MergeBatchSites +*Formerly MergeCohortVcfs* + +Combines filtered variants across batches. The WDL can be found at: `/wdl/MergeBatchSites.wdl`. #### Prerequisites: -* [Module 03](#module03) +* [FilterBatch](#filter-batch) #### Inputs: -* List of filtered PESR VCFs ([Module 03](#module03)) -* List of filtered depth VCFs ([Module 03](#module03)) +* List of filtered PESR VCFs ([FilterBatch](#filter-batch)) +* List of filtered depth VCFs ([FilterBatch](#filter-batch)) #### Outputs: * Combined cohort PESR and depth VCFs -* Cohort and clustered depth variant BED files -## Module 04 +## GenotypeBatch +*Formerly Module04* + Genotypes a batch of samples across unfiltered variants combined across all batches. #### Prerequisites: -* [Module 03](#module03) -* Merge Cohort VCFs +* [FilterBatch](#filter-batch) +* [MergeBatchSites](#merge-batch-sites) #### Inputs: -* Batch PESR and depth VCFs ([Module 03](#module03)) -* Cohort PESR and depth VCFs (Merge Cohort VCFs) -* Batch read count, PE, and SR files ([Module 00c](#module00c)) +* Batch PESR and depth VCFs ([FilterBatch](#filter-batch)) +* Cohort PESR and depth VCFs ([MergeBatchSites](#merge-batch-sites)) +* Batch read count, PE, and SR files ([GatherBatchEvidence](#gather-batch-evidence)) #### Outputs: * Filtered SV (non-depth-only a.k.a. "PESR") VCF with outlier samples excluded @@ -367,40 +385,43 @@ Genotypes a batch of samples across unfiltered variants combined across all batc * (Optional) Depth re-genotyping intervals list -## Module 04b +## RegenotypeCNVs +*Formerly Module04b* + Re-genotypes probable mosaic variants across multiple batches. #### Prerequisites: -* [Module 04](#module04) +* [GenotypeBatch](#genotype-batch) #### Inputs: -* Per-sample median coverage estimates ([Module 00c](#module00c)) -* Pre-genotyping depth VCFs ([Module 03](#module03)) -* Batch PED files ([Module 03](#module03)) -* Clustered depth variant BED file (Merge Cohort VCFs) -* Cohort depth VCF (Merge Cohort VCFs) -* Genotyped depth VCFs ([Module 04](#module04)) -* Genotyped depth RD cutoffs file ([Module 04](#module04)) +* Per-sample median coverage estimates ([GatherBatchEvidence](#gather-batch-evidence)) +* Pre-genotyping depth VCFs ([FilterBatch](#filter-batch)) +* Batch PED files ([FilterBatch](#filter-batch)) +* Cohort depth VCF ([MergeBatchSites](#merge-batch-sites)) +* Genotyped depth VCFs ([GenotypeBatch](#genotype-batch)) +* Genotyped depth RD cutoffs file ([GenotypeBatch](#genotype-batch)) #### Outputs: * Re-genotyped depth VCFs -## Module 05/06 +## MakeCohortVcf +*Formerly Module0506* + Combines variants across multiple batches, resolves complex variants, re-genotypes, and performs final VCF clean-up. #### Prerequisites: -* [Module 04](#module04) -* (Optional) [Module 04b](#module04b) +* [GenotypeBatch](#genotype-batch) +* (Optional) [RegenotypeCNVs](#regenotype-cnvs) #### Inputs: -* RD, PE and SR file URIs ([Module 00c](#module00c)) -* Batch filtered PED file URIs ([Module 03](#module03)) -* Genotyped PESR VCF URIs ([Module 04](#module04)) -* Genotyped depth VCF URIs ([Module 04](#module04) or [04b](#module04b)) -* SR pass variant file URIs ([Module 04](#module04)) -* SR fail variant file URIs ([Module 04](#module04)) -* Genotyping cutoff file URIs ([Module 04](#module04)) +* RD, PE and SR file URIs ([GatherBatchEvidence](#gather-batch-evidence)) +* Batch filtered PED file URIs ([FilterBatch](#filter-batch)) +* Genotyped PESR VCF URIs ([GenotypeBatch](#genotype-batch)) +* Genotyped depth VCF URIs ([GenotypeBatch](#genotype-batch) or [RegenotypeCNVs](#regenotype-cnvs)) +* SR pass variant file URIs ([GenotypeBatch](#genotype-batch)) +* SR fail variant file URIs ([GenotypeBatch](#genotype-batch)) +* Genotyping cutoff file URIs ([GenotypeBatch](#genotype-batch)) * Batch IDs * Sample ID list URIs @@ -423,7 +444,9 @@ gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v2/mingq/1KGP_2 * FilterOutlierSamples - remove outlier samples with unusually high or low number of SVs * FilterCleanupQualRecalibration - sanitize filter columns and recalibrate variant QUAL scores for easier interpretation -## Module 08 (in development) +## AnnotateVcf (in development) +*Formerly Module08Annotation* + Add annotations, such as the inferred function and allele frequencies of variants, to final vcf. Annotations methods include: @@ -445,7 +468,7 @@ Visualization methods include: ### VM runs out of memory or disk * Default pipeline settings are tuned for batches of 100 samples. Larger batches or cohorts may require additional VM resources. Most runtime attributes can be modified through the `RuntimeAttr` inputs. These are formatted like this in the json: ``` -"ModuleX.runtime_attr_override": { +"MyWorkflow.runtime_attr_override": { "disk_gb": 100, "mem_gb": 16 }, diff --git a/input_templates/GATKSVPipelineBatch.ref_panel_1kg.json.tmpl b/input_templates/GATKSVPipelineBatch.ref_panel_1kg.json.tmpl index bb140efcb..4c731b062 100644 --- a/input_templates/GATKSVPipelineBatch.ref_panel_1kg.json.tmpl +++ b/input_templates/GATKSVPipelineBatch.ref_panel_1kg.json.tmpl @@ -8,7 +8,7 @@ "GATKSVPipelineBatch.batch": {{ ref_panel.batch_name | tojson }}, "GATKSVPipelineBatch.ped_file": {{ ref_panel.ped_file | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.requester_pays_crams": {{ ref_panel.requester_pays_crams | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.requester_pays_crams": {{ ref_panel.requester_pays_crams | tojson }}, "GATKSVPipelineBatch.sample_ids": {{ ref_panel.samples | tojson }}, "GATKSVPipelineBatch.bam_or_cram_files": {{ ref_panel.bam_or_cram_files | tojson }}, "GATKSVPipelineBatch.gvcfs": {{ ref_panel.gvcfs | tojson }}, @@ -43,15 +43,15 @@ "GATKSVPipelineBatch.primary_contigs_fai" : {{ reference_resources.primary_contigs_fai | tojson }}, "GATKSVPipelineBatch.genome_file" : {{ reference_resources.genome_file | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.reference_version": {{ reference_resources.reference_version | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.delly_exclude_intervals_file": {{ reference_resources.delly_exclude_intervals_file | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.manta_region_bed": {{ reference_resources.manta_region_bed | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.melt_standard_vcf_header": {{ reference_resources.melt_std_vcf_header | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.wham_include_list_bed_file": {{ reference_resources.wham_include_list_bed_file | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.preprocessed_intervals": {{ reference_resources.preprocessed_intervals | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.reference_version": {{ reference_resources.reference_version | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.delly_exclude_intervals_file": {{ reference_resources.delly_exclude_intervals_file | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.manta_region_bed": {{ reference_resources.manta_region_bed | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.melt_standard_vcf_header": {{ reference_resources.melt_std_vcf_header | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.wham_include_list_bed_file": {{ reference_resources.wham_include_list_bed_file | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.preprocessed_intervals": {{ reference_resources.preprocessed_intervals | tojson }}, - "GATKSVPipelineBatch.Module00b.wgd_scoring_mask": {{ reference_resources.wgd_scoring_mask | tojson }}, - "GATKSVPipelineBatch.Module00b.run_vcf_qc": "false", + "GATKSVPipelineBatch.EvidenceQC.wgd_scoring_mask": {{ reference_resources.wgd_scoring_mask | tojson }}, + "GATKSVPipelineBatch.EvidenceQC.run_vcf_qc": "false", "GATKSVPipelineBatch.GATKSVPipelinePhase1.unpadded_intervals_file" : {{ reference_resources.unpadded_intervals_file | tojson }}, "GATKSVPipelineBatch.GATKSVPipelinePhase1.dbsnp_vcf" : {{ reference_resources.dbsnp_vcf | tojson }}, @@ -107,29 +107,29 @@ "GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_table" : {{ ref_panel.outlier_cutoff_table | tojson }}, "GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_nIQR": "999999", - "GATKSVPipelineBatch.Module04.n_RD_genotype_bins": "100000", - "GATKSVPipelineBatch.Module04.n_per_split": "5000", - "GATKSVPipelineBatch.Module04.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "GATKSVPipelineBatch.Module04.seed_cutoffs": {{ reference_resources.seed_cutoffs | tojson }}, - "GATKSVPipelineBatch.Module04.reference_build": "hg38", - "GATKSVPipelineBatch.Module04.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, - - "GATKSVPipelineBatch.Module04b.n_RdTest_bins": "100000", - "GATKSVPipelineBatch.Module04b.n_per_split": "5000", - - "GATKSVPipelineBatch.Module0506.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, - "GATKSVPipelineBatch.Module0506.empty_file" : {{ reference_resources.empty_file | tojson }}, - "GATKSVPipelineBatch.Module0506.cytobands": {{ reference_resources.cytobands | tojson }}, - "GATKSVPipelineBatch.Module0506.mei_bed": {{ reference_resources.mei_bed | tojson }}, - "GATKSVPipelineBatch.Module0506.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "GATKSVPipelineBatch.Module0506.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, - "GATKSVPipelineBatch.Module0506.min_sr_background_fail_batches": 0.5, - "GATKSVPipelineBatch.Module0506.max_shards_per_chrom_clean_vcf_step1": 200, - "GATKSVPipelineBatch.Module0506.min_records_per_shard_clean_vcf_step1": 5000, - "GATKSVPipelineBatch.Module0506.samples_per_clean_vcf_step2_shard": 100, - "GATKSVPipelineBatch.Module0506.random_seed": 0, - "GATKSVPipelineBatch.Module0506.max_shards_per_chrom": 100, - "GATKSVPipelineBatch.Module0506.min_variants_per_shard": 30, + "GATKSVPipelineBatch.GenotypeBatch.n_RD_genotype_bins": "100000", + "GATKSVPipelineBatch.GenotypeBatch.n_per_split": "5000", + "GATKSVPipelineBatch.GenotypeBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "GATKSVPipelineBatch.GenotypeBatch.seed_cutoffs": {{ reference_resources.seed_cutoffs | tojson }}, + "GATKSVPipelineBatch.GenotypeBatch.reference_build": "hg38", + "GATKSVPipelineBatch.GenotypeBatch.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, + + "GATKSVPipelineBatch.RegenotypeCNVs.n_RdTest_bins": "100000", + "GATKSVPipelineBatch.RegenotypeCNVs.n_per_split": "5000", + + "GATKSVPipelineBatch.MakeCohortVcf.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.empty_file" : {{ reference_resources.empty_file | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.cytobands": {{ reference_resources.cytobands | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.mei_bed": {{ reference_resources.mei_bed | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.min_sr_background_fail_batches": 0.5, + "GATKSVPipelineBatch.MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200, + "GATKSVPipelineBatch.MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000, + "GATKSVPipelineBatch.MakeCohortVcf.samples_per_clean_vcf_step2_shard": 100, + "GATKSVPipelineBatch.MakeCohortVcf.random_seed": 0, + "GATKSVPipelineBatch.MakeCohortVcf.max_shards_per_chrom": 100, + "GATKSVPipelineBatch.MakeCohortVcf.min_variants_per_shard": 30, "GATKSVPipelineBatch.BatchQC.qc_definitions" : "gs://gatk-sv-resources-public/test/batch/batch_sv.test_large.qc_definitions.tsv", diff --git a/input_templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl b/input_templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl index 69c13e0bc..8591112a5 100644 --- a/input_templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl +++ b/input_templates/terra_workspaces/cohort_mode/cohort_mode_workspace_dashboard.md.tmpl @@ -28,7 +28,7 @@ The following cohort-level or batch-level inputs are also required: |---------|--------|--------------| |`String`|`sample_set_id`|Batch identifier| |`String`|`sample_set_set_id`|Cohort identifier| -|`File`|`cohort_ped_file`|Path to the GCS location of a family structure definitions file in [PED format](https://gatk.broadinstitute.org/hc/en-us/articles/360035531972-PED-Pedigree-format). Sex aneuploidies (detected in Module00b) should be entered as sex = 0.| +|`File`|`cohort_ped_file`|Path to the GCS location of a family structure definitions file in [PED format](https://gatk.broadinstitute.org/hc/en-us/articles/360035531972-PED-Pedigree-format). Sex aneuploidies (detected in `02-EvidenceQC`) should be entered as sex = 0.| |`Array[File]`|`snp_vcfs`|Paths to the GCS locations of a jointly-genotyped, position-sharded SNP VCF (may contain indels and multiallelic sites). Alternatively, provide a GCS path to a text file containing one SNP VCF shard path per line using the `File` input `snp_vcfs_shard_list`.**| **Only one of `gvcf` or `snp_vcfs` or `snp_vcfs_shard_list` is required @@ -49,23 +49,21 @@ The following are the main pipeline outputs. For more information on the outputs The following workflows are included in this workspace, to be executed in this order: -1. `module00a`: Per-sample SV evidence collection, including calls from a configurable set of algorithms (Delly, Manta, MELT, and Wham), read depth (RD), split read positions (SR), and discordant pair positions (PE). -2. `module00b`: Dosage bias scoring and ploidy estimation, run on preliminary batches -3. `train-gCNV`: Per-batch training of a gCNV model for use in `module00c` -4. `module00c`: Per-batch copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) generation; call and evidence aggregation -5. `module01`: Per-batch variant clustering -6. `module02`: Per-batch variant filtering, metric generation -7. `module03`: Per-batch variant filtering; outlier exclusion -8. (Skip for a single batch) `merge-cohort-vcfs`: Site merging of SVs discovered across batches, run on a cohort-level `sample_set_set` -9. `module04`: Per-batch genotyping of all sites in the cohort. Use `module04_single_batch` if you only have one batch. -10. `module04b`: Cohort-level genotype refinement of some depth calls. Use `module04b_single_batch` if you only have one batch. -11. `module0506`: Cohort-level cross-batch integration; complex variant resolution and re-genotyping; VCF cleanup. Use `module0506_single_batch` if you only have one batch. -12. `module08`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and AF annotation with external population callsets. Use `module08_single_batch` if you only have one batch. +1. `01-GatherSampleEvidence`: Per-sample SV evidence collection, including calls from a configurable set of algorithms (Delly, Manta, MELT, and Wham), read depth (RD), split read positions (SR), and discordant pair positions (PE). +2. `02-EvidenceQC`: Dosage bias scoring and ploidy estimation, run on preliminary batches +3. `03-TrainGCNV`: Per-batch training of a gCNV model for use in `04-GatherBatchEvidence` +4. `04-GatherBatchEvidence`: Per-batch copy number variant calling using cn.MOPS and GATK gCNV; B-allele frequency (BAF) generation; call and evidence aggregation +5. `05-ClusterBatch`: Per-batch variant clustering +6. `06-GenerateBatchMetrics`: Per-batch variant filtering, metric generation +7. `07-FilterBatch`: Per-batch variant filtering; outlier exclusion +8. (Skip for a single batch) `08-MergeBatchSites`: Site merging of SVs discovered across batches, run on a cohort-level `sample_set_set` +9. `09-GenotypeBatch`: Per-batch genotyping of all sites in the cohort. Use `09-GenotypeBatch_SingleBatch` if you only have one batch. +10. `10-RegenotypeCNVs`: Cohort-level genotype refinement of some depth calls. Use `10-RegenotypeCNVs_SingleBatch` if you only have one batch. +11. `11-MakeCohortVcf`: Cohort-level cross-batch integration; complex variant resolution and re-genotyping; VCF cleanup. Use `11-MakeCohortVcf_SingleBatch` if you only have one batch. +12. `12-AnnotateVcf`: Cohort VCF annotations, including functional annotation, allele frequency (AF) annotation, and AF annotation with external population callsets. Use `12-AnnotateVcf_SingleBatch` if you only have one batch. Additional modules, such as those for filtering and visualization, are under development. They are not included in this workspace at this time, but the source code can be found in the [GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv). -The metrics workflows in this workspace (`module01-metrics`, `module02-metrics`, etc.) are provided for testing purposes and do not need to be executed. - For detailed instructions on running the pipeline in Terra, see **Step-by-step instructions** below. ### How many samples can I process at once? @@ -102,12 +100,12 @@ Refer to [the Sample ID Requirements section of the README](https://github.com/b The same requirements apply to family IDs in the PED file, batch IDs (`sample_set_id`), and the cohort ID (`sample_set_set_id`). -Sample IDs are provided to `module00a` directly and need not match sample names from the BAM/CRAM headers or GVCFs. We recommend transforming sample IDs using [this script](https://github.com/talkowski-lab/gnomad_sv_v3/blob/master/sample_id/convert_sample_ids.py) prior to uploading your sample data table. (Currently, sample IDs can be replaced again in `module00c`.) The following files will need to be updated with the transformed sample IDs: +Sample IDs are provided to `01-GatherSampleEvidence` directly and need not match sample names from the BAM/CRAM headers or GVCFs. We recommend transforming sample IDs using [this script](https://github.com/talkowski-lab/gnomad_sv_v3/blob/master/sample_id/convert_sample_ids.py) prior to uploading your sample data table. (Currently, sample IDs can be replaced again in `04-GatherBatchEvidence`.) The following files will need to be updated with the transformed sample IDs: * Sample data table (for Terra) * PED file * Sample set membership file (for Terra) -If using a SNP VCF in `module00c`, it does not need to be re-headered; simply provide the `vcf_samples` argument. An easy way to provide these sample IDs is to add a column `vcf_sample_id` to the sample TSV you upload to the workspace (see **Workspace setup** step 2 below), then reference this column as `this.samples.vcf_sample_id` in the `module00c` inputs. +If using a SNP VCF in `04-GatherBatchEvidence`, it does not need to be re-headered; simply provide the `vcf_samples` argument. An easy way to provide these sample IDs is to add a column `vcf_sample_id` to the sample TSV you upload to the workspace (see **Workspace setup** step 2 below), then reference this column as `this.samples.vcf_sample_id` in the `04-GatherBatchEvidence` inputs. ### Workspace setup @@ -117,7 +115,7 @@ If using a SNP VCF in `module00c`, it does not need to be re-headered; simply pr 2. In your new workspace, delete the sample, sample_set, and sample_set_set data tables. To do this, go to the *Data* tab of the workspace. Select the `sample` data table. Check the box to select all samples. Click the 3 blue dots that appear, and select "Delete Data". Confirm when prompted. Repeat for any remaining samples and for any remaining entries in the `sample_set` or `sample_set_set` tables. deleting data tables -3. Create and upload a new sample data table for your samples. This should be a tab-separated file (.tsv) with one line per sample, as well as a header (first) line. It should contain the columns `entity:sample_id` (first column), `bam_or_cram_file`, and `requester_pays_cram` at minimum. If you are using GVCFs instead of a joint SNP VCF in Module00c, there should be an additional `gvcf` column (if using a joint SNP VCF, you will provide this directly in `module00c` later on). See the **Required inputs** section above for more information on these inputs. For an example sample data table, refer to the sample data table for the 1000 Genomes samples in this workspace [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/master/input_templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl). To upload the TSV file, navigate to the *Data* tab of the workspace and click the `+` button next to "Tables". +3. Create and upload a new sample data table for your samples. This should be a tab-separated file (.tsv) with one line per sample, as well as a header (first) line. It should contain the columns `entity:sample_id` (first column), `bam_or_cram_file`, and `requester_pays_cram` at minimum. If you are using GVCFs instead of a joint SNP VCF in `04-GatherBatchEvidence`, there should be an additional `gvcf` column (if using a joint SNP VCF, you will provide this directly in `04-GatherBatchEvidence` later on). See the **Required inputs** section above for more information on these inputs. For an example sample data table, refer to the sample data table for the 1000 Genomes samples in this workspace [here in the GATK-SV GitHub repository](https://github.com/broadinstitute/gatk-sv/blob/master/input_templates/terra_workspaces/cohort_mode/samples_1kgp.tsv.tmpl). To upload the TSV file, navigate to the *Data* tab of the workspace and click the `+` button next to "Tables". uploading a TSV data table 4. Edit the `cohort_ped_file` item in the Workspace Data table (as shown in the screenshot below) to provide the Google URI to the PED file for your cohort (make sure to share it with your Terra proxy account!). @@ -140,66 +138,66 @@ To create batches (in the `sample_set` table), the easiest way is to upload a ta * Another option is to use the `fiss mop` API call to delete all files that do not appear in one of the Terra data tables. Always ensure that you are completely done with a step and you will not need to return before using this option, as it will break call-caching. See [this blog post](https://terra.bio/deleting-intermediate-workflow-outputs/) for more details. This can also be done [via the command line](https://github.com/broadinstitute/fiss/wiki/MOP:-reducing-your-cloud-storage-footprint). * If your workflow fails, check the job manager for the error message. Most issues can be resolved by increasing the memory or disk. Do not delete workflow log files until you are done troubleshooting. If call-caching is enabled, do not delete any files from the failed workflow until you have run it successfully. * To display run costs, see [this article](https://support.terra.bio/hc/en-us/articles/360037862771#h_01EX5ED53HAZ59M29DRCG24CXY) for one-time setup instructions for non-Broad users. -* If you only have one batch, you will need to skip `merge-cohort-vcfs` and use the single-batch versions of all workflows after `module04`. +* If you only have one batch, you will need to skip `08-MergeBatchSites` and use the single-batch versions of all workflows after `09-GenotypeBatch`. -#### Module00a +#### 01-GatherSampleEvidence -Read the full Module00a documentation [here](https://github.com/broadinstitute/gatk-sv#module-00a). +Read the full GatherSampleEvidence documentation [here](https://github.com/broadinstitute/gatk-sv#gather-sample-evidence). * This workflow runs on a per-sample level, but you can launch many (a few hundred) samples at once, in arbitrary partitions. Make sure to try just one sample first though! -* It is normal for a few samples in a cohort to run out of memory during Wham SV calling, so we recommend enabling auto-retry for out-of-memory errors for `module00a` only. Before you launch the workflow, click the checkbox reading "Retry with more memory" and set the memory retry factor to 1.8. This action must be performed each time you launch a `module00a` job. -* Please note that most large published joint call sets produced by GATK-SV, including gnomAD-SV, included the tool MELT, a state-of-the-art mobile element insertion (MEI) detector, as part of the pipeline. Due to licensing restrictions, we cannot provide a public docker image for this algorithm. The `module00a` workflow does not use MELT as one of the SV callers by default, which will result in less sensitivity to MEI calls. In order to use MELT, you will need to build your own docker image, share it with your Terra proxy account, enter it in the `melt_docker` input in the `module00a` configuration (as a string, surrounded by double-quotes), and then click "Save". No further changes are necessary beyond `module00a`. -* Successful runs of `module00a` will automatically delete any BAM files generated during the workflow, but BAM files will not be deleted if the run fails. Since BAM files are large, we recommend deleting them to save on storage costs, but only after fixing and re-running the failed workflow, so that it will call-cache. +* It is normal for a few samples in a cohort to run out of memory during Wham SV calling, so we recommend enabling auto-retry for out-of-memory errors for `01-GatherSampleEvidence` only. Before you launch the workflow, click the checkbox reading "Retry with more memory" and set the memory retry factor to 1.8. This action must be performed each time you launch a `01-GatherSampleEvidence` job. +* Please note that most large published joint call sets produced by GATK-SV, including gnomAD-SV, included the tool MELT, a state-of-the-art mobile element insertion (MEI) detector, as part of the pipeline. Due to licensing restrictions, we cannot provide a public docker image for this algorithm. The `01-GatherSampleEvidence` workflow does not use MELT as one of the SV callers by default, which will result in less sensitivity to MEI calls. In order to use MELT, you will need to build your own docker image, share it with your Terra proxy account, enter it in the `melt_docker` input in the `01-GatherSampleEvidence` configuration (as a string, surrounded by double-quotes), and then click "Save". No further changes are necessary beyond `01-GatherSampleEvidence`. +* Successful runs of `01-GatherSampleEvidence` will automatically delete any BAM files generated during the workflow, but BAM files will not be deleted if the run fails. Since BAM files are large, we recommend deleting them to save on storage costs, but only after fixing and re-running the failed workflow, so that it will call-cache. -#### Module00b +#### 02-EvidenceQC -Read the full Module00b documentation [here](https://github.com/broadinstitute/gatk-sv#module-00b). -* `module00b` is run on arbitrary cohort partitions of up to 500 samples. -* The outputs from Module00b can be used for [preliminary sample QC](https://github.com/broadinstitute/gatk-sv#preliminary-sample-qc) and [batching](https://github.com/broadinstitute/gatk-sv#batching) before moving on to TrainGCNV. +Read the full EvidenceQC documentation [here](https://github.com/broadinstitute/gatk-sv#evidence-qc). +* `02-EvidenceQC` is run on arbitrary cohort partitions of up to 500 samples. +* The outputs from `02-EvidenceQC` can be used for [preliminary sample QC](https://github.com/broadinstitute/gatk-sv#preliminary-sample-qc) and [batching](https://github.com/broadinstitute/gatk-sv#batching) before moving on to TrainGCNV. -#### TrainGCNV +#### 03-TrainGCNV Read the full TrainGCNV documentation [here](https://github.com/broadinstitute/gatk-sv#gcnv-training-1). -* By default, `train-gCNV` is configured to be run once per `sample_set` on 100 randomly-chosen samples from that set to create a gCNV model for each batch. -* Before running this workflow, create the batches (~100-500 samples) you will use for the rest of the pipeline based on sample coverage, WGD score (from `module00b`), and PCR status. These will likely not be the same as the batches you used for `module00b`. +* By default, `03-TrainGCNV` is configured to be run once per `sample_set` on 100 randomly-chosen samples from that set to create a gCNV model for each batch. +* Before running this workflow, create the batches (~100-500 samples) you will use for the rest of the pipeline based on sample coverage, WGD score (from `02-EvidenceQC`), and PCR status. These will likely not be the same as the batches you used for `02-EvidenceQC`. -#### Module00c +#### 04-GatherBatchEvidence -Read the full Module00c documentation [here](https://github.com/broadinstitute/gatk-sv#module-00c). -* Use the same `sample_set` definitions you used for `train-gCNV`. -* The default configuration for `module00c` in this workspace uses sample GVCFs. To use a position-sharded joint SNP VCF instead, delete the `gvcfs` input, provide your file(s) for `snp_vcfs`, and click "Save". The `snp_vcfs` argument should be formatted as an `Array[File]`, ie. `["gs://bucket/shard1.vcf.gz", "gs://bucket/shard2.vcf.gz"]`. Alternatively, provide the input `snp_vcfs_shard_list`: a GCS path to a text file containing one SNP VCF shard path per line (this option is useful if the `Array[File]` of `snp_vcfs` shards is too long for Terra to handle). +Read the full GatherBatchEvidence documentation [here](https://github.com/broadinstitute/gatk-sv#gather-batch-evidence). +* Use the same `sample_set` definitions you used for `03-TrainGCNV`. +* The default configuration for `04-GatherBatchEvidence` in this workspace uses sample GVCFs. To use a position-sharded joint SNP VCF instead, delete the `gvcfs` input, provide your file(s) for `snp_vcfs`, and click "Save". The `snp_vcfs` argument should be formatted as an `Array[File]`, ie. `["gs://bucket/shard1.vcf.gz", "gs://bucket/shard2.vcf.gz"]`. Alternatively, provide the input `snp_vcfs_shard_list`: a GCS path to a text file containing one SNP VCF shard path per line (this option is useful if the `Array[File]` of `snp_vcfs` shards is too long for Terra to handle). * If you are using GVCFs in a requester-pays bucket, you must provide the Terra billing project for the workspace to the `gvcf_gcs_project_for_requester_pays` argument as a string, surrounded by double-quotes. -#### Module01 and Module02 +#### 05-ClusterBatch and 06-GenerateBatchMetrics -Read the full documentation for these modules [here](https://github.com/broadinstitute/gatk-sv#module-01). -* Use the same `sample_set` definitions you used for `train-gCNV` and `module00c`. +Read the full documentation for these modules [here](https://github.com/broadinstitute/gatk-sv#cluster-batch). +* Use the same `sample_set` definitions you used for `03-TrainGCNV` and `04-GatherBatchEvidence`. -#### Module03 +#### 07-FilterBatch -Read the full Module03 documentation [here](https://github.com/broadinstitute/gatk-sv#module-03). -* Use the same `sample_set` definitions you used for `train-gCNV` through `module02`. +Read the full FilterBatch documentation [here](https://github.com/broadinstitute/gatk-sv#filter-batch). +* Use the same `sample_set` definitions you used for `03-TrainGCNV` through `06-GenerateBatchMetrics`. * The default value for `outlier_cutoff_nIQR`, which is used to filter samples that have an abnormal number of SV calls, is 10000. This essentially means that no samples are filtered. You should adjust this value depending on your scientific needs. -#### MergeCohortVcfs +#### 08-MergeBatchSites -Read the full MergeCohortVcfs documentation [here](https://github.com/broadinstitute/gatk-sv#merge-cohort-vcfs). +Read the full MergeBatchSites documentation [here](https://github.com/broadinstitute/gatk-sv#merge-batch-sites). * If you only have one batch, skip this workflow. -* For a multi-batch cohort, `merge-cohort-vcfs` is a cohort-level workflow, so it is run on a `sample_set_set` containing all of the batches in the cohort. You can create this `sample_set_set` while you are launching the `merge-cohort-vcfs` workflow: click "Select Data", choose "Create new sample_set_set [...]", check all the batches to include (all of the ones used in `train-gCNV` through `module03`), and give it a name that follows the **Sample ID requirements**. +* For a multi-batch cohort, `08-MergeBatchSites` is a cohort-level workflow, so it is run on a `sample_set_set` containing all of the batches in the cohort. You can create this `sample_set_set` while you are launching the `08-MergeBatchSites` workflow: click "Select Data", choose "Create new sample_set_set [...]", check all the batches to include (all of the ones used in `03-TrainGCNV` through `07-FilterBatch`), and give it a name that follows the **Sample ID requirements**. creating a cohort sample_set_set -#### Module04 +#### 09-GenotypeBatch -Read the full Module04 documentation [here](https://github.com/broadinstitute/gatk-sv#module-04). -* Use the same `sample_set` definitions you used for `train-gCNV` through `module03`. -* If you only have one batch, use the `module04_single_batch` version of the workflow. +Read the full GenotypeBatch documentation [here](https://github.com/broadinstitute/gatk-sv#genotype-batch). +* Use the same `sample_set` definitions you used for `03-TrainGCNV` through `07-FilterBatch`. +* If you only have one batch, use the `09-GenotypeBatch_SingleBatch` version of the workflow. -#### Module04b, Module0506, and Module08Annotation +#### 10-RegenotypeCNVs, 11-MakeCohortVcf, and 12-AnnotateVcf -Read the full documentation for [Module04b](https://github.com/broadinstitute/gatk-sv#module-04b), [Module0506](https://github.com/broadinstitute/gatk-sv#module-0506), and [Module08](https://github.com/broadinstitute/gatk-sv#module-08-in-development) on the README. -* For a multi-batch cohort, use the same cohort `sample_set_set` you created and used for `merge-cohort-vcfs`. -* If you only have one batch, use the `module0X_single_batch` version of the workflow. +Read the full documentation for [RegenotypeCNVs](https://github.com/broadinstitute/gatk-sv#regenotype-cnvs), [CombineResolveClean](https://github.com/broadinstitute/gatk-sv#make-cohort-vcf), and [AnnotateVcf](https://github.com/broadinstitute/gatk-sv#annotate-vcf) on the README. +* For a multi-batch cohort, use the same cohort `sample_set_set` you created and used for `08-MergeBatchSites`. +* If you only have one batch, use the `[...]_SingleBatch` version of the workflow. diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl new file mode 100644 index 000000000..4fc584080 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.SingleBatch.json.tmpl @@ -0,0 +1,24 @@ +{ + "AnnotateVcf.vcf" : "${this.vcf}", + "AnnotateVcf.vcf_idx" : "${this.vcf_index}", + + "AnnotateVcf.protein_coding_gtf" : "${workspace.protein_coding_gtf}", + "AnnotateVcf.linc_rna_gtf" : "${workspace.linc_rna_gtf}", + "AnnotateVcf.promoter_bed" : "${workspace.promoter_bed}", + "AnnotateVcf.noncoding_bed" : "${workspace.noncoding_bed}", + "AnnotateVcf.ref_bed" : "${workspace.external_af_ref_bed}", + "AnnotateVcf.ref_prefix" : "${workspace.external_af_ref_bed_prefix}", + "AnnotateVcf.population" : {{ reference_resources.external_af_population | tojson }}, + + + "AnnotateVcf.contig_list" : "${workspace.primary_contigs_list}", + "AnnotateVcf.ped_file": "${workspace.cohort_ped_file}", + "AnnotateVcf.sv_per_shard" : "5000", + "AnnotateVcf.max_shards_per_chrom_step1" : 200, + "AnnotateVcf.min_records_per_shard_step1" : 5000, + + "AnnotateVcf.prefix" : "${this.sample_set_id}", + + "AnnotateVcf.sv_base_mini_docker" : "${workspace.sv_base_mini_docker}", + "AnnotateVcf.sv_pipeline_docker" : "${workspace.sv_pipeline_docker}" +} \ No newline at end of file diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl new file mode 100644 index 000000000..5b177dfb7 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/AnnotateVcf.json.tmpl @@ -0,0 +1,24 @@ +{ + "AnnotateVcf.vcf" : "${this.vcf}", + "AnnotateVcf.vcf_idx" : "${this.vcf_index}", + + "AnnotateVcf.protein_coding_gtf" : "${workspace.protein_coding_gtf}", + "AnnotateVcf.linc_rna_gtf" : "${workspace.linc_rna_gtf}", + "AnnotateVcf.promoter_bed" : "${workspace.promoter_bed}", + "AnnotateVcf.noncoding_bed" : "${workspace.noncoding_bed}", + "AnnotateVcf.ref_bed" : "${workspace.external_af_ref_bed}", + "AnnotateVcf.ref_prefix" : "${workspace.external_af_ref_bed_prefix}", + "AnnotateVcf.population" : {{ reference_resources.external_af_population | tojson }}, + + + "AnnotateVcf.contig_list" : "${workspace.primary_contigs_list}", + "AnnotateVcf.ped_file": "${workspace.cohort_ped_file}", + "AnnotateVcf.sv_per_shard" : "5000", + "AnnotateVcf.max_shards_per_chrom_step1" : 200, + "AnnotateVcf.min_records_per_shard_step1" : 5000, + + "AnnotateVcf.prefix" : "${this.sample_set_set_id}", + + "AnnotateVcf.sv_base_mini_docker" : "${workspace.sv_base_mini_docker}", + "AnnotateVcf.sv_pipeline_docker" : "${workspace.sv_pipeline_docker}" +} \ No newline at end of file diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl new file mode 100644 index 000000000..871454737 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/ClusterBatch.json.tmpl @@ -0,0 +1,26 @@ +{ + "ClusterBatch.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "ClusterBatch.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + + "ClusterBatch.contigs": "${workspace.primary_contigs_fai}", + "ClusterBatch.depth_flags": "--merge-coordinates", + "ClusterBatch.depth_frac": "0.8", + "ClusterBatch.pesr_svsize": "0", + "ClusterBatch.pesr_frac": "0.1", + "ClusterBatch.pesr_flags": "--preserve-ids", + "ClusterBatch.pesr_exclude_list": "${workspace.pesr_exclude_list}", + "ClusterBatch.pesr_distance": "300", + "ClusterBatch.depth_exclude_list": "${workspace.depth_exclude_list}", + "ClusterBatch.depth_exclude_list_frac_max": "0.5", + + "ClusterBatch.primary_contigs_list": "${workspace.primary_contigs_list}", + "ClusterBatch.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + "ClusterBatch.linux_docker": "${workspace.linux_docker}", + + "ClusterBatch.batch": "${this.sample_set_id}", + "ClusterBatch.del_bed": "${this.merged_dels}", + "ClusterBatch.dup_bed": "${this.merged_dups}", + "ClusterBatch.wham_vcfs": "${this.std_wham_vcf}", + "ClusterBatch.manta_vcfs": "${this.std_manta_vcf}", + "ClusterBatch.melt_vcfs": "${this.std_melt_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/EvidenceQC.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/EvidenceQC.json.tmpl new file mode 100644 index 000000000..ce08553cf --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/EvidenceQC.json.tmpl @@ -0,0 +1,16 @@ +{ + "EvidenceQC.run_vcf_qc" : "true", + "EvidenceQC.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "EvidenceQC.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "EvidenceQC.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", + "EvidenceQC.sv_base_docker": "${workspace.sv_base_docker}", + "EvidenceQC.wgd_scoring_mask": "${workspace.wgd_scoring_mask}", + "EvidenceQC.genome_file": "${workspace.genome_file}", + + "EvidenceQC.batch": "${this.sample_set_id}", + "EvidenceQC.counts": "${this.samples.coverage_counts}", + "EvidenceQC.manta_vcfs": "${this.samples.manta_vcf}", + "EvidenceQC.melt_vcfs": "${this.samples.melt_vcf}", + "EvidenceQC.wham_vcfs": "${this.samples.wham_vcf}", + "EvidenceQC.samples": "${this.samples.sample_id}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatch.json.tmpl new file mode 100644 index 000000000..72c2feab2 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/FilterBatch.json.tmpl @@ -0,0 +1,19 @@ +{ + "FilterBatch.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "FilterBatch.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "FilterBatch.linux_docker" : "${workspace.linux_docker}", + + "FilterBatch.outlier_cutoff_nIQR": "10000", + + "FilterBatch.primary_contigs_list": "${workspace.primary_contigs_list}", + "FilterBatch.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + "FilterBatch.ped_file": "${workspace.cohort_ped_file}", + + "FilterBatch.batch": "${this.sample_set_id}", + "FilterBatch.depth_vcf" : "${this.clustered_depth_vcf}", + "FilterBatch.manta_vcf" : "${this.clustered_manta_vcf}", + "FilterBatch.wham_vcf" : "${this.clustered_wham_vcf}", + "FilterBatch.melt_vcf" : "${this.clustered_melt_vcf}", + "FilterBatch.evidence_metrics": "${this.metrics}", + "FilterBatch.evidence_metrics_common": "${this.metrics_common}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherBatchEvidence.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherBatchEvidence.json.tmpl new file mode 100644 index 000000000..0435ba3dc --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherBatchEvidence.json.tmpl @@ -0,0 +1,65 @@ +{ + "GatherBatchEvidence.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "GatherBatchEvidence.sv_base_docker": "${workspace.sv_base_docker}", + "GatherBatchEvidence.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "GatherBatchEvidence.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", + "GatherBatchEvidence.cnmops_docker": "${workspace.cnmops_docker}", + "GatherBatchEvidence.linux_docker" : "${workspace.linux_docker}", + "GatherBatchEvidence.gatk_docker" : "${workspace.gatk_docker}", + "GatherBatchEvidence.gcnv_gatk_docker" : "${workspace.gcnv_gatk_docker}", + "GatherBatchEvidence.condense_counts_docker" : "${workspace.condense_counts_docker}", + + "GatherBatchEvidence.cytoband":"${workspace.cytobands}", + "GatherBatchEvidence.mei_bed":"${workspace.mei_bed}", + "GatherBatchEvidence.cnmops_allo_file": "${workspace.allosome_file}", + "GatherBatchEvidence.cnmops_exclude_list": "${workspace.cnmops_exclude_list}", + "GatherBatchEvidence.cnmops_chrom_file": "${workspace.autosome_file}", + "GatherBatchEvidence.primary_contigs_fai": "${workspace.primary_contigs_fai}", + "GatherBatchEvidence.genome_file": "${workspace.genome_file}", + "GatherBatchEvidence.inclusion_bed": "${workspace.inclusion_bed}", + "GatherBatchEvidence.matrix_qc_distance": "1000000", + "GatherBatchEvidence.min_svsize": "50", + "GatherBatchEvidence.run_matrix_qc": "true", + + "GatherBatchEvidence.unpadded_intervals_file" : "${workspace.unpadded_intervals_file}", + "GatherBatchEvidence.dbsnp_vcf" : "${workspace.dbsnp_vcf}", + "GatherBatchEvidence.ref_fasta": "${workspace.reference_fasta}", + "GatherBatchEvidence.ref_fasta_index": "${workspace.reference_index}", + "GatherBatchEvidence.ref_dict": "${workspace.reference_dict}", + + "GatherBatchEvidence.ploidy_sample_psi_scale": "0.001", + "GatherBatchEvidence.contig_ploidy_model_tar" : "${this.contig_ploidy_model_tar}", + "GatherBatchEvidence.gcnv_learning_rate" : 0.03, + "GatherBatchEvidence.gcnv_num_thermal_advi_iters" : 250, + "GatherBatchEvidence.gcnv_max_advi_iter_first_epoch" : 1000, + "GatherBatchEvidence.gcnv_max_advi_iter_subsequent_epochs" : 200, + "GatherBatchEvidence.gcnv_max_training_epochs" : 5, + "GatherBatchEvidence.gcnv_min_training_epochs" : 1, + "GatherBatchEvidence.gcnv_convergence_snr_averaging_window" : 100, + "GatherBatchEvidence.gcnv_convergence_snr_countdown_window" : 10, + "GatherBatchEvidence.gcnv_cnv_coherence_length" : 1000, + "GatherBatchEvidence.gcnv_copy_number_posterior_expectation_mode" : "EXACT", + "GatherBatchEvidence.gcnv_log_emission_sampling_rounds" : 20, + "GatherBatchEvidence.gcnv_p_alt" : 0.000001, + "GatherBatchEvidence.gcnv_sample_psi_scale" : 0.000001, + "GatherBatchEvidence.ref_copy_number_autosomal_contigs": "${workspace.copy_number_autosomal_contigs}", + "GatherBatchEvidence.allosomal_contigs": {{ reference_resources.allosomal_contigs | tojson }}, + "GatherBatchEvidence.gcnv_caller_internal_admixing_rate": "0.5", + "GatherBatchEvidence.gcnv_caller_update_convergence_threshold": "0.000001", + "GatherBatchEvidence.gcnv_convergence_snr_trigger_threshold": "0.2", + "GatherBatchEvidence.gcnv_depth_correction_tau": "10000", + "GatherBatchEvidence.gcnv_log_emission_sampling_median_rel_error": "0.001", + "GatherBatchEvidence.gcnv_qs_cutoff": "30", + + "GatherBatchEvidence.batch": "${this.sample_set_id}", + "GatherBatchEvidence.ped_file": "${workspace.cohort_ped_file}", + "GatherBatchEvidence.gcnv_model_tars" : "${this.gcnv_model_tars}", + "GatherBatchEvidence.PE_files": "${this.samples.pesr_disc}", + "GatherBatchEvidence.SR_files": "${this.samples.pesr_split}", + "GatherBatchEvidence.counts": "${this.samples.coverage_counts}", + "GatherBatchEvidence.manta_vcfs": "${this.samples.manta_vcf}", + "GatherBatchEvidence.samples": "${this.samples.sample_id}", + "GatherBatchEvidence.melt_vcfs": "${this.samples.melt_vcf}", + "GatherBatchEvidence.wham_vcfs": "${this.samples.wham_vcf}", + "GatherBatchEvidence.gvcfs": "${this.samples.gvcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherSampleEvidence.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherSampleEvidence.json.tmpl new file mode 100644 index 000000000..2112fe389 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GatherSampleEvidence.json.tmpl @@ -0,0 +1,36 @@ +{ + "GatherSampleEvidence.primary_contigs_list": "${workspace.primary_contigs_list}", + "GatherSampleEvidence.reference_fasta": "${workspace.reference_fasta}", + "GatherSampleEvidence.reference_index": "${workspace.reference_index}", + "GatherSampleEvidence.reference_dict": "${workspace.reference_dict}", + "GatherSampleEvidence.reference_version": "${workspace.reference_version}", + + "GatherSampleEvidence.collect_coverage": "true", + "GatherSampleEvidence.collect_pesr": "true", + + "GatherSampleEvidence.preprocessed_intervals": "${workspace.preprocessed_intervals}", + + "GatherSampleEvidence.delly_exclude_intervals_file": "${workspace.delly_exclude_intervals_file}", + "GatherSampleEvidence.manta_region_bed": "${workspace.manta_region_bed}", + "GatherSampleEvidence.melt_standard_vcf_header": "${workspace.melt_standard_vcf_header}", + + "GatherSampleEvidence.wham_include_list_bed_file": "${workspace.wham_include_list_bed_file}", + + "GatherSampleEvidence.samtools_cloud_docker": "${workspace.samtools_cloud_docker}", + "GatherSampleEvidence.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "GatherSampleEvidence.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "GatherSampleEvidence.manta_docker": "${workspace.manta_docker}", + "GatherSampleEvidence.wham_docker": "${workspace.wham_docker}", + "GatherSampleEvidence.genomes_in_the_cloud_docker" : "${workspace.genomes_in_the_cloud_docker}", + "GatherSampleEvidence.gatk_docker" : "${workspace.gatk_docker}", + "GatherSampleEvidence.gatk_docker_pesr_override": "${workspace.gatk_docker_pesr_override}", + "GatherSampleEvidence.cloud_sdk_docker": "${workspace.cloud_sdk_docker}", + + "GatherSampleEvidence.primary_contigs_fai": "${workspace.primary_contigs_fai}", + "GatherSampleEvidence.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + + "GatherSampleEvidence.bam_or_cram_file": "${this.bam_or_cram_file}", + "GatherSampleEvidence.sample_id": "${this.sample_id}", + + "GatherSampleEvidence.requester_pays_crams": "${this.requester_pays_cram}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenerateBatchMetrics.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenerateBatchMetrics.json.tmpl new file mode 100644 index 000000000..343456518 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenerateBatchMetrics.json.tmpl @@ -0,0 +1,34 @@ +{ + "GenerateBatchMetrics.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "GenerateBatchMetrics.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "GenerateBatchMetrics.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "GenerateBatchMetrics.sv_base_docker": "${workspace.sv_base_docker}", + "GenerateBatchMetrics.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + "GenerateBatchMetrics.linux_docker" : "${workspace.linux_docker}", + + "GenerateBatchMetrics.BAF_split_size": "10000", + "GenerateBatchMetrics.RD_split_size": "10000", + "GenerateBatchMetrics.PE_split_size": "10000", + "GenerateBatchMetrics.SR_split_size": "1000", + "GenerateBatchMetrics.common_cnv_size_cutoff": "5000", + "GenerateBatchMetrics.allosome_contigs": "${workspace.allosome_file}", + "GenerateBatchMetrics.autosome_contigs": "${workspace.autosome_file}", + "GenerateBatchMetrics.rmsk": "${workspace.rmsk}", + "GenerateBatchMetrics.segdups": "${workspace.segdups}", + "GenerateBatchMetrics.ref_dict": "${workspace.reference_dict}", + + "GenerateBatchMetrics.primary_contigs_list": "${workspace.primary_contigs_list}", + + "GenerateBatchMetrics.batch": "${this.sample_set_id}", + "GenerateBatchMetrics.ped_file": "${workspace.cohort_ped_file}", + + "GenerateBatchMetrics.discfile": "${this.merged_PE}", + "GenerateBatchMetrics.baf_metrics": "${this.merged_BAF}", + "GenerateBatchMetrics.coveragefile": "${this.merged_bincov}", + "GenerateBatchMetrics.splitfile": "${this.merged_SR}", + "GenerateBatchMetrics.medianfile": "${this.median_cov}", + "GenerateBatchMetrics.depth_vcf" : "${this.clustered_depth_vcf}", + "GenerateBatchMetrics.manta_vcf" : "${this.clustered_manta_vcf}", + "GenerateBatchMetrics.wham_vcf" : "${this.clustered_wham_vcf}", + "GenerateBatchMetrics.melt_vcf" : "${this.clustered_melt_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.SingleBatch.json.tmpl new file mode 100644 index 000000000..c1f36e952 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.SingleBatch.json.tmpl @@ -0,0 +1,29 @@ +{ + "GenotypeBatch.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "GenotypeBatch.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "GenotypeBatch.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "GenotypeBatch.linux_docker" : "${workspace.linux_docker}", + + "GenotypeBatch.n_RD_genotype_bins": "100000", + "GenotypeBatch.n_per_split": "5000", + "GenotypeBatch.pesr_exclude_list": "${workspace.pesr_exclude_list}", + "GenotypeBatch.seed_cutoffs": "${workspace.seed_cutoffs}", + "GenotypeBatch.reference_build": "${workspace.reference_build}", + "GenotypeBatch.ref_dict": "${workspace.reference_dict}", + + "GenotypeBatch.primary_contigs_list": "${workspace.primary_contigs_list}", + "GenotypeBatch.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + + "GenotypeBatch.batch": "${this.sample_set_id}", + "GenotypeBatch.rf_cutoffs": "${this.cutoffs}", + "GenotypeBatch.batch_depth_vcf": "${this.filtered_depth_vcf}", + "GenotypeBatch.batch_pesr_vcf": "${this.filtered_pesr_vcf}", + "GenotypeBatch.ped_file": "${workspace.cohort_ped_file}", + "GenotypeBatch.bin_exclude": "${workspace.bin_exclude}", + "GenotypeBatch.discfile": "${this.merged_PE}", + "GenotypeBatch.coveragefile": "${this.merged_bincov}", + "GenotypeBatch.splitfile": "${this.merged_SR}", + "GenotypeBatch.medianfile": "${this.median_cov}", + "GenotypeBatch.cohort_depth_vcf": "${this.filtered_depth_vcf}", + "GenotypeBatch.cohort_pesr_vcf": "${this.filtered_pesr_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.json.tmpl new file mode 100644 index 000000000..b617fa964 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeBatch.json.tmpl @@ -0,0 +1,29 @@ +{ + "GenotypeBatch.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "GenotypeBatch.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "GenotypeBatch.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "GenotypeBatch.linux_docker" : "${workspace.linux_docker}", + + "GenotypeBatch.n_RD_genotype_bins": "100000", + "GenotypeBatch.n_per_split": "5000", + "GenotypeBatch.pesr_exclude_list": "${workspace.pesr_exclude_list}", + "GenotypeBatch.seed_cutoffs": "${workspace.seed_cutoffs}", + "GenotypeBatch.reference_build": "${workspace.reference_build}", + "GenotypeBatch.ref_dict": "${workspace.reference_dict}", + + "GenotypeBatch.primary_contigs_list": "${workspace.primary_contigs_list}", + "GenotypeBatch.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + + "GenotypeBatch.batch": "${this.sample_set_id}", + "GenotypeBatch.rf_cutoffs": "${this.cutoffs}", + "GenotypeBatch.batch_depth_vcf": "${this.filtered_depth_vcf}", + "GenotypeBatch.batch_pesr_vcf": "${this.filtered_pesr_vcf}", + "GenotypeBatch.ped_file": "${workspace.cohort_ped_file}", + "GenotypeBatch.bin_exclude": "${workspace.bin_exclude}", + "GenotypeBatch.discfile": "${this.merged_PE}", + "GenotypeBatch.coveragefile": "${this.merged_bincov}", + "GenotypeBatch.splitfile": "${this.merged_SR}", + "GenotypeBatch.medianfile": "${this.median_cov}", + "GenotypeBatch.cohort_depth_vcf": "${workspace.cohort_depth_vcf}", + "GenotypeBatch.cohort_pesr_vcf": "${workspace.cohort_pesr_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.SingleBatch.json.tmpl new file mode 100644 index 000000000..747e2a37f --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.SingleBatch.json.tmpl @@ -0,0 +1,43 @@ +{ + "MakeCohortVcf.bin_exclude": "${workspace.bin_exclude}", + "MakeCohortVcf.contig_list": "${workspace.primary_contigs_fai}", + "MakeCohortVcf.allosome_fai": "${workspace.allosome_file}", + "MakeCohortVcf.cytobands": "${workspace.cytobands}", + "MakeCohortVcf.mei_bed": "${workspace.mei_bed}", + "MakeCohortVcf.pe_exclude_list": "${workspace.pesr_exclude_list}", + "MakeCohortVcf.depth_exclude_list": "${workspace.depth_exclude_list}", + "MakeCohortVcf.empty_file" : "${workspace.empty_file}", + "MakeCohortVcf.ref_dict": "${workspace.reference_dict}", + + "MakeCohortVcf.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, + + "MakeCohortVcf.min_sr_background_fail_batches": 0.5, + "MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200, + "MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000, + "MakeCohortVcf.samples_per_clean_vcf_step2_shard": 100, + "MakeCohortVcf.random_seed": 0, + "MakeCohortVcf.max_shards_per_chrom": 100, + "MakeCohortVcf.min_variants_per_shard": 30, + + "MakeCohortVcf.linux_docker": "${workspace.linux_docker}", + "MakeCohortVcf.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "MakeCohortVcf.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "MakeCohortVcf.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "MakeCohortVcf.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", + + "MakeCohortVcf.primary_contigs_list": "${workspace.primary_contigs_list}", + "MakeCohortVcf.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + + "MakeCohortVcf.cohort_name": "${this.sample_set_id}", + "MakeCohortVcf.batches": "${this.sample_set_id}", + "MakeCohortVcf.ped_file": "${workspace.cohort_ped_file}", + "MakeCohortVcf.disc_files": "${this.merged_PE}", + "MakeCohortVcf.bincov_files": "${this.merged_bincov}", + "MakeCohortVcf.median_coverage_files": "${this.median_cov}", + "MakeCohortVcf.rf_cutoff_files": "${this.cutoffs}", + "MakeCohortVcf.pesr_vcfs": "${this.genotyped_pesr_vcf}", + "MakeCohortVcf.depth_vcfs": "${this.regenotyped_depth_vcfs}", + "MakeCohortVcf.depth_gt_rd_sep_files": "${this.trained_genotype_depth_depth_sepcutoff}", + "MakeCohortVcf.raw_sr_bothside_pass_files": "${this.sr_bothside_pass}", + "MakeCohortVcf.raw_sr_background_fail_files": "${this.sr_background_fail}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl new file mode 100644 index 000000000..b10aea732 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MakeCohortVcf.json.tmpl @@ -0,0 +1,43 @@ +{ + "MakeCohortVcf.bin_exclude": "${workspace.bin_exclude}", + "MakeCohortVcf.contig_list": "${workspace.primary_contigs_fai}", + "MakeCohortVcf.allosome_fai": "${workspace.allosome_file}", + "MakeCohortVcf.cytobands": "${workspace.cytobands}", + "MakeCohortVcf.mei_bed": "${workspace.mei_bed}", + "MakeCohortVcf.pe_exclude_list": "${workspace.pesr_exclude_list}", + "MakeCohortVcf.depth_exclude_list": "${workspace.depth_exclude_list}", + "MakeCohortVcf.empty_file" : "${workspace.empty_file}", + "MakeCohortVcf.ref_dict": "${workspace.reference_dict}", + + "MakeCohortVcf.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, + + "MakeCohortVcf.min_sr_background_fail_batches": 0.5, + "MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200, + "MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000, + "MakeCohortVcf.samples_per_clean_vcf_step2_shard": 100, + "MakeCohortVcf.random_seed": 0, + "MakeCohortVcf.max_shards_per_chrom": 100, + "MakeCohortVcf.min_variants_per_shard": 30, + + "MakeCohortVcf.linux_docker": "${workspace.linux_docker}", + "MakeCohortVcf.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "MakeCohortVcf.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "MakeCohortVcf.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "MakeCohortVcf.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", + + "MakeCohortVcf.primary_contigs_list": "${workspace.primary_contigs_list}", + "MakeCohortVcf.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + + "MakeCohortVcf.cohort_name": "${this.sample_set_set_id}", + "MakeCohortVcf.batches": "${this.sample_sets.sample_set_id}", + "MakeCohortVcf.ped_file": "${workspace.cohort_ped_file}", + "MakeCohortVcf.disc_files": "${this.sample_sets.merged_PE}", + "MakeCohortVcf.bincov_files": "${this.sample_sets.merged_bincov}", + "MakeCohortVcf.median_coverage_files": "${this.sample_sets.median_cov}", + "MakeCohortVcf.rf_cutoff_files": "${this.sample_sets.cutoffs}", + "MakeCohortVcf.pesr_vcfs": "${this.sample_sets.genotyped_pesr_vcf}", + "MakeCohortVcf.depth_vcfs": "${this.regenotyped_depth_vcfs}", + "MakeCohortVcf.depth_gt_rd_sep_files": "${this.sample_sets.trained_genotype_depth_depth_sepcutoff}", + "MakeCohortVcf.raw_sr_bothside_pass_files": "${this.sample_sets.sr_bothside_pass}", + "MakeCohortVcf.raw_sr_background_fail_files": "${this.sample_sets.sr_background_fail}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeBatchSites.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeBatchSites.json.tmpl new file mode 100644 index 000000000..95a7daf97 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeBatchSites.json.tmpl @@ -0,0 +1,6 @@ +{ + "MergeBatchSites.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "MergeBatchSites.cohort": "${this.sample_set_set_id}", + "MergeBatchSites.pesr_vcfs": "${this.sample_sets.filtered_pesr_vcf}", + "MergeBatchSites.depth_vcfs": "${this.sample_sets.filtered_depth_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeCohortVcfs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeCohortVcfs.json.tmpl deleted file mode 100644 index dc8a30d41..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/MergeCohortVcfs.json.tmpl +++ /dev/null @@ -1,6 +0,0 @@ -{ - "MergeCohortVcfs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "MergeCohortVcfs.cohort": "${this.sample_set_set_id}", - "MergeCohortVcfs.pesr_vcfs": "${this.sample_sets.filtered_pesr_vcf}", - "MergeCohortVcfs.depth_vcfs": "${this.sample_sets.filtered_depth_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00a.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00a.json.tmpl deleted file mode 100644 index d8d255c4d..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00a.json.tmpl +++ /dev/null @@ -1,36 +0,0 @@ -{ - "Module00a.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module00a.reference_fasta": "${workspace.reference_fasta}", - "Module00a.reference_index": "${workspace.reference_index}", - "Module00a.reference_dict": "${workspace.reference_dict}", - "Module00a.reference_version": "${workspace.reference_version}", - - "Module00a.collect_coverage": "true", - "Module00a.collect_pesr": "true", - - "Module00a.preprocessed_intervals": "${workspace.preprocessed_intervals}", - - "Module00a.delly_exclude_intervals_file": "${workspace.delly_exclude_intervals_file}", - "Module00a.manta_region_bed": "${workspace.manta_region_bed}", - "Module00a.melt_standard_vcf_header": "${workspace.melt_standard_vcf_header}", - - "Module00a.wham_include_list_bed_file": "${workspace.wham_include_list_bed_file}", - - "Module00a.samtools_cloud_docker": "${workspace.samtools_cloud_docker}", - "Module00a.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module00a.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module00a.manta_docker": "${workspace.manta_docker}", - "Module00a.wham_docker": "${workspace.wham_docker}", - "Module00a.genomes_in_the_cloud_docker" : "${workspace.genomes_in_the_cloud_docker}", - "Module00a.gatk_docker" : "${workspace.gatk_docker}", - "Module00a.gatk_docker_pesr_override": "${workspace.gatk_docker_pesr_override}", - "Module00a.cloud_sdk_docker": "${workspace.cloud_sdk_docker}", - - "Module00a.primary_contigs_fai": "${workspace.primary_contigs_fai}", - "Module00a.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - - "Module00a.bam_or_cram_file": "${this.bam_or_cram_file}", - "Module00a.sample_id": "${this.sample_id}", - - "Module00a.requester_pays_crams": "${this.requester_pays_cram}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00b.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00b.json.tmpl deleted file mode 100644 index 6e18a3fa4..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00b.json.tmpl +++ /dev/null @@ -1,16 +0,0 @@ -{ - "Module00b.run_vcf_qc" : "true", - "Module00b.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module00b.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module00b.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", - "Module00b.sv_base_docker": "${workspace.sv_base_docker}", - "Module00b.wgd_scoring_mask": "${workspace.wgd_scoring_mask}", - "Module00b.genome_file": "${workspace.genome_file}", - - "Module00b.batch": "${this.sample_set_id}", - "Module00b.counts": "${this.samples.coverage_counts}", - "Module00b.manta_vcfs": "${this.samples.manta_vcf}", - "Module00b.melt_vcfs": "${this.samples.melt_vcf}", - "Module00b.wham_vcfs": "${this.samples.wham_vcf}", - "Module00b.samples": "${this.samples.sample_id}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00c.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00c.json.tmpl deleted file mode 100644 index 5f7003093..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module00c.json.tmpl +++ /dev/null @@ -1,65 +0,0 @@ -{ - "Module00c.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module00c.sv_base_docker": "${workspace.sv_base_docker}", - "Module00c.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module00c.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", - "Module00c.cnmops_docker": "${workspace.cnmops_docker}", - "Module00c.linux_docker" : "${workspace.linux_docker}", - "Module00c.gatk_docker" : "${workspace.gatk_docker}", - "Module00c.gcnv_gatk_docker" : "${workspace.gcnv_gatk_docker}", - "Module00c.condense_counts_docker" : "${workspace.condense_counts_docker}", - - "Module00c.cytoband":"${workspace.cytobands}", - "Module00c.mei_bed":"${workspace.mei_bed}", - "Module00c.cnmops_allo_file": "${workspace.allosome_file}", - "Module00c.cnmops_exclude_list": "${workspace.cnmops_exclude_list}", - "Module00c.cnmops_chrom_file": "${workspace.autosome_file}", - "Module00c.primary_contigs_fai": "${workspace.primary_contigs_fai}", - "Module00c.genome_file": "${workspace.genome_file}", - "Module00c.inclusion_bed": "${workspace.inclusion_bed}", - "Module00c.matrix_qc_distance": "1000000", - "Module00c.min_svsize": "50", - "Module00c.run_matrix_qc": "true", - - "Module00c.unpadded_intervals_file" : "${workspace.unpadded_intervals_file}", - "Module00c.dbsnp_vcf" : "${workspace.dbsnp_vcf}", - "Module00c.ref_fasta": "${workspace.reference_fasta}", - "Module00c.ref_fasta_index": "${workspace.reference_index}", - "Module00c.ref_dict": "${workspace.reference_dict}", - - "Module00c.ploidy_sample_psi_scale": "0.001", - "Module00c.contig_ploidy_model_tar" : "${this.contig_ploidy_model_tar}", - "Module00c.gcnv_learning_rate" : 0.03, - "Module00c.gcnv_num_thermal_advi_iters" : 250, - "Module00c.gcnv_max_advi_iter_first_epoch" : 1000, - "Module00c.gcnv_max_advi_iter_subsequent_epochs" : 200, - "Module00c.gcnv_max_training_epochs" : 5, - "Module00c.gcnv_min_training_epochs" : 1, - "Module00c.gcnv_convergence_snr_averaging_window" : 100, - "Module00c.gcnv_convergence_snr_countdown_window" : 10, - "Module00c.gcnv_cnv_coherence_length" : 1000, - "Module00c.gcnv_copy_number_posterior_expectation_mode" : "EXACT", - "Module00c.gcnv_log_emission_sampling_rounds" : 20, - "Module00c.gcnv_p_alt" : 0.000001, - "Module00c.gcnv_sample_psi_scale" : 0.000001, - "Module00c.ref_copy_number_autosomal_contigs": "${workspace.copy_number_autosomal_contigs}", - "Module00c.allosomal_contigs": {{ reference_resources.allosomal_contigs | tojson }}, - "Module00c.gcnv_caller_internal_admixing_rate": "0.5", - "Module00c.gcnv_caller_update_convergence_threshold": "0.000001", - "Module00c.gcnv_convergence_snr_trigger_threshold": "0.2", - "Module00c.gcnv_depth_correction_tau": "10000", - "Module00c.gcnv_log_emission_sampling_median_rel_error": "0.001", - "Module00c.gcnv_qs_cutoff": "30", - - "Module00c.batch": "${this.sample_set_id}", - "Module00c.ped_file": "${workspace.cohort_ped_file}", - "Module00c.gcnv_model_tars" : "${this.gcnv_model_tars}", - "Module00c.PE_files": "${this.samples.pesr_disc}", - "Module00c.SR_files": "${this.samples.pesr_split}", - "Module00c.counts": "${this.samples.coverage_counts}", - "Module00c.manta_vcfs": "${this.samples.manta_vcf}", - "Module00c.samples": "${this.samples.sample_id}", - "Module00c.melt_vcfs": "${this.samples.melt_vcf}", - "Module00c.wham_vcfs": "${this.samples.wham_vcf}", - "Module00c.gvcfs": "${this.samples.gvcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module01.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module01.json.tmpl deleted file mode 100644 index d6173f529..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module01.json.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -{ - "Module01.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module01.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - - "Module01.contigs": "${workspace.primary_contigs_fai}", - "Module01.depth_flags": "--merge-coordinates", - "Module01.depth_frac": "0.8", - "Module01.pesr_svsize": "0", - "Module01.pesr_frac": "0.1", - "Module01.pesr_flags": "--preserve-ids", - "Module01.pesr_exclude_list": "${workspace.pesr_exclude_list}", - "Module01.pesr_distance": "300", - "Module01.depth_exclude_list": "${workspace.depth_exclude_list}", - "Module01.depth_exclude_list_frac_max": "0.5", - - "Module01.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module01.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - "Module01.linux_docker": "${workspace.linux_docker}", - - "Module01.batch": "${this.sample_set_id}", - "Module01.del_bed": "${this.merged_dels}", - "Module01.dup_bed": "${this.merged_dups}", - "Module01.wham_vcfs": "${this.std_wham_vcf}", - "Module01.manta_vcfs": "${this.std_manta_vcf}", - "Module01.melt_vcfs": "${this.std_melt_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module02.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module02.json.tmpl deleted file mode 100644 index 02949cc31..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module02.json.tmpl +++ /dev/null @@ -1,34 +0,0 @@ -{ - "Module02.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module02.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module02.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module02.sv_base_docker": "${workspace.sv_base_docker}", - "Module02.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - "Module02.linux_docker" : "${workspace.linux_docker}", - - "Module02.BAF_split_size": "10000", - "Module02.RD_split_size": "10000", - "Module02.PE_split_size": "10000", - "Module02.SR_split_size": "1000", - "Module02.common_cnv_size_cutoff": "5000", - "Module02.allosome_contigs": "${workspace.allosome_file}", - "Module02.autosome_contigs": "${workspace.autosome_file}", - "Module02.rmsk": "${workspace.rmsk}", - "Module02.segdups": "${workspace.segdups}", - "Module02.ref_dict": "${workspace.reference_dict}", - - "Module02.primary_contigs_list": "${workspace.primary_contigs_list}", - - "Module02.batch": "${this.sample_set_id}", - "Module02.ped_file": "${workspace.cohort_ped_file}", - - "Module02.discfile": "${this.merged_PE}", - "Module02.baf_metrics": "${this.merged_BAF}", - "Module02.coveragefile": "${this.merged_bincov}", - "Module02.splitfile": "${this.merged_SR}", - "Module02.medianfile": "${this.median_cov}", - "Module02.depth_vcf" : "${this.clustered_depth_vcf}", - "Module02.manta_vcf" : "${this.clustered_manta_vcf}", - "Module02.wham_vcf" : "${this.clustered_wham_vcf}", - "Module02.melt_vcf" : "${this.clustered_melt_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module03.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module03.json.tmpl deleted file mode 100644 index 7b64ba9c2..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module03.json.tmpl +++ /dev/null @@ -1,19 +0,0 @@ -{ - "Module03.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module03.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module03.linux_docker" : "${workspace.linux_docker}", - - "Module03.outlier_cutoff_nIQR": "10000", - - "Module03.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module03.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - "Module03.ped_file": "${workspace.cohort_ped_file}", - - "Module03.batch": "${this.sample_set_id}", - "Module03.depth_vcf" : "${this.clustered_depth_vcf}", - "Module03.manta_vcf" : "${this.clustered_manta_vcf}", - "Module03.wham_vcf" : "${this.clustered_wham_vcf}", - "Module03.melt_vcf" : "${this.clustered_melt_vcf}", - "Module03.evidence_metrics": "${this.metrics}", - "Module03.evidence_metrics_common": "${this.metrics_common}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.SingleBatch.json.tmpl deleted file mode 100644 index 22e91c267..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.SingleBatch.json.tmpl +++ /dev/null @@ -1,29 +0,0 @@ -{ - "Module04.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module04.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module04.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module04.linux_docker" : "${workspace.linux_docker}", - - "Module04.n_RD_genotype_bins": "100000", - "Module04.n_per_split": "5000", - "Module04.pesr_exclude_list": "${workspace.pesr_exclude_list}", - "Module04.seed_cutoffs": "${workspace.seed_cutoffs}", - "Module04.reference_build": "${workspace.reference_build}", - "Module04.ref_dict": "${workspace.reference_dict}", - - "Module04.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module04.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - - "Module04.batch": "${this.sample_set_id}", - "Module04.rf_cutoffs": "${this.cutoffs}", - "Module04.batch_depth_vcf": "${this.filtered_depth_vcf}", - "Module04.batch_pesr_vcf": "${this.filtered_pesr_vcf}", - "Module04.ped_file": "${workspace.cohort_ped_file}", - "Module04.bin_exclude": "${workspace.bin_exclude}", - "Module04.discfile": "${this.merged_PE}", - "Module04.coveragefile": "${this.merged_bincov}", - "Module04.splitfile": "${this.merged_SR}", - "Module04.medianfile": "${this.median_cov}", - "Module04.cohort_depth_vcf": "${this.filtered_depth_vcf}", - "Module04.cohort_pesr_vcf": "${this.filtered_pesr_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.json.tmpl deleted file mode 100644 index 18e40d9b3..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04.json.tmpl +++ /dev/null @@ -1,29 +0,0 @@ -{ - "Module04.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module04.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module04.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module04.linux_docker" : "${workspace.linux_docker}", - - "Module04.n_RD_genotype_bins": "100000", - "Module04.n_per_split": "5000", - "Module04.pesr_exclude_list": "${workspace.pesr_exclude_list}", - "Module04.seed_cutoffs": "${workspace.seed_cutoffs}", - "Module04.reference_build": "${workspace.reference_build}", - "Module04.ref_dict": "${workspace.reference_dict}", - - "Module04.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module04.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - - "Module04.batch": "${this.sample_set_id}", - "Module04.rf_cutoffs": "${this.cutoffs}", - "Module04.batch_depth_vcf": "${this.filtered_depth_vcf}", - "Module04.batch_pesr_vcf": "${this.filtered_pesr_vcf}", - "Module04.ped_file": "${workspace.cohort_ped_file}", - "Module04.bin_exclude": "${workspace.bin_exclude}", - "Module04.discfile": "${this.merged_PE}", - "Module04.coveragefile": "${this.merged_bincov}", - "Module04.splitfile": "${this.merged_SR}", - "Module04.medianfile": "${this.median_cov}", - "Module04.cohort_depth_vcf": "${workspace.cohort_depth_vcf}", - "Module04.cohort_pesr_vcf": "${workspace.cohort_pesr_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.SingleBatch.json.tmpl deleted file mode 100644 index 029703d49..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.SingleBatch.json.tmpl +++ /dev/null @@ -1,25 +0,0 @@ -{ - "Module04b.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module04b.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - "Module04b.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module04b.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module04b.n_RdTest_bins": "100000", - "Module04b.n_per_split": "5000", - - "Module04b.cohort": "${this.sample_set_id}", - "Module04b.contig_list": "${workspace.primary_contigs_list}", - "Module04b.regeno_coverage_medians": "${this.regeno_coverage_medians}", - - "Module04b.RD_depth_sepcutoffs": "${this.trained_genotype_depth_depth_sepcutoff}", - - "Module04b.cohort_depth_vcf": "${this.filtered_depth_vcf}", - - "Module04b.ped_file": "${workspace.cohort_ped_file}", - "Module04b.batch_depth_vcfs": "${this.filtered_depth_vcf}", - - "Module04b.depth_vcfs": "${this.genotyped_depth_vcf}", - "Module04b.coveragefiles": "${this.merged_bincov}", - "Module04b.coveragefile_idxs": "${this.merged_bincov_index}", - "Module04b.medianfiles": "${this.median_cov}", - "Module04b.batches": "${this.sample_set_id}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.json.tmpl deleted file mode 100644 index 341f59b90..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module04b.json.tmpl +++ /dev/null @@ -1,25 +0,0 @@ -{ - "Module04b.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module04b.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - "Module04b.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module04b.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module04b.n_RdTest_bins": "100000", - "Module04b.n_per_split": "5000", - - "Module04b.cohort": "${this.sample_set_set_id}", - "Module04b.contig_list": "${workspace.primary_contigs_list}", - "Module04b.regeno_coverage_medians": "${this.sample_sets.regeno_coverage_medians}", - - "Module04b.RD_depth_sepcutoffs": "${this.sample_sets.trained_genotype_depth_depth_sepcutoff}", - - "Module04b.cohort_depth_vcf": "${workspace.cohort_depth_vcf}", - - "Module04b.ped_file": "${workspace.cohort_ped_file}", - "Module04b.batch_depth_vcfs": "${this.sample_sets.filtered_depth_vcf}", - - "Module04b.depth_vcfs": "${this.sample_sets.genotyped_depth_vcf}", - "Module04b.coveragefiles": "${this.sample_sets.merged_bincov}", - "Module04b.coveragefile_idxs": "${this.sample_sets.merged_bincov_index}", - "Module04b.medianfiles": "${this.sample_sets.median_cov}", - "Module04b.batches": "${this.sample_sets.sample_set_id}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.SingleBatch.json.tmpl deleted file mode 100644 index c7a8dc8ab..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.SingleBatch.json.tmpl +++ /dev/null @@ -1,43 +0,0 @@ -{ - "Module0506.bin_exclude": "${workspace.bin_exclude}", - "Module0506.contig_list": "${workspace.primary_contigs_fai}", - "Module0506.allosome_fai": "${workspace.allosome_file}", - "Module0506.cytobands": "${workspace.cytobands}", - "Module0506.mei_bed": "${workspace.mei_bed}", - "Module0506.pe_exclude_list": "${workspace.pesr_exclude_list}", - "Module0506.depth_exclude_list": "${workspace.depth_exclude_list}", - "Module0506.empty_file" : "${workspace.empty_file}", - "Module0506.ref_dict": "${workspace.reference_dict}", - - "Module0506.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, - - "Module0506.min_sr_background_fail_batches": 0.5, - "Module0506.max_shards_per_chrom_clean_vcf_step1": 200, - "Module0506.min_records_per_shard_clean_vcf_step1": 5000, - "Module0506.samples_per_clean_vcf_step2_shard": 100, - "Module0506.random_seed": 0, - "Module0506.max_shards_per_chrom": 100, - "Module0506.min_variants_per_shard": 30, - - "Module0506.linux_docker": "${workspace.linux_docker}", - "Module0506.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module0506.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module0506.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module0506.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", - - "Module0506.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module0506.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - - "Module0506.cohort_name": "${this.sample_set_id}", - "Module0506.batches": "${this.sample_set_id}", - "Module0506.ped_file": "${workspace.cohort_ped_file}", - "Module0506.disc_files": "${this.merged_PE}", - "Module0506.bincov_files": "${this.merged_bincov}", - "Module0506.median_coverage_files": "${this.median_cov}", - "Module0506.rf_cutoff_files": "${this.cutoffs}", - "Module0506.pesr_vcfs": "${this.genotyped_pesr_vcf}", - "Module0506.depth_vcfs": "${this.regenotyped_depth_vcfs}", - "Module0506.depth_gt_rd_sep_files": "${this.trained_genotype_depth_depth_sepcutoff}", - "Module0506.raw_sr_bothside_pass_files": "${this.sr_bothside_pass}", - "Module0506.raw_sr_background_fail_files": "${this.sr_background_fail}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.json.tmpl deleted file mode 100644 index 525e22698..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module0506.json.tmpl +++ /dev/null @@ -1,43 +0,0 @@ -{ - "Module0506.bin_exclude": "${workspace.bin_exclude}", - "Module0506.contig_list": "${workspace.primary_contigs_fai}", - "Module0506.allosome_fai": "${workspace.allosome_file}", - "Module0506.cytobands": "${workspace.cytobands}", - "Module0506.mei_bed": "${workspace.mei_bed}", - "Module0506.pe_exclude_list": "${workspace.pesr_exclude_list}", - "Module0506.depth_exclude_list": "${workspace.depth_exclude_list}", - "Module0506.empty_file" : "${workspace.empty_file}", - "Module0506.ref_dict": "${workspace.reference_dict}", - - "Module0506.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, - - "Module0506.min_sr_background_fail_batches": 0.5, - "Module0506.max_shards_per_chrom_clean_vcf_step1": 200, - "Module0506.min_records_per_shard_clean_vcf_step1": 5000, - "Module0506.samples_per_clean_vcf_step2_shard": 100, - "Module0506.random_seed": 0, - "Module0506.max_shards_per_chrom": 100, - "Module0506.min_variants_per_shard": 30, - - "Module0506.linux_docker": "${workspace.linux_docker}", - "Module0506.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", - "Module0506.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", - "Module0506.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", - "Module0506.sv_pipeline_qc_docker": "${workspace.sv_pipeline_qc_docker}", - - "Module0506.primary_contigs_list": "${workspace.primary_contigs_list}", - "Module0506.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", - - "Module0506.cohort_name": "${this.sample_set_set_id}", - "Module0506.batches": "${this.sample_sets.sample_set_id}", - "Module0506.ped_file": "${workspace.cohort_ped_file}", - "Module0506.disc_files": "${this.sample_sets.merged_PE}", - "Module0506.bincov_files": "${this.sample_sets.merged_bincov}", - "Module0506.median_coverage_files": "${this.sample_sets.median_cov}", - "Module0506.rf_cutoff_files": "${this.sample_sets.cutoffs}", - "Module0506.pesr_vcfs": "${this.sample_sets.genotyped_pesr_vcf}", - "Module0506.depth_vcfs": "${this.regenotyped_depth_vcfs}", - "Module0506.depth_gt_rd_sep_files": "${this.sample_sets.trained_genotype_depth_depth_sepcutoff}", - "Module0506.raw_sr_bothside_pass_files": "${this.sample_sets.sr_bothside_pass}", - "Module0506.raw_sr_background_fail_files": "${this.sample_sets.sr_background_fail}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.SingleBatch.json.tmpl deleted file mode 100644 index df7b27140..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.SingleBatch.json.tmpl +++ /dev/null @@ -1,24 +0,0 @@ -{ - "Module08Annotation.vcf" : "${this.vcf}", - "Module08Annotation.vcf_idx" : "${this.vcf_index}", - - "Module08Annotation.protein_coding_gtf" : "${workspace.protein_coding_gtf}", - "Module08Annotation.linc_rna_gtf" : "${workspace.linc_rna_gtf}", - "Module08Annotation.promoter_bed" : "${workspace.promoter_bed}", - "Module08Annotation.noncoding_bed" : "${workspace.noncoding_bed}", - "Module08Annotation.ref_bed" : "${workspace.external_af_ref_bed}", - "Module08Annotation.ref_prefix" : "${workspace.external_af_ref_bed_prefix}", - "Module08Annotation.population" : {{ reference_resources.external_af_population | tojson }}, - - - "Module08Annotation.contig_list" : "${workspace.primary_contigs_list}", - "Module08Annotation.ped_file": "${workspace.cohort_ped_file}", - "Module08Annotation.sv_per_shard" : "5000", - "Module08Annotation.max_shards_per_chrom_step1" : 200, - "Module08Annotation.min_records_per_shard_step1" : 5000, - - "Module08Annotation.prefix" : "${this.sample_set_id}", - - "Module08Annotation.sv_base_mini_docker" : "${workspace.sv_base_mini_docker}", - "Module08Annotation.sv_pipeline_docker" : "${workspace.sv_pipeline_docker}" -} \ No newline at end of file diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.json.tmpl deleted file mode 100644 index 8da20be40..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/Module08Annotation.json.tmpl +++ /dev/null @@ -1,24 +0,0 @@ -{ - "Module08Annotation.vcf" : "${this.vcf}", - "Module08Annotation.vcf_idx" : "${this.vcf_index}", - - "Module08Annotation.protein_coding_gtf" : "${workspace.protein_coding_gtf}", - "Module08Annotation.linc_rna_gtf" : "${workspace.linc_rna_gtf}", - "Module08Annotation.promoter_bed" : "${workspace.promoter_bed}", - "Module08Annotation.noncoding_bed" : "${workspace.noncoding_bed}", - "Module08Annotation.ref_bed" : "${workspace.external_af_ref_bed}", - "Module08Annotation.ref_prefix" : "${workspace.external_af_ref_bed_prefix}", - "Module08Annotation.population" : {{ reference_resources.external_af_population | tojson }}, - - - "Module08Annotation.contig_list" : "${workspace.primary_contigs_list}", - "Module08Annotation.ped_file": "${workspace.cohort_ped_file}", - "Module08Annotation.sv_per_shard" : "5000", - "Module08Annotation.max_shards_per_chrom_step1" : 200, - "Module08Annotation.min_records_per_shard_step1" : 5000, - - "Module08Annotation.prefix" : "${this.sample_set_set_id}", - - "Module08Annotation.sv_base_mini_docker" : "${workspace.sv_base_mini_docker}", - "Module08Annotation.sv_pipeline_docker" : "${workspace.sv_pipeline_docker}" -} \ No newline at end of file diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.SingleBatch.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.SingleBatch.json.tmpl new file mode 100644 index 000000000..2c8307867 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.SingleBatch.json.tmpl @@ -0,0 +1,25 @@ +{ + "RegenotypeCNVs.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "RegenotypeCNVs.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + "RegenotypeCNVs.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "RegenotypeCNVs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "RegenotypeCNVs.n_RdTest_bins": "100000", + "RegenotypeCNVs.n_per_split": "5000", + + "RegenotypeCNVs.cohort": "${this.sample_set_id}", + "RegenotypeCNVs.contig_list": "${workspace.primary_contigs_list}", + "RegenotypeCNVs.regeno_coverage_medians": "${this.regeno_coverage_medians}", + + "RegenotypeCNVs.RD_depth_sepcutoffs": "${this.trained_genotype_depth_depth_sepcutoff}", + + "RegenotypeCNVs.cohort_depth_vcf": "${this.filtered_depth_vcf}", + + "RegenotypeCNVs.ped_file": "${workspace.cohort_ped_file}", + "RegenotypeCNVs.batch_depth_vcfs": "${this.filtered_depth_vcf}", + + "RegenotypeCNVs.depth_vcfs": "${this.genotyped_depth_vcf}", + "RegenotypeCNVs.coveragefiles": "${this.merged_bincov}", + "RegenotypeCNVs.coveragefile_idxs": "${this.merged_bincov_index}", + "RegenotypeCNVs.medianfiles": "${this.median_cov}", + "RegenotypeCNVs.batches": "${this.sample_set_id}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.json.tmpl new file mode 100644 index 000000000..82584f798 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/RegenotypeCNVs.json.tmpl @@ -0,0 +1,25 @@ +{ + "RegenotypeCNVs.sv_pipeline_rdtest_docker": "${workspace.sv_pipeline_rdtest_docker}", + "RegenotypeCNVs.sv_pipeline_base_docker": "${workspace.sv_pipeline_base_docker}", + "RegenotypeCNVs.sv_base_mini_docker": "${workspace.sv_base_mini_docker}", + "RegenotypeCNVs.sv_pipeline_docker": "${workspace.sv_pipeline_docker}", + "RegenotypeCNVs.n_RdTest_bins": "100000", + "RegenotypeCNVs.n_per_split": "5000", + + "RegenotypeCNVs.cohort": "${this.sample_set_set_id}", + "RegenotypeCNVs.contig_list": "${workspace.primary_contigs_list}", + "RegenotypeCNVs.regeno_coverage_medians": "${this.sample_sets.regeno_coverage_medians}", + + "RegenotypeCNVs.RD_depth_sepcutoffs": "${this.sample_sets.trained_genotype_depth_depth_sepcutoff}", + + "RegenotypeCNVs.cohort_depth_vcf": "${workspace.cohort_depth_vcf}", + + "RegenotypeCNVs.ped_file": "${workspace.cohort_ped_file}", + "RegenotypeCNVs.batch_depth_vcfs": "${this.sample_sets.filtered_depth_vcf}", + + "RegenotypeCNVs.depth_vcfs": "${this.sample_sets.genotyped_depth_vcf}", + "RegenotypeCNVs.coveragefiles": "${this.sample_sets.merged_bincov}", + "RegenotypeCNVs.coveragefile_idxs": "${this.sample_sets.merged_bincov_index}", + "RegenotypeCNVs.medianfiles": "${this.sample_sets.median_cov}", + "RegenotypeCNVs.batches": "${this.sample_sets.sample_set_id}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/ClusterBatchOutputs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/ClusterBatchOutputs.json.tmpl new file mode 100644 index 000000000..0712581a0 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/ClusterBatchOutputs.json.tmpl @@ -0,0 +1,7 @@ +{ + "ClusterBatch.delly_vcf":"${this.clustered_delly_vcf}", + "ClusterBatch.depth_vcf":"${this.clustered_depth_vcf}", + "ClusterBatch.manta_vcf":"${this.clustered_manta_vcf}", + "ClusterBatch.melt_vcf":"${this.clustered_melt_vcf}", + "ClusterBatch.wham_vcf":"${this.clustered_wham_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/GatherBatchEvidenceOutputs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/GatherBatchEvidenceOutputs.json.tmpl new file mode 100644 index 000000000..c69b83853 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/GatherBatchEvidenceOutputs.json.tmpl @@ -0,0 +1,34 @@ +{ + "GatherBatchEvidence.BAF_stats": "${this.BAF_stats}", + "GatherBatchEvidence.Matrix_QC_plot": "${this.Matrix_QC_plot}", + "GatherBatchEvidence.PE_stats": "${this.PE_stats}", + "GatherBatchEvidence.RD_stats": "${this.RD_stats}", + "GatherBatchEvidence.SR_stats": "${this.SR_stats}", + "GatherBatchEvidence.cnmops_del": "${this.cnmops_del}", + "GatherBatchEvidence.cnmops_del_index": "${this.cnmops_del_index}", + "GatherBatchEvidence.cnmops_dup": "${this.cnmops_dup}", + "GatherBatchEvidence.cnmops_dup_index": "${this.cnmops_dup_index}", + "GatherBatchEvidence.cnmops_large_del": "${this.cnmops_large_del}", + "GatherBatchEvidence.cnmops_large_del_index": "${this.cnmops_large_del_index}", + "GatherBatchEvidence.cnmops_large_dup": "${this.cnmops_large_dup}", + "GatherBatchEvidence.cnmops_large_dup_index": "${this.cnmops_large_dup_index}", + "GatherBatchEvidence.combined_ped_file": "${this.combined_ped_file}", + "GatherBatchEvidence.manta_tloc": "${this.manta_tloc}", + "GatherBatchEvidence.median_cov": "${this.median_cov}", + "GatherBatchEvidence.merged_BAF": "${this.merged_BAF}", + "GatherBatchEvidence.merged_BAF_index": "${this.merged_BAF_index}", + "GatherBatchEvidence.merged_PE": "${this.merged_PE}", + "GatherBatchEvidence.merged_PE_index": "${this.merged_PE_index}", + "GatherBatchEvidence.merged_SR": "${this.merged_SR}", + "GatherBatchEvidence.merged_SR_index": "${this.merged_SR_index}", + "GatherBatchEvidence.merged_bincov": "${this.merged_bincov}", + "GatherBatchEvidence.merged_bincov_index": "${this.merged_bincov_index}", + "GatherBatchEvidence.merged_dels": "${this.merged_dels}", + "GatherBatchEvidence.merged_dups": "${this.merged_dups}", + "GatherBatchEvidence.ploidy_matrix": "${this.ploidy_matrix_00c}", + "GatherBatchEvidence.ploidy_plots": "${this.ploidy_plots_00c}", + "GatherBatchEvidence.std_delly_vcf": "${this.std_delly_vcf}", + "GatherBatchEvidence.std_manta_vcf": "${this.std_manta_vcf}", + "GatherBatchEvidence.std_melt_vcf": "${this.std_melt_vcf}", + "GatherBatchEvidence.std_wham_vcf": "${this.std_wham_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeBatchSitesOutputs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeBatchSitesOutputs.json.tmpl new file mode 100644 index 000000000..6e01687d4 --- /dev/null +++ b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeBatchSitesOutputs.json.tmpl @@ -0,0 +1,4 @@ +{ + "MergeBatchSites.cohort_depth_vcf": "${workspace.cohort_depth_vcf}", + "MergeBatchSites.cohort_pesr_vcf": "${workspace.cohort_pesr_vcf}" +} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeCohortVcfsOutputs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeCohortVcfsOutputs.json.tmpl deleted file mode 100644 index 2336c99ee..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/MergeCohortVcfsOutputs.json.tmpl +++ /dev/null @@ -1,4 +0,0 @@ -{ - "MergeCohortVcfs.cohort_depth_vcf": "${workspace.cohort_depth_vcf}", - "MergeCohortVcfs.cohort_pesr_vcf": "${workspace.cohort_pesr_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module00cOutputs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module00cOutputs.json.tmpl deleted file mode 100644 index ece5aa61b..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module00cOutputs.json.tmpl +++ /dev/null @@ -1,34 +0,0 @@ -{ - "Module00c.BAF_stats": "${this.BAF_stats}", - "Module00c.Matrix_QC_plot": "${this.Matrix_QC_plot}", - "Module00c.PE_stats": "${this.PE_stats}", - "Module00c.RD_stats": "${this.RD_stats}", - "Module00c.SR_stats": "${this.SR_stats}", - "Module00c.cnmops_del": "${this.cnmops_del}", - "Module00c.cnmops_del_index": "${this.cnmops_del_index}", - "Module00c.cnmops_dup": "${this.cnmops_dup}", - "Module00c.cnmops_dup_index": "${this.cnmops_dup_index}", - "Module00c.cnmops_large_del": "${this.cnmops_large_del}", - "Module00c.cnmops_large_del_index": "${this.cnmops_large_del_index}", - "Module00c.cnmops_large_dup": "${this.cnmops_large_dup}", - "Module00c.cnmops_large_dup_index": "${this.cnmops_large_dup_index}", - "Module00c.combined_ped_file": "${this.combined_ped_file}", - "Module00c.manta_tloc": "${this.manta_tloc}", - "Module00c.median_cov": "${this.median_cov}", - "Module00c.merged_BAF": "${this.merged_BAF}", - "Module00c.merged_BAF_index": "${this.merged_BAF_index}", - "Module00c.merged_PE": "${this.merged_PE}", - "Module00c.merged_PE_index": "${this.merged_PE_index}", - "Module00c.merged_SR": "${this.merged_SR}", - "Module00c.merged_SR_index": "${this.merged_SR_index}", - "Module00c.merged_bincov": "${this.merged_bincov}", - "Module00c.merged_bincov_index": "${this.merged_bincov_index}", - "Module00c.merged_dels": "${this.merged_dels}", - "Module00c.merged_dups": "${this.merged_dups}", - "Module00c.ploidy_matrix": "${this.ploidy_matrix_00c}", - "Module00c.ploidy_plots": "${this.ploidy_plots_00c}", - "Module00c.std_delly_vcf": "${this.std_delly_vcf}", - "Module00c.std_manta_vcf": "${this.std_manta_vcf}", - "Module00c.std_melt_vcf": "${this.std_melt_vcf}", - "Module00c.std_wham_vcf": "${this.std_wham_vcf}" -} diff --git a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module01Outputs.json.tmpl b/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module01Outputs.json.tmpl deleted file mode 100644 index 4a65a8bbf..000000000 --- a/input_templates/terra_workspaces/cohort_mode/workflow_configurations/output_configurations/Module01Outputs.json.tmpl +++ /dev/null @@ -1,7 +0,0 @@ -{ - "Module01.delly_vcf":"${this.clustered_delly_vcf}", - "Module01.depth_vcf":"${this.clustered_depth_vcf}", - "Module01.manta_vcf":"${this.clustered_manta_vcf}", - "Module01.melt_vcf":"${this.clustered_melt_vcf}", - "Module01.wham_vcf":"${this.clustered_wham_vcf}" -} diff --git a/scripts/test/validate.sh b/scripts/test/validate.sh index cef1baf1d..e88d6302b 100755 --- a/scripts/test/validate.sh +++ b/scripts/test/validate.sh @@ -84,7 +84,7 @@ echo "" echo "#############################################################" echo "${COUNTER} TESTS PASSED SUCCESSFULLY!" -if [ $TERRA_VALIDATION = true ]; then +if [ "$TERRA_VALIDATION" = true ]; then echo "" echo "#############################################################" echo "RUNNING TERRA INPUT VALIDATION NOW" diff --git a/test_input_templates/AnnotateVcf/AnnotateVcf.json.tmpl b/test_input_templates/AnnotateVcf/AnnotateVcf.json.tmpl new file mode 100644 index 000000000..4169361fe --- /dev/null +++ b/test_input_templates/AnnotateVcf/AnnotateVcf.json.tmpl @@ -0,0 +1,24 @@ +{ + "AnnotateVcf.vcf" : "gs://fc-fae972fb-9dbf-41c7-926f-f419a767a1ab/61a7ce7c-3b3c-4716-977a-ffb6e34464b6/minGQ_filter_workflow_v2/d841eeb6-90c3-4ff2-8b99-7a793c85cfea/call-combine_vcfs/Talkowski_SV_PCR-free_WGS_144.minGQ_filtered.vcf.gz", + "AnnotateVcf.vcf_idx" : "gs://fc-fae972fb-9dbf-41c7-926f-f419a767a1ab/61a7ce7c-3b3c-4716-977a-ffb6e34464b6/minGQ_filter_workflow_v2/d841eeb6-90c3-4ff2-8b99-7a793c85cfea/call-combine_vcfs/Talkowski_SV_PCR-free_WGS_144.minGQ_filtered.vcf.gz.tbi", + + "AnnotateVcf.protein_coding_gtf" : {{ reference_resources.protein_coding_gtf | tojson }}, + "AnnotateVcf.linc_rna_gtf" : {{ reference_resources.linc_rna_gtf | tojson }}, + "AnnotateVcf.promoter_bed" : {{ reference_resources.promoter_bed | tojson }}, + "AnnotateVcf.noncoding_bed" : {{ reference_resources.noncoding_bed | tojson }}, + "AnnotateVcf.ref_bed" : {{ reference_resources.external_af_ref_bed | tojson }}, + "AnnotateVcf.ref_prefix" : {{ reference_resources.external_af_ref_bed_prefix | tojson }}, + "AnnotateVcf.population" : {{ reference_resources.external_af_population | tojson }}, + + + "AnnotateVcf.contig_list" : {{ reference_resources.primary_contigs_list | tojson }}, + "AnnotateVcf.ped_file": "gs://fc-fae972fb-9dbf-41c7-926f-f419a767a1ab/FINAL_full_prenatal_dosage_sex.ped", + "AnnotateVcf.sv_per_shard" : "5000", + "AnnotateVcf.max_shards_per_chrom_step1" : 200, + "AnnotateVcf.min_records_per_shard_step1" : 5000, + + "AnnotateVcf.prefix" : {{ test_batch.batch_name | tojson }}, + + "AnnotateVcf.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "AnnotateVcf.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} +} diff --git a/test_input_templates/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl b/test_input_templates/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl new file mode 100644 index 000000000..3766a0317 --- /dev/null +++ b/test_input_templates/AnnotateVcf/GenerateFunctionalAnnotationResources.json.tmpl @@ -0,0 +1,12 @@ +{ + "GenerateFunctionalAnnotationResources.gencode_annotation_gtf": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.annotation.gtf.gz", + "GenerateFunctionalAnnotationResources.gencode_pc_transcripts_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_transcripts.fa.gz", + "GenerateFunctionalAnnotationResources.gencode_pc_translations_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_translations.fa.gz", + "GenerateFunctionalAnnotationResources.gencode_transcript_source": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.metadata.Transcript_source", + + "GenerateFunctionalAnnotationResources.promoter_window": 1000, + + "GenerateFunctionalAnnotationResources.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "GenerateFunctionalAnnotationResources.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} +} + diff --git a/test_input_templates/module08/PrepareGencode.wdl.example.json.tmpl b/test_input_templates/AnnotateVcf/PrepareGencode.json.tmpl similarity index 100% rename from test_input_templates/module08/PrepareGencode.wdl.example.json.tmpl rename to test_input_templates/AnnotateVcf/PrepareGencode.json.tmpl diff --git a/test_input_templates/module08/PrepareNoncoding.wdl.example.json.tmpl b/test_input_templates/AnnotateVcf/PrepareNoncoding.json.tmpl similarity index 100% rename from test_input_templates/module08/PrepareNoncoding.wdl.example.json.tmpl rename to test_input_templates/AnnotateVcf/PrepareNoncoding.json.tmpl diff --git a/test_input_templates/ClusterBatch/ClusterBatch.json.tmpl b/test_input_templates/ClusterBatch/ClusterBatch.json.tmpl new file mode 100644 index 000000000..87319fd53 --- /dev/null +++ b/test_input_templates/ClusterBatch/ClusterBatch.json.tmpl @@ -0,0 +1,26 @@ +{ + "ClusterBatch.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "ClusterBatch.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + + "ClusterBatch.contigs": {{ reference_resources.primary_contigs_fai | tojson }}, + "ClusterBatch.depth_flags": "--merge-coordinates", + "ClusterBatch.depth_frac": "0.8", + "ClusterBatch.pesr_svsize": "0", + "ClusterBatch.pesr_frac": "0.1", + "ClusterBatch.pesr_flags": "--preserve-ids", + "ClusterBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "ClusterBatch.pesr_distance": "300", + "ClusterBatch.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, + "ClusterBatch.depth_exclude_list_frac_max": "0.5", + + "ClusterBatch.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, + "ClusterBatch.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + "ClusterBatch.linux_docker": {{ dockers.linux_docker | tojson }}, + + "ClusterBatch.batch": {{ test_batch.batch_name | tojson }}, + "ClusterBatch.del_bed": {{ test_batch.del_bed| tojson }}, + "ClusterBatch.dup_bed": {{ test_batch.dup_bed | tojson }}, + "ClusterBatch.wham_vcfs": {{ test_batch.std_wham_vcfs | tojson }}, + "ClusterBatch.manta_vcfs": {{ test_batch.std_manta_vcfs | tojson }}, + "ClusterBatch.melt_vcfs": {{ test_batch.std_melt_vcfs | tojson }} +} diff --git a/test_input_templates/EvidenceQC/EvidenceQC.json.tmpl b/test_input_templates/EvidenceQC/EvidenceQC.json.tmpl new file mode 100644 index 000000000..c694b6020 --- /dev/null +++ b/test_input_templates/EvidenceQC/EvidenceQC.json.tmpl @@ -0,0 +1,16 @@ +{ + "EvidenceQC.run_vcf_qc" : "true", + "EvidenceQC.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "EvidenceQC.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "EvidenceQC.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, + "EvidenceQC.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, + "EvidenceQC.wgd_scoring_mask": {{ reference_resources.wgd_scoring_mask | tojson }}, + "EvidenceQC.genome_file": {{ reference_resources.genome_file | tojson }}, + + "EvidenceQC.batch": {{ test_batch.batch_name | tojson }}, + "EvidenceQC.counts": {{ test_batch.counts | tojson }}, + "EvidenceQC.manta_vcfs": {{ test_batch.manta_vcfs | tojson }}, + "EvidenceQC.melt_vcfs": {{ test_batch.melt_vcfs | tojson }}, + "EvidenceQC.wham_vcfs": {{ test_batch.wham_vcfs | tojson }}, + "EvidenceQC.samples": {{ test_batch.samples | tojson }} +} diff --git a/test_input_templates/FilterBatch/FilterBatch.json.tmpl b/test_input_templates/FilterBatch/FilterBatch.json.tmpl new file mode 100644 index 000000000..3868870e4 --- /dev/null +++ b/test_input_templates/FilterBatch/FilterBatch.json.tmpl @@ -0,0 +1,20 @@ +{ + "FilterBatch.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "FilterBatch.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "FilterBatch.linux_docker" : {{ dockers.linux_docker | tojson }}, + + "FilterBatch.outlier_cutoff_nIQR": "10000", + + "FilterBatch.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, + "FilterBatch.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + "FilterBatch.ped_file": {{ test_batch.ped_file | tojson }}, + + "FilterBatch.batch": {{ test_batch.batch_name | tojson }}, + "FilterBatch.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }}, + "FilterBatch.depth_vcf" : {{ test_batch.merged_depth_vcf | tojson }}, + "FilterBatch.manta_vcf" : {{ test_batch.merged_manta_vcf | tojson }}, + "FilterBatch.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }}, + "FilterBatch.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }}, + "FilterBatch.evidence_metrics": {{ test_batch.evidence_metrics | tojson }}, + "FilterBatch.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }} +} diff --git a/test_input_templates/FilterBatch/FilterBatchQc.json.tmpl b/test_input_templates/FilterBatch/FilterBatchQc.json.tmpl new file mode 100644 index 000000000..26b2a9fd2 --- /dev/null +++ b/test_input_templates/FilterBatch/FilterBatchQc.json.tmpl @@ -0,0 +1,23 @@ +{ + "FilterBatchQc.contig_list": {{ reference_resources.primary_contigs_fai | tojson }}, + + "FilterBatchQc.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, + "FilterBatchQc.hgsv_tarballs": {{ reference_resources.hgsv_tarballs | tojson }}, + "FilterBatchQc.asc_tarballs": {{ reference_resources.asc_tarballs | tojson }}, + "FilterBatchQc.sanders_2015_tarball": {{ reference_resources.sanders_2015_tarball | tojson }}, + "FilterBatchQc.collins_2017_tarball": {{ reference_resources.collins_2017_tarball | tojson }}, + "FilterBatchQc.werling_2018_tarball": {{ reference_resources.werling_2018_tarball | tojson }}, + + "FilterBatchQc.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "FilterBatchQc.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "FilterBatchQc.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, + + "FilterBatchQc.batch": {{ test_batch.batch_name | tojson }}, + + "FilterBatchQc.ped_file": {{ test_batch.ped_file | tojson }}, + "FilterBatchQc.depth_vcf_noOutliers": {{ test_batch.filtered_depth_vcf | tojson }}, + "FilterBatchQc.merged_pesr_vcf": {{ test_batch.filtered_pesr_vcf | tojson }} +} + + + diff --git a/test_input_templates/GatherBatchEvidence/GatherBatchEvidence.baf_from_vcf.json.tmpl b/test_input_templates/GatherBatchEvidence/GatherBatchEvidence.baf_from_vcf.json.tmpl new file mode 100644 index 000000000..3f71b6620 --- /dev/null +++ b/test_input_templates/GatherBatchEvidence/GatherBatchEvidence.baf_from_vcf.json.tmpl @@ -0,0 +1,64 @@ +{ + "GatherBatchEvidence.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "GatherBatchEvidence.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, + "GatherBatchEvidence.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "GatherBatchEvidence.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, + "GatherBatchEvidence.cnmops_docker": {{ dockers.cnmops_docker | tojson }}, + "GatherBatchEvidence.linux_docker" : {{ dockers.linux_docker | tojson }}, + "GatherBatchEvidence.gatk_docker": {{ dockers.gatk_docker | tojson }}, + "GatherBatchEvidence.gcnv_gatk_docker" : {{ dockers.gatk_docker | tojson }}, + "GatherBatchEvidence.condense_counts_docker" : {{ dockers.condense_counts_docker | tojson }}, + "GatherBatchEvidence.cytoband":{{ reference_resources.cytobands | tojson }}, + "GatherBatchEvidence.mei_bed":{{ reference_resources.mei_bed | tojson }}, + "GatherBatchEvidence.cnmops_allo_file": {{ reference_resources.allosome_file | tojson }}, + "GatherBatchEvidence.cnmops_exclude_list": {{ reference_resources.cnmops_exclude_list | tojson }}, + "GatherBatchEvidence.cnmops_chrom_file": {{ reference_resources.autosome_file | tojson }}, + "GatherBatchEvidence.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, + "GatherBatchEvidence.genome_file": {{ reference_resources.genome_file | tojson }}, + "GatherBatchEvidence.inclusion_bed": {{ reference_resources.inclusion_bed | tojson }}, + "GatherBatchEvidence.matrix_qc_distance": "1000000", + "GatherBatchEvidence.min_svsize": "50", + "GatherBatchEvidence.run_matrix_qc": "true", + + "GatherBatchEvidence.unpadded_intervals_file" : {{ reference_resources.unpadded_intervals_file | tojson }}, + "GatherBatchEvidence.dbsnp_vcf" : {{ reference_resources.dbsnp_vcf | tojson }}, + "GatherBatchEvidence.ref_fasta": {{ reference_resources.reference_fasta | tojson }}, + "GatherBatchEvidence.ref_fasta_index": {{ reference_resources.reference_index | tojson }}, + "GatherBatchEvidence.ref_dict": {{ reference_resources.reference_dict | tojson }}, + + "GatherBatchEvidence.ploidy_sample_psi_scale": "0.001", + "GatherBatchEvidence.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }}, + "GatherBatchEvidence.gcnv_learning_rate" : 0.03, + "GatherBatchEvidence.gcnv_num_thermal_advi_iters" : 250, + "GatherBatchEvidence.gcnv_max_advi_iter_first_epoch" : 1000, + "GatherBatchEvidence.gcnv_max_advi_iter_subsequent_epochs" : 200, + "GatherBatchEvidence.gcnv_max_training_epochs" : 5, + "GatherBatchEvidence.gcnv_min_training_epochs" : 1, + "GatherBatchEvidence.gcnv_convergence_snr_averaging_window" : 100, + "GatherBatchEvidence.gcnv_convergence_snr_countdown_window" : 10, + "GatherBatchEvidence.gcnv_cnv_coherence_length" : 1000, + "GatherBatchEvidence.gcnv_copy_number_posterior_expectation_mode" : "EXACT", + "GatherBatchEvidence.gcnv_log_emission_sampling_rounds" : 20, + "GatherBatchEvidence.gcnv_p_alt" : 0.000001, + "GatherBatchEvidence.gcnv_sample_psi_scale" : 0.000001, + "GatherBatchEvidence.ref_copy_number_autosomal_contigs": {{ reference_resources.copy_number_autosomal_contigs | tojson }}, + "GatherBatchEvidence.allosomal_contigs": {{ reference_resources.allosomal_contigs | tojson }}, + "GatherBatchEvidence.gcnv_caller_internal_admixing_rate": "0.5", + "GatherBatchEvidence.gcnv_caller_update_convergence_threshold": "0.000001", + "GatherBatchEvidence.gcnv_convergence_snr_trigger_threshold": "0.2", + "GatherBatchEvidence.gcnv_depth_correction_tau": "10000", + "GatherBatchEvidence.gcnv_log_emission_sampling_median_rel_error": "0.001", + "GatherBatchEvidence.gcnv_qs_cutoff": "30", + + "GatherBatchEvidence.batch": {{ test_batch.batch_name | tojson }}, + "GatherBatchEvidence.ped_file": {{ test_batch.ped_file | tojson }}, + "GatherBatchEvidence.gcnv_model_tars" : {{ test_batch.gcnv_model_tars | tojson }}, + "GatherBatchEvidence.PE_files": {{ test_batch.PE_files | tojson }}, + "GatherBatchEvidence.SR_files": {{ test_batch.SR_files | tojson }}, + "GatherBatchEvidence.counts": {{ test_batch.counts | tojson }}, + "GatherBatchEvidence.manta_vcfs": {{ test_batch.manta_vcfs | tojson }}, + "GatherBatchEvidence.samples": {{ test_batch.samples | tojson }}, + "GatherBatchEvidence.melt_vcfs": {{ test_batch.melt_vcfs | tojson }}, + "GatherBatchEvidence.wham_vcfs": {{ test_batch.wham_vcfs | tojson }}, + "GatherBatchEvidence.snp_vcfs": {{ test_batch.snp_vcfs | tojson }} +} diff --git a/test_input_templates/GatherBatchEvidence/GatherBatchEvidence.json.tmpl b/test_input_templates/GatherBatchEvidence/GatherBatchEvidence.json.tmpl new file mode 100644 index 000000000..56ca69d23 --- /dev/null +++ b/test_input_templates/GatherBatchEvidence/GatherBatchEvidence.json.tmpl @@ -0,0 +1,64 @@ +{ + "GatherBatchEvidence.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "GatherBatchEvidence.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, + "GatherBatchEvidence.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "GatherBatchEvidence.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, + "GatherBatchEvidence.cnmops_docker": {{ dockers.cnmops_docker | tojson }}, + "GatherBatchEvidence.linux_docker" : {{ dockers.linux_docker | tojson }}, + "GatherBatchEvidence.gatk_docker": {{ dockers.gatk_docker | tojson }}, + "GatherBatchEvidence.gcnv_gatk_docker" : {{ dockers.gatk_docker | tojson }}, + "GatherBatchEvidence.condense_counts_docker" : {{ dockers.condense_counts_docker | tojson }}, + "GatherBatchEvidence.cytoband":{{ reference_resources.cytobands | tojson }}, + "GatherBatchEvidence.mei_bed":{{ reference_resources.mei_bed | tojson }}, + "GatherBatchEvidence.cnmops_allo_file": {{ reference_resources.allosome_file | tojson }}, + "GatherBatchEvidence.cnmops_exclude_list": {{ reference_resources.cnmops_exclude_list | tojson }}, + "GatherBatchEvidence.cnmops_chrom_file": {{ reference_resources.autosome_file | tojson }}, + "GatherBatchEvidence.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, + "GatherBatchEvidence.genome_file": {{ reference_resources.genome_file | tojson }}, + "GatherBatchEvidence.inclusion_bed": {{ reference_resources.inclusion_bed | tojson }}, + "GatherBatchEvidence.matrix_qc_distance": "1000000", + "GatherBatchEvidence.min_svsize": "50", + "GatherBatchEvidence.run_matrix_qc": "true", + + "GatherBatchEvidence.unpadded_intervals_file" : {{ reference_resources.unpadded_intervals_file | tojson }}, + "GatherBatchEvidence.dbsnp_vcf" : {{ reference_resources.dbsnp_vcf | tojson }}, + "GatherBatchEvidence.ref_fasta": {{ reference_resources.reference_fasta | tojson }}, + "GatherBatchEvidence.ref_fasta_index": {{ reference_resources.reference_index | tojson }}, + "GatherBatchEvidence.ref_dict": {{ reference_resources.reference_dict | tojson }}, + + "GatherBatchEvidence.ploidy_sample_psi_scale": "0.001", + "GatherBatchEvidence.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }}, + "GatherBatchEvidence.gcnv_learning_rate" : 0.03, + "GatherBatchEvidence.gcnv_num_thermal_advi_iters" : 250, + "GatherBatchEvidence.gcnv_max_advi_iter_first_epoch" : 1000, + "GatherBatchEvidence.gcnv_max_advi_iter_subsequent_epochs" : 200, + "GatherBatchEvidence.gcnv_max_training_epochs" : 5, + "GatherBatchEvidence.gcnv_min_training_epochs" : 1, + "GatherBatchEvidence.gcnv_convergence_snr_averaging_window" : 100, + "GatherBatchEvidence.gcnv_convergence_snr_countdown_window" : 10, + "GatherBatchEvidence.gcnv_cnv_coherence_length" : 1000, + "GatherBatchEvidence.gcnv_copy_number_posterior_expectation_mode" : "EXACT", + "GatherBatchEvidence.gcnv_log_emission_sampling_rounds" : 20, + "GatherBatchEvidence.gcnv_p_alt" : 0.000001, + "GatherBatchEvidence.gcnv_sample_psi_scale" : 0.000001, + "GatherBatchEvidence.ref_copy_number_autosomal_contigs": {{ reference_resources.copy_number_autosomal_contigs | tojson }}, + "GatherBatchEvidence.allosomal_contigs": {{ reference_resources.allosomal_contigs | tojson }}, + "GatherBatchEvidence.gcnv_caller_internal_admixing_rate": "0.5", + "GatherBatchEvidence.gcnv_caller_update_convergence_threshold": "0.000001", + "GatherBatchEvidence.gcnv_convergence_snr_trigger_threshold": "0.2", + "GatherBatchEvidence.gcnv_depth_correction_tau": "10000", + "GatherBatchEvidence.gcnv_log_emission_sampling_median_rel_error": "0.001", + "GatherBatchEvidence.gcnv_qs_cutoff": "30", + + "GatherBatchEvidence.batch": {{ test_batch.batch_name | tojson }}, + "GatherBatchEvidence.ped_file": {{ test_batch.ped_file | tojson }}, + "GatherBatchEvidence.gcnv_model_tars" : {{ test_batch.gcnv_model_tars | tojson }}, + "GatherBatchEvidence.PE_files": {{ test_batch.PE_files | tojson }}, + "GatherBatchEvidence.SR_files": {{ test_batch.SR_files | tojson }}, + "GatherBatchEvidence.counts": {{ test_batch.counts | tojson }}, + "GatherBatchEvidence.manta_vcfs": {{ test_batch.manta_vcfs | tojson }}, + "GatherBatchEvidence.samples": {{ test_batch.samples | tojson }}, + "GatherBatchEvidence.melt_vcfs": {{ test_batch.melt_vcfs | tojson }}, + "GatherBatchEvidence.wham_vcfs": {{ test_batch.wham_vcfs | tojson }}, + "GatherBatchEvidence.gvcfs": {{ test_batch.gvcfs | tojson }} +} diff --git a/test_input_templates/GatherSampleEvidence/GatherSampleEvidenceBatch.json.tmpl b/test_input_templates/GatherSampleEvidence/GatherSampleEvidenceBatch.json.tmpl new file mode 100644 index 000000000..e7f66ae45 --- /dev/null +++ b/test_input_templates/GatherSampleEvidence/GatherSampleEvidenceBatch.json.tmpl @@ -0,0 +1,38 @@ +{ + "GatherSampleEvidenceBatch.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, + "GatherSampleEvidenceBatch.reference_fasta": {{ reference_resources.reference_fasta | tojson }}, + "GatherSampleEvidenceBatch.reference_index": {{ reference_resources.reference_index | tojson }}, + "GatherSampleEvidenceBatch.reference_dict": {{ reference_resources.reference_dict | tojson }}, + "GatherSampleEvidenceBatch.reference_version": {{ reference_resources.reference_version | tojson }}, + + "GatherSampleEvidenceBatch.collect_coverage": "true", + "GatherSampleEvidenceBatch.collect_pesr": "true", + + "GatherSampleEvidenceBatch.preprocessed_intervals": {{ reference_resources.preprocessed_intervals | tojson }}, + + "GatherSampleEvidenceBatch.delly_exclude_intervals_file": {{ reference_resources.delly_exclude_intervals_file | tojson }}, + "GatherSampleEvidenceBatch.manta_region_bed": {{ reference_resources.manta_region_bed | tojson }}, + "GatherSampleEvidenceBatch.melt_standard_vcf_header": {{ reference_resources.melt_std_vcf_header | tojson }}, + + "GatherSampleEvidenceBatch.wham_include_list_bed_file": {{ reference_resources.wham_include_list_bed_file | tojson }}, + + "GatherSampleEvidenceBatch.samtools_cloud_docker": {{ dockers.samtools_cloud_docker | tojson }}, + "GatherSampleEvidenceBatch.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "GatherSampleEvidenceBatch.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "GatherSampleEvidenceBatch.delly_docker": {{ dockers.delly_docker | tojson }}, + "GatherSampleEvidenceBatch.manta_docker": {{ dockers.manta_docker | tojson }}, + "GatherSampleEvidenceBatch.melt_docker" : {{ dockers.melt_docker | tojson }}, + "GatherSampleEvidenceBatch.wham_docker": {{ dockers.wham_docker | tojson }}, + "GatherSampleEvidenceBatch.genomes_in_the_cloud_docker" : {{ dockers.genomes_in_the_cloud_docker | tojson }}, + "GatherSampleEvidenceBatch.gatk_docker": {{ dockers.gatk_docker | tojson }}, + "GatherSampleEvidenceBatch.gatk_docker_pesr_override" : {{ dockers.gatk_docker | tojson }}, + "GatherSampleEvidenceBatch.cloud_sdk_docker": {{ dockers.cloud_sdk_docker | tojson }}, + + "GatherSampleEvidenceBatch.batch": {{ test_batch.batch_name | tojson }}, + "GatherSampleEvidenceBatch.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, + "GatherSampleEvidenceBatch.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + "GatherSampleEvidenceBatch.linux_docker": {{ dockers.linux_docker | tojson }}, + + "GatherSampleEvidenceBatch.bam_or_cram_files": {{ test_batch.bam_or_cram_files | tojson }}, + "GatherSampleEvidenceBatch.sample_ids": {{ test_batch.samples | tojson }} +} diff --git a/test_input_templates/GenerateBatchMetrics/GenerateBatchMetrics.json.tmpl b/test_input_templates/GenerateBatchMetrics/GenerateBatchMetrics.json.tmpl new file mode 100644 index 000000000..1fca2a0ed --- /dev/null +++ b/test_input_templates/GenerateBatchMetrics/GenerateBatchMetrics.json.tmpl @@ -0,0 +1,33 @@ +{ + "GenerateBatchMetrics.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "GenerateBatchMetrics.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, + "GenerateBatchMetrics.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "GenerateBatchMetrics.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, + "GenerateBatchMetrics.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + "GenerateBatchMetrics.linux_docker" : {{ dockers.linux_docker | tojson }}, + + "GenerateBatchMetrics.BAF_split_size": "10000", + "GenerateBatchMetrics.RD_split_size": "10000", + "GenerateBatchMetrics.PE_split_size": "10000", + "GenerateBatchMetrics.SR_split_size": "1000", + "GenerateBatchMetrics.common_cnv_size_cutoff": "5000", + "GenerateBatchMetrics.allosome_contigs": {{ reference_resources.allosome_file | tojson }}, + "GenerateBatchMetrics.autosome_contigs": {{ reference_resources.autosome_file | tojson }}, + "GenerateBatchMetrics.rmsk": {{ reference_resources.rmsk | tojson }}, + "GenerateBatchMetrics.segdups": {{ reference_resources.segdups | tojson }}, + "GenerateBatchMetrics.ref_dict": {{ reference_resources.reference_dict | tojson }}, + + "GenerateBatchMetrics.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, + + "GenerateBatchMetrics.batch": {{ test_batch.batch_name | tojson }}, + "GenerateBatchMetrics.ped_file": {{ test_batch.ped_file | tojson }}, + "GenerateBatchMetrics.discfile": {{ test_batch.merged_disc_file | tojson }}, + "GenerateBatchMetrics.baf_metrics": {{ test_batch.merged_baf_file | tojson }}, + "GenerateBatchMetrics.coveragefile": {{ test_batch.merged_coverage_file | tojson }}, + "GenerateBatchMetrics.splitfile": {{ test_batch.merged_split_file | tojson }}, + "GenerateBatchMetrics.medianfile": {{ test_batch.medianfile | tojson }}, + "GenerateBatchMetrics.depth_vcf" : {{ test_batch.merged_depth_vcf | tojson }}, + "GenerateBatchMetrics.manta_vcf" : {{ test_batch.merged_manta_vcf | tojson }}, + "GenerateBatchMetrics.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }}, + "GenerateBatchMetrics.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }} +} diff --git a/test_input_templates/GenotypeBatch/GenotypeBatch.json.tmpl b/test_input_templates/GenotypeBatch/GenotypeBatch.json.tmpl new file mode 100644 index 000000000..a4ff87282 --- /dev/null +++ b/test_input_templates/GenotypeBatch/GenotypeBatch.json.tmpl @@ -0,0 +1,29 @@ +{ + "GenotypeBatch.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "GenotypeBatch.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "GenotypeBatch.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, + "GenotypeBatch.linux_docker" : {{ dockers.linux_docker | tojson }}, + + "GenotypeBatch.n_RD_genotype_bins": "100000", + "GenotypeBatch.n_per_split": "5000", + "GenotypeBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "GenotypeBatch.seed_cutoffs": {{ reference_resources.seed_cutoffs | tojson }}, + "GenotypeBatch.reference_build": {{ reference_resources.reference_build | tojson }}, + "GenotypeBatch.ref_dict": {{ reference_resources.reference_dict | tojson }}, + + "GenotypeBatch.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, + "GenotypeBatch.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + + "GenotypeBatch.batch": {{ test_batch.batch_name | tojson }}, + "GenotypeBatch.rf_cutoffs": {{ test_batch.cutoffs | tojson }}, + "GenotypeBatch.batch_depth_vcf": {{ test_batch.filtered_depth_vcf | tojson }}, + "GenotypeBatch.batch_pesr_vcf": {{ test_batch.filtered_pesr_vcf | tojson }}, + "GenotypeBatch.ped_file": {{ test_batch.ped_file | tojson }}, + "GenotypeBatch.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, + "GenotypeBatch.discfile": {{ test_batch.merged_disc_file | tojson }}, + "GenotypeBatch.coveragefile": {{ test_batch.merged_coverage_file | tojson }}, + "GenotypeBatch.splitfile": {{ test_batch.merged_split_file | tojson }}, + "GenotypeBatch.medianfile": {{ test_batch.medianfile | tojson }}, + "GenotypeBatch.cohort_depth_vcf": {{ test_batch.cohort_depth_vcf | tojson }}, + "GenotypeBatch.cohort_pesr_vcf": {{ test_batch.cohort_pesr_vcf | tojson }} +} diff --git a/test_input_templates/MakeCohortVcf/MakeCohortVcf.json.tmpl b/test_input_templates/MakeCohortVcf/MakeCohortVcf.json.tmpl new file mode 100644 index 000000000..34e78bcf1 --- /dev/null +++ b/test_input_templates/MakeCohortVcf/MakeCohortVcf.json.tmpl @@ -0,0 +1,68 @@ +{ + "MakeCohortVcf.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, + "MakeCohortVcf.contig_list": {{ reference_resources.primary_contigs_fai | tojson }}, + "MakeCohortVcf.allosome_fai": {{ reference_resources.allosome_file | tojson }}, + "MakeCohortVcf.cytobands": {{ reference_resources.cytobands | tojson }}, + "MakeCohortVcf.mei_bed": {{ reference_resources.mei_bed | tojson }}, + "MakeCohortVcf.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "MakeCohortVcf.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, + "MakeCohortVcf.empty_file" : {{ reference_resources.empty_file | tojson }}, + "MakeCohortVcf.ref_dict": {{ reference_resources.reference_dict | tojson }}, + + "MakeCohortVcf.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, + "MakeCohortVcf.hgsv_tarballs": {{ reference_resources.hgsv_tarballs | tojson }}, + "MakeCohortVcf.asc_tarballs": {{ reference_resources.asc_tarballs | tojson }}, + "MakeCohortVcf.sanders_2015_tarball": {{ reference_resources.sanders_2015_tarball | tojson }}, + "MakeCohortVcf.collins_2017_tarball": {{ reference_resources.collins_2017_tarball | tojson }}, + "MakeCohortVcf.werling_2018_tarball": {{ reference_resources.werling_2018_tarball | tojson }}, + + "MakeCohortVcf.min_sr_background_fail_batches": 0.5, + "MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200, + "MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000, + "MakeCohortVcf.samples_per_clean_vcf_step2_shard": 100, + "MakeCohortVcf.random_seed": 0, + "MakeCohortVcf.max_shards_per_chrom": 100, + "MakeCohortVcf.min_variants_per_shard": 30, + + "MakeCohortVcf.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, + "MakeCohortVcf.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + + "MakeCohortVcf.linux_docker": {{ dockers.linux_docker | tojson }}, + "MakeCohortVcf.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "MakeCohortVcf.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "MakeCohortVcf.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, + "MakeCohortVcf.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, + + "MakeCohortVcf.cohort_name": {{ test_batch.batch_name | tojson }}, + "MakeCohortVcf.batches": [ + {{ test_batch.batch_name | tojson }} + ], + "MakeCohortVcf.ped_file": {{ test_batch.ped_file | tojson }}, + "MakeCohortVcf.disc_files": [ + {{ test_batch.merged_disc_file | tojson }} + ], + "MakeCohortVcf.bincov_files": [ + {{ test_batch.merged_coverage_file | tojson }} + ], + "MakeCohortVcf.median_coverage_files": [ + {{ test_batch.medianfile | tojson }} + ], + "MakeCohortVcf.rf_cutoff_files": [ + {{ test_batch.cutoffs | tojson }} + ], + "MakeCohortVcf.pesr_vcfs": [ + {{ test_batch.genotyped_pesr_vcf| tojson }} + ], + "MakeCohortVcf.depth_vcfs": [ + {{ test_batch.genotyped_depth_vcf | tojson }} + ], + "MakeCohortVcf.depth_gt_rd_sep_files": [ + {{ test_batch.depth_gt_rd_sep_file | tojson }} + ], + "MakeCohortVcf.raw_sr_bothside_pass_files": [ + {{ test_batch.raw_sr_bothside_pass_files | tojson }} + ], + "MakeCohortVcf.raw_sr_background_fail_files": [ + {{ test_batch.raw_sr_background_fail_files | tojson }} + ] +} diff --git a/test_input_templates/MergeBatchSites/MergeBatchSites.json.tmpl b/test_input_templates/MergeBatchSites/MergeBatchSites.json.tmpl new file mode 100644 index 000000000..2c66ae9a5 --- /dev/null +++ b/test_input_templates/MergeBatchSites/MergeBatchSites.json.tmpl @@ -0,0 +1,6 @@ +{ + "MergeBatchSites.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "MergeBatchSites.pesr_vcfs": [{{ test_batch.filtered_pesr_vcf | tojson }}], + "MergeBatchSites.depth_vcfs": [{{ test_batch.filtered_depth_vcf | tojson }}], + "MergeBatchSites.cohort": {{ test_batch.batch_name | tojson }} +} diff --git a/test_input_templates/RegenotypeCNVs/RegenotypeCNVs.json.tmpl b/test_input_templates/RegenotypeCNVs/RegenotypeCNVs.json.tmpl new file mode 100644 index 000000000..d375ec921 --- /dev/null +++ b/test_input_templates/RegenotypeCNVs/RegenotypeCNVs.json.tmpl @@ -0,0 +1,25 @@ +{ + "RegenotypeCNVs.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, + "RegenotypeCNVs.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, + "RegenotypeCNVs.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, + "RegenotypeCNVs.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, + "RegenotypeCNVs.n_RdTest_bins": "100000", + "RegenotypeCNVs.n_per_split": "5000", + + "RegenotypeCNVs.cohort": {{ test_batch.batch_name | tojson }}, + "RegenotypeCNVs.contig_list": {{ reference_resources.primary_contigs_list | tojson }}, + "RegenotypeCNVs.regeno_coverage_medians": {{ test_batch.regeno_coverage_medians | tojson }}, + + "RegenotypeCNVs.RD_depth_sepcutoffs": [{{ test_batch.depth_gt_rd_sep_file | tojson }}], + + "RegenotypeCNVs.cohort_depth_vcf": {{ test_batch.cohort_depth_vcf | tojson }}, + + "RegenotypeCNVs.ped_file": {{ test_batch.ped_file | tojson }}, + "RegenotypeCNVs.batch_depth_vcfs": [{{ test_batch.filtered_depth_vcf | tojson }}], + + "RegenotypeCNVs.depth_vcfs": [{{ test_batch.genotyped_depth_vcf | tojson }}], + "RegenotypeCNVs.coveragefiles": [{{ test_batch.merged_coverage_file | tojson }}], + "RegenotypeCNVs.coveragefile_idxs": [{{ test_batch.merged_coverage_file_idx| tojson }}], + "RegenotypeCNVs.medianfiles": [{{ test_batch.medianfile | tojson }}], + "RegenotypeCNVs.batches": [{{ test_batch.batch_name | tojson }}] +} diff --git a/test_input_templates/gcnv/trainGCNV.test.json.tmpl b/test_input_templates/TrainGCNV/TrainGCNV.json.tmpl similarity index 100% rename from test_input_templates/gcnv/trainGCNV.test.json.tmpl rename to test_input_templates/TrainGCNV/TrainGCNV.json.tmpl diff --git a/test_input_templates/batch/GATKSVPipelineBatch.json.tmpl b/test_input_templates/batch/GATKSVPipelineBatch.json.tmpl index eb0bda881..77eadd6d0 100644 --- a/test_input_templates/batch/GATKSVPipelineBatch.json.tmpl +++ b/test_input_templates/batch/GATKSVPipelineBatch.json.tmpl @@ -40,15 +40,15 @@ "GATKSVPipelineBatch.samtools_cloud_docker": {{ dockers.samtools_cloud_docker | tojson }}, "GATKSVPipelineBatch.cloud_sdk_docker": {{ dockers.cloud_sdk_docker | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.reference_version": {{ reference_resources.reference_version | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.delly_exclude_intervals_file": {{ reference_resources.delly_exclude_intervals_file | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.manta_region_bed": {{ reference_resources.manta_region_bed | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.melt_standard_vcf_header": {{ reference_resources.melt_std_vcf_header | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.wham_include_list_bed_file": {{ reference_resources.wham_include_list_bed_file | tojson }}, - "GATKSVPipelineBatch.Module00aBatch.preprocessed_intervals": {{ reference_resources.preprocessed_intervals | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.reference_version": {{ reference_resources.reference_version | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.delly_exclude_intervals_file": {{ reference_resources.delly_exclude_intervals_file | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.manta_region_bed": {{ reference_resources.manta_region_bed | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.melt_standard_vcf_header": {{ reference_resources.melt_std_vcf_header | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.wham_include_list_bed_file": {{ reference_resources.wham_include_list_bed_file | tojson }}, + "GATKSVPipelineBatch.GatherSampleEvidenceBatch.preprocessed_intervals": {{ reference_resources.preprocessed_intervals | tojson }}, - "GATKSVPipelineBatch.Module00b.wgd_scoring_mask": {{ reference_resources.wgd_scoring_mask | tojson }}, - "GATKSVPipelineBatch.Module00b.run_vcf_qc": "false", + "GATKSVPipelineBatch.EvidenceQC.wgd_scoring_mask": {{ reference_resources.wgd_scoring_mask | tojson }}, + "GATKSVPipelineBatch.EvidenceQC.run_vcf_qc": "false", "GATKSVPipelineBatch.GATKSVPipelinePhase1.unpadded_intervals_file" : {{ reference_resources.unpadded_intervals_file | tojson }}, "GATKSVPipelineBatch.GATKSVPipelinePhase1.dbsnp_vcf" : {{ reference_resources.dbsnp_vcf | tojson }}, @@ -105,30 +105,30 @@ "GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }}, "GATKSVPipelineBatch.GATKSVPipelinePhase1.outlier_cutoff_nIQR": "999999", - "GATKSVPipelineBatch.Module04.n_RD_genotype_bins": "100000", - "GATKSVPipelineBatch.Module04.n_per_split": "5000", - "GATKSVPipelineBatch.Module04.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "GATKSVPipelineBatch.Module04.seed_cutoffs": {{ reference_resources.seed_cutoffs | tojson }}, - "GATKSVPipelineBatch.Module04.reference_build": "hg38", - "GATKSVPipelineBatch.Module04.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, + "GATKSVPipelineBatch.GenotypeBatch.n_RD_genotype_bins": "100000", + "GATKSVPipelineBatch.GenotypeBatch.n_per_split": "5000", + "GATKSVPipelineBatch.GenotypeBatch.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "GATKSVPipelineBatch.GenotypeBatch.seed_cutoffs": {{ reference_resources.seed_cutoffs | tojson }}, + "GATKSVPipelineBatch.GenotypeBatch.reference_build": "hg38", + "GATKSVPipelineBatch.GenotypeBatch.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, - "GATKSVPipelineBatch.Module04b.n_RdTest_bins": "100000", - "GATKSVPipelineBatch.Module04b.n_per_split": "5000", + "GATKSVPipelineBatch.RegenotypeCNVs.n_RdTest_bins": "100000", + "GATKSVPipelineBatch.RegenotypeCNVs.n_per_split": "5000", - "GATKSVPipelineBatch.Module0506.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, - "GATKSVPipelineBatch.Module0506.empty_file" : {{ reference_resources.empty_file | tojson }}, - "GATKSVPipelineBatch.Module0506.sanders_2015_tarball": {{ reference_resources.sanders_2015_tarball | tojson }}, - "GATKSVPipelineBatch.Module0506.collins_2017_tarball": {{ reference_resources.collins_2017_tarball | tojson }}, - "GATKSVPipelineBatch.Module0506.werling_2018_tarball": {{ reference_resources.werling_2018_tarball | tojson }}, - "GATKSVPipelineBatch.Module0506.cytobands": {{ reference_resources.cytobands | tojson }}, - "GATKSVPipelineBatch.Module0506.mei_bed": {{ reference_resources.mei_bed | tojson }}, - "GATKSVPipelineBatch.Module0506.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "GATKSVPipelineBatch.Module0506.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, - "GATKSVPipelineBatch.Module0506.min_sr_background_fail_batches": 0.5, - "GATKSVPipelineBatch.Module0506.max_shards_per_chrom_clean_vcf_step1": 200, - "GATKSVPipelineBatch.Module0506.min_records_per_shard_clean_vcf_step1": 5000, - "GATKSVPipelineBatch.Module0506.samples_per_clean_vcf_step2_shard": 100, - "GATKSVPipelineBatch.Module0506.random_seed": 0, - "GATKSVPipelineBatch.Module0506.max_shards_per_chrom": 100, - "GATKSVPipelineBatch.Module0506.min_variants_per_shard": 30 + "GATKSVPipelineBatch.MakeCohortVcf.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.empty_file" : {{ reference_resources.empty_file | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.sanders_2015_tarball": {{ reference_resources.sanders_2015_tarball | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.collins_2017_tarball": {{ reference_resources.collins_2017_tarball | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.werling_2018_tarball": {{ reference_resources.werling_2018_tarball | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.cytobands": {{ reference_resources.cytobands | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.mei_bed": {{ reference_resources.mei_bed | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, + "GATKSVPipelineBatch.MakeCohortVcf.min_sr_background_fail_batches": 0.5, + "GATKSVPipelineBatch.MakeCohortVcf.max_shards_per_chrom_clean_vcf_step1": 200, + "GATKSVPipelineBatch.MakeCohortVcf.min_records_per_shard_clean_vcf_step1": 5000, + "GATKSVPipelineBatch.MakeCohortVcf.samples_per_clean_vcf_step2_shard": 100, + "GATKSVPipelineBatch.MakeCohortVcf.random_seed": 0, + "GATKSVPipelineBatch.MakeCohortVcf.max_shards_per_chrom": 100, + "GATKSVPipelineBatch.MakeCohortVcf.min_variants_per_shard": 30 } diff --git a/test_input_templates/module00a/Module00aBatch.json.tmpl b/test_input_templates/module00a/Module00aBatch.json.tmpl deleted file mode 100644 index a2f02388d..000000000 --- a/test_input_templates/module00a/Module00aBatch.json.tmpl +++ /dev/null @@ -1,38 +0,0 @@ -{ - "Module00aBatch.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, - "Module00aBatch.reference_fasta": {{ reference_resources.reference_fasta | tojson }}, - "Module00aBatch.reference_index": {{ reference_resources.reference_index | tojson }}, - "Module00aBatch.reference_dict": {{ reference_resources.reference_dict | tojson }}, - "Module00aBatch.reference_version": {{ reference_resources.reference_version | tojson }}, - - "Module00aBatch.collect_coverage": "true", - "Module00aBatch.collect_pesr": "true", - - "Module00aBatch.preprocessed_intervals": {{ reference_resources.preprocessed_intervals | tojson }}, - - "Module00aBatch.delly_exclude_intervals_file": {{ reference_resources.delly_exclude_intervals_file | tojson }}, - "Module00aBatch.manta_region_bed": {{ reference_resources.manta_region_bed | tojson }}, - "Module00aBatch.melt_standard_vcf_header": {{ reference_resources.melt_std_vcf_header | tojson }}, - - "Module00aBatch.wham_include_list_bed_file": {{ reference_resources.wham_include_list_bed_file | tojson }}, - - "Module00aBatch.samtools_cloud_docker": {{ dockers.samtools_cloud_docker | tojson }}, - "Module00aBatch.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module00aBatch.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module00aBatch.delly_docker": {{ dockers.delly_docker | tojson }}, - "Module00aBatch.manta_docker": {{ dockers.manta_docker | tojson }}, - "Module00aBatch.melt_docker" : {{ dockers.melt_docker | tojson }}, - "Module00aBatch.wham_docker": {{ dockers.wham_docker | tojson }}, - "Module00aBatch.genomes_in_the_cloud_docker" : {{ dockers.genomes_in_the_cloud_docker | tojson }}, - "Module00aBatch.gatk_docker": {{ dockers.gatk_docker | tojson }}, - "Module00aBatch.gatk_docker_pesr_override" : {{ dockers.gatk_docker | tojson }}, - "Module00aBatch.cloud_sdk_docker": {{ dockers.cloud_sdk_docker | tojson }}, - - "Module00aBatch.batch": {{ test_batch.batch_name | tojson }}, - "Module00aBatch.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, - "Module00aBatch.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - "Module00aBatch.linux_docker": {{ dockers.linux_docker | tojson }}, - - "Module00aBatch.bam_or_cram_files": {{ test_batch.bam_or_cram_files | tojson }}, - "Module00aBatch.sample_ids": {{ test_batch.samples | tojson }} -} diff --git a/test_input_templates/module00b/Module00b.json.tmpl b/test_input_templates/module00b/Module00b.json.tmpl deleted file mode 100644 index eae3068d1..000000000 --- a/test_input_templates/module00b/Module00b.json.tmpl +++ /dev/null @@ -1,16 +0,0 @@ -{ - "Module00b.run_vcf_qc" : "true", - "Module00b.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module00b.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module00b.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, - "Module00b.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, - "Module00b.wgd_scoring_mask": {{ reference_resources.wgd_scoring_mask | tojson }}, - "Module00b.genome_file": {{ reference_resources.genome_file | tojson }}, - - "Module00b.batch": {{ test_batch.batch_name | tojson }}, - "Module00b.counts": {{ test_batch.counts | tojson }}, - "Module00b.manta_vcfs": {{ test_batch.manta_vcfs | tojson }}, - "Module00b.melt_vcfs": {{ test_batch.melt_vcfs | tojson }}, - "Module00b.wham_vcfs": {{ test_batch.wham_vcfs | tojson }}, - "Module00b.samples": {{ test_batch.samples | tojson }} -} diff --git a/test_input_templates/module00c/Module00c.baf_from_vcf.json.tmpl b/test_input_templates/module00c/Module00c.baf_from_vcf.json.tmpl deleted file mode 100644 index eab68836c..000000000 --- a/test_input_templates/module00c/Module00c.baf_from_vcf.json.tmpl +++ /dev/null @@ -1,64 +0,0 @@ -{ - "Module00c.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module00c.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, - "Module00c.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module00c.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, - "Module00c.cnmops_docker": {{ dockers.cnmops_docker | tojson }}, - "Module00c.linux_docker" : {{ dockers.linux_docker | tojson }}, - "Module00c.gatk_docker": {{ dockers.gatk_docker | tojson }}, - "Module00c.gcnv_gatk_docker" : {{ dockers.gatk_docker | tojson }}, - "Module00c.condense_counts_docker" : {{ dockers.condense_counts_docker | tojson }}, - "Module00c.cytoband":{{ reference_resources.cytobands | tojson }}, - "Module00c.mei_bed":{{ reference_resources.mei_bed | tojson }}, - "Module00c.cnmops_allo_file": {{ reference_resources.allosome_file | tojson }}, - "Module00c.cnmops_exclude_list": {{ reference_resources.cnmops_exclude_list | tojson }}, - "Module00c.cnmops_chrom_file": {{ reference_resources.autosome_file | tojson }}, - "Module00c.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, - "Module00c.genome_file": {{ reference_resources.genome_file | tojson }}, - "Module00c.inclusion_bed": {{ reference_resources.inclusion_bed | tojson }}, - "Module00c.matrix_qc_distance": "1000000", - "Module00c.min_svsize": "50", - "Module00c.run_matrix_qc": "true", - - "Module00c.unpadded_intervals_file" : {{ reference_resources.unpadded_intervals_file | tojson }}, - "Module00c.dbsnp_vcf" : {{ reference_resources.dbsnp_vcf | tojson }}, - "Module00c.ref_fasta": {{ reference_resources.reference_fasta | tojson }}, - "Module00c.ref_fasta_index": {{ reference_resources.reference_index | tojson }}, - "Module00c.ref_dict": {{ reference_resources.reference_dict | tojson }}, - - "Module00c.ploidy_sample_psi_scale": "0.001", - "Module00c.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }}, - "Module00c.gcnv_learning_rate" : 0.03, - "Module00c.gcnv_num_thermal_advi_iters" : 250, - "Module00c.gcnv_max_advi_iter_first_epoch" : 1000, - "Module00c.gcnv_max_advi_iter_subsequent_epochs" : 200, - "Module00c.gcnv_max_training_epochs" : 5, - "Module00c.gcnv_min_training_epochs" : 1, - "Module00c.gcnv_convergence_snr_averaging_window" : 100, - "Module00c.gcnv_convergence_snr_countdown_window" : 10, - "Module00c.gcnv_cnv_coherence_length" : 1000, - "Module00c.gcnv_copy_number_posterior_expectation_mode" : "EXACT", - "Module00c.gcnv_log_emission_sampling_rounds" : 20, - "Module00c.gcnv_p_alt" : 0.000001, - "Module00c.gcnv_sample_psi_scale" : 0.000001, - "Module00c.ref_copy_number_autosomal_contigs": {{ reference_resources.copy_number_autosomal_contigs | tojson }}, - "Module00c.allosomal_contigs": {{ reference_resources.allosomal_contigs | tojson }}, - "Module00c.gcnv_caller_internal_admixing_rate": "0.5", - "Module00c.gcnv_caller_update_convergence_threshold": "0.000001", - "Module00c.gcnv_convergence_snr_trigger_threshold": "0.2", - "Module00c.gcnv_depth_correction_tau": "10000", - "Module00c.gcnv_log_emission_sampling_median_rel_error": "0.001", - "Module00c.gcnv_qs_cutoff": "30", - - "Module00c.batch": {{ test_batch.batch_name | tojson }}, - "Module00c.ped_file": {{ test_batch.ped_file | tojson }}, - "Module00c.gcnv_model_tars" : {{ test_batch.gcnv_model_tars | tojson }}, - "Module00c.PE_files": {{ test_batch.PE_files | tojson }}, - "Module00c.SR_files": {{ test_batch.SR_files | tojson }}, - "Module00c.counts": {{ test_batch.counts | tojson }}, - "Module00c.manta_vcfs": {{ test_batch.manta_vcfs | tojson }}, - "Module00c.samples": {{ test_batch.samples | tojson }}, - "Module00c.melt_vcfs": {{ test_batch.melt_vcfs | tojson }}, - "Module00c.wham_vcfs": {{ test_batch.wham_vcfs | tojson }}, - "Module00c.snp_vcfs": {{ test_batch.snp_vcfs | tojson }} -} diff --git a/test_input_templates/module00c/Module00c.json.tmpl b/test_input_templates/module00c/Module00c.json.tmpl deleted file mode 100644 index cc983c7b1..000000000 --- a/test_input_templates/module00c/Module00c.json.tmpl +++ /dev/null @@ -1,64 +0,0 @@ -{ - "Module00c.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module00c.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, - "Module00c.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module00c.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, - "Module00c.cnmops_docker": {{ dockers.cnmops_docker | tojson }}, - "Module00c.linux_docker" : {{ dockers.linux_docker | tojson }}, - "Module00c.gatk_docker": {{ dockers.gatk_docker | tojson }}, - "Module00c.gcnv_gatk_docker" : {{ dockers.gatk_docker | tojson }}, - "Module00c.condense_counts_docker" : {{ dockers.condense_counts_docker | tojson }}, - "Module00c.cytoband":{{ reference_resources.cytobands | tojson }}, - "Module00c.mei_bed":{{ reference_resources.mei_bed | tojson }}, - "Module00c.cnmops_allo_file": {{ reference_resources.allosome_file | tojson }}, - "Module00c.cnmops_exclude_list": {{ reference_resources.cnmops_exclude_list | tojson }}, - "Module00c.cnmops_chrom_file": {{ reference_resources.autosome_file | tojson }}, - "Module00c.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, - "Module00c.genome_file": {{ reference_resources.genome_file | tojson }}, - "Module00c.inclusion_bed": {{ reference_resources.inclusion_bed | tojson }}, - "Module00c.matrix_qc_distance": "1000000", - "Module00c.min_svsize": "50", - "Module00c.run_matrix_qc": "true", - - "Module00c.unpadded_intervals_file" : {{ reference_resources.unpadded_intervals_file | tojson }}, - "Module00c.dbsnp_vcf" : {{ reference_resources.dbsnp_vcf | tojson }}, - "Module00c.ref_fasta": {{ reference_resources.reference_fasta | tojson }}, - "Module00c.ref_fasta_index": {{ reference_resources.reference_index | tojson }}, - "Module00c.ref_dict": {{ reference_resources.reference_dict | tojson }}, - - "Module00c.ploidy_sample_psi_scale": "0.001", - "Module00c.contig_ploidy_model_tar" : {{ test_batch.contig_ploidy_model_tar | tojson }}, - "Module00c.gcnv_learning_rate" : 0.03, - "Module00c.gcnv_num_thermal_advi_iters" : 250, - "Module00c.gcnv_max_advi_iter_first_epoch" : 1000, - "Module00c.gcnv_max_advi_iter_subsequent_epochs" : 200, - "Module00c.gcnv_max_training_epochs" : 5, - "Module00c.gcnv_min_training_epochs" : 1, - "Module00c.gcnv_convergence_snr_averaging_window" : 100, - "Module00c.gcnv_convergence_snr_countdown_window" : 10, - "Module00c.gcnv_cnv_coherence_length" : 1000, - "Module00c.gcnv_copy_number_posterior_expectation_mode" : "EXACT", - "Module00c.gcnv_log_emission_sampling_rounds" : 20, - "Module00c.gcnv_p_alt" : 0.000001, - "Module00c.gcnv_sample_psi_scale" : 0.000001, - "Module00c.ref_copy_number_autosomal_contigs": {{ reference_resources.copy_number_autosomal_contigs | tojson }}, - "Module00c.allosomal_contigs": {{ reference_resources.allosomal_contigs | tojson }}, - "Module00c.gcnv_caller_internal_admixing_rate": "0.5", - "Module00c.gcnv_caller_update_convergence_threshold": "0.000001", - "Module00c.gcnv_convergence_snr_trigger_threshold": "0.2", - "Module00c.gcnv_depth_correction_tau": "10000", - "Module00c.gcnv_log_emission_sampling_median_rel_error": "0.001", - "Module00c.gcnv_qs_cutoff": "30", - - "Module00c.batch": {{ test_batch.batch_name | tojson }}, - "Module00c.ped_file": {{ test_batch.ped_file | tojson }}, - "Module00c.gcnv_model_tars" : {{ test_batch.gcnv_model_tars | tojson }}, - "Module00c.PE_files": {{ test_batch.PE_files | tojson }}, - "Module00c.SR_files": {{ test_batch.SR_files | tojson }}, - "Module00c.counts": {{ test_batch.counts | tojson }}, - "Module00c.manta_vcfs": {{ test_batch.manta_vcfs | tojson }}, - "Module00c.samples": {{ test_batch.samples | tojson }}, - "Module00c.melt_vcfs": {{ test_batch.melt_vcfs | tojson }}, - "Module00c.wham_vcfs": {{ test_batch.wham_vcfs | tojson }}, - "Module00c.gvcfs": {{ test_batch.gvcfs | tojson }} -} diff --git a/test_input_templates/module01/Module01.json.tmpl b/test_input_templates/module01/Module01.json.tmpl deleted file mode 100644 index fa40af191..000000000 --- a/test_input_templates/module01/Module01.json.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -{ - "Module01.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module01.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - - "Module01.contigs": {{ reference_resources.primary_contigs_fai | tojson }}, - "Module01.depth_flags": "--merge-coordinates", - "Module01.depth_frac": "0.8", - "Module01.pesr_svsize": "0", - "Module01.pesr_frac": "0.1", - "Module01.pesr_flags": "--preserve-ids", - "Module01.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "Module01.pesr_distance": "300", - "Module01.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, - "Module01.depth_exclude_list_frac_max": "0.5", - - "Module01.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, - "Module01.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - "Module01.linux_docker": {{ dockers.linux_docker | tojson }}, - - "Module01.batch": {{ test_batch.batch_name | tojson }}, - "Module01.del_bed": {{ test_batch.del_bed| tojson }}, - "Module01.dup_bed": {{ test_batch.dup_bed | tojson }}, - "Module01.wham_vcfs": {{ test_batch.std_wham_vcfs | tojson }}, - "Module01.manta_vcfs": {{ test_batch.std_manta_vcfs | tojson }}, - "Module01.melt_vcfs": {{ test_batch.std_melt_vcfs | tojson }} -} diff --git a/test_input_templates/module02/Module02.json.tmpl b/test_input_templates/module02/Module02.json.tmpl deleted file mode 100644 index da31775c3..000000000 --- a/test_input_templates/module02/Module02.json.tmpl +++ /dev/null @@ -1,33 +0,0 @@ -{ - "Module02.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module02.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, - "Module02.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module02.sv_base_docker": {{ dockers.sv_base_docker | tojson }}, - "Module02.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - "Module02.linux_docker" : {{ dockers.linux_docker | tojson }}, - - "Module02.BAF_split_size": "10000", - "Module02.RD_split_size": "10000", - "Module02.PE_split_size": "10000", - "Module02.SR_split_size": "1000", - "Module02.common_cnv_size_cutoff": "5000", - "Module02.allosome_contigs": {{ reference_resources.allosome_file | tojson }}, - "Module02.autosome_contigs": {{ reference_resources.autosome_file | tojson }}, - "Module02.rmsk": {{ reference_resources.rmsk | tojson }}, - "Module02.segdups": {{ reference_resources.segdups | tojson }}, - "Module02.ref_dict": {{ reference_resources.reference_dict | tojson }}, - - "Module02.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, - - "Module02.batch": {{ test_batch.batch_name | tojson }}, - "Module02.ped_file": {{ test_batch.ped_file | tojson }}, - "Module02.discfile": {{ test_batch.merged_disc_file | tojson }}, - "Module02.baf_metrics": {{ test_batch.merged_baf_file | tojson }}, - "Module02.coveragefile": {{ test_batch.merged_coverage_file | tojson }}, - "Module02.splitfile": {{ test_batch.merged_split_file | tojson }}, - "Module02.medianfile": {{ test_batch.medianfile | tojson }}, - "Module02.depth_vcf" : {{ test_batch.merged_depth_vcf | tojson }}, - "Module02.manta_vcf" : {{ test_batch.merged_manta_vcf | tojson }}, - "Module02.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }}, - "Module02.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }} -} diff --git a/test_input_templates/module03/Module03.json.tmpl b/test_input_templates/module03/Module03.json.tmpl deleted file mode 100644 index d62f86c6d..000000000 --- a/test_input_templates/module03/Module03.json.tmpl +++ /dev/null @@ -1,20 +0,0 @@ -{ - "Module03.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module03.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module03.linux_docker" : {{ dockers.linux_docker | tojson }}, - - "Module03.outlier_cutoff_nIQR": "10000", - - "Module03.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, - "Module03.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - "Module03.ped_file": {{ test_batch.ped_file | tojson }}, - - "Module03.batch": {{ test_batch.batch_name | tojson }}, - "Module03.outlier_cutoff_table" : {{ test_batch.outlier_cutoff_table | tojson }}, - "Module03.depth_vcf" : {{ test_batch.merged_depth_vcf | tojson }}, - "Module03.manta_vcf" : {{ test_batch.merged_manta_vcf | tojson }}, - "Module03.wham_vcf" : {{ test_batch.merged_wham_vcf | tojson }}, - "Module03.melt_vcf" : {{ test_batch.merged_melt_vcf | tojson }}, - "Module03.evidence_metrics": {{ test_batch.evidence_metrics | tojson }}, - "Module03.evidence_metrics_common": {{ test_batch.evidence_metrics_common | tojson }} -} diff --git a/test_input_templates/module03/Module03Qc.json.tmpl b/test_input_templates/module03/Module03Qc.json.tmpl deleted file mode 100644 index f606c0ba1..000000000 --- a/test_input_templates/module03/Module03Qc.json.tmpl +++ /dev/null @@ -1,23 +0,0 @@ -{ - "Module03Qc.contig_list": {{ reference_resources.primary_contigs_fai | tojson }}, - - "Module03Qc.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, - "Module03Qc.hgsv_tarballs": {{ reference_resources.hgsv_tarballs | tojson }}, - "Module03Qc.asc_tarballs": {{ reference_resources.asc_tarballs | tojson }}, - "Module03Qc.sanders_2015_tarball": {{ reference_resources.sanders_2015_tarball | tojson }}, - "Module03Qc.collins_2017_tarball": {{ reference_resources.collins_2017_tarball | tojson }}, - "Module03Qc.werling_2018_tarball": {{ reference_resources.werling_2018_tarball | tojson }}, - - "Module03Qc.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module03Qc.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module03Qc.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, - - "Module03Qc.batch": {{ test_batch.batch_name | tojson }}, - - "Module03Qc.ped_file": {{ test_batch.ped_file | tojson }}, - "Module03Qc.depth_vcf_noOutliers": {{ test_batch.filtered_depth_vcf | tojson }}, - "Module03Qc.merged_pesr_vcf": {{ test_batch.filtered_pesr_vcf | tojson }} -} - - - diff --git a/test_input_templates/module04/MergeCohortVcfs.test.json.tmpl b/test_input_templates/module04/MergeCohortVcfs.test.json.tmpl deleted file mode 100644 index 82941c865..000000000 --- a/test_input_templates/module04/MergeCohortVcfs.test.json.tmpl +++ /dev/null @@ -1,6 +0,0 @@ -{ - "MergeCohortVcfs.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "MergeCohortVcfs.pesr_vcfs": [{{ test_batch.filtered_pesr_vcf | tojson }}], - "MergeCohortVcfs.depth_vcfs": [{{ test_batch.filtered_depth_vcf | tojson }}], - "MergeCohortVcfs.cohort": {{ test_batch.batch_name | tojson }} -} diff --git a/test_input_templates/module04/Module04.json.tmpl b/test_input_templates/module04/Module04.json.tmpl deleted file mode 100644 index 2458277ab..000000000 --- a/test_input_templates/module04/Module04.json.tmpl +++ /dev/null @@ -1,29 +0,0 @@ -{ - "Module04.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module04.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module04.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, - "Module04.linux_docker" : {{ dockers.linux_docker | tojson }}, - - "Module04.n_RD_genotype_bins": "100000", - "Module04.n_per_split": "5000", - "Module04.pesr_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "Module04.seed_cutoffs": {{ reference_resources.seed_cutoffs | tojson }}, - "Module04.reference_build": {{ reference_resources.reference_build | tojson }}, - "Module04.ref_dict": {{ reference_resources.reference_dict | tojson }}, - - "Module04.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, - "Module04.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - - "Module04.batch": {{ test_batch.batch_name | tojson }}, - "Module04.rf_cutoffs": {{ test_batch.cutoffs | tojson }}, - "Module04.batch_depth_vcf": {{ test_batch.filtered_depth_vcf | tojson }}, - "Module04.batch_pesr_vcf": {{ test_batch.filtered_pesr_vcf | tojson }}, - "Module04.ped_file": {{ test_batch.ped_file | tojson }}, - "Module04.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, - "Module04.discfile": {{ test_batch.merged_disc_file | tojson }}, - "Module04.coveragefile": {{ test_batch.merged_coverage_file | tojson }}, - "Module04.splitfile": {{ test_batch.merged_split_file | tojson }}, - "Module04.medianfile": {{ test_batch.medianfile | tojson }}, - "Module04.cohort_depth_vcf": {{ test_batch.cohort_depth_vcf | tojson }}, - "Module04.cohort_pesr_vcf": {{ test_batch.cohort_pesr_vcf | tojson }} -} diff --git a/test_input_templates/module04b/Module04b.test.json.tmpl b/test_input_templates/module04b/Module04b.test.json.tmpl deleted file mode 100644 index f1280c9aa..000000000 --- a/test_input_templates/module04b/Module04b.test.json.tmpl +++ /dev/null @@ -1,25 +0,0 @@ -{ - "Module04b.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, - "Module04b.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - "Module04b.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module04b.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module04b.n_RdTest_bins": "100000", - "Module04b.n_per_split": "5000", - - "Module04b.cohort": {{ test_batch.batch_name | tojson }}, - "Module04b.contig_list": {{ reference_resources.primary_contigs_list | tojson }}, - "Module04b.regeno_coverage_medians": {{ test_batch.regeno_coverage_medians | tojson }}, - - "Module04b.RD_depth_sepcutoffs": [{{ test_batch.depth_gt_rd_sep_file | tojson }}], - - "Module04b.cohort_depth_vcf": {{ test_batch.cohort_depth_vcf | tojson }}, - - "Module04b.ped_file": {{ test_batch.ped_file | tojson }}, - "Module04b.batch_depth_vcfs": [{{ test_batch.filtered_depth_vcf | tojson }}], - - "Module04b.depth_vcfs": [{{ test_batch.genotyped_depth_vcf | tojson }}], - "Module04b.coveragefiles": [{{ test_batch.merged_coverage_file | tojson }}], - "Module04b.coveragefile_idxs": [{{ test_batch.merged_coverage_file_idx| tojson }}], - "Module04b.medianfiles": [{{ test_batch.medianfile | tojson }}], - "Module04b.batches": [{{ test_batch.batch_name | tojson }}] -} diff --git a/test_input_templates/module0506/Module0506.json.tmpl b/test_input_templates/module0506/Module0506.json.tmpl deleted file mode 100644 index 12beeebc8..000000000 --- a/test_input_templates/module0506/Module0506.json.tmpl +++ /dev/null @@ -1,68 +0,0 @@ -{ - "Module0506.bin_exclude": {{ reference_resources.bin_exclude | tojson }}, - "Module0506.contig_list": {{ reference_resources.primary_contigs_fai | tojson }}, - "Module0506.allosome_fai": {{ reference_resources.allosome_file | tojson }}, - "Module0506.cytobands": {{ reference_resources.cytobands | tojson }}, - "Module0506.mei_bed": {{ reference_resources.mei_bed | tojson }}, - "Module0506.pe_exclude_list": {{ reference_resources.pesr_exclude_list | tojson }}, - "Module0506.depth_exclude_list": {{ reference_resources.depth_exclude_list | tojson }}, - "Module0506.empty_file" : {{ reference_resources.empty_file | tojson }}, - "Module0506.ref_dict": {{ reference_resources.reference_dict | tojson }}, - - "Module0506.thousand_genomes_tarballs": {{ reference_resources.thousand_genomes_tarballs | tojson }}, - "Module0506.hgsv_tarballs": {{ reference_resources.hgsv_tarballs | tojson }}, - "Module0506.asc_tarballs": {{ reference_resources.asc_tarballs | tojson }}, - "Module0506.sanders_2015_tarball": {{ reference_resources.sanders_2015_tarball | tojson }}, - "Module0506.collins_2017_tarball": {{ reference_resources.collins_2017_tarball | tojson }}, - "Module0506.werling_2018_tarball": {{ reference_resources.werling_2018_tarball | tojson }}, - - "Module0506.min_sr_background_fail_batches": 0.5, - "Module0506.max_shards_per_chrom_clean_vcf_step1": 200, - "Module0506.min_records_per_shard_clean_vcf_step1": 5000, - "Module0506.samples_per_clean_vcf_step2_shard": 100, - "Module0506.random_seed": 0, - "Module0506.max_shards_per_chrom": 100, - "Module0506.min_variants_per_shard": 30, - - "Module0506.primary_contigs_list": {{ reference_resources.primary_contigs_list | tojson }}, - "Module0506.sv_pipeline_base_docker": {{ dockers.sv_pipeline_base_docker | tojson }}, - - "Module0506.linux_docker": {{ dockers.linux_docker | tojson }}, - "Module0506.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}, - "Module0506.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module0506.sv_pipeline_rdtest_docker": {{ dockers.sv_pipeline_rdtest_docker | tojson }}, - "Module0506.sv_pipeline_qc_docker": {{ dockers.sv_pipeline_qc_docker | tojson }}, - - "Module0506.cohort_name": {{ test_batch.batch_name | tojson }}, - "Module0506.batches": [ - {{ test_batch.batch_name | tojson }} - ], - "Module0506.ped_file": {{ test_batch.ped_file | tojson }}, - "Module0506.disc_files": [ - {{ test_batch.merged_disc_file | tojson }} - ], - "Module0506.bincov_files": [ - {{ test_batch.merged_coverage_file | tojson }} - ], - "Module0506.median_coverage_files": [ - {{ test_batch.medianfile | tojson }} - ], - "Module0506.rf_cutoff_files": [ - {{ test_batch.cutoffs | tojson }} - ], - "Module0506.pesr_vcfs": [ - {{ test_batch.genotyped_pesr_vcf| tojson }} - ], - "Module0506.depth_vcfs": [ - {{ test_batch.genotyped_depth_vcf | tojson }} - ], - "Module0506.depth_gt_rd_sep_files": [ - {{ test_batch.depth_gt_rd_sep_file | tojson }} - ], - "Module0506.raw_sr_bothside_pass_files": [ - {{ test_batch.raw_sr_bothside_pass_files | tojson }} - ], - "Module0506.raw_sr_background_fail_files": [ - {{ test_batch.raw_sr_background_fail_files | tojson }} - ] -} diff --git a/test_input_templates/module08/Module08Annotation.test.json.tmpl b/test_input_templates/module08/Module08Annotation.test.json.tmpl deleted file mode 100644 index 106aa80a6..000000000 --- a/test_input_templates/module08/Module08Annotation.test.json.tmpl +++ /dev/null @@ -1,24 +0,0 @@ -{ - "Module08Annotation.vcf" : "gs://fc-fae972fb-9dbf-41c7-926f-f419a767a1ab/61a7ce7c-3b3c-4716-977a-ffb6e34464b6/minGQ_filter_workflow_v2/d841eeb6-90c3-4ff2-8b99-7a793c85cfea/call-combine_vcfs/Talkowski_SV_PCR-free_WGS_144.minGQ_filtered.vcf.gz", - "Module08Annotation.vcf_idx" : "gs://fc-fae972fb-9dbf-41c7-926f-f419a767a1ab/61a7ce7c-3b3c-4716-977a-ffb6e34464b6/minGQ_filter_workflow_v2/d841eeb6-90c3-4ff2-8b99-7a793c85cfea/call-combine_vcfs/Talkowski_SV_PCR-free_WGS_144.minGQ_filtered.vcf.gz.tbi", - - "Module08Annotation.protein_coding_gtf" : {{ reference_resources.protein_coding_gtf | tojson }}, - "Module08Annotation.linc_rna_gtf" : {{ reference_resources.linc_rna_gtf | tojson }}, - "Module08Annotation.promoter_bed" : {{ reference_resources.promoter_bed | tojson }}, - "Module08Annotation.noncoding_bed" : {{ reference_resources.noncoding_bed | tojson }}, - "Module08Annotation.ref_bed" : {{ reference_resources.external_af_ref_bed | tojson }}, - "Module08Annotation.ref_prefix" : {{ reference_resources.external_af_ref_bed_prefix | tojson }}, - "Module08Annotation.population" : {{ reference_resources.external_af_population | tojson }}, - - - "Module08Annotation.contig_list" : {{ reference_resources.primary_contigs_list | tojson }}, - "Module08Annotation.ped_file": "gs://fc-fae972fb-9dbf-41c7-926f-f419a767a1ab/FINAL_full_prenatal_dosage_sex.ped", - "Module08Annotation.sv_per_shard" : "5000", - "Module08Annotation.max_shards_per_chrom_step1" : 200, - "Module08Annotation.min_records_per_shard_step1" : 5000, - - "Module08Annotation.prefix" : {{ test_batch.batch_name | tojson }}, - - "Module08Annotation.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module08Annotation.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} -} diff --git a/test_input_templates/module08/Module08Preprocessing.wdl.example.json.tmpl b/test_input_templates/module08/Module08Preprocessing.wdl.example.json.tmpl deleted file mode 100644 index de8dc4ad9..000000000 --- a/test_input_templates/module08/Module08Preprocessing.wdl.example.json.tmpl +++ /dev/null @@ -1,12 +0,0 @@ -{ - "Module08Preprocessing.gencode_annotation_gtf": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.annotation.gtf.gz", - "Module08Preprocessing.gencode_pc_transcripts_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_transcripts.fa.gz", - "Module08Preprocessing.gencode_pc_translations_fa": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.pc_translations.fa.gz", - "Module08Preprocessing.gencode_transcript_source": "gs://broad-sv-dev-data/module_tests/07/inputs/prepare/gencode.v29.metadata.Transcript_source", - - "Module08Preprocessing.promoter_window": 1000, - - "Module08Preprocessing.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }}, - "Module08Preprocessing.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }} -} - diff --git a/wdl/AnnotateExternalAF.wdl b/wdl/AnnotateExternalAF.wdl index 5177c7495..18427c94b 100644 --- a/wdl/AnnotateExternalAF.wdl +++ b/wdl/AnnotateExternalAF.wdl @@ -3,7 +3,7 @@ version 1.0 # Author: Xuefang Zhao import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "AnnotateExternalAFperContig.wdl" as AnnotateExternalAFperContig workflow AnnotateExternalAF { diff --git a/wdl/AnnotateExternalAFperContig.wdl b/wdl/AnnotateExternalAFperContig.wdl index d9ae1f565..63fc5707e 100644 --- a/wdl/AnnotateExternalAFperContig.wdl +++ b/wdl/AnnotateExternalAFperContig.wdl @@ -3,7 +3,7 @@ version 1.0 # Author: Xuefang Zhao import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow AnnotateExternalAFperContig { input{ diff --git a/wdl/AnnotateILFeatures.wdl b/wdl/AnnotateILFeatures.wdl index ffa9c6fed..dbc05cbbe 100644 --- a/wdl/AnnotateILFeatures.wdl +++ b/wdl/AnnotateILFeatures.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as tasks0506 +import "TasksMakeCohortVcf.wdl" as MiniTasks import "TasksBenchmark.wdl" as tasks10 import "VaPoR.wdl" as vapor @@ -222,7 +222,7 @@ workflow AnnoILFeatures { } - call tasks0506.ConcatVcfs as ConcatVcfsIL{ + call MiniTasks.ConcatVcfs as ConcatVcfsIL{ input: vcfs=Bcf2VcfIL.vcf, merge_sort=true, @@ -231,7 +231,7 @@ workflow AnnoILFeatures { runtime_attr_override=runtime_attr_ConcatVcfs } - call tasks0506.ConcatVcfs as ConcatVcfsIL_le_flank{ + call MiniTasks.ConcatVcfs as ConcatVcfsIL_le_flank{ input: vcfs=Bcf2VcfIL_le_flank.vcf, merge_sort=true, @@ -240,7 +240,7 @@ workflow AnnoILFeatures { runtime_attr_override=runtime_attr_ConcatVcfs } - call tasks0506.ConcatVcfs as ConcatVcfsIL_ri_flank{ + call MiniTasks.ConcatVcfs as ConcatVcfsIL_ri_flank{ input: vcfs=Bcf2VcfIL_ri_flank.vcf, merge_sort=true, diff --git a/wdl/AnnotateVcf.wdl b/wdl/AnnotateVcf.wdl index 01ddd77c0..dc1c37fb0 100644 --- a/wdl/AnnotateVcf.wdl +++ b/wdl/AnnotateVcf.wdl @@ -1,24 +1,34 @@ -# Workflow to parallelize VCF annotation by chromosome - version 1.0 -import "Tasks0506.wdl" as tasks0506 -import "AnnotateChromosome.wdl" as annotate_by_chrom +import "ScatterAnnotateVcfByChrom.wdl" as ann +import "PruneAndAddVafs.wdl" as pav +import "AnnotateExternalAF.wdl" as eaf -# Scatter VCF and apply prepraed annotations workflow AnnotateVcf { input { - File vcf - String prefix File vcf_idx File contig_list + String prefix + File protein_coding_gtf File linc_rna_gtf File promoter_bed File noncoding_bed + Int max_shards_per_chrom_step1 + Int min_records_per_shard_step1 + + File? sample_pop_assignments # Two-column file with sample ID & pop assignment. "." for pop will ignore sample + File? prune_list # List of samples to be excluded from the output vcf + File? ped_file # Used for M/F AF calculations + Int sv_per_shard + + File? ref_bed # File with external allele frequencies + String? ref_prefix # prefix name for external AF call set (required if ref_bed set) + Array[String]? population # populations to annotate external AF for (required if ref_bed set) + String sv_base_mini_docker String sv_pipeline_docker @@ -26,115 +36,74 @@ workflow AnnotateVcf { RuntimeAttr? runtime_attr_merge_annotations RuntimeAttr? runtime_attr_subset_vcf RuntimeAttr? runtime_attr_concat_vcfs + RuntimeAttr? runtime_attr_prune_vcf + RuntimeAttr? runtime_attr_shard_vcf + RuntimeAttr? runtime_attr_compute_AFs + RuntimeAttr? runtime_attr_combine_vcfs + RuntimeAttr? runtime_attr_modify_vcf + RuntimeAttr? runtime_override_combine_vcfs + RuntimeAttr? runtime_override_split_vcf } - Array[Array[String]] contigs = read_tsv(contig_list) - - # Annotate, scattered by chromosome - scatter (contig in contigs) { - # Remote tabix each chromosome - call SubsetVcf { - input: - vcf = vcf, - vcf_idx = vcf_idx, - contig = contig[0], - prefix = "${prefix}.${contig[0]}", - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_override = runtime_attr_subset_vcf - } - - # Annotate per chromosome - call annotate_by_chrom.AnnotateChromosome as AnnotateChromosome { - input: - vcf = SubsetVcf.subsetted_vcf, - prefix = "${prefix}.${contig[0]}", - protein_coding_gtf = protein_coding_gtf, - linc_rna_gtf = linc_rna_gtf, - promoter_bed = promoter_bed, - noncoding_bed = noncoding_bed, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_annotate_intervals = runtime_attr_annotate_intervals, - runtime_attr_merge_annotations = runtime_attr_merge_annotations - } - } - - # Merge integrated vcfs across chromosomes - call tasks0506.ConcatVcfs as ConcatVcfs { + call ann.ScatterAnnotateVcfByChrom as ScatterAnnotateVcfByChrom { input: - vcfs = AnnotateChromosome.annotated_vcf, - vcfs_idx = AnnotateChromosome.annotated_vcf_idx, - outfile_prefix = "${prefix}.annotated", - sv_base_mini_docker = sv_base_mini_docker, - runtime_attr_override = runtime_attr_concat_vcfs - } - - output { - File annotated_vcf = ConcatVcfs.concat_vcf - File annotated_vcf_idx = ConcatVcfs.concat_vcf_idx + vcf = vcf, + vcf_idx = vcf_idx, + prefix = prefix, + contig_list = contig_list, + protein_coding_gtf = protein_coding_gtf, + linc_rna_gtf = linc_rna_gtf, + promoter_bed = promoter_bed, + noncoding_bed = noncoding_bed, + sv_base_mini_docker = sv_base_mini_docker, + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_annotate_intervals = runtime_attr_annotate_intervals, + runtime_attr_merge_annotations = runtime_attr_merge_annotations, + runtime_attr_subset_vcf = runtime_attr_subset_vcf, + runtime_attr_concat_vcfs = runtime_attr_concat_vcfs } -} - -# Scatter VCF by chromosome -task SubsetVcf { - - input { - File vcf - File vcf_idx - String contig - String prefix - - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_override + call pav.PruneAndAddVafs as PruneAndAddVafs { + input: + vcf = ScatterAnnotateVcfByChrom.annotated_vcf, + vcf_idx = ScatterAnnotateVcfByChrom.annotated_vcf_idx, + prefix = prefix, + sample_pop_assignments = sample_pop_assignments, + prune_list = prune_list, + ped_file = ped_file, + sv_per_shard = sv_per_shard, + contig_list = contig_list, + sv_base_mini_docker = sv_base_mini_docker, + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_prune_vcf = runtime_attr_prune_vcf, + runtime_attr_shard_vcf = runtime_attr_shard_vcf, + runtime_attr_compute_AFs = runtime_attr_compute_AFs, + runtime_attr_combine_vcfs = runtime_attr_combine_vcfs, + runtime_attr_concat_vcfs = runtime_attr_concat_vcfs } - parameter_meta { - vcf: { - localization_optional: true - } - vcf_idx: { - localization_optional: true + if (defined(ref_bed)) { + call eaf.AnnotateExternalAF as AnnotateExternalAF { + input: + vcf = PruneAndAddVafs.output_vcf, + vcf_idx = PruneAndAddVafs.output_vcf_idx, + ref_bed = select_first([ref_bed]), + population = select_first([population]), + ref_prefix = select_first([ref_prefix]), + prefix = prefix, + contigs = read_lines(contig_list), + max_shards_per_chrom_step1 = max_shards_per_chrom_step1, + min_records_per_shard_step1 = min_records_per_shard_step1, + sv_base_mini_docker = sv_base_mini_docker, + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_modify_vcf = runtime_attr_modify_vcf, + runtime_override_split_vcf = runtime_override_split_vcf, + runtime_override_combine_vcfs = runtime_override_combine_vcfs } } output { - File subsetted_vcf = "${prefix}.${contig}.vcf.gz" - File subsetted_vcf_idx = "${prefix}.${contig}.vcf.gz.tbi" - } - - ######################### - RuntimeAttr default_attr = object { - cpu_cores: 1, - mem_gb: 3.75, - disk_gb: 50, - boot_disk_gb: 10, - preemptible_tries: 3, - max_retries: 0 - } - RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) - - Float mem_gb = select_first([runtime_attr.mem_gb, default_attr.mem_gb]) - Int java_mem_mb = ceil(mem_gb * 1000 * 0.8) - - command <<< - - set -euo pipefail - - java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} SelectVariants \ - -V "~{vcf}" \ - -L "~{contig}" \ - -O ~{prefix}.~{contig}.vcf.gz - - >>> - - runtime { - cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) - memory: mem_gb + " GiB" - disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" - bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) - preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) - maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) - docker: sv_pipeline_docker + File output_vcf = select_first([AnnotateExternalAF.annotated_vcf, PruneAndAddVafs.output_vcf]) + File output_vcf_idx = select_first([AnnotateExternalAF.annotated_vcf_tbi, PruneAndAddVafs.output_vcf_idx]) } } diff --git a/wdl/BAFTest.wdl b/wdl/BAFTest.wdl index 9510afb80..864b65155 100644 --- a/wdl/BAFTest.wdl +++ b/wdl/BAFTest.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics import "BAFTestChromosome.wdl" as bafc workflow BAFTest { @@ -44,7 +44,7 @@ workflow BAFTest { } } - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = BAFTestAutosome.stats, prefix = "${batch}.${algorithm}", diff --git a/wdl/BatchEvidenceMerging.wdl b/wdl/BatchEvidenceMerging.wdl index d99e8f928..2d9117c7a 100644 --- a/wdl/BatchEvidenceMerging.wdl +++ b/wdl/BatchEvidenceMerging.wdl @@ -39,7 +39,7 @@ workflow EvidenceMerging { input: file = select_first([BAF_files_value[i]]), sample_id = samples[i], - output_name = "BAF00c.~{samples[i]}.txt.gz", + output_name = "BAF_GatherBatchEvidence.~{samples[i]}.txt.gz", sample_column_index = 4, sv_base_mini_docker = sv_base_mini_docker, runtime_attr_override = runtime_attr_set_sample @@ -77,7 +77,7 @@ workflow EvidenceMerging { input: file = SR_files[i], sample_id = samples[i], - output_name = "SR00c.~{samples[i]}.txt.gz", + output_name = "SR_GatherBatchEvidence.~{samples[i]}.txt.gz", sample_column_index = 5, sv_base_mini_docker = sv_base_mini_docker, runtime_attr_override = runtime_attr_set_sample @@ -87,7 +87,7 @@ workflow EvidenceMerging { input: file = PE_files[i], sample_id = samples[i], - output_name = "PE00c.~{samples[i]}.txt.gz", + output_name = "PE_GatherBatchEvidence.~{samples[i]}.txt.gz", sample_column_index = 7, sv_base_mini_docker = sv_base_mini_docker, runtime_attr_override = runtime_attr_set_sample diff --git a/wdl/CleanVcf.wdl b/wdl/CleanVcf.wdl index 94c3e4c44..598e93a6b 100644 --- a/wdl/CleanVcf.wdl +++ b/wdl/CleanVcf.wdl @@ -1,26 +1,33 @@ version 1.0 -import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "CleanVcfChromosome.wdl" as CleanVcfContig +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow CleanVcf { input { - File vcf - String contig - File background_list - File ped_file + String cohort_name + + Array[File] complex_genotype_vcfs + Array[File] complex_resolve_bothside_pass_lists + Array[File] complex_resolve_background_fail_lists + File merged_ped_file + + File contig_list File allosome_fai - String prefix - Int max_shards_per_chrom_step1 - File bothsides_pass_list - Int min_records_per_shard_step1 - Int samples_per_step2_shard + Int max_shards_per_chrom + Int max_shards_per_chrom_clean_vcf_step1 + Int min_records_per_shard_clean_vcf_step1 + Int samples_per_clean_vcf_step2_shard + File? outlier_samples_list String sv_base_mini_docker String sv_pipeline_docker - # overrides for local tasks + # overrides for mini tasks + RuntimeAttr? runtime_override_concat_cleaned_vcfs + + # overrides for CleanVcfContig RuntimeAttr? runtime_override_clean_vcf_1a RuntimeAttr? runtime_override_clean_vcf_1b RuntimeAttr? runtime_override_clean_vcf_2 @@ -30,8 +37,6 @@ workflow CleanVcf { RuntimeAttr? runtime_override_drop_redundant_cnvs RuntimeAttr? runtime_override_stitch_fragmented_cnvs RuntimeAttr? runtime_override_final_cleanup - - # overrides for MiniTasks RuntimeAttr? runtime_override_split_vcf_to_clean RuntimeAttr? runtime_override_combine_step_1_vcfs RuntimeAttr? runtime_override_combine_step_1_sex_chr_revisions @@ -39,639 +44,59 @@ workflow CleanVcf { RuntimeAttr? runtime_override_combine_clean_vcf_2 RuntimeAttr? runtime_override_combine_revised_4 RuntimeAttr? runtime_override_combine_multi_ids_4 - } - call MiniTasks.SplitVcf as SplitVcfToClean { - input: - vcf=vcf, - contig=contig, - prefix="~{prefix}.~{contig}.shard_", - n_shards=max_shards_per_chrom_step1, - min_vars_per_shard=min_records_per_shard_step1, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_split_vcf_to_clean - } + #Scatter per chromosome + Array[String] contigs = transpose(read_tsv(contig_list))[0] + scatter ( i in range(length(contigs)) ) { + String contig = contigs[i] - scatter ( vcf_shard in SplitVcfToClean.vcf_shards ) { - call CleanVcf1a { + call CleanVcfContig.CleanVcfChromosome as CleanContigVcf { input: - vcf=vcf_shard, - background_list=background_list, - ped_file=ped_file, - sv_pipeline_docker=sv_pipeline_docker, - bothsides_pass_list=bothsides_pass_list, + vcf=complex_genotype_vcfs[i], + contig=contig, + background_list=complex_resolve_background_fail_lists[i], + ped_file=merged_ped_file, + bothsides_pass_list=complex_resolve_bothside_pass_lists[i], allosome_fai=allosome_fai, - runtime_attr_override=runtime_override_clean_vcf_1a - } - } - - call MiniTasks.ConcatVcfs as CombineStep1Vcfs { - input: - vcfs=CleanVcf1a.intermediate_vcf, - vcfs_idx=CleanVcf1a.intermediate_vcf_idx, - outfile_prefix=prefix + ".cleanVCF_step1.intermediate_vcf.merged", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_combine_step_1_vcfs - } - - call MiniTasks.CatUncompressedFiles as CombineStep1SexChrRevisions { - input: - shards=CleanVcf1a.sex, - outfile_name=prefix + ".cleanVCF_step1.sexchr_revise.merged.txt", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_combine_step_1_sex_chr_revisions - } - - call CleanVcf1b { - input: - intermediate_vcf=CombineStep1Vcfs.concat_vcf, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_clean_vcf_1b - } - - call MiniTasks.SplitUncompressed as SplitIncludeList { - input: - whole_file=CleanVcf1a.include_list[0], - lines_per_shard=samples_per_step2_shard, - shard_prefix="includeexclude.", - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_split_include_list - } - - scatter ( included_interval in SplitIncludeList.shards ){ - call CleanVcf2{ - input: - normal_revise_vcf=CleanVcf1b.normal, - include_list=included_interval, - multi_cnvs=CleanVcf1b.multi, - vcftools_idx=CleanVcf1b.vcftools_idx, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_clean_vcf_2 - } - } - - call MiniTasks.CatUncompressedFiles as CombineCleanVcf2 { - input: - shards=CleanVcf2.out, - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_combine_clean_vcf_2 - } - - call CleanVcf3 { - input: - rd_cn_revise=CombineCleanVcf2.outfile, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_clean_vcf_3 - } - - scatter ( rd_cn_revise in CleanVcf3.shards ){ - call CleanVcf4 { - input: - rd_cn_revise=rd_cn_revise, - normal_revise_vcf=CleanVcf1b.normal, + prefix=cohort_name, + max_shards_per_chrom_step1=max_shards_per_chrom_clean_vcf_step1, + min_records_per_shard_step1=min_records_per_shard_clean_vcf_step1, + samples_per_step2_shard=samples_per_clean_vcf_step2_shard, + outlier_samples_list=outlier_samples_list, + sv_base_mini_docker=sv_base_mini_docker, sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_clean_vcf_4 + runtime_override_clean_vcf_1a=runtime_override_clean_vcf_1a, + runtime_override_clean_vcf_1b=runtime_override_clean_vcf_1b, + runtime_override_clean_vcf_2=runtime_override_clean_vcf_2, + runtime_override_clean_vcf_3=runtime_override_clean_vcf_3, + runtime_override_clean_vcf_4=runtime_override_clean_vcf_4, + runtime_override_clean_vcf_5=runtime_override_clean_vcf_5, + runtime_override_drop_redundant_cnvs=runtime_override_drop_redundant_cnvs, + runtime_override_stitch_fragmented_cnvs=runtime_override_stitch_fragmented_cnvs, + runtime_override_final_cleanup=runtime_override_final_cleanup, + runtime_override_split_vcf_to_clean=runtime_override_split_vcf_to_clean, + runtime_override_combine_step_1_vcfs=runtime_override_combine_step_1_vcfs, + runtime_override_combine_step_1_sex_chr_revisions=runtime_override_combine_step_1_sex_chr_revisions, + runtime_override_split_include_list=runtime_override_split_include_list, + runtime_override_combine_clean_vcf_2=runtime_override_combine_clean_vcf_2, + runtime_override_combine_revised_4=runtime_override_combine_revised_4, + runtime_override_combine_multi_ids_4=runtime_override_combine_multi_ids_4 } } - call MiniTasks.CatUncompressedFiles as CombineRevised4 { + call MiniTasks.ConcatVcfs as ConcatCleanedVcfs { input: - shards=CleanVcf4.out, - outfile_name="revise.vcf.lines.txt.gz", + vcfs=CleanContigVcf.out, + vcfs_idx=CleanContigVcf.out_idx, + merge_sort=true, + outfile_prefix="~{cohort_name}.cleaned", sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_combine_revised_4 - } - - call MiniTasks.CatUncompressedFiles as CombineMultiIds4 { - input: - shards=CleanVcf4.multi_ids, - outfile_name="multi.geno.ids.txt.gz", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_combine_multi_ids_4 - } - - call CleanVcf5 { - input: - revise_vcf_lines=CombineRevised4.outfile, - normal_revise_vcf=CleanVcf1b.normal, - ped_file=ped_file, - sex_chr_revise=CombineStep1SexChrRevisions.outfile, - multi_ids=CombineMultiIds4.outfile, - outlier_samples_list=outlier_samples_list, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_clean_vcf_5 - } - - call DropRedundantCnvs { - input: - vcf=CleanVcf5.polished, - contig=contig, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_drop_redundant_cnvs - } - - call StitchFragmentedCnvs { - input: - vcf=DropRedundantCnvs.cleaned_vcf_shard, - contig=contig, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_stitch_fragmented_cnvs - } - - call FinalCleanup { - input: - vcf=StitchFragmentedCnvs.stitched_vcf_shard, - contig=contig, - prefix=prefix, - sv_pipeline_docker=sv_pipeline_docker, - runtime_attr_override=runtime_override_final_cleanup - - } - - output { - File out=FinalCleanup.final_cleaned_shard - File out_idx=FinalCleanup.final_cleaned_shard_idx - } -} - - -#CleanVCF 1a is sharded -task CleanVcf1a { - input { - File vcf - File background_list - File ped_file - String sv_pipeline_docker - File bothsides_pass_list - File allosome_fai - RuntimeAttr? runtime_attr_override - } - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float shard_size = size([vcf, background_list, ped_file], "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + shard_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + shard_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1.sh ~{vcf} ~{background_list} ~{ped_file} ~{allosome_fai} - /opt/sv-pipeline/04_variant_resolution/scripts/add_bothsides_support_filter.py \ - --bgzip \ - --outfile int.w_bothsides.vcf.gz \ - int.vcf.gz \ - ~{bothsides_pass_list} - tabix int.w_bothsides.vcf.gz - >>> - - output { - File include_list="includelist.txt" - File sex="sexchr.revise.txt" - File intermediate_vcf="int.w_bothsides.vcf.gz" - File intermediate_vcf_idx="int.w_bothsides.vcf.gz.tbi" - } -} - - -task CleanVcf1b { - input { - File intermediate_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size(intermediate_vcf, "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -euo pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b.sh ~{intermediate_vcf} - >>> - - output { - File multi="multi.cnvs.txt" - File normal="normal.revise.vcf.gz" - File vcftools_idx = "normal.revise.vcf.gz.csi" - } -} - - -task CleanVcf2 { - input { - File normal_revise_vcf - File include_list - File multi_cnvs - File vcftools_idx - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size([normal_revise_vcf, include_list, multi_cnvs, vcftools_idx], "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 4.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part2.sh \ - ~{normal_revise_vcf} \ - ~{include_list} \ - ~{multi_cnvs} \ - "output.txt" - >>> - - output { - File out="output.txt" - } -} - - -task CleanVcf3{ - input { - File rd_cn_revise - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size(rd_cn_revise, "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part3.sh ~{rd_cn_revise} - - # Ensure there is at least one shard - touch shards/out.0_0.txt - >>> - - output { - Array[File] shards = glob("shards/*") - } -} - - -task CleanVcf4 { - input { - File rd_cn_revise - File normal_revise_vcf - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size([rd_cn_revise, normal_revise_vcf], "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part4.sh ~{rd_cn_revise} ~{normal_revise_vcf} - >>> - - output { - File out="revise.vcf.lines.txt.gz" - File multi_ids="multi.geno.ids.txt.gz" - } -} - - -task CleanVcf5 { - input { - File revise_vcf_lines - File normal_revise_vcf - File ped_file - File sex_chr_revise - File multi_ids - File? outlier_samples_list - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size( - select_all([revise_vcf_lines, normal_revise_vcf, ped_file, sex_chr_revise, multi_ids, outlier_samples_list]), - "GB" - ) - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - ~{if defined(outlier_samples_list) then "ln ~{outlier_samples_list} outliers.txt" else "touch outliers.txt"} - - /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5.sh \ - ~{revise_vcf_lines} \ - ~{normal_revise_vcf} \ - ~{ped_file} \ - ~{sex_chr_revise} \ - ~{multi_ids} \ - outliers.txt - >>> - - output { - File polished="polished.vcf.gz" + runtime_attr_override=runtime_override_concat_cleaned_vcfs } -} - - -task DropRedundantCnvs { - input { - File vcf - String contig - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - String outfile_name = contig + ".shard.no_CNV_redundancies.vcf.gz" - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size(vcf, "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/resolve_CPX_CNV_redundancies.sh \ - ~{vcf} \ - ~{outfile_name} - >>> - - output { - File cleaned_vcf_shard = outfile_name - } -} - - -# Stitch fragmented RD-only calls found in 100% of the same samples -task StitchFragmentedCnvs { - input { - File vcf - String contig - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - String stitched_vcf_name = contig + ".shard.fragmented_CNVs_stitched.vcf.gz" - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size(vcf, "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_CNVs.sh \ - ~{vcf} \ - "tmp_~{stitched_vcf_name}" - - /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_CNVs.sh \ - "tmp_~{stitched_vcf_name}" \ - "~{stitched_vcf_name}" - >>> - - output { - File stitched_vcf_shard = stitched_vcf_name - } -} - - -# Final VCF cleanup -task FinalCleanup { - input { - File vcf - String contig - String prefix - String sv_pipeline_docker - RuntimeAttr? runtime_attr_override - } - - String cleaned_shard_name = prefix + "." + contig + ".final_cleanup.vcf.gz" - - # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed - # generally assume working memory is ~3 * inputs - Float input_size = size(vcf, "GB") - Float base_disk_gb = 10.0 - Float base_mem_gb = 2.0 - Float input_mem_scale = 3.0 - Float input_disk_scale = 5.0 - RuntimeAttr runtime_default = object { - mem_gb: base_mem_gb + input_size * input_mem_scale, - disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), - cpu_cores: 1, - preemptible_tries: 3, - max_retries: 1, - boot_disk_gb: 10 - } - RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) - runtime { - memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" - disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" - cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) - preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) - maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) - docker: sv_pipeline_docker - bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) - } - - command <<< - set -eu -o pipefail - - /opt/sv-pipeline/04_variant_resolution/scripts/rename_after_vcfcluster.py \ - --chrom ~{contig} \ - --prefix ~{prefix} \ - ~{vcf} stdout \ - | fgrep -v "##INFO= "~{cleaned_shard_name}" - tabix ~{cleaned_shard_name} - >>> output { - File final_cleaned_shard = cleaned_shard_name - File final_cleaned_shard_idx = cleaned_shard_name + ".tbi" + File cleaned_vcf = ConcatCleanedVcfs.concat_vcf + File cleaned_vcf_index = ConcatCleanedVcfs.concat_vcf_idx } } diff --git a/wdl/CleanVcfChromosome.wdl b/wdl/CleanVcfChromosome.wdl new file mode 100644 index 000000000..5cdc53ea1 --- /dev/null +++ b/wdl/CleanVcfChromosome.wdl @@ -0,0 +1,677 @@ +version 1.0 + +import "Structs.wdl" +import "TasksMakeCohortVcf.wdl" as MiniTasks + +workflow CleanVcfChromosome { + input { + File vcf + String contig + File background_list + File ped_file + File allosome_fai + String prefix + Int max_shards_per_chrom_step1 + File bothsides_pass_list + Int min_records_per_shard_step1 + Int samples_per_step2_shard + File? outlier_samples_list + + String sv_base_mini_docker + String sv_pipeline_docker + + # overrides for local tasks + RuntimeAttr? runtime_override_clean_vcf_1a + RuntimeAttr? runtime_override_clean_vcf_1b + RuntimeAttr? runtime_override_clean_vcf_2 + RuntimeAttr? runtime_override_clean_vcf_3 + RuntimeAttr? runtime_override_clean_vcf_4 + RuntimeAttr? runtime_override_clean_vcf_5 + RuntimeAttr? runtime_override_drop_redundant_cnvs + RuntimeAttr? runtime_override_stitch_fragmented_cnvs + RuntimeAttr? runtime_override_final_cleanup + + # overrides for MiniTasks + RuntimeAttr? runtime_override_split_vcf_to_clean + RuntimeAttr? runtime_override_combine_step_1_vcfs + RuntimeAttr? runtime_override_combine_step_1_sex_chr_revisions + RuntimeAttr? runtime_override_split_include_list + RuntimeAttr? runtime_override_combine_clean_vcf_2 + RuntimeAttr? runtime_override_combine_revised_4 + RuntimeAttr? runtime_override_combine_multi_ids_4 + + } + + call MiniTasks.SplitVcf as SplitVcfToClean { + input: + vcf=vcf, + contig=contig, + prefix="~{prefix}.~{contig}.shard_", + n_shards=max_shards_per_chrom_step1, + min_vars_per_shard=min_records_per_shard_step1, + sv_base_mini_docker=sv_base_mini_docker, + runtime_attr_override=runtime_override_split_vcf_to_clean + } + + scatter ( vcf_shard in SplitVcfToClean.vcf_shards ) { + call CleanVcf1a { + input: + vcf=vcf_shard, + background_list=background_list, + ped_file=ped_file, + sv_pipeline_docker=sv_pipeline_docker, + bothsides_pass_list=bothsides_pass_list, + allosome_fai=allosome_fai, + runtime_attr_override=runtime_override_clean_vcf_1a + } + } + + call MiniTasks.ConcatVcfs as CombineStep1Vcfs { + input: + vcfs=CleanVcf1a.intermediate_vcf, + vcfs_idx=CleanVcf1a.intermediate_vcf_idx, + outfile_prefix=prefix + ".cleanVCF_step1.intermediate_vcf.merged", + sv_base_mini_docker=sv_base_mini_docker, + runtime_attr_override=runtime_override_combine_step_1_vcfs + } + + call MiniTasks.CatUncompressedFiles as CombineStep1SexChrRevisions { + input: + shards=CleanVcf1a.sex, + outfile_name=prefix + ".cleanVCF_step1.sexchr_revise.merged.txt", + sv_base_mini_docker=sv_base_mini_docker, + runtime_attr_override=runtime_override_combine_step_1_sex_chr_revisions + } + + call CleanVcf1b { + input: + intermediate_vcf=CombineStep1Vcfs.concat_vcf, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_clean_vcf_1b + } + + call MiniTasks.SplitUncompressed as SplitIncludeList { + input: + whole_file=CleanVcf1a.include_list[0], + lines_per_shard=samples_per_step2_shard, + shard_prefix="includeexclude.", + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_split_include_list + } + + scatter ( included_interval in SplitIncludeList.shards ){ + call CleanVcf2{ + input: + normal_revise_vcf=CleanVcf1b.normal, + include_list=included_interval, + multi_cnvs=CleanVcf1b.multi, + vcftools_idx=CleanVcf1b.vcftools_idx, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_clean_vcf_2 + } + } + + call MiniTasks.CatUncompressedFiles as CombineCleanVcf2 { + input: + shards=CleanVcf2.out, + sv_base_mini_docker=sv_base_mini_docker, + runtime_attr_override=runtime_override_combine_clean_vcf_2 + } + + call CleanVcf3 { + input: + rd_cn_revise=CombineCleanVcf2.outfile, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_clean_vcf_3 + } + + scatter ( rd_cn_revise in CleanVcf3.shards ){ + call CleanVcf4 { + input: + rd_cn_revise=rd_cn_revise, + normal_revise_vcf=CleanVcf1b.normal, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_clean_vcf_4 + } + } + + call MiniTasks.CatUncompressedFiles as CombineRevised4 { + input: + shards=CleanVcf4.out, + outfile_name="revise.vcf.lines.txt.gz", + sv_base_mini_docker=sv_base_mini_docker, + runtime_attr_override=runtime_override_combine_revised_4 + } + + call MiniTasks.CatUncompressedFiles as CombineMultiIds4 { + input: + shards=CleanVcf4.multi_ids, + outfile_name="multi.geno.ids.txt.gz", + sv_base_mini_docker=sv_base_mini_docker, + runtime_attr_override=runtime_override_combine_multi_ids_4 + } + + call CleanVcf5 { + input: + revise_vcf_lines=CombineRevised4.outfile, + normal_revise_vcf=CleanVcf1b.normal, + ped_file=ped_file, + sex_chr_revise=CombineStep1SexChrRevisions.outfile, + multi_ids=CombineMultiIds4.outfile, + outlier_samples_list=outlier_samples_list, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_clean_vcf_5 + } + + call DropRedundantCnvs { + input: + vcf=CleanVcf5.polished, + contig=contig, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_drop_redundant_cnvs + } + + call StitchFragmentedCnvs { + input: + vcf=DropRedundantCnvs.cleaned_vcf_shard, + contig=contig, + prefix=prefix, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_stitch_fragmented_cnvs + } + + call FinalCleanup { + input: + vcf=StitchFragmentedCnvs.stitched_vcf_shard, + contig=contig, + prefix=prefix, + sv_pipeline_docker=sv_pipeline_docker, + runtime_attr_override=runtime_override_final_cleanup + + } + + output { + File out=FinalCleanup.final_cleaned_shard + File out_idx=FinalCleanup.final_cleaned_shard_idx + } +} + + +#CleanVCF 1a is sharded +task CleanVcf1a { + input { + File vcf + File background_list + File ped_file + String sv_pipeline_docker + File bothsides_pass_list + File allosome_fai + RuntimeAttr? runtime_attr_override + } + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float shard_size = size([vcf, background_list, ped_file], "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + shard_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + shard_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1.sh ~{vcf} ~{background_list} ~{ped_file} ~{allosome_fai} + /opt/sv-pipeline/04_variant_resolution/scripts/add_bothsides_support_filter.py \ + --bgzip \ + --outfile int.w_bothsides.vcf.gz \ + int.vcf.gz \ + ~{bothsides_pass_list} + tabix int.w_bothsides.vcf.gz + >>> + + output { + File include_list="includelist.txt" + File sex="sexchr.revise.txt" + File intermediate_vcf="int.w_bothsides.vcf.gz" + File intermediate_vcf_idx="int.w_bothsides.vcf.gz.tbi" + } +} + + +task CleanVcf1b { + input { + File intermediate_vcf + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size(intermediate_vcf, "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -euo pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part1b.sh ~{intermediate_vcf} + >>> + + output { + File multi="multi.cnvs.txt" + File normal="normal.revise.vcf.gz" + File vcftools_idx = "normal.revise.vcf.gz.csi" + } +} + + +task CleanVcf2 { + input { + File normal_revise_vcf + File include_list + File multi_cnvs + File vcftools_idx + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size([normal_revise_vcf, include_list, multi_cnvs, vcftools_idx], "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 4.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part2.sh \ + ~{normal_revise_vcf} \ + ~{include_list} \ + ~{multi_cnvs} \ + "output.txt" + >>> + + output { + File out="output.txt" + } +} + + +task CleanVcf3{ + input { + File rd_cn_revise + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size(rd_cn_revise, "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part3.sh ~{rd_cn_revise} + + # Ensure there is at least one shard + touch shards/out.0_0.txt + >>> + + output { + Array[File] shards = glob("shards/*") + } +} + + +task CleanVcf4 { + input { + File rd_cn_revise + File normal_revise_vcf + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size([rd_cn_revise, normal_revise_vcf], "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part4.sh ~{rd_cn_revise} ~{normal_revise_vcf} + >>> + + output { + File out="revise.vcf.lines.txt.gz" + File multi_ids="multi.geno.ids.txt.gz" + } +} + + +task CleanVcf5 { + input { + File revise_vcf_lines + File normal_revise_vcf + File ped_file + File sex_chr_revise + File multi_ids + File? outlier_samples_list + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size( + select_all([revise_vcf_lines, normal_revise_vcf, ped_file, sex_chr_revise, multi_ids, outlier_samples_list]), + "GB" + ) + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + ~{if defined(outlier_samples_list) then "ln ~{outlier_samples_list} outliers.txt" else "touch outliers.txt"} + + /opt/sv-pipeline/04_variant_resolution/scripts/clean_vcf_part5.sh \ + ~{revise_vcf_lines} \ + ~{normal_revise_vcf} \ + ~{ped_file} \ + ~{sex_chr_revise} \ + ~{multi_ids} \ + outliers.txt + >>> + + output { + File polished="polished.vcf.gz" + } +} + + +task DropRedundantCnvs { + input { + File vcf + String contig + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + String outfile_name = contig + ".shard.no_CNV_redundancies.vcf.gz" + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size(vcf, "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/resolve_CPX_CNV_redundancies.sh \ + ~{vcf} \ + ~{outfile_name} + >>> + + output { + File cleaned_vcf_shard = outfile_name + } +} + + +# Stitch fragmented RD-only calls found in 100% of the same samples +task StitchFragmentedCnvs { + input { + File vcf + String contig + String prefix + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + String stitched_vcf_name = contig + ".shard.fragmented_CNVs_stitched.vcf.gz" + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size(vcf, "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_CNVs.sh \ + ~{vcf} \ + "tmp_~{stitched_vcf_name}" + + /opt/sv-pipeline/04_variant_resolution/scripts/stitch_fragmented_CNVs.sh \ + "tmp_~{stitched_vcf_name}" \ + "~{stitched_vcf_name}" + >>> + + output { + File stitched_vcf_shard = stitched_vcf_name + } +} + + +# Final VCF cleanup +task FinalCleanup { + input { + File vcf + String contig + String prefix + String sv_pipeline_docker + RuntimeAttr? runtime_attr_override + } + + String cleaned_shard_name = prefix + "." + contig + ".final_cleanup.vcf.gz" + + # generally assume working disk size is ~2 * inputs, and outputs are ~2 *inputs, and inputs are not removed + # generally assume working memory is ~3 * inputs + Float input_size = size(vcf, "GB") + Float base_disk_gb = 10.0 + Float base_mem_gb = 2.0 + Float input_mem_scale = 3.0 + Float input_disk_scale = 5.0 + RuntimeAttr runtime_default = object { + mem_gb: base_mem_gb + input_size * input_mem_scale, + disk_gb: ceil(base_disk_gb + input_size * input_disk_scale), + cpu_cores: 1, + preemptible_tries: 3, + max_retries: 1, + boot_disk_gb: 10 + } + RuntimeAttr runtime_override = select_first([runtime_attr_override, runtime_default]) + runtime { + memory: "~{select_first([runtime_override.mem_gb, runtime_default.mem_gb])} GB" + disks: "local-disk ~{select_first([runtime_override.disk_gb, runtime_default.disk_gb])} HDD" + cpu: select_first([runtime_override.cpu_cores, runtime_default.cpu_cores]) + preemptible: select_first([runtime_override.preemptible_tries, runtime_default.preemptible_tries]) + maxRetries: select_first([runtime_override.max_retries, runtime_default.max_retries]) + docker: sv_pipeline_docker + bootDiskSizeGb: select_first([runtime_override.boot_disk_gb, runtime_default.boot_disk_gb]) + } + + command <<< + set -eu -o pipefail + + /opt/sv-pipeline/04_variant_resolution/scripts/rename_after_vcfcluster.py \ + --chrom ~{contig} \ + --prefix ~{prefix} \ + ~{vcf} stdout \ + | fgrep -v "##INFO= "~{cleaned_shard_name}" + tabix ~{cleaned_shard_name} + >>> + + output { + File final_cleaned_shard = cleaned_shard_name + File final_cleaned_shard_idx = cleaned_shard_name + ".tbi" + } +} diff --git a/wdl/Module01.wdl b/wdl/ClusterBatch.wdl similarity index 97% rename from wdl/Module01.wdl rename to wdl/ClusterBatch.wdl index cdf04f58f..f1d1ce9ce 100644 --- a/wdl/Module01.wdl +++ b/wdl/ClusterBatch.wdl @@ -2,9 +2,9 @@ version 1.0 import "PESRClustering.wdl" as pesr import "DepthClustering.wdl" as depth -import "Module01Metrics.wdl" as metrics +import "ClusterBatchMetrics.wdl" as metrics -workflow Module01 { +workflow ClusterBatch { input { Array[File]? manta_vcfs Array[File]? delly_vcfs @@ -148,7 +148,7 @@ workflow Module01 { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true if (run_module_metrics_) { - call metrics.Module01Metrics { + call metrics.ClusterBatchMetrics { input: name = batch, depth_vcf = ClusterDepth.clustered_vcf, @@ -175,6 +175,6 @@ workflow Module01 { File? wham_vcf = ClusterPESR_wham.clustered_vcf File? melt_vcf = ClusterPESR_melt.clustered_vcf - File? metrics_file_01 = Module01Metrics.metrics_file + File? metrics_file_clusterbatch = ClusterBatchMetrics.metrics_file } } diff --git a/wdl/Module01Metrics.wdl b/wdl/ClusterBatchMetrics.wdl similarity index 97% rename from wdl/Module01Metrics.wdl rename to wdl/ClusterBatchMetrics.wdl index 6f929e0d7..28376153c 100644 --- a/wdl/Module01Metrics.wdl +++ b/wdl/ClusterBatchMetrics.wdl @@ -3,7 +3,7 @@ version 1.0 import "TestUtils.wdl" as tu import "Utils.wdl" as util -workflow Module01Metrics { +workflow ClusterBatchMetrics { input { Array[String]? samples String name @@ -95,7 +95,7 @@ workflow Module01Metrics { call tu.CatMetrics { input: - prefix = "module01." + name, + prefix = "ClusterBatch." + name, metric_files = select_all([depth_metrics.out, delly_metrics.out, manta_metrics.out, melt_metrics.out, wham_metrics.out]), linux_docker = linux_docker } diff --git a/wdl/ClusterSingleChromosome.wdl b/wdl/ClusterSingleChromosome.wdl index 9141300fa..f1b1bf9d2 100644 --- a/wdl/ClusterSingleChromosome.wdl +++ b/wdl/ClusterSingleChromosome.wdl @@ -2,7 +2,7 @@ version 1.0 # Author: Ryan Collins -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "ShardedCluster.wdl" as ShardedCluster # Workflow to perform sharding & clustering of a vcf for a single chromosome diff --git a/wdl/CollectQcPerSample.wdl b/wdl/CollectQcPerSample.wdl index d195d6bb8..3252c9a1b 100644 --- a/wdl/CollectQcPerSample.wdl +++ b/wdl/CollectQcPerSample.wdl @@ -2,7 +2,7 @@ version 1.0 # Author: Ryan Collins -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks # Workflow to gather lists of variant IDs per sample from an SV VCF workflow CollectQcPerSample { diff --git a/wdl/Module0506Cluster.wdl b/wdl/CombineBatches.wdl similarity index 98% rename from wdl/Module0506Cluster.wdl rename to wdl/CombineBatches.wdl index 6163e1d41..26619df85 100644 --- a/wdl/Module0506Cluster.wdl +++ b/wdl/CombineBatches.wdl @@ -1,9 +1,9 @@ version 1.0 import "VcfClusterSingleChromsome.wdl" as VcfClusterContig -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks -workflow Module0506Cluster { +workflow CombineBatches { input { String cohort_name Array[String] batches @@ -196,7 +196,7 @@ workflow Module0506Cluster { vcfs=MergePesrDepth.merged_vcf, vcfs_idx=MergePesrDepth.merged_vcf_idx, merge_sort=true, - outfile_prefix="~{cohort_name}.0506_clustered", + outfile_prefix="~{cohort_name}.CombineBatches_clustered", sv_base_mini_docker=sv_base_mini_docker, runtime_attr_override=runtime_override_concat } diff --git a/wdl/Duphold.wdl b/wdl/Duphold.wdl index 8cedfd26c..0439d3161 100644 --- a/wdl/Duphold.wdl +++ b/wdl/Duphold.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as tasks0506 +import "TasksMakeCohortVcf.wdl" as MiniTasks import "TasksBenchmark.wdl" as tasks10 workflow Duphold { @@ -72,7 +72,7 @@ workflow Duphold { } } - call tasks0506.ConcatVcfs as ConcatVcfs{ + call MiniTasks.ConcatVcfs as ConcatVcfs{ input: vcfs=Bcf2Vcf.vcf, merge_sort=true, diff --git a/wdl/Module00b.wdl b/wdl/EvidenceQC.wdl similarity index 99% rename from wdl/Module00b.wdl rename to wdl/EvidenceQC.wdl index e7dda2cc4..54c1431ff 100644 --- a/wdl/Module00b.wdl +++ b/wdl/EvidenceQC.wdl @@ -12,7 +12,7 @@ import "MedianCov.wdl" as mc # - Dosage scoring # - QC for raw SV calls (optional) -workflow Module00b { +workflow EvidenceQC { input { # Batch info String batch diff --git a/wdl/Module03.wdl b/wdl/FilterBatch.wdl similarity index 98% rename from wdl/Module03.wdl rename to wdl/FilterBatch.wdl index da701e78e..5bee48f76 100644 --- a/wdl/Module03.wdl +++ b/wdl/FilterBatch.wdl @@ -2,9 +2,9 @@ version 1.0 import "FilterOutliers.wdl" as filter_outliers import "Utils.wdl" as util -import "Module03Metrics.wdl" as metrics +import "FilterBatchMetrics.wdl" as metrics -workflow Module03 { +workflow FilterBatch { input { String batch File? manta_vcf @@ -117,7 +117,7 @@ workflow Module03 { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true if (run_module_metrics_) { - call metrics.Module03Metrics { + call metrics.FilterBatchMetrics { input: name = batch, samples = GetSampleIdsFromVcf.out_array, @@ -151,7 +151,7 @@ workflow Module03 { File outlier_samples_excluded_file = FilterOutlierSamples.outlier_samples_excluded_file File batch_samples_postOutlierExclusion_file = FilterOutlierSamples.filtered_batch_samples_file - File? metrics_file_03 = Module03Metrics.metrics_file + File? metrics_file_filterbatch = FilterBatchMetrics.metrics_file } } diff --git a/wdl/Module03Metrics.wdl b/wdl/FilterBatchMetrics.wdl similarity index 96% rename from wdl/Module03Metrics.wdl rename to wdl/FilterBatchMetrics.wdl index 6f428bb41..b17d1b4c7 100644 --- a/wdl/Module03Metrics.wdl +++ b/wdl/FilterBatchMetrics.wdl @@ -4,7 +4,7 @@ import "TestUtils.wdl" as tu import "Utils.wdl" as util import "Structs.wdl" -workflow Module03Metrics { +workflow FilterBatchMetrics { input { Array[String] samples String name @@ -71,7 +71,7 @@ workflow Module03Metrics { call tu.CatMetrics { input: - prefix = "module03." + name, + prefix = "FilterBatch." + name, metric_files = [PESR_VCF_Metrics.out, Depth_VCF_Metrics.out, CutoffAndOutlierMetrics.out], linux_docker = linux_docker } diff --git a/wdl/Module03Qc.wdl b/wdl/FilterBatchQc.wdl similarity index 98% rename from wdl/Module03Qc.wdl rename to wdl/FilterBatchQc.wdl index 4c0ba51b9..97397e134 100644 --- a/wdl/Module03Qc.wdl +++ b/wdl/FilterBatchQc.wdl @@ -3,7 +3,7 @@ version 1.0 import "MasterVcfQc.wdl" as vcf_qc import "Utils.wdl" as util -workflow Module03Qc { +workflow FilterBatchQc { input { File? manta_vcf_noOutliers File? delly_vcf_noOutliers @@ -95,7 +95,7 @@ workflow Module03Qc { input: vcf = select_first([vcfs_array[i]]), ped_file=SubsetPedFile.ped_subset_file, - prefix="${batch}.${algorithms[i]}_03_filtered_vcf", + prefix="${batch}.${algorithms[i]}_FilterBatch_filtered_vcf", sv_per_shard=10000, samples_per_shard=100, thousand_genomes_tarballs=thousand_genomes_tarballs, diff --git a/wdl/FilterCleanupQualRecalibration.wdl b/wdl/FilterCleanupQualRecalibration.wdl index e79ef7ff5..1a853896f 100644 --- a/wdl/FilterCleanupQualRecalibration.wdl +++ b/wdl/FilterCleanupQualRecalibration.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow FilterCleanupQualRecalibration { diff --git a/wdl/GATKSVPipelineBatch.wdl b/wdl/GATKSVPipelineBatch.wdl index 4be19f80e..cab4db17d 100644 --- a/wdl/GATKSVPipelineBatch.wdl +++ b/wdl/GATKSVPipelineBatch.wdl @@ -1,17 +1,18 @@ version 1.0 -import "Module00aBatch.wdl" as m00a -import "Module00b.wdl" as m00b +import "GatherSampleEvidenceBatch.wdl" as sampleevidence +import "EvidenceQC.wdl" as evidenceqc import "GATKSVPipelinePhase1.wdl" as phase1 -import "Module04.wdl" as m04 -import "Module04b.wdl" as m04b -import "Module0506.wdl" as m0506 +import "GenotypeBatch.wdl" as genotypebatch +import "RegenotypeCNVs.wdl" as regenocnvs +import "MakeCohortVcf.wdl" as makecohortvcf import "Utils.wdl" as utils import "Structs.wdl" import "TestUtils.wdl" as tu # GATK SV Pipeline batch mode -# Runs modules 00abc, 01, 02, 03, 04, 0506 +# Runs GatherSampleEvidence, EvidenceQC, GatherBatchEvidence, ClusterBatch, GenerateBatchMetrics, FilterBatch, GenotypeBatch, RegenotypeCNVs, +# and MakeCohortVcf (CombineBatches, ResolveComplexVariants, GenotypeComplexVariants, and GenotypeComplexVariants) workflow GATKSVPipelineBatch { input { @@ -49,10 +50,10 @@ workflow GATKSVPipelineBatch { Array[File]? snp_vcfs File? snp_vcf_header # Required only if VCFs are unheadered - # Merge contig vcfs at each stage of Module 0506 for QC - Boolean module0506_merge_cluster_vcfs = false - Boolean module0506_merge_complex_resolve_vcfs = false - Boolean module0506_merge_complex_genotype_vcfs = false + # Merge contig vcfs at each stage of MakeCohortVcf for QC + Boolean makecohortvcf_merge_cluster_vcfs = false + Boolean makecohortvcf_merge_complex_resolve_vcfs = false + Boolean makecohortvcf_merge_complex_genotype_vcfs = false # Global files File ped_file @@ -66,21 +67,21 @@ workflow GATKSVPipelineBatch { File allosome_file # fai of allosomal contigs # Run module metrics - all modules on by default for batch WDL - Boolean? run_00a_metrics - Boolean? run_00c_metrics = true # 00c metrics is off by default standalone but on for batch WDL - Boolean? run_01_metrics - Boolean? run_02_metrics - Boolean? run_03_metrics - Boolean? run_04_metrics - Boolean? run_0506_metrics - - File? baseline_00a_metrics - File? baseline_00c_metrics - File? baseline_01_metrics - File? baseline_02_metrics - File? baseline_03_metrics - File? baseline_04_metrics - File? baseline_0506_metrics + Boolean? run_sampleevidence_metrics + Boolean? run_batchevidence_metrics = true # GatherBatchEvidenceMetrics is off by default standalone but on for batch WDL + Boolean? run_clusterbatch_metrics + Boolean? run_batchmetrics_metrics + Boolean? run_filterbatch_metrics + Boolean? run_genotypebatch_metrics + Boolean? run_makecohortvcf_metrics + + File? baseline_sampleevidence_metrics + File? baseline_batchevidence_metrics + File? baseline_clusterbatch_metrics + File? baseline_batchmetrics_metrics + File? baseline_filterbatch_metrics + File? baseline_genotypebatch_metrics + File? baseline_makecohortvcf_metrics String sv_base_mini_docker String sv_base_docker @@ -115,10 +116,10 @@ workflow GATKSVPipelineBatch { String? melt_docker_ = if (!defined(melt_vcfs) && use_melt) then melt_docker else NONE_STRING_ String? wham_docker_ = if (!defined(wham_vcfs) && use_wham) then wham_docker else NONE_STRING_ - Boolean run_module00a = collect_coverage_ || collect_pesr_ || defined(delly_docker_) || defined(manta_docker_) || defined(melt_docker_) || defined(wham_docker_) + Boolean run_sampleevidence = collect_coverage_ || collect_pesr_ || defined(delly_docker_) || defined(manta_docker_) || defined(melt_docker_) || defined(wham_docker_) - if (run_module00a) { - call m00a.Module00aBatch { + if (run_sampleevidence) { + call sampleevidence.GatherSampleEvidenceBatch { input: bam_or_cram_files=select_first([bam_or_cram_files]), bam_or_cram_indexes=bam_or_cram_indexes, @@ -129,7 +130,7 @@ workflow GATKSVPipelineBatch { reference_fasta=reference_fasta, reference_index=reference_index, reference_dict=reference_dict, - run_module_metrics = run_00a_metrics, + run_module_metrics = run_sampleevidence_metrics, primary_contigs_fai = primary_contigs_fai, batch = batch, sv_pipeline_base_docker = sv_pipeline_base_docker, @@ -148,24 +149,24 @@ workflow GATKSVPipelineBatch { } } - Array[File] counts_files_ = if collect_coverage_ then select_all(select_first([Module00aBatch.coverage_counts])) else select_first([counts_files]) - Array[File] pe_files_ = if collect_pesr_ then select_all(select_first([Module00aBatch.pesr_disc])) else select_first([pe_files]) - Array[File] sr_files_ = if collect_pesr_ then select_all(select_first([Module00aBatch.pesr_split])) else select_first([sr_files]) + Array[File] counts_files_ = if collect_coverage_ then select_all(select_first([GatherSampleEvidenceBatch.coverage_counts])) else select_first([counts_files]) + Array[File] pe_files_ = if collect_pesr_ then select_all(select_first([GatherSampleEvidenceBatch.pesr_disc])) else select_first([pe_files]) + Array[File] sr_files_ = if collect_pesr_ then select_all(select_first([GatherSampleEvidenceBatch.pesr_split])) else select_first([sr_files]) if (use_delly) { - Array[File] delly_vcfs_ = if defined(delly_vcfs) then select_first([delly_vcfs]) else select_all(select_first([Module00aBatch.delly_vcf])) + Array[File] delly_vcfs_ = if defined(delly_vcfs) then select_first([delly_vcfs]) else select_all(select_first([GatherSampleEvidenceBatch.delly_vcf])) } if (use_manta) { - Array[File] manta_vcfs_ = if defined(manta_vcfs) then select_first([manta_vcfs]) else select_all(select_first([Module00aBatch.manta_vcf])) + Array[File] manta_vcfs_ = if defined(manta_vcfs) then select_first([manta_vcfs]) else select_all(select_first([GatherSampleEvidenceBatch.manta_vcf])) } if (use_melt) { - Array[File] melt_vcfs_ = if defined(melt_vcfs) then select_first([melt_vcfs]) else select_all(select_first([Module00aBatch.melt_vcf])) + Array[File] melt_vcfs_ = if defined(melt_vcfs) then select_first([melt_vcfs]) else select_all(select_first([GatherSampleEvidenceBatch.melt_vcf])) } if (use_wham) { - Array[File] wham_vcfs_ = if defined(wham_vcfs) then select_first([wham_vcfs]) else select_all(select_first([Module00aBatch.wham_vcf])) + Array[File] wham_vcfs_ = if defined(wham_vcfs) then select_first([wham_vcfs]) else select_all(select_first([GatherSampleEvidenceBatch.wham_vcf])) } - call m00b.Module00b as Module00b { + call evidenceqc.EvidenceQC as EvidenceQC { input: batch=batch, samples=sample_ids, @@ -190,8 +191,8 @@ workflow GATKSVPipelineBatch { reference_dict=reference_dict, BAF_files=baf_files, counts=counts_files_, - bincov_matrix=Module00b.bincov_matrix, - bincov_matrix_index=Module00b.bincov_matrix_index, + bincov_matrix=EvidenceQC.bincov_matrix, + bincov_matrix_index=EvidenceQC.bincov_matrix_index, PE_files=pe_files_, SR_files=sr_files_, delly_vcfs=delly_vcfs_, @@ -205,10 +206,10 @@ workflow GATKSVPipelineBatch { cnmops_allo_file=allosome_file, allosome_contigs=allosome_file, autosome_contigs=autosome_file, - run_00c_metrics = run_00c_metrics, - run_01_metrics = run_01_metrics, - run_02_metrics = run_02_metrics, - run_03_metrics = run_03_metrics, + run_batchevidence_metrics = run_batchevidence_metrics, + run_clusterbatch_metrics = run_clusterbatch_metrics, + run_batchmetrics_metrics = run_batchmetrics_metrics, + run_filterbatch_metrics = run_filterbatch_metrics, primary_contigs_list = primary_contigs_list, sv_base_mini_docker=sv_base_mini_docker, sv_base_docker=sv_base_docker, @@ -223,7 +224,7 @@ workflow GATKSVPipelineBatch { condense_counts_docker=condense_counts_docker } - call m04.Module04 as Module04 { + call genotypebatch.GenotypeBatch as GenotypeBatch { input: batch_pesr_vcf=GATKSVPipelinePhase1.filtered_pesr_vcf, batch_depth_vcf=select_first([GATKSVPipelinePhase1.filtered_depth_vcf]), @@ -240,7 +241,7 @@ workflow GATKSVPipelineBatch { splitfile_index=GATKSVPipelinePhase1.merged_SR_index, ped_file=ped_file, ref_dict=reference_dict, - run_module_metrics = run_04_metrics, + run_module_metrics = run_genotypebatch_metrics, primary_contigs_list = primary_contigs_list, sv_pipeline_base_docker = sv_pipeline_base_docker, sv_base_mini_docker=sv_base_mini_docker, @@ -249,9 +250,9 @@ workflow GATKSVPipelineBatch { linux_docker=linux_docker } - call m04b.Module04b as Module04b { + call regenocnvs.RegenotypeCNVs as RegenotypeCNVs { input: - depth_vcfs=[Module04.genotyped_depth_vcf], + depth_vcfs=[GenotypeBatch.genotyped_depth_vcf], batch_depth_vcfs=[select_first([GATKSVPipelinePhase1.filtered_depth_vcf])], cohort_depth_vcf=select_first([GATKSVPipelinePhase1.filtered_depth_vcf]), batches=[batch], @@ -260,9 +261,9 @@ workflow GATKSVPipelineBatch { coveragefiles=[GATKSVPipelinePhase1.merged_bincov], coveragefile_idxs=[GATKSVPipelinePhase1.merged_bincov_index], ped_file=ped_file, - RD_depth_sepcutoffs=[select_first([Module04.trained_genotype_depth_depth_sepcutoff])], + RD_depth_sepcutoffs=[select_first([GenotypeBatch.trained_genotype_depth_depth_sepcutoff])], contig_list=primary_contigs_list, - regeno_coverage_medians=[Module04.regeno_coverage_medians], + regeno_coverage_medians=[GenotypeBatch.regeno_coverage_medians], sv_base_mini_docker=sv_base_mini_docker, sv_pipeline_docker=sv_pipeline_docker, sv_pipeline_rdtest_docker=sv_pipeline_rdtest_docker, @@ -270,16 +271,16 @@ workflow GATKSVPipelineBatch { } - call m0506.Module0506 as Module0506 { + call makecohortvcf.MakeCohortVcf as MakeCohortVcf { input: - merge_cluster_vcfs = module0506_merge_cluster_vcfs, - merge_complex_resolve_vcfs = module0506_merge_complex_resolve_vcfs, - merge_complex_genotype_vcfs = module0506_merge_complex_genotype_vcfs, - raw_sr_bothside_pass_files=[Module04.sr_bothside_pass], - raw_sr_background_fail_files=[Module04.sr_background_fail], + merge_cluster_vcfs = makecohortvcf_merge_cluster_vcfs, + merge_complex_resolve_vcfs = makecohortvcf_merge_complex_resolve_vcfs, + merge_complex_genotype_vcfs = makecohortvcf_merge_complex_genotype_vcfs, + raw_sr_bothside_pass_files=[GenotypeBatch.sr_bothside_pass], + raw_sr_background_fail_files=[GenotypeBatch.sr_background_fail], ped_file=ped_file, - pesr_vcfs=[Module04.genotyped_pesr_vcf], - depth_vcfs=Module04b.regenotyped_depth_vcfs, + pesr_vcfs=[GenotypeBatch.genotyped_pesr_vcf], + depth_vcfs=RegenotypeCNVs.regenotyped_depth_vcfs, contig_list=primary_contigs_fai, allosome_fai=allosome_file, ref_dict=reference_dict, @@ -289,9 +290,9 @@ workflow GATKSVPipelineBatch { cohort_name=batch, rf_cutoff_files=[GATKSVPipelinePhase1.cutoffs], batches=[batch], - depth_gt_rd_sep_files=[select_first([Module04.trained_genotype_depth_depth_sepcutoff])], + depth_gt_rd_sep_files=[select_first([GenotypeBatch.trained_genotype_depth_depth_sepcutoff])], median_coverage_files=[GATKSVPipelinePhase1.median_cov], - run_module_metrics = run_0506_metrics, + run_module_metrics = run_makecohortvcf_metrics, primary_contigs_list = primary_contigs_list, sv_pipeline_base_docker = sv_pipeline_base_docker, linux_docker=linux_docker, @@ -304,11 +305,11 @@ workflow GATKSVPipelineBatch { call tu.CatMetrics as CatBatchMetrics { input: prefix = "batch_sv." + batch, - metric_files = select_all([Module00aBatch.metrics_file_00a, GATKSVPipelinePhase1.metrics_file_00c, GATKSVPipelinePhase1.metrics_file_01, GATKSVPipelinePhase1.metrics_file_02, GATKSVPipelinePhase1.metrics_file_03, Module04.metrics_file_04, Module0506.metrics_file_0506]), + metric_files = select_all([GatherSampleEvidenceBatch.metrics_file_sampleevidence, GATKSVPipelinePhase1.metrics_file_batchevidence, GATKSVPipelinePhase1.metrics_file_clusterbatch, GATKSVPipelinePhase1.metrics_file_batchmetrics, GATKSVPipelinePhase1.metrics_file_filterbatch, GenotypeBatch.metrics_file_genotypebatch, MakeCohortVcf.metrics_file_makecohortvcf]), linux_docker = linux_docker } - Array[File] defined_baseline_metrics = select_all([baseline_00a_metrics, baseline_00c_metrics, baseline_01_metrics, baseline_02_metrics, baseline_03_metrics, baseline_04_metrics, baseline_0506_metrics]) + Array[File] defined_baseline_metrics = select_all([baseline_sampleevidence_metrics, baseline_batchevidence_metrics, baseline_clusterbatch_metrics, baseline_batchmetrics_metrics, baseline_filterbatch_metrics, baseline_genotypebatch_metrics, baseline_makecohortvcf_metrics]) if (length(defined_baseline_metrics) > 0) { call tu.CatMetrics as CatBaselineMetrics { input: @@ -334,8 +335,8 @@ workflow GATKSVPipelineBatch { } output { - File vcf = Module0506.vcf - File vcf_index = Module0506.vcf_index + File vcf = MakeCohortVcf.vcf + File vcf_index = MakeCohortVcf.vcf_index File metrics_file_batch = CatBatchMetrics.out File qc_file = BatchQC.out @@ -354,12 +355,12 @@ workflow GATKSVPipelineBatch { File final_sample_outlier_list = GATKSVPipelinePhase1.outlier_samples_excluded_file File cutoffs = GATKSVPipelinePhase1.cutoffs - File genotype_pesr_pesr_sepcutoff = select_first([Module04.trained_genotype_pesr_pesr_sepcutoff]) - File genotype_pesr_depth_sepcutoff = select_first([Module04.trained_genotype_pesr_depth_sepcutoff]) - File genotype_depth_pesr_sepcutoff = select_first([Module04.trained_genotype_depth_pesr_sepcutoff]) - File genotype_depth_depth_sepcutoff = select_first([Module04.trained_genotype_depth_depth_sepcutoff]) - File PE_metrics = select_first([Module04.trained_PE_metrics]) - File SR_metrics = select_first([Module04.trained_SR_metrics]) + File genotype_pesr_pesr_sepcutoff = select_first([GenotypeBatch.trained_genotype_pesr_pesr_sepcutoff]) + File genotype_pesr_depth_sepcutoff = select_first([GenotypeBatch.trained_genotype_pesr_depth_sepcutoff]) + File genotype_depth_pesr_sepcutoff = select_first([GenotypeBatch.trained_genotype_depth_pesr_sepcutoff]) + File genotype_depth_depth_sepcutoff = select_first([GenotypeBatch.trained_genotype_depth_depth_sepcutoff]) + File PE_metrics = select_first([GenotypeBatch.trained_PE_metrics]) + File SR_metrics = select_first([GenotypeBatch.trained_SR_metrics]) } } diff --git a/wdl/GATKSVPipelineBatchMetrics.wdl b/wdl/GATKSVPipelineBatchMetrics.wdl deleted file mode 100644 index 0216515f5..000000000 --- a/wdl/GATKSVPipelineBatchMetrics.wdl +++ /dev/null @@ -1,281 +0,0 @@ -version 1.0 - -import "Module00aBatchMetrics.wdl" as m00a -import "Module00cMetrics.wdl" as m00c -import "Module01Metrics.wdl" as m01 -import "Module02Metrics.wdl" as m02 -import "Module03Metrics.wdl" as m03 -import "Module04Metrics.wdl" as m04 -import "Module0506Metrics.wdl" as m0506 -import "TestUtils.wdl" as utils - -workflow BatchMetrics { - input { - String name - Array[String] samples - File contig_list - File contig_index - String linux_docker - String sv_pipeline_base_docker - String sv_base_mini_docker - - File? baseline_00a_metrics - File? baseline_00c_metrics - File? baseline_01_metrics - File? baseline_02_metrics - File? baseline_03_metrics - File? baseline_04_metrics - File? baseline_0506_metrics - - # 00a - Array[File] coverage_counts - Array[File] pesr_disc - Array[File] pesr_split - Array[File]? delly_vcf - Array[File]? manta_vcf - Array[File]? melt_vcf - Array[File]? wham_vcf - - Array[File]? baseline_delly_vcf - Array[File]? baseline_manta_vcf - Array[File]? baseline_melt_vcf - Array[File]? baseline_wham_vcf - - # 00c - File merged_BAF - File merged_SR - File merged_PE - File merged_bincov - File merged_dels - File merged_dups - File median_cov - Array[File]? std_delly_vcf - Array[File]? std_manta_vcf - Array[File]? std_melt_vcf - Array[File]? std_wham_vcf - - File? baseline_merged_dels - File? baseline_merged_dups - File? baseline_median_cov - Array[File]? baseline_std_delly_vcf - Array[File]? baseline_std_manta_vcf - Array[File]? baseline_std_melt_vcf - Array[File]? baseline_std_wham_vcf - - # 01 - File merged_depth_vcf - File? merged_delly_vcf - File? merged_manta_vcf - File? merged_wham_vcf - File? merged_melt_vcf - - File? baseline_merged_depth_vcf - File? baseline_merged_delly_vcf - File? baseline_merged_manta_vcf - File? baseline_merged_wham_vcf - File? baseline_merged_melt_vcf - - # 02 - File metrics - File metrics_common - - # 03 - File filtered_pesr_vcf - File filtered_depth_vcf - File cutoffs - File outlier_list - File ped_file - File samples_post_filtering_file - - File? baseline_filtered_pesr_vcf - File? baseline_filtered_depth_vcf - - # 04 - File genotyped_pesr_vcf - File genotyped_depth_vcf - File cutoffs_pesr_pesr - File cutoffs_pesr_depth - File cutoffs_depth_pesr - File cutoffs_depth_depth - File sr_bothside_pass - File sr_background_fail - - File? baseline_genotyped_pesr_vcf - File? baseline_genotyped_depth_vcf - - # 0506 - File? module0506_cluster_vcf - File? module0506_complex_resolve_vcf - File? module0506_complex_genotype_vcf - File module0506_cleaned_vcf - - File? baseline_module0506_cluster_vcf - File? baseline_module0506_complex_resolve_vcf - File? baseline_module0506_complex_genotype_vcf - File? baseline_module0506_cleaned_vcf - } - - Array[String] samples_post_filter = read_lines(samples_post_filtering_file) - - call m00a.Module00aBatchMetrics { - input: - name = name, - samples = samples, - coverage_counts = coverage_counts, - pesr_disc = pesr_disc, - pesr_split = pesr_split, - delly_vcf = delly_vcf, - manta_vcf = manta_vcf, - melt_vcf = melt_vcf, - wham_vcf = wham_vcf, - baseline_delly_vcf = baseline_delly_vcf, - baseline_manta_vcf = baseline_manta_vcf, - baseline_melt_vcf = baseline_melt_vcf, - baseline_wham_vcf = baseline_wham_vcf, - contig_list = contig_list, - contig_index = contig_index, - sv_pipeline_base_docker = sv_pipeline_base_docker, - linux_docker = linux_docker - } - - call m00c.Module00cMetrics { - input: - name = name, - samples = samples, - merged_BAF = merged_BAF, - merged_SR = merged_SR, - merged_PE = merged_PE, - merged_bincov = merged_bincov, - merged_dels = merged_dels, - merged_dups = merged_dups, - median_cov = median_cov, - std_delly_vcf = std_delly_vcf, - std_manta_vcf = std_manta_vcf, - std_melt_vcf = std_melt_vcf, - std_wham_vcf = std_wham_vcf, - baseline_merged_dels = baseline_merged_dels, - baseline_merged_dups = baseline_merged_dups, - baseline_median_cov = baseline_median_cov, - baseline_std_delly_vcf = baseline_std_delly_vcf, - baseline_std_manta_vcf = baseline_std_manta_vcf, - baseline_std_melt_vcf = baseline_std_melt_vcf, - baseline_std_wham_vcf = baseline_std_wham_vcf, - contig_list = contig_list, - sv_pipeline_base_docker = sv_pipeline_base_docker, - linux_docker = linux_docker - } - - call m01.Module01Metrics { - input: - name = name, - samples = samples, - depth_vcf = merged_depth_vcf, - delly_vcf = merged_delly_vcf, - manta_vcf = merged_manta_vcf, - wham_vcf = merged_wham_vcf, - melt_vcf = merged_melt_vcf, - baseline_depth_vcf = baseline_merged_depth_vcf, - baseline_delly_vcf = baseline_merged_delly_vcf, - baseline_manta_vcf = baseline_merged_manta_vcf, - baseline_wham_vcf = baseline_merged_wham_vcf, - baseline_melt_vcf = baseline_merged_melt_vcf, - contig_list = contig_list, - sv_pipeline_base_docker = sv_pipeline_base_docker, - linux_docker = linux_docker - } - - call m02.Module02Metrics { - input: - name = name, - metrics = metrics, - metrics_common = metrics_common, - contig_list = contig_list, - linux_docker = linux_docker, - sv_pipeline_base_docker = sv_pipeline_base_docker - } - - call m03.Module03Metrics { - input: - name = name, - samples = samples, - filtered_pesr_vcf = filtered_pesr_vcf, - filtered_depth_vcf = filtered_depth_vcf, - cutoffs = cutoffs, - outlier_list = outlier_list, - ped_file = ped_file, - samples_post_filtering_file = samples_post_filtering_file, - baseline_filtered_pesr_vcf = baseline_filtered_pesr_vcf, - baseline_filtered_depth_vcf = baseline_filtered_depth_vcf, - contig_list = contig_list, - linux_docker = linux_docker, - sv_pipeline_base_docker = sv_pipeline_base_docker, - sv_base_mini_docker = sv_base_mini_docker - } - - call m04.Module04Metrics { - input: - name = name, - samples = samples_post_filter, - genotyped_pesr_vcf = genotyped_pesr_vcf, - genotyped_depth_vcf = genotyped_depth_vcf, - cutoffs_pesr_pesr = cutoffs_pesr_pesr, - cutoffs_pesr_depth = cutoffs_pesr_depth, - cutoffs_depth_pesr = cutoffs_depth_pesr, - cutoffs_depth_depth = cutoffs_depth_depth, - sr_bothside_pass = sr_bothside_pass, - sr_background_fail = sr_background_fail, - baseline_genotyped_pesr_vcf = baseline_genotyped_pesr_vcf, - baseline_genotyped_depth_vcf = baseline_genotyped_depth_vcf, - contig_list = contig_list, - linux_docker = linux_docker, - sv_pipeline_base_docker = sv_pipeline_base_docker - } - - call m0506.Module0506Metrics { - input: - name = name, - samples = samples_post_filter, - cluster_vcf = module0506_cluster_vcf, - complex_resolve_vcf = module0506_complex_resolve_vcf, - complex_genotype_vcf = module0506_complex_genotype_vcf, - cleaned_vcf = module0506_cleaned_vcf, - baseline_cluster_vcf = baseline_module0506_cluster_vcf, - baseline_complex_resolve_vcf = baseline_module0506_complex_resolve_vcf, - baseline_complex_genotype_vcf = baseline_module0506_complex_genotype_vcf, - baseline_cleaned_vcf = baseline_module0506_cleaned_vcf, - contig_list = contig_list, - linux_docker = linux_docker, - sv_pipeline_base_docker = sv_pipeline_base_docker - } - - call utils.CatMetrics as CatBatchMetrics { - input: - prefix = "batch_sv." + name, - metric_files = [Module00aBatchMetrics.metrics_file, Module00cMetrics.metrics_file, Module01Metrics.metrics_file, Module02Metrics.metrics_file, Module03Metrics.metrics_file, Module04Metrics.metrics_file, Module0506Metrics.metrics_file], - linux_docker = linux_docker - } - - Array[File] defined_baseline_metrics = select_all([baseline_00a_metrics, baseline_00c_metrics, baseline_01_metrics, baseline_02_metrics, baseline_03_metrics, baseline_04_metrics, baseline_0506_metrics]) - if (length(defined_baseline_metrics) > 0) { - call utils.CatMetrics as CatBaselineMetrics { - input: - prefix = "baseline." + name, - metric_files = defined_baseline_metrics, - linux_docker = linux_docker - } - call utils.PlotMetrics { - input: - name = name, - samples = samples, - test_metrics = CatBatchMetrics.out, - base_metrics = CatBaselineMetrics.out, - sv_pipeline_base_docker = sv_pipeline_base_docker - } - } - - output { - File metrics_file = CatBatchMetrics.out - File? metrics_plot_pdf = PlotMetrics.metrics_plot_pdf - File? metrics_plot_tsv = PlotMetrics.metrics_plot_tsv - } -} diff --git a/wdl/GATKSVPipelinePhase1.wdl b/wdl/GATKSVPipelinePhase1.wdl index 80090b26a..6eb5fa181 100644 --- a/wdl/GATKSVPipelinePhase1.wdl +++ b/wdl/GATKSVPipelinePhase1.wdl @@ -1,13 +1,13 @@ version 1.0 -import "Module00c.wdl" as m00c -import "Module01.wdl" as m01 -import "Module02.wdl" as m02 -import "Module03.wdl" as m03 +import "GatherBatchEvidence.wdl" as batchevidence +import "ClusterBatch.wdl" as clusterbatch +import "GenerateBatchMetrics.wdl" as batchmetrics +import "FilterBatch.wdl" as filterbatch import "Structs.wdl" # One mighty WDL to rule them all... -# Runs Modules 00c, 01, 02, and 03 +# Runs GatherBatchEvidence, ClusterBatch, GenerateBatchMetrics, FilterBatch workflow GATKSVPipelinePhase1 { input { @@ -37,7 +37,7 @@ workflow GATKSVPipelinePhase1 { String condense_counts_docker ############################################################ - ## Module 00c + ## GatherBatchEvidence ############################################################ # PE/SR/BAF/RD files @@ -161,7 +161,7 @@ workflow GATKSVPipelinePhase1 { RuntimeAttr? runtime_attr_explode ############################################################ - ## Module 01 + ## ClusterBatch ############################################################ Int pesr_svsize @@ -183,7 +183,7 @@ workflow GATKSVPipelinePhase1 { RuntimeAttr? runtime_attr_rdtest_bed ############################################################ - ## Module 02 + ## GenerateBatchMetrics ############################################################ Int BAF_split_size @@ -210,7 +210,7 @@ workflow GATKSVPipelinePhase1 { RuntimeAttr? runtime_attr_merge_stats ############################################################ - ## Module 03 + ## FilterBatch ############################################################ File? outlier_cutoff_table @@ -225,19 +225,19 @@ workflow GATKSVPipelinePhase1 { RuntimeAttr? runtime_attr_filter_samples ############################################################ - ## Module metrics parameters for 00c, 01, 02, and 03 metrics + ## Module metrics parameters for GatherBatchEvidence, ClusterBatch, GenerateBatchMetrics, FilterBatch metrics ############################################################ - # Run module metrics workflow at the end - by default on except for Module00c because of runtime/expense - Boolean? run_00c_metrics - Boolean? run_01_metrics - Boolean? run_02_metrics - Boolean? run_03_metrics + # Run module metrics workflow at the end - by default on except for GatherBatchEvidence because of runtime/expense + Boolean? run_batchevidence_metrics + Boolean? run_clusterbatch_metrics + Boolean? run_batchmetrics_metrics + Boolean? run_filterbatch_metrics File? primary_contigs_list # required if run_module_metrics = true } - call m00c.Module00c as Module00c { + call batchevidence.GatherBatchEvidence as GatherBatchEvidence { input: batch = batch, samples = samples, @@ -343,19 +343,19 @@ workflow GATKSVPipelinePhase1 { runtime_attr_bundle = runtime_attr_bundle, runtime_attr_postprocess = runtime_attr_postprocess, runtime_attr_explode = runtime_attr_explode, - run_module_metrics = run_00c_metrics, + run_module_metrics = run_batchevidence_metrics, primary_contigs_list = primary_contigs_list, sv_pipeline_base_docker = sv_pipeline_base_docker } - call m01.Module01 as Module01 { + call clusterbatch.ClusterBatch as ClusterBatch { input: - manta_vcfs=Module00c.std_manta_vcf, - delly_vcfs=Module00c.std_delly_vcf, - wham_vcfs=Module00c.std_wham_vcf, - melt_vcfs=Module00c.std_melt_vcf, - del_bed=Module00c.merged_dels, - dup_bed=Module00c.merged_dups, + manta_vcfs=GatherBatchEvidence.std_manta_vcf, + delly_vcfs=GatherBatchEvidence.std_delly_vcf, + wham_vcfs=GatherBatchEvidence.std_wham_vcf, + melt_vcfs=GatherBatchEvidence.std_melt_vcf, + del_bed=GatherBatchEvidence.merged_dels, + dup_bed=GatherBatchEvidence.merged_dups, batch=batch, pesr_svsize=pesr_svsize, pesr_frac=pesr_frac, @@ -375,25 +375,25 @@ workflow GATKSVPipelinePhase1 { runtime_attr_depth_concat=runtime_attr_depth_concat, runtime_attr_depth_vcf=runtime_attr_depth_vcf, runtime_attr_rdtest_bed=runtime_attr_rdtest_bed, - run_module_metrics = run_01_metrics, + run_module_metrics = run_clusterbatch_metrics, primary_contigs_list = primary_contigs_list, sv_pipeline_base_docker = sv_pipeline_base_docker, linux_docker = linux_docker } - call m02.Module02 as Module02 { + call batchmetrics.GenerateBatchMetrics as GenerateBatchMetrics { input: batch=batch, - depth_vcf=Module01.depth_vcf, - melt_vcf=Module01.melt_vcf, - delly_vcf=Module01.delly_vcf, - wham_vcf=Module01.wham_vcf, - manta_vcf=Module01.manta_vcf, - baf_metrics=select_first([Module00c.merged_BAF]), - discfile=Module00c.merged_PE, - coveragefile=Module00c.merged_bincov, - splitfile=Module00c.merged_SR, - medianfile=Module00c.median_cov, + depth_vcf=ClusterBatch.depth_vcf, + melt_vcf=ClusterBatch.melt_vcf, + delly_vcf=ClusterBatch.delly_vcf, + wham_vcf=ClusterBatch.wham_vcf, + manta_vcf=ClusterBatch.manta_vcf, + baf_metrics=select_first([GatherBatchEvidence.merged_BAF]), + discfile=GatherBatchEvidence.merged_PE, + coveragefile=GatherBatchEvidence.merged_bincov, + splitfile=GatherBatchEvidence.merged_SR, + medianfile=GatherBatchEvidence.median_cov, BAF_split_size=BAF_split_size, RD_split_size=RD_split_size, PE_split_size=PE_split_size, @@ -422,21 +422,21 @@ workflow GATKSVPipelinePhase1 { runtime_attr_merge_allo=runtime_attr_merge_allo, runtime_attr_merge_baf=runtime_attr_merge_baf, runtime_attr_merge_stats=runtime_attr_merge_stats, - run_module_metrics = run_02_metrics, + run_module_metrics = run_batchmetrics_metrics, primary_contigs_list = primary_contigs_list } - call m03.Module03 as Module03 { + call filterbatch.FilterBatch as FilterBatch { input: batch=batch, - manta_vcf=Module01.manta_vcf, - delly_vcf=Module01.delly_vcf, - wham_vcf=Module01.wham_vcf, - melt_vcf=Module01.melt_vcf, - depth_vcf=Module01.depth_vcf, + manta_vcf=ClusterBatch.manta_vcf, + delly_vcf=ClusterBatch.delly_vcf, + wham_vcf=ClusterBatch.wham_vcf, + melt_vcf=ClusterBatch.melt_vcf, + depth_vcf=ClusterBatch.depth_vcf, outlier_cutoff_table=outlier_cutoff_table, - evidence_metrics=Module02.metrics, - evidence_metrics_common=Module02.metrics_common, + evidence_metrics=GenerateBatchMetrics.metrics, + evidence_metrics_common=GenerateBatchMetrics.metrics_common, outlier_cutoff_nIQR=outlier_cutoff_nIQR, sv_base_mini_docker=sv_base_mini_docker, sv_pipeline_docker=sv_pipeline_docker, @@ -448,7 +448,7 @@ workflow GATKSVPipelinePhase1 { runtime_attr_exclude_outliers=runtime_attr_exclude_outliers, runtime_attr_cat_outliers=runtime_attr_cat_outliers, runtime_attr_filter_samples=runtime_attr_filter_samples, - run_module_metrics = run_03_metrics, + run_module_metrics = run_filterbatch_metrics, primary_contigs_list = primary_contigs_list, sv_pipeline_base_docker = sv_pipeline_base_docker, ped_file = ped_file @@ -456,63 +456,63 @@ workflow GATKSVPipelinePhase1 { output { # Module 00 - File merged_BAF = select_first([Module00c.merged_BAF]) - File merged_BAF_index = select_first([Module00c.merged_BAF_index]) - File merged_SR = Module00c.merged_SR - File merged_SR_index = Module00c.merged_SR_index - File merged_PE = Module00c.merged_PE - File merged_PE_index = Module00c.merged_PE_index - File merged_bincov = Module00c.merged_bincov - File merged_bincov_index = Module00c.merged_bincov_index - - File median_cov = Module00c.median_cov - - File? PE_stats = Module00c.PE_stats - File? RD_stats = Module00c.RD_stats - File? SR_stats = Module00c.SR_stats - File? BAF_stats = Module00c.BAF_stats - File? Matrix_QC_plot=Module00c.Matrix_QC_plot - - File merged_dels = Module00c.merged_dels - File merged_dups = Module00c.merged_dups - - Array[File]? std_manta_vcf = Module00c.std_manta_vcf - Array[File]? std_delly_vcf = Module00c.std_delly_vcf - Array[File]? std_melt_vcf = Module00c.std_melt_vcf - Array[File]? std_wham_vcf = Module00c.std_wham_vcf - - File? metrics_file_00c = Module00c.metrics_file_00c - - # Module 01 - File? depth_vcf = Module01.depth_vcf - File? manta_vcf = Module01.manta_vcf - File? delly_vcf = Module01.delly_vcf - File? wham_vcf = Module01.wham_vcf - File? melt_vcf = Module01.melt_vcf - - File? metrics_file_01 = Module01.metrics_file_01 - - # Module 02 - File evidence_metrics = Module02.metrics - File evidence_metrics_common = Module02.metrics_common - - File? metrics_file_02 = Module02.metrics_file_02 - - # Module 03 - File? filtered_manta_vcf = Module03.filtered_manta_vcf - File? filtered_delly_vcf = Module03.filtered_delly_vcf - File? filtered_wham_vcf = Module03.filtered_wham_vcf - File? filtered_melt_vcf = Module03.filtered_melt_vcf - File? filtered_depth_vcf = Module03.filtered_depth_vcf - File filtered_pesr_vcf = Module03.filtered_pesr_vcf - File cutoffs = Module03.cutoffs - File scores = Module03.scores - File RF_intermediate_files = Module03.RF_intermediate_files - Array[String] outlier_samples_excluded = Module03.outlier_samples_excluded - Array[String] batch_samples_postOutlierExclusion = Module03.batch_samples_postOutlierExclusion - File outlier_samples_excluded_file = Module03.outlier_samples_excluded_file - File batch_samples_postOutlierExclusion_file = Module03.batch_samples_postOutlierExclusion_file - - File? metrics_file_03 = Module03.metrics_file_03 + File merged_BAF = select_first([GatherBatchEvidence.merged_BAF]) + File merged_BAF_index = select_first([GatherBatchEvidence.merged_BAF_index]) + File merged_SR = GatherBatchEvidence.merged_SR + File merged_SR_index = GatherBatchEvidence.merged_SR_index + File merged_PE = GatherBatchEvidence.merged_PE + File merged_PE_index = GatherBatchEvidence.merged_PE_index + File merged_bincov = GatherBatchEvidence.merged_bincov + File merged_bincov_index = GatherBatchEvidence.merged_bincov_index + + File median_cov = GatherBatchEvidence.median_cov + + File? PE_stats = GatherBatchEvidence.PE_stats + File? RD_stats = GatherBatchEvidence.RD_stats + File? SR_stats = GatherBatchEvidence.SR_stats + File? BAF_stats = GatherBatchEvidence.BAF_stats + File? Matrix_QC_plot=GatherBatchEvidence.Matrix_QC_plot + + File merged_dels = GatherBatchEvidence.merged_dels + File merged_dups = GatherBatchEvidence.merged_dups + + Array[File]? std_manta_vcf = GatherBatchEvidence.std_manta_vcf + Array[File]? std_delly_vcf = GatherBatchEvidence.std_delly_vcf + Array[File]? std_melt_vcf = GatherBatchEvidence.std_melt_vcf + Array[File]? std_wham_vcf = GatherBatchEvidence.std_wham_vcf + + File? metrics_file_batchevidence = GatherBatchEvidence.metrics_file_batchevidence + + # ClusterBatch + File? depth_vcf = ClusterBatch.depth_vcf + File? manta_vcf = ClusterBatch.manta_vcf + File? delly_vcf = ClusterBatch.delly_vcf + File? wham_vcf = ClusterBatch.wham_vcf + File? melt_vcf = ClusterBatch.melt_vcf + + File? metrics_file_clusterbatch = ClusterBatch.metrics_file_clusterbatch + + # GenerateBatchMetrics + File evidence_metrics = GenerateBatchMetrics.metrics + File evidence_metrics_common = GenerateBatchMetrics.metrics_common + + File? metrics_file_batchmetrics = GenerateBatchMetrics.metrics_file_batchmetrics + + # FilterBatch + File? filtered_manta_vcf = FilterBatch.filtered_manta_vcf + File? filtered_delly_vcf = FilterBatch.filtered_delly_vcf + File? filtered_wham_vcf = FilterBatch.filtered_wham_vcf + File? filtered_melt_vcf = FilterBatch.filtered_melt_vcf + File? filtered_depth_vcf = FilterBatch.filtered_depth_vcf + File filtered_pesr_vcf = FilterBatch.filtered_pesr_vcf + File cutoffs = FilterBatch.cutoffs + File scores = FilterBatch.scores + File RF_intermediate_files = FilterBatch.RF_intermediate_files + Array[String] outlier_samples_excluded = FilterBatch.outlier_samples_excluded + Array[String] batch_samples_postOutlierExclusion = FilterBatch.batch_samples_postOutlierExclusion + File outlier_samples_excluded_file = FilterBatch.outlier_samples_excluded_file + File batch_samples_postOutlierExclusion_file = FilterBatch.batch_samples_postOutlierExclusion_file + + File? metrics_file_filterbatch = FilterBatch.metrics_file_filterbatch } } diff --git a/wdl/GATKSVPipelineSingleSample.wdl b/wdl/GATKSVPipelineSingleSample.wdl index 6ca91a2ec..573703009 100644 --- a/wdl/GATKSVPipelineSingleSample.wdl +++ b/wdl/GATKSVPipelineSingleSample.wdl @@ -1,17 +1,17 @@ version 1.0 -import "Module00a.wdl" as m00a -import "Module00b.wdl" as m00b +import "GatherSampleEvidence.wdl" as sampleevidence +import "EvidenceQC.wdl" as evidenceqc import "PloidyEstimation.wdl" as pe -import "Module00c.wdl" as m00c +import "GatherBatchEvidence.wdl" as batchevidence import "DepthPreprocessing.wdl" as dpn -import "Module01.wdl" as m01 -import "Module02.wdl" as m02 +import "ClusterBatch.wdl" as clusterbatch +import "GenerateBatchMetrics.wdl" as batchmetrics import "SRTest.wdl" as SRTest -import "Module03.wdl" as m03 -import "Module04.wdl" as m04 -import "Module0506.wdl" as m0506 -import "Module08Annotation.wdl" as m08 +import "FilterBatch.wdl" as filterbatch +import "GenotypeBatch.wdl" as genotypebatch +import "MakeCohortVcf.wdl" as makecohortvcf +import "AnnotateVcf.wdl" as annotate import "GermlineCNVCase.wdl" as gcnv import "SingleSampleFiltering.wdl" as SingleSampleFiltering import "GATKSVPipelineSingleSampleMetrics.wdl" as SingleSampleMetrics @@ -20,7 +20,8 @@ import "TestUtils.wdl" as tu import "Structs.wdl" # GATK SV Pipeline single sample mode -# Runs Modules 00abc, 01, 03.MergePesrVcfs, 04, 05/06 +# Runs GatherSampleEvidence, EvidenceQC, GatherBatchEvidence, ClusterBatch, FilterBatch.MergePesrVcfs, GenotypeBatch, +# MakeCohortVcf (CombineBatches, ResolveComplexVariants, GenotypeComplexVariants, GenotypeComplexVariants), and AnnotateVcf workflow GATKSVPipelineSingleSample { meta { @@ -39,7 +40,7 @@ workflow GATKSVPipelineSingleSample { Boolean use_melt = true Boolean use_wham = true - # If Module00a outputs already prepared + # If GatherSampleEvidence outputs already prepared File? case_delly_vcf File? case_manta_vcf File? case_melt_vcf @@ -84,11 +85,11 @@ workflow GATKSVPipelineSingleSample { String? wham_docker ############################################################ - ## Module 00a + ## GatherSampleEvidence ############################################################ - # Required if any 00a outputs need to be generated (vcfs, counts, pe/sr files) - # (When "If Module00a outputs already prepared" section above is used) + # Required if any GatherSampleEvidence outputs need to be generated (vcfs, counts, pe/sr files) + # (When "If GatherSampleEvidence outputs already prepared" section above is used) File? bam_or_cram_file File? bam_or_cram_index @@ -121,8 +122,8 @@ workflow GATKSVPipelineSingleSample { # Wham inputs File wham_include_list_bed_file - # Run Module00a metrics - default is off for single sample pipeline - Boolean? run_00a_metrics = false + # Run GatherSampleEvidence metrics - default is off for single sample pipeline + Boolean? run_sampleevidence_metrics = false # Runtime configuration overrides RuntimeAttr? runtime_attr_baf @@ -137,7 +138,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_attr_wham_include_list ############################################################ - ## Module 00b + ## EvidenceQC ############################################################ # Optional QC tasks @@ -154,7 +155,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? wgd_score_runtime_attr ############################################################ - ## Module 00c + ## GatherBatchEvidence ############################################################ # Parameters @@ -216,8 +217,8 @@ workflow GATKSVPipelineSingleSample { # QC files Int matrix_qc_distance - # Run Module00c metrics - default is off for single sample pipeline - Boolean? run_00c_metrics = false + # Run GatherBatchEvidence metrics - default is off for single sample pipeline + Boolean? run_batchevidence_metrics = false RuntimeAttr? median_cov_runtime_attr # Memory ignored, use median_cov_mem_gb_per_sample Float? median_cov_mem_gb_per_sample @@ -250,11 +251,11 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_attr_explode ############################################################ - ## Module 01 + ## ClusterBatch ############################################################ # Depth merging parameters - RuntimeAttr? runtime_attr_depth_merge_pre_01 + RuntimeAttr? runtime_attr_depth_merge_pre_clusterbatch # Reference panel standardized caller VCFs Array[File] ref_std_manta_vcfs @@ -274,8 +275,8 @@ workflow GATKSVPipelineSingleSample { File? Werling_2018_tarball File? Collins_2017_tarball - # Run Module01 metrics - default is off for single sample pipeline - Boolean? run_01_metrics = false + # Run ClusterBatch metrics - default is off for single sample pipeline + Boolean? run_clusterbatch_metrics = false RuntimeAttr? runtime_attr_pesr_cluster RuntimeAttr? runtime_attr_pesr_concat @@ -287,7 +288,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_attr_filter_vcf_by_id ############################################################ - ## Module 02/03 + ## GenerateBatchMetrics/FilterBatch ############################################################ File rmsk @@ -306,7 +307,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_attr_merge_pesr_vcfs ############################################################ - ## Module 04 + ## GenotypeBatch ############################################################ Int genotyping_n_per_split @@ -324,8 +325,8 @@ workflow GATKSVPipelineSingleSample { File bin_exclude - # Run Module04 metrics - default is off for single sample pipeline - Boolean? run_04_metrics = false + # Run GenotypeBatch metrics - default is off for single sample pipeline + Boolean? run_genotypebatch_metrics = false # Common RuntimeAttr? runtime_attr_merge_counts @@ -335,7 +336,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_attr_add_genotypes RuntimeAttr? runtime_attr_genotype_depths_concat_vcfs RuntimeAttr? runtime_attr_genotype_pesr_concat_vcfs - RuntimeAttr? runtime_attr_split_vcf_module04 + RuntimeAttr? runtime_attr_split_vcf_genotypebatch # Master @@ -355,7 +356,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_attr_integrate_depth_gq ############################################################ - ## Module 0506 + ## MakeCohortVcf ############################################################ Float clean_vcf_min_sr_background_fail_batches @@ -375,8 +376,8 @@ workflow GATKSVPipelineSingleSample { Int? clean_vcf_random_seed - # Run Module0506 metrics - default is off for single sample pipeline - Boolean? run_0506_metrics = false + # Run MakeCohortVcf metrics - default is off for single sample pipeline + Boolean? run_makecohortvcf_metrics = false RuntimeAttr? runtime_override_update_sr_list RuntimeAttr? runtime_override_merge_pesr_depth @@ -389,7 +390,7 @@ workflow GATKSVPipelineSingleSample { RuntimeAttr? runtime_override_make_cpx_cnv_input_file ############################################################ - ## Module 08 + ## AnnotateVcf ############################################################ File protein_coding_gtf @@ -439,10 +440,10 @@ workflow GATKSVPipelineSingleSample { Boolean collect_coverage = !defined(case_counts_file) Boolean collect_pesr = !defined(case_pe_file) || !defined(case_sr_file) - Boolean run_00a = defined(delly_docker_) || defined(manta_docker_) || defined(melt_docker_) || defined(wham_docker_) || collect_coverage || collect_pesr + Boolean run_sampleevidence = defined(delly_docker_) || defined(manta_docker_) || defined(melt_docker_) || defined(wham_docker_) || collect_coverage || collect_pesr - if (run_00a) { - call m00a.Module00a as Module00a { + if (run_sampleevidence) { + call sampleevidence.GatherSampleEvidence as GatherSampleEvidence { input: bam_or_cram_file=select_first([bam_or_cram_file]), bam_or_cram_index=bam_or_cram_index, @@ -470,7 +471,7 @@ workflow GATKSVPipelineSingleSample { pct_chimeras=pct_chimeras, total_reads=total_reads, wham_include_list_bed_file=wham_include_list_bed_file, - run_module_metrics = run_00a_metrics, + run_module_metrics = run_sampleevidence_metrics, sv_pipeline_docker=sv_pipeline_docker, sv_base_mini_docker=sv_base_mini_docker, delly_docker=delly_docker_, @@ -493,11 +494,11 @@ workflow GATKSVPipelineSingleSample { } } - File case_counts_file_ = select_first([case_counts_file, Module00a.coverage_counts]) - File case_pe_file_ = select_first([case_pe_file, Module00a.pesr_disc]) - File case_sr_file_ = select_first([case_sr_file, Module00a.pesr_split]) + File case_counts_file_ = select_first([case_counts_file, GatherSampleEvidence.coverage_counts]) + File case_pe_file_ = select_first([case_pe_file, GatherSampleEvidence.pesr_disc]) + File case_sr_file_ = select_first([case_sr_file, GatherSampleEvidence.pesr_split]) - call m00b.Module00b as Module00b { + call evidenceqc.EvidenceQC as EvidenceQC { input: batch=batch, samples=[sample_id], @@ -517,19 +518,19 @@ workflow GATKSVPipelineSingleSample { } if (use_delly) { - Array[File] delly_vcfs_ = [select_first([case_delly_vcf, Module00a.delly_vcf])] + Array[File] delly_vcfs_ = [select_first([case_delly_vcf, GatherSampleEvidence.delly_vcf])] } if (use_manta) { - Array[File] manta_vcfs_ = [select_first([case_manta_vcf, Module00a.manta_vcf])] + Array[File] manta_vcfs_ = [select_first([case_manta_vcf, GatherSampleEvidence.manta_vcf])] } if (use_melt) { - Array[File] melt_vcfs_ = [select_first([case_melt_vcf, Module00a.melt_vcf])] + Array[File] melt_vcfs_ = [select_first([case_melt_vcf, GatherSampleEvidence.melt_vcf])] } if (use_wham) { - Array[File] wham_vcfs_ = [select_first([case_wham_vcf, Module00a.wham_vcf])] + Array[File] wham_vcfs_ = [select_first([case_wham_vcf, GatherSampleEvidence.wham_vcf])] } - call m00c.Module00c as Module00c { + call batchevidence.GatherBatchEvidence as GatherBatchEvidence { input: batch=batch, samples=[sample_id], @@ -543,8 +544,8 @@ workflow GATKSVPipelineSingleSample { ref_dict=reference_dict, counts=[case_counts_file_], ref_panel_bincov_matrix=ref_panel_bincov_matrix, - bincov_matrix=Module00b.bincov_matrix, - bincov_matrix_index=Module00b.bincov_matrix_index, + bincov_matrix=EvidenceQC.bincov_matrix, + bincov_matrix_index=EvidenceQC.bincov_matrix_index, PE_files=[case_pe_file_], cytoband=cytobands, mei_bed=mei_bed, @@ -598,7 +599,7 @@ workflow GATKSVPipelineSingleSample { cnmops_allo_file=allosome_file, cnmops_large_min_size=cnmops_large_min_size, matrix_qc_distance=matrix_qc_distance, - run_module_metrics = run_00c_metrics, + run_module_metrics = run_batchevidence_metrics, sv_base_mini_docker=sv_base_mini_docker, sv_base_docker=sv_base_docker, sv_pipeline_docker=sv_pipeline_docker, @@ -637,33 +638,33 @@ workflow GATKSVPipelineSingleSample { runtime_attr_explode = runtime_attr_explode } - File combined_ped_file = select_first([Module00c.combined_ped_file]) + File combined_ped_file = select_first([GatherBatchEvidence.combined_ped_file]) # Merge calls with reference panel - Array[File] merged_manta_vcfs_array = flatten([select_first([Module00c.std_manta_vcf]), ref_std_manta_vcfs]) - Array[File] merged_wham_vcfs_array = flatten([select_first([Module00c.std_wham_vcf]), ref_std_wham_vcfs]) - if (defined(Module00c.std_melt_vcf)) { - Array[File]? merged_melt_vcfs_array = flatten([select_first([Module00c.std_melt_vcf]), select_first([ref_std_melt_vcfs])]) + Array[File] merged_manta_vcfs_array = flatten([select_first([GatherBatchEvidence.std_manta_vcf]), ref_std_manta_vcfs]) + Array[File] merged_wham_vcfs_array = flatten([select_first([GatherBatchEvidence.std_wham_vcf]), ref_std_wham_vcfs]) + if (defined(GatherBatchEvidence.std_melt_vcf)) { + Array[File]? merged_melt_vcfs_array = flatten([select_first([GatherBatchEvidence.std_melt_vcf]), select_first([ref_std_melt_vcfs])]) } call dpn.MergeSet as MergeSetDel { input: - beds=[Module00c.merged_dels, ref_panel_del_bed], + beds=[GatherBatchEvidence.merged_dels, ref_panel_del_bed], svtype="DEL", batch=batch, sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_depth_merge_pre_01 + runtime_attr_override=runtime_attr_depth_merge_pre_clusterbatch } call dpn.MergeSet as MergeSetDup { input: - beds=[Module00c.merged_dups, ref_panel_dup_bed], + beds=[GatherBatchEvidence.merged_dups, ref_panel_dup_bed], svtype="DUP", batch=batch, sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_attr_depth_merge_pre_01 + runtime_attr_override=runtime_attr_depth_merge_pre_clusterbatch } - call m01.Module01 as Module01 { + call clusterbatch.ClusterBatch as ClusterBatch { input: manta_vcfs=merged_manta_vcfs_array, wham_vcfs=merged_wham_vcfs_array, @@ -681,7 +682,7 @@ workflow GATKSVPipelineSingleSample { depth_flags=depth_flags, depth_frac=depth_frac, contigs=primary_contigs_fai, - run_module_metrics = run_01_metrics, + run_module_metrics = run_clusterbatch_metrics, sv_base_mini_docker=sv_base_mini_docker, sv_pipeline_docker=sv_pipeline_docker, runtime_attr_pesr_cluster=runtime_attr_pesr_cluster, @@ -696,7 +697,7 @@ workflow GATKSVPipelineSingleSample { if (use_manta) { call SingleSampleFiltering.FilterVcfBySampleGenotypeAndAddEvidenceAnnotation as FilterManta { input : - vcf_gz=select_first([Module01.manta_vcf]), + vcf_gz=select_first([ClusterBatch.manta_vcf]), sample_id=sample_id, evidence="RD,PE,SR", sv_base_mini_docker=sv_base_mini_docker, @@ -706,7 +707,7 @@ workflow GATKSVPipelineSingleSample { if (use_wham) { call SingleSampleFiltering.FilterVcfBySampleGenotypeAndAddEvidenceAnnotation as FilterWham { input : - vcf_gz=select_first([Module01.wham_vcf]), + vcf_gz=select_first([ClusterBatch.wham_vcf]), sample_id=sample_id, evidence="RD,PE,SR", sv_base_mini_docker=sv_base_mini_docker, @@ -716,7 +717,7 @@ workflow GATKSVPipelineSingleSample { if (use_melt) { call SingleSampleFiltering.FilterVcfBySampleGenotypeAndAddEvidenceAnnotation as FilterMelt { input : - vcf_gz=select_first([Module01.melt_vcf]), + vcf_gz=select_first([ClusterBatch.melt_vcf]), sample_id=sample_id, evidence="RD,PE,SR", sv_base_mini_docker=sv_base_mini_docker, @@ -726,7 +727,7 @@ workflow GATKSVPipelineSingleSample { if (use_delly) { call SingleSampleFiltering.FilterVcfBySampleGenotypeAndAddEvidenceAnnotation as FilterDelly { input : - vcf_gz=select_first([Module01.delly_vcf]), + vcf_gz=select_first([ClusterBatch.delly_vcf]), sample_id=sample_id, evidence="RD,PE,SR", sv_base_mini_docker=sv_base_mini_docker, @@ -736,14 +737,14 @@ workflow GATKSVPipelineSingleSample { call SingleSampleFiltering.FilterVcfBySampleGenotypeAndAddEvidenceAnnotation as FilterDepth { input : - vcf_gz=Module01.depth_vcf, + vcf_gz=ClusterBatch.depth_vcf, sample_id=sample_id, evidence="RD", sv_base_mini_docker=sv_base_mini_docker, runtime_attr_override=runtime_attr_filter_vcf_by_id } - call m03.MergePesrVcfs as MergePesrVcfs { + call filterbatch.MergePesrVcfs as MergePesrVcfs { input: manta_vcf=FilterManta.out, wham_vcf=FilterWham.out, @@ -757,15 +758,15 @@ workflow GATKSVPipelineSingleSample { call SingleSampleFiltering.FilterLargePESRCallsWithoutRawDepthSupport as FilterLargePESRCallsWithoutRawDepthSupport { input: pesr_vcf=MergePesrVcfs.merged_pesr_vcf, - raw_dels=Module00c.merged_dels, - raw_dups=Module00c.merged_dups, + raw_dels=GatherBatchEvidence.merged_dels, + raw_dups=GatherBatchEvidence.merged_dups, min_large_pesr_call_size_for_filtering=min_large_pesr_call_size_for_filtering, min_large_pesr_depth_overlap_fraction=min_large_pesr_depth_overlap_fraction, sv_pipeline_docker = sv_pipeline_docker, runtime_attr_override=runtime_attr_filter_large_pesr } - call m02.GetSampleLists as SamplesList { + call batchmetrics.GetSampleLists as SamplesList { input: ped_file = combined_ped_file, samples = flatten([[sample_id], ref_samples]), @@ -774,8 +775,8 @@ workflow GATKSVPipelineSingleSample { call SRTest.SRTest as SRTest { input: - splitfile = Module00c.merged_SR, - medianfile = Module00c.median_cov, + splitfile = GatherBatchEvidence.merged_SR, + medianfile = GatherBatchEvidence.median_cov, ped_file = combined_ped_file, vcf = FilterLargePESRCallsWithoutRawDepthSupport.out, autosome_contigs = autosome_file, @@ -797,7 +798,7 @@ workflow GATKSVPipelineSingleSample { runtime_attr_merge_stats = runtime_attr_merge_stats } - call m02.AggregateTests as AggregateTests { + call batchmetrics.AggregateTests as AggregateTests { input: vcf=FilterLargePESRCallsWithoutRawDepthSupport.out, srtest=SRTest.srtest, @@ -816,7 +817,7 @@ workflow GATKSVPipelineSingleSample { runtime_attr_override = runtime_attr_rewritesrcoords } - call m04.Module04 as Module04 { + call genotypebatch.GenotypeBatch as GenotypeBatch { input: batch_pesr_vcf=RewriteSRCoords.annotated_vcf, batch_depth_vcf=FilterDepth.out, @@ -824,13 +825,13 @@ workflow GATKSVPipelineSingleSample { cohort_depth_vcf=FilterDepth.out, batch=batch, n_per_split=genotyping_n_per_split, - medianfile=Module00c.median_cov, - coveragefile=Module00c.merged_bincov, - coveragefile_index=Module00c.merged_bincov_index, - discfile=Module00c.merged_PE, - discfile_index=Module00c.merged_PE_index, - splitfile=Module00c.merged_SR, - splitfile_index=Module00c.merged_SR_index, + medianfile=GatherBatchEvidence.median_cov, + coveragefile=GatherBatchEvidence.merged_bincov, + coveragefile_index=GatherBatchEvidence.merged_bincov_index, + discfile=GatherBatchEvidence.merged_PE, + discfile_index=GatherBatchEvidence.merged_PE_index, + splitfile=GatherBatchEvidence.merged_SR, + splitfile_index=GatherBatchEvidence.merged_SR_index, ped_file=combined_ped_file, ref_dict=reference_dict, n_RD_genotype_bins=n_RD_genotype_bins, @@ -841,12 +842,12 @@ workflow GATKSVPipelineSingleSample { SR_metrics=SR_metrics, PE_metrics=PE_metrics, bin_exclude=bin_exclude, - run_module_metrics = run_04_metrics, + run_module_metrics = run_genotypebatch_metrics, sv_base_mini_docker=sv_base_mini_docker, sv_pipeline_docker=sv_pipeline_docker, sv_pipeline_rdtest_docker=sv_pipeline_rdtest_docker, linux_docker=linux_docker, - runtime_attr_split_vcf=runtime_attr_split_vcf_module04, + runtime_attr_split_vcf=runtime_attr_split_vcf_genotypebatch, runtime_attr_merge_counts=runtime_attr_merge_counts, runtime_attr_split_variants=runtime_attr_split_variants, runtime_attr_make_subset_vcf=runtime_attr_make_subset_vcf, @@ -868,21 +869,21 @@ workflow GATKSVPipelineSingleSample { call SingleSampleFiltering.ConvertCNVsWithoutDepthSupportToBNDs as ConvertCNVsWithoutDepthSupportToBNDs { input: - genotyped_pesr_vcf=Module04.genotyped_pesr_vcf, + genotyped_pesr_vcf=GenotypeBatch.genotyped_pesr_vcf, allosome_file=allosome_file, merged_famfile=combined_ped_file, case_sample=sample_id, sv_pipeline_docker=sv_pipeline_docker } - call m0506.Module0506 as Module0506 { + call makecohortvcf.MakeCohortVcf as MakeCohortVcf { input: - raw_sr_bothside_pass_files=[Module04.sr_bothside_pass], - raw_sr_background_fail_files=[Module04.sr_background_fail], + raw_sr_bothside_pass_files=[GenotypeBatch.sr_bothside_pass], + raw_sr_background_fail_files=[GenotypeBatch.sr_background_fail], min_sr_background_fail_batches=clean_vcf_min_sr_background_fail_batches, ped_file=combined_ped_file, pesr_vcfs=[ConvertCNVsWithoutDepthSupportToBNDs.out_vcf], - depth_vcfs=[Module04.genotyped_depth_vcf], + depth_vcfs=[GenotypeBatch.genotyped_depth_vcf], contig_list=primary_contigs_fai, allosome_fai=allosome_file, ref_dict=reference_dict, @@ -894,9 +895,9 @@ workflow GATKSVPipelineSingleSample { bin_exclude=bin_exclude, - disc_files=[Module00c.merged_PE], - disc_files_index=[Module00c.merged_PE_index], - bincov_files=[Module00c.merged_bincov], + disc_files=[GatherBatchEvidence.merged_PE], + disc_files_index=[GatherBatchEvidence.merged_PE_index], + bincov_files=[GatherBatchEvidence.merged_bincov], mei_bed=mei_bed, pe_exclude_list=pe_exclude_list, @@ -911,7 +912,7 @@ workflow GATKSVPipelineSingleSample { rf_cutoff_files=[cutoffs], batches=[batch], depth_gt_rd_sep_files=[genotype_depth_depth_sepcutoff], - median_coverage_files=[Module00c.median_cov], + median_coverage_files=[GatherBatchEvidence.median_cov], max_shards_per_chrom_clean_vcf_step1=clean_vcf_max_shards_per_chrom_clean_vcf_step1, min_records_per_shard_clean_vcf_step1=clean_vcf_min_records_per_shard_clean_vcf_step1, @@ -919,7 +920,7 @@ workflow GATKSVPipelineSingleSample { random_seed=clean_vcf_random_seed, - run_module_metrics = run_0506_metrics, + run_module_metrics = run_makecohortvcf_metrics, linux_docker=linux_docker, sv_pipeline_docker=sv_pipeline_docker, @@ -941,7 +942,7 @@ workflow GATKSVPipelineSingleSample { call SingleSampleFiltering.FilterVcfForShortDepthCalls as FilterVcfDepthLt5kb { input: - vcf_gz=Module0506.vcf, + vcf_gz=MakeCohortVcf.vcf, min_length=5000, filter_name="DEPTH_LT_5KB", sv_base_mini_docker=sv_base_mini_docker @@ -949,7 +950,7 @@ workflow GATKSVPipelineSingleSample { call SingleSampleFiltering.GetUniqueNonGenotypedDepthCalls as GetUniqueNonGenotypedDepthCalls { input: - vcf_gz=select_first([Module0506.complex_genotype_vcf]), + vcf_gz=select_first([MakeCohortVcf.complex_genotype_vcf]), sample_id=sample_id, ref_panel_dels=ref_panel_del_bed, ref_panel_dups=ref_panel_dup_bed, @@ -995,7 +996,7 @@ workflow GATKSVPipelineSingleSample { name = batch, ref_samples = ref_samples, case_sample = sample_id, - wgd_scores = Module00b.WGD_scores, + wgd_scores = EvidenceQC.WGD_scores, sample_counts = case_counts_file_, contig_list = primary_contigs_list, linux_docker = linux_docker, @@ -1026,7 +1027,7 @@ workflow GATKSVPipelineSingleSample { sv_pipeline_base_docker=sv_pipeline_base_docker, } - call m08.Module08Annotation { + call annotate.AnnotateVcf { input: vcf = FilterSample.out, vcf_idx = FilterSample.out_idx, @@ -1048,15 +1049,15 @@ workflow GATKSVPipelineSingleSample { call SingleSampleFiltering.VcfToBed as VcfToBed { input: - vcf = Module08Annotation.output_vcf, + vcf = AnnotateVcf.output_vcf, prefix = batch, sv_pipeline_docker = sv_pipeline_docker } call SingleSampleFiltering.FinalVCFCleanup as FinalVCFCleanup { input: - single_sample_vcf=Module08Annotation.output_vcf, - single_sample_vcf_idx=Module08Annotation.output_vcf_idx, + single_sample_vcf=AnnotateVcf.output_vcf, + single_sample_vcf_idx=AnnotateVcf.output_vcf_idx, ref_fasta=reference_fasta, ref_fasta_idx=reference_index, sv_pipeline_docker=sv_pipeline_docker @@ -1067,14 +1068,14 @@ workflow GATKSVPipelineSingleSample { name = batch, ref_samples = ref_samples, case_sample = sample_id, - wgd_scores = Module00b.WGD_scores, + wgd_scores = EvidenceQC.WGD_scores, sample_pe = case_pe_file_, sample_sr = case_sr_file_, sample_counts = case_counts_file_, - cleaned_vcf = Module0506.vcf, + cleaned_vcf = MakeCohortVcf.vcf, final_vcf = FinalVCFCleanup.out, genotyped_pesr_vcf = ConvertCNVsWithoutDepthSupportToBNDs.out_vcf, - genotyped_depth_vcf = Module04.genotyped_depth_vcf, + genotyped_depth_vcf = GenotypeBatch.genotyped_depth_vcf, non_genotyped_unique_depth_calls_vcf = GetUniqueNonGenotypedDepthCalls.out, contig_list = primary_contigs_list, linux_docker = linux_docker, @@ -1098,11 +1099,11 @@ workflow GATKSVPipelineSingleSample { # These files contain events reported in the internal VCF representation # They are less VCF-spec compliant but may be useful if components of the pipeline need to be re-run # on the output. - File pre_cleanup_vcf = Module08Annotation.output_vcf - File pre_cleanup_vcf_idx = Module08Annotation.output_vcf_idx + File pre_cleanup_vcf = AnnotateVcf.output_vcf + File pre_cleanup_vcf_idx = AnnotateVcf.output_vcf_idx - File ploidy_matrix = select_first([Module00c.ploidy_matrix]) - File ploidy_plots = select_first([Module00c.ploidy_plots]) + File ploidy_matrix = select_first([GatherBatchEvidence.ploidy_matrix]) + File ploidy_plots = select_first([GatherBatchEvidence.ploidy_plots]) File metrics_file = SingleSampleMetrics.metrics_file File qc_file = SingleSampleQC.out diff --git a/wdl/Module00c.wdl b/wdl/GatherBatchEvidence.wdl similarity index 98% rename from wdl/Module00c.wdl rename to wdl/GatherBatchEvidence.wdl index e1558f3cf..21ff222a3 100644 --- a/wdl/Module00c.wdl +++ b/wdl/GatherBatchEvidence.wdl @@ -10,7 +10,7 @@ import "DepthPreprocessing.wdl" as dpn import "MakeBincovMatrix.wdl" as mbm import "MatrixQC.wdl" as mqc import "MedianCov.wdl" as mc -import "Module00cMetrics.wdl" as metrics +import "GatherBatchEvidenceMetrics.wdl" as metrics import "PESRPreprocessing.wdl" as pp import "GermlineCNVCase.wdl" as gcnv import "PloidyEstimation.wdl" as pe @@ -23,7 +23,7 @@ import "Utils.wdl" as util # - Run gCNV # - Run MedianCoverage -workflow Module00c { +workflow GatherBatchEvidence { input { # Batch info String batch @@ -148,7 +148,7 @@ workflow Module00c { Int matrix_qc_distance # Module metrics parameters - # Run module metrics workflow at the end - off by default for Module00c because of runtime/expense + # Run module metrics workflow at the end - off by default for GatherBatchEvidence because of runtime/expense Boolean? run_module_metrics String? sv_pipeline_base_docker # required if run_module_metrics = true File? primary_contigs_list # required if run_module_metrics = true @@ -514,7 +514,7 @@ workflow Module00c { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else false if (run_module_metrics_) { - call metrics.Module00cMetrics { + call metrics.GatherBatchEvidenceMetrics { input: name = batch, samples = samples, @@ -585,7 +585,7 @@ workflow Module00c { Array[File]? manta_tloc = TinyResolve.tloc_manta_vcf - File? metrics_file_00c = Module00cMetrics.metrics_file + File? metrics_file_batchevidence = GatherBatchEvidenceMetrics.metrics_file } } diff --git a/wdl/Module00cMetrics.wdl b/wdl/GatherBatchEvidenceMetrics.wdl similarity index 98% rename from wdl/Module00cMetrics.wdl rename to wdl/GatherBatchEvidenceMetrics.wdl index ba47dc9ff..aa50016fd 100644 --- a/wdl/Module00cMetrics.wdl +++ b/wdl/GatherBatchEvidenceMetrics.wdl @@ -2,7 +2,7 @@ version 1.0 import "TestUtils.wdl" as tu -workflow Module00cMetrics { +workflow GatherBatchEvidenceMetrics { input { Array[String] samples String name @@ -174,7 +174,7 @@ workflow Module00cMetrics { call tu.CatMetrics { input: - prefix = "module00c." + name, + prefix = "GatherBatchEvidence." + name, metric_files = flatten([sample_metric_files, [BAFMetrics.out, SRMetrics.out, PEMetrics.out, BincovMetrics.out, MedcovMetrics.out, del_metrics, dup_metrics]]), linux_docker = linux_docker } diff --git a/wdl/Module00a.wdl b/wdl/GatherSampleEvidence.wdl similarity index 98% rename from wdl/Module00a.wdl rename to wdl/GatherSampleEvidence.wdl index 93ccb7fe3..9ac514a73 100644 --- a/wdl/Module00a.wdl +++ b/wdl/GatherSampleEvidence.wdl @@ -7,13 +7,13 @@ import "CramToBam.ReviseBase.wdl" as ctb_revise import "Delly.wdl" as delly import "Manta.wdl" as manta import "MELT.wdl" as melt -import "Module00aMetrics.wdl" as metrics +import "GatherSampleEvidenceMetrics.wdl" as metrics import "PESRCollection.wdl" as pesr import "Whamg.wdl" as wham # Runs selected tools on BAM/CRAM files -workflow Module00a { +workflow GatherSampleEvidence { input { File bam_or_cram_file File? bam_or_cram_index @@ -285,7 +285,7 @@ workflow Module00a { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true if (run_module_metrics_) { - call metrics.Module00aMetrics { + call metrics.GatherSampleEvidenceMetrics { input: sample = sample_id, coverage_counts = CollectCounts.counts, @@ -328,7 +328,7 @@ workflow Module00a { File? wham_vcf = Whamg.vcf File? wham_index = Whamg.index - Array[File]? sample_metrics_files = Module00aMetrics.sample_metrics_files + Array[File]? sample_metrics_files = GatherSampleEvidenceMetrics.sample_metrics_files } } diff --git a/wdl/Module00aBatch.wdl b/wdl/GatherSampleEvidenceBatch.wdl similarity index 84% rename from wdl/Module00aBatch.wdl rename to wdl/GatherSampleEvidenceBatch.wdl index b9e9d7d52..e300e4b74 100644 --- a/wdl/Module00aBatch.wdl +++ b/wdl/GatherSampleEvidenceBatch.wdl @@ -1,9 +1,9 @@ version 1.0 -import "Module00a.wdl" as m00a +import "GatherSampleEvidence.wdl" as sampleevidence import "TestUtils.wdl" as tu -workflow Module00aBatch { +workflow GatherSampleEvidenceBatch { input { Array[File] bam_or_cram_files Array[File]? bam_or_cram_indexes @@ -111,7 +111,7 @@ workflow Module00aBatch { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true scatter (i in range(length(bam_or_cram_files))) { - call m00a.Module00a { + call sampleevidence.GatherSampleEvidence { input: bam_or_cram_file = bam_or_cram_files[i], bam_or_cram_index = if defined(bam_or_cram_indexes) then select_first([bam_or_cram_indexes])[i] else NONE_FILE_, @@ -180,38 +180,38 @@ workflow Module00aBatch { } if (run_module_metrics_) { - Array[Array[File]] sample_metrics_files_ = select_all(Module00a.sample_metrics_files) + Array[Array[File]] sample_metrics_files_ = select_all(GatherSampleEvidence.sample_metrics_files) call tu.CatMetrics { input: - prefix = "module00a." + select_first([batch]), + prefix = "GatherSampleEvidence." + select_first([batch]), metric_files = flatten(sample_metrics_files_), linux_docker = select_first([linux_docker]) } } output { - Array[File?] coverage_counts = Module00a.coverage_counts + Array[File?] coverage_counts = GatherSampleEvidence.coverage_counts - Array[File?] delly_vcf = Module00a.delly_vcf - Array[File?] delly_index = Module00a.delly_index + Array[File?] delly_vcf = GatherSampleEvidence.delly_vcf + Array[File?] delly_index = GatherSampleEvidence.delly_index - Array[File?] manta_vcf = Module00a.manta_vcf - Array[File?] manta_index = Module00a.manta_index + Array[File?] manta_vcf = GatherSampleEvidence.manta_vcf + Array[File?] manta_index = GatherSampleEvidence.manta_index - Array[File?] melt_vcf = Module00a.melt_vcf - Array[File?] melt_index = Module00a.melt_index - Array[Float?] melt_coverage = Module00a.melt_coverage - Array[Int?] melt_read_length = Module00a.melt_read_length - Array[Float?] melt_insert_size = Module00a.melt_insert_size + Array[File?] melt_vcf = GatherSampleEvidence.melt_vcf + Array[File?] melt_index = GatherSampleEvidence.melt_index + Array[Float?] melt_coverage = GatherSampleEvidence.melt_coverage + Array[Int?] melt_read_length = GatherSampleEvidence.melt_read_length + Array[Float?] melt_insert_size = GatherSampleEvidence.melt_insert_size - Array[File?] pesr_disc = Module00a.pesr_disc - Array[File?] pesr_disc_index = Module00a.pesr_disc_index - Array[File?] pesr_split = Module00a.pesr_split - Array[File?] pesr_split_index = Module00a.pesr_split_index + Array[File?] pesr_disc = GatherSampleEvidence.pesr_disc + Array[File?] pesr_disc_index = GatherSampleEvidence.pesr_disc_index + Array[File?] pesr_split = GatherSampleEvidence.pesr_split + Array[File?] pesr_split_index = GatherSampleEvidence.pesr_split_index - Array[File?] wham_vcf = Module00a.wham_vcf - Array[File?] wham_index = Module00a.wham_index + Array[File?] wham_vcf = GatherSampleEvidence.wham_vcf + Array[File?] wham_index = GatherSampleEvidence.wham_index - File? metrics_file_00a = CatMetrics.out + File? metrics_file_sampleevidence = CatMetrics.out } } diff --git a/wdl/Module00aMetrics.wdl b/wdl/GatherSampleEvidenceMetrics.wdl similarity index 99% rename from wdl/Module00aMetrics.wdl rename to wdl/GatherSampleEvidenceMetrics.wdl index b9f377ce1..3f9629604 100644 --- a/wdl/Module00aMetrics.wdl +++ b/wdl/GatherSampleEvidenceMetrics.wdl @@ -2,7 +2,7 @@ version 1.0 import "TestUtils.wdl" as tu -workflow Module00aMetrics { +workflow GatherSampleEvidenceMetrics { input { String sample File? coverage_counts diff --git a/wdl/Module02.wdl b/wdl/GenerateBatchMetrics.wdl similarity index 97% rename from wdl/Module02.wdl rename to wdl/GenerateBatchMetrics.wdl index fe3904369..cf807099e 100644 --- a/wdl/Module02.wdl +++ b/wdl/GenerateBatchMetrics.wdl @@ -4,11 +4,11 @@ import "PETest.wdl" as pet import "RDTest.wdl" as rdt import "SRTest.wdl" as srt import "BAFTest.wdl" as baft -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics import "Utils.wdl" as util -import "Module02Metrics.wdl" as metrics +import "GenerateBatchMetricsMetrics.wdl" as metrics -workflow Module02 { +workflow GenerateBatchMetrics { input { String batch @@ -214,7 +214,7 @@ workflow Module02 { runtime_attr_override = runtime_attr_aggregate_tests } - call tasks02.GetCommonVCF { + call tasksbatchmetrics.GetCommonVCF { input: vcf = vcf, cnv_size_cutoff = common_cnv_size_cutoff, @@ -255,7 +255,7 @@ workflow Module02 { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true if (run_module_metrics_) { - call metrics.Module02Metrics { + call metrics.GenerateBatchMetricsMetrics { input: name = batch, metrics = AggregateCallers.metrics, @@ -270,7 +270,7 @@ workflow Module02 { File metrics = AggregateCallers.metrics File metrics_common = AggregateCallersCommon.metrics - File? metrics_file_02 = Module02Metrics.metrics_file + File? metrics_file_batchmetrics = GenerateBatchMetricsMetrics.metrics_file } } diff --git a/wdl/Module02Metrics.wdl b/wdl/GenerateBatchMetricsMetrics.wdl similarity index 80% rename from wdl/Module02Metrics.wdl rename to wdl/GenerateBatchMetricsMetrics.wdl index b4ebc52d4..863bb8f7a 100644 --- a/wdl/Module02Metrics.wdl +++ b/wdl/GenerateBatchMetricsMetrics.wdl @@ -2,7 +2,7 @@ version 1.0 import "TestUtils.wdl" as tu -workflow Module02Metrics { +workflow GenerateBatchMetricsMetrics { input { String name File metrics @@ -17,7 +17,7 @@ workflow Module02Metrics { metrics_file = metrics, contig_list = contig_list, common = false, - prefix = "module02.non_common." + name, + prefix = "GenerateBatchMetrics.non_common." + name, sv_pipeline_base_docker = sv_pipeline_base_docker } @@ -26,13 +26,13 @@ workflow Module02Metrics { metrics_file = metrics_common, contig_list = contig_list, common = true, - prefix = "module02.common." + name, + prefix = "GenerateBatchMetrics.common." + name, sv_pipeline_base_docker = sv_pipeline_base_docker } call tu.CatMetrics { input: - prefix = "module02." + name, + prefix = "GenerateBatchMetrics." + name, metric_files = [MetricsFileMetrics.out, CommonMetricsFileMetrics.out], linux_docker = linux_docker } diff --git a/wdl/Module08Preprocessing.wdl b/wdl/GenerateFunctionalAnnotationResources.wdl similarity index 94% rename from wdl/Module08Preprocessing.wdl rename to wdl/GenerateFunctionalAnnotationResources.wdl index 4b31181bb..b39d1048d 100644 --- a/wdl/Module08Preprocessing.wdl +++ b/wdl/GenerateFunctionalAnnotationResources.wdl @@ -3,8 +3,8 @@ version 1.0 import "PrepareGencode.wdl" as pg # import "PrepareNoncoding.wdl" as pn -# Workflow for the preprocessing sub-module in Module07, its output will be used for the annotation sub-module -workflow Module08Preprocessing { +# Workflow for preprocessing for functional annotation +workflow GenerateFunctionalAnnotationResources { input { ### args for PrepareGencode diff --git a/wdl/Module04.wdl b/wdl/GenotypeBatch.wdl similarity index 97% rename from wdl/Module04.wdl rename to wdl/GenotypeBatch.wdl index a1340e9db..328c51029 100644 --- a/wdl/Module04.wdl +++ b/wdl/GenotypeBatch.wdl @@ -4,11 +4,11 @@ import "GenotypePESRPart1.wdl" as gp1 import "GenotypePESRPart2.wdl" as gp2 import "GenotypeDepthPart1.wdl" as gd1 import "GenotypeDepthPart2.wdl" as gd2 -import "Module04Metrics.wdl" as metrics -import "Tasks04.wdl" as tasks04 +import "GenotypeBatchMetrics.wdl" as metrics +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch import "Utils.wdl" as util -workflow Module04 { +workflow GenotypeBatch { input { File batch_pesr_vcf File batch_depth_vcf @@ -99,7 +99,7 @@ workflow Module04 { } Boolean single_sample_mode = defined(genotype_pesr_pesr_sepcutoff) && defined(genotype_pesr_depth_sepcutoff) && defined(genotype_depth_depth_sepcutoff) && defined(SR_metrics) && defined(PE_metrics) - call tasks04.AddBatchSamples as AddBatchSamplesPESR { + call tasksgenotypebatch.AddBatchSamples as AddBatchSamplesPESR { input: batch_vcf = batch_pesr_vcf, cohort_vcf = cohort_pesr_vcf, @@ -108,7 +108,7 @@ workflow Module04 { runtime_attr_override = runtime_attr_add_batch } - call tasks04.AddBatchSamples as AddBatchSamplesDepth { + call tasksgenotypebatch.AddBatchSamples as AddBatchSamplesDepth { input: batch_vcf = batch_depth_vcf, cohort_vcf = cohort_depth_vcf, @@ -274,7 +274,7 @@ workflow Module04 { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true if (run_module_metrics_) { - call metrics.Module04Metrics { + call metrics.GenotypeBatchMetrics { input: name = batch, samples = GetSampleIdsFromVcf.out_array, @@ -312,7 +312,7 @@ workflow Module04 { File genotyped_pesr_vcf_index = GenotypePESRPart2.genotyped_vcf_index File regeno_coverage_medians = GenotypeDepthPart2.regeno_coverage_medians - File? metrics_file_04 = Module04Metrics.metrics_file + File? metrics_file_genotypebatch = GenotypeBatchMetrics.metrics_file } } diff --git a/wdl/Module04Metrics.wdl b/wdl/GenotypeBatchMetrics.wdl similarity index 97% rename from wdl/Module04Metrics.wdl rename to wdl/GenotypeBatchMetrics.wdl index 04b3b7305..610ab2739 100644 --- a/wdl/Module04Metrics.wdl +++ b/wdl/GenotypeBatchMetrics.wdl @@ -2,7 +2,7 @@ version 1.0 import "TestUtils.wdl" as tu -workflow Module04Metrics { +workflow GenotypeBatchMetrics { input { Array[String] samples String name @@ -90,7 +90,7 @@ workflow Module04Metrics { call tu.CatMetrics { input: - prefix = "module04." + name, + prefix = "GenotypeBatch." + name, metric_files = [PESR_VCF_Metrics.out, Depth_VCF_Metrics.out, Cutoff_PESR_PESR.out, Cutoff_PESR_Depth.out, Cutoff_Depth_PESR.out, Cutoff_Depth_Depth.out, Background_Fail.out, Bothside_Pass.out], linux_docker = linux_docker } diff --git a/wdl/Module0506ComplexGenotype.wdl b/wdl/GenotypeComplexVariants.wdl similarity index 96% rename from wdl/Module0506ComplexGenotype.wdl rename to wdl/GenotypeComplexVariants.wdl index 0578ad6f4..d2acc3e38 100644 --- a/wdl/Module0506ComplexGenotype.wdl +++ b/wdl/GenotypeComplexVariants.wdl @@ -1,10 +1,10 @@ version 1.0 import "ScatterCpxGenotyping.wdl" as GenotypeComplexContig -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "Utils.wdl" as util -workflow Module0506ComplexGenotype { +workflow GenotypeComplexVariants { input { String cohort_name Array[String] batches @@ -112,7 +112,7 @@ workflow Module0506ComplexGenotype { vcfs=ScatterCpxGenotyping.cpx_depth_gt_resolved_vcf, vcfs_idx=ScatterCpxGenotyping.cpx_depth_gt_resolved_vcf_idx, merge_sort=true, - outfile_prefix="~{cohort_name}.0506_complex", + outfile_prefix="~{cohort_name}.complex_genotype", sv_base_mini_docker=sv_base_mini_docker, runtime_attr_override=runtime_override_concat } diff --git a/wdl/GenotypeCpxCnvs.wdl b/wdl/GenotypeCpxCnvs.wdl index 2cca6df74..5fa4a89f2 100644 --- a/wdl/GenotypeCpxCnvs.wdl +++ b/wdl/GenotypeCpxCnvs.wdl @@ -3,10 +3,10 @@ version 1.0 # Author: Ryan Collins import "GenotypeCpxCnvsPerBatch.wdl" as RunDepthGenotypePerBatch -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks # Workflow to perform depth-based genotyping for a single vcf shard scattered -# across batches on predicted CPX CNVs from 04b +# across batches on predicted CPX CNVs workflow GenotypeCpxCnvs { input { File bin_exclude diff --git a/wdl/GenotypeCpxCnvsPerBatch.wdl b/wdl/GenotypeCpxCnvsPerBatch.wdl index ca871eaac..49774317a 100644 --- a/wdl/GenotypeCpxCnvsPerBatch.wdl +++ b/wdl/GenotypeCpxCnvsPerBatch.wdl @@ -2,11 +2,11 @@ version 1.0 # Author: Ryan Collins -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "Utils.wdl" as Utils # Workflow to perform depth-based genotyping per batch -# on predicted CPX CNVs from 04b +# on predicted CPX CNVs workflow GenotypeCpxCnvsPerBatch { input { diff --git a/wdl/GenotypeDepthPart2.wdl b/wdl/GenotypeDepthPart2.wdl index 29bb3a476..a1fcf52d7 100644 --- a/wdl/GenotypeDepthPart2.wdl +++ b/wdl/GenotypeDepthPart2.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks04.wdl" as tasks04 +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch workflow GenotypeDepthPart2 { input { @@ -34,7 +34,7 @@ workflow GenotypeDepthPart2 { File bin_exclude_idx = bin_exclude + ".tbi" - call tasks04.SplitVariants as SplitVariants { + call tasksgenotypebatch.SplitVariants as SplitVariants { input: vcf = cohort_vcf, n_per_split = n_per_split, @@ -45,7 +45,7 @@ workflow GenotypeDepthPart2 { scatter (gt5kb_bed in SplitVariants.gt5kb_beds) { - call tasks04.MakeSubsetVcf as MakeSubsetVcfOver5kb { + call tasksgenotypebatch.MakeSubsetVcf as MakeSubsetVcfOver5kb { input: vcf = cohort_vcf, bed = gt5kb_bed, @@ -53,7 +53,7 @@ workflow GenotypeDepthPart2 { runtime_attr_override = runtime_attr_make_subset_vcf } - call tasks04.RDTestGenotype as RDTestGenotypeOver5kb { + call tasksgenotypebatch.RDTestGenotype as RDTestGenotypeOver5kb { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, @@ -72,7 +72,7 @@ workflow GenotypeDepthPart2 { runtime_attr_override = runtime_attr_rdtest_genotype } - call tasks04.IntegrateDepthGq as IntegrateDepthGqOver5kb { + call tasksgenotypebatch.IntegrateDepthGq as IntegrateDepthGqOver5kb { input: vcf = MakeSubsetVcfOver5kb.subset_vcf, RD_melted_genotypes = RDTestGenotypeOver5kb.melted_genotypes, @@ -81,7 +81,7 @@ workflow GenotypeDepthPart2 { runtime_attr_override = runtime_attr_integrate_depth_gq } - call tasks04.AddGenotypes as AddGenotypesOver5kb { + call tasksgenotypebatch.AddGenotypes as AddGenotypesOver5kb { input: vcf = MakeSubsetVcfOver5kb.subset_vcf, genotypes = IntegrateDepthGqOver5kb.genotypes, @@ -94,7 +94,7 @@ workflow GenotypeDepthPart2 { scatter (lt5kb_bed in SplitVariants.lt5kb_beds) { - call tasks04.RDTestGenotype as RDTestGenotypeUnder5kb { + call tasksgenotypebatch.RDTestGenotype as RDTestGenotypeUnder5kb { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, @@ -113,7 +113,7 @@ workflow GenotypeDepthPart2 { runtime_attr_override = runtime_attr_rdtest_genotype } - call tasks04.MakeSubsetVcf as MakeSubsetVcfUnder5kb { + call tasksgenotypebatch.MakeSubsetVcf as MakeSubsetVcfUnder5kb { input: vcf = cohort_vcf, bed = lt5kb_bed, @@ -121,7 +121,7 @@ workflow GenotypeDepthPart2 { runtime_attr_override = runtime_attr_make_subset_vcf } - call tasks04.IntegrateDepthGq as IntegrateDepthGqUnder5kb { + call tasksgenotypebatch.IntegrateDepthGq as IntegrateDepthGqUnder5kb { input: vcf = MakeSubsetVcfUnder5kb.subset_vcf, RD_melted_genotypes = RDTestGenotypeUnder5kb.melted_genotypes, @@ -129,7 +129,7 @@ workflow GenotypeDepthPart2 { sv_pipeline_docker = sv_pipeline_docker, runtime_attr_override = runtime_attr_integrate_depth_gq } - call tasks04.AddGenotypes as AddGenotypesUnder5kb { + call tasksgenotypebatch.AddGenotypes as AddGenotypesUnder5kb { input: vcf = MakeSubsetVcfUnder5kb.subset_vcf, genotypes = IntegrateDepthGqUnder5kb.genotypes, @@ -148,7 +148,7 @@ workflow GenotypeDepthPart2 { runtime_attr_override = runtime_attr_merge_regeno_cov_med } - call tasks04.ConcatGenotypedVcfs as ConcatGenotypedVcfs { + call tasksgenotypebatch.ConcatGenotypedVcfs as ConcatGenotypedVcfs { input: lt5kb_vcfs = AddGenotypesUnder5kb.genotyped_vcf, gt5kb_vcfs = AddGenotypesOver5kb.genotyped_vcf, diff --git a/wdl/GenotypePESRPart2.wdl b/wdl/GenotypePESRPart2.wdl index 72a4a76fa..07f5c2215 100644 --- a/wdl/GenotypePESRPart2.wdl +++ b/wdl/GenotypePESRPart2.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks04.wdl" as tasks04 +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch workflow GenotypePESRPart2 { input { @@ -47,7 +47,7 @@ workflow GenotypePESRPart2 { File bin_exclude_idx = bin_exclude + ".tbi" - call tasks04.SplitVariants as SplitVariants { + call tasksgenotypebatch.SplitVariants as SplitVariants { input: vcf = cohort_vcf, n_per_split = n_per_split, @@ -57,7 +57,7 @@ workflow GenotypePESRPart2 { } scatter (lt5kb_bed in SplitVariants.lt5kb_beds) { - call tasks04.MakeSubsetVcf as MakeSubsetVcfUnder5kb { + call tasksgenotypebatch.MakeSubsetVcf as MakeSubsetVcfUnder5kb { input: vcf = cohort_vcf, bed = lt5kb_bed, @@ -65,7 +65,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_make_subset_vcf } - call tasks04.CountPE as CountPEUnder5kb { + call tasksgenotypebatch.CountPE as CountPEUnder5kb { input: vcf = MakeSubsetVcfUnder5kb.subset_vcf, discfile = discfile, @@ -85,7 +85,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_genotype_pe } - call tasks04.CountSR as CountSRUnder5kb { + call tasksgenotypebatch.CountSR as CountSRUnder5kb { input: vcf = MakeSubsetVcfUnder5kb.subset_vcf, splitfile = splitfile, @@ -107,7 +107,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_genotype_sr } - call tasks04.RDTestGenotype as RDTestGenotypeUnder5kb { + call tasksgenotypebatch.RDTestGenotype as RDTestGenotypeUnder5kb { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, @@ -139,7 +139,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_integrate_gq } - call tasks04.AddGenotypes as AddGenotypesUnder5kb { + call tasksgenotypebatch.AddGenotypes as AddGenotypesUnder5kb { input: vcf = MakeSubsetVcfUnder5kb.subset_vcf, genotypes = IntegrateGQUnder5kb.genotypes, @@ -151,7 +151,7 @@ workflow GenotypePESRPart2 { } scatter (gt5kb_bed in SplitVariants.gt5kb_beds) { - call tasks04.MakeSubsetVcf as MakeSubsetVcfOver5kb { + call tasksgenotypebatch.MakeSubsetVcf as MakeSubsetVcfOver5kb { input: vcf = cohort_vcf, bed = gt5kb_bed, @@ -159,7 +159,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_make_subset_vcf } - call tasks04.CountPE as CountPEOver5kb { + call tasksgenotypebatch.CountPE as CountPEOver5kb { input: vcf = MakeSubsetVcfOver5kb.subset_vcf, discfile = discfile, @@ -179,7 +179,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_genotype_pe } - call tasks04.CountSR as CountSROver5kb { + call tasksgenotypebatch.CountSR as CountSROver5kb { input: vcf = MakeSubsetVcfOver5kb.subset_vcf, splitfile = splitfile, @@ -201,7 +201,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_genotype_sr } - call tasks04.RDTestGenotype as RDTestGenotypeOver5kb { + call tasksgenotypebatch.RDTestGenotype as RDTestGenotypeOver5kb { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, @@ -233,7 +233,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_integrate_gq } - call tasks04.AddGenotypes as AddGenotypesOver5kb { + call tasksgenotypebatch.AddGenotypes as AddGenotypesOver5kb { input: vcf = MakeSubsetVcfOver5kb.subset_vcf, genotypes = IntegrateGQOver5kb.genotypes, @@ -245,7 +245,7 @@ workflow GenotypePESRPart2 { } scatter (bca_bed in SplitVariants.bca_beds) { - call tasks04.MakeSubsetVcf as MakeSubsetVcfBca { + call tasksgenotypebatch.MakeSubsetVcf as MakeSubsetVcfBca { input: vcf = cohort_vcf, bed = bca_bed, @@ -253,7 +253,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_make_subset_vcf } - call tasks04.CountPE as CountPEBca { + call tasksgenotypebatch.CountPE as CountPEBca { input: vcf = MakeSubsetVcfBca.subset_vcf, discfile = discfile, @@ -273,7 +273,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_genotype_pe } - call tasks04.CountSR as CountSRBca { + call tasksgenotypebatch.CountSR as CountSRBca { input: vcf = MakeSubsetVcfBca.subset_vcf, splitfile = splitfile, @@ -306,7 +306,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_integrate_pesr_gq } - call tasks04.AddGenotypes as AddGenotypesBca { + call tasksgenotypebatch.AddGenotypes as AddGenotypesBca { input: vcf = MakeSubsetVcfBca.subset_vcf, genotypes = IntegratePesrGQBca.genotypes, @@ -337,7 +337,7 @@ workflow GenotypePESRPart2 { runtime_attr_override = runtime_attr_triple_stream_cat } - call tasks04.ConcatGenotypedVcfs as ConcatGenotypedVcfs { + call tasksgenotypebatch.ConcatGenotypedVcfs as ConcatGenotypedVcfs { input: lt5kb_vcfs = AddGenotypesUnder5kb.genotyped_vcf, gt5kb_vcfs = AddGenotypesOver5kb.genotyped_vcf, diff --git a/wdl/Genotype_2.wdl b/wdl/Genotype_2.wdl index ddc4361f8..5a255d8df 100644 --- a/wdl/Genotype_2.wdl +++ b/wdl/Genotype_2.wdl @@ -1,5 +1,5 @@ version 1.0 -import "Tasks04.wdl" as task04 +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch workflow Regenotype { input { @@ -30,7 +30,7 @@ workflow Regenotype { RuntimeAttr? runtime_attr_add_genotypes RuntimeAttr? runtime_attr_concat_regenotyped_vcfs_g2 } - call task04.AddBatchSamples as AddBatchSamplesDepth { + call tasksgenotypebatch.AddBatchSamples as AddBatchSamplesDepth { input: batch_vcf=batch_depth_vcf, cohort_vcf=cohort_depth_vcf, @@ -54,7 +54,7 @@ workflow Regenotype { sv_pipeline_docker=sv_pipeline_docker } scatter (regeno in SplitBeds_regeno.regeno_beds) { - call task04.MakeSubsetVcf as make_subset_vcf_regeno { + call tasksgenotypebatch.MakeSubsetVcf as make_subset_vcf_regeno { input: vcf=AddBatchSamplesDepth.updated_vcf, bed=regeno, @@ -75,7 +75,7 @@ workflow Regenotype { runtime_attr_override = runtime_attr_rd_test_gt_regeno, sv_pipeline_rdtest_docker=sv_pipeline_rdtest_docker } - call task04.IntegrateDepthGq as IntegrateGQRegeno { + call tasksgenotypebatch.IntegrateDepthGq as IntegrateGQRegeno { input: vcf=make_subset_vcf_regeno.subset_vcf, RD_melted_genotypes=RdTestGenotypeRegeno.melted_genotypes, @@ -83,7 +83,7 @@ workflow Regenotype { runtime_attr_override = runtime_attr_integrate_depth_gq, sv_pipeline_docker=sv_pipeline_docker } - call task04.AddGenotypes as AddGenotypesRegeno { + call tasksgenotypebatch.AddGenotypes as AddGenotypesRegeno { input: vcf=make_subset_vcf_regeno.subset_vcf, genotypes=IntegrateGQRegeno.genotypes, diff --git a/wdl/GermlineCNVCohort.wdl b/wdl/GermlineCNVCohort.wdl index c7cf84839..be80a5075 100644 --- a/wdl/GermlineCNVCohort.wdl +++ b/wdl/GermlineCNVCohort.wdl @@ -1,6 +1,6 @@ # Standalone gCNV cohort WDL # - To be run on reference batches only -# - Most samples should be run in case mode using Module00a +# - Most samples should be run in case mode using GatherBatchEvidence version 1.0 diff --git a/wdl/Module0506.wdl b/wdl/MakeCohortVcf.wdl similarity index 88% rename from wdl/Module0506.wdl rename to wdl/MakeCohortVcf.wdl index 7dfdc4d45..e60fbab9a 100644 --- a/wdl/Module0506.wdl +++ b/wdl/MakeCohortVcf.wdl @@ -1,13 +1,13 @@ version 1.0 -import "Module0506Cluster.wdl" as Cluster -import "Module0506ComplexResolve.wdl" as ComplexResolve -import "Module0506ComplexGenotype.wdl" as ComplexGenotype -import "Module0506Clean.wdl" as Clean +import "CombineBatches.wdl" as Cluster +import "ResolveComplexVariants.wdl" as ComplexResolve +import "GenotypeComplexVariants.wdl" as ComplexGenotype +import "CleanVcf.wdl" as Clean import "MasterVcfQc.wdl" as VcfQc -import "Module0506Metrics.wdl" as metrics +import "MakeCohortVcfMetrics.wdl" as metrics -workflow Module0506 { +workflow MakeCohortVcf { input { String cohort_name Array[String] batches @@ -173,7 +173,7 @@ workflow Module0506 { RuntimeAttr? runtime_override_merge_and_tar_shard_benchmarks } - call Cluster.Module0506Cluster { + call Cluster.CombineBatches { input: cohort_name=cohort_name, batches=batches, @@ -207,13 +207,13 @@ workflow Module0506 { runtime_override_concat_shards=runtime_override_concat_shards } - call ComplexResolve.Module0506ComplexResolve { + call ComplexResolve.ResolveComplexVariants { input: cohort_name=cohort_name, merge_vcfs=merge_complex_resolve_vcfs, - cluster_vcfs=Module0506Cluster.vcfs, - cluster_bothside_pass_lists=Module0506Cluster.cluster_bothside_pass_lists, - cluster_background_fail_lists=Module0506Cluster.cluster_background_fail_lists, + cluster_vcfs=CombineBatches.vcfs, + cluster_bothside_pass_lists=CombineBatches.cluster_bothside_pass_lists, + cluster_background_fail_lists=CombineBatches.cluster_background_fail_lists, disc_files=disc_files, disc_files_index=disc_files_index, rf_cutoff_files=rf_cutoff_files, @@ -240,13 +240,13 @@ workflow Module0506 { runtime_override_concat_resolved_per_shard=runtime_override_concat_resolved_per_shard } - call ComplexGenotype.Module0506ComplexGenotype { + call ComplexGenotype.GenotypeComplexVariants { input: cohort_name=cohort_name, batches=batches, merge_vcfs=merge_complex_genotype_vcfs, - complex_resolve_vcfs=Module0506ComplexResolve.complex_resolve_vcfs, - complex_resolve_vcf_indexes=Module0506ComplexResolve.complex_resolve_vcf_indexes, + complex_resolve_vcfs=ResolveComplexVariants.complex_resolve_vcfs, + complex_resolve_vcf_indexes=ResolveComplexVariants.complex_resolve_vcf_indexes, depth_vcfs=depth_vcfs, merged_ped_file=ped_file, bincov_files=bincov_files, @@ -274,12 +274,12 @@ workflow Module0506 { runtime_attr_subset_ped=runtime_attr_subset_ped } - call Clean.Module0506Clean { + call Clean.CleanVcf { input: cohort_name=cohort_name, - complex_genotype_vcfs=Module0506ComplexGenotype.complex_genotype_vcfs, - complex_resolve_bothside_pass_lists=Module0506ComplexResolve.complex_resolve_bothside_pass_lists, - complex_resolve_background_fail_lists=Module0506ComplexResolve.complex_resolve_background_fail_lists, + complex_genotype_vcfs=GenotypeComplexVariants.complex_genotype_vcfs, + complex_resolve_bothside_pass_lists=ResolveComplexVariants.complex_resolve_bothside_pass_lists, + complex_resolve_background_fail_lists=ResolveComplexVariants.complex_resolve_background_fail_lists, merged_ped_file=ped_file, contig_list=contig_list, allosome_fai=allosome_fai, @@ -312,8 +312,8 @@ workflow Module0506 { Array[String] contigs = transpose(read_tsv(contig_list))[0] call VcfQc.MasterVcfQc { input: - vcf=Module0506Clean.cleaned_vcf, - vcf_idx=Module0506Clean.cleaned_vcf_index, + vcf=CleanVcf.cleaned_vcf, + vcf_idx=CleanVcf.cleaned_vcf_index, ped_file=ped_file, prefix="~{cohort_name}.cleaned", sv_per_shard=10000, @@ -334,13 +334,13 @@ workflow Module0506 { Boolean run_module_metrics_ = if defined(run_module_metrics) then select_first([run_module_metrics]) else true if (run_module_metrics_) { - call metrics.Module0506Metrics { + call metrics.MakeCohortVcfMetrics { input: name = cohort_name, - cluster_vcf = Module0506Cluster.merged_vcf, - complex_resolve_vcf = Module0506ComplexResolve.merged_vcf, - complex_genotype_vcf = Module0506ComplexGenotype.merged_vcf, - cleaned_vcf = Module0506Clean.cleaned_vcf, + cluster_vcf = CombineBatches.merged_vcf, + complex_resolve_vcf = ResolveComplexVariants.merged_vcf, + complex_genotype_vcf = GenotypeComplexVariants.merged_vcf, + cleaned_vcf = CleanVcf.cleaned_vcf, baseline_cluster_vcf = baseline_cluster_vcf, baseline_complex_resolve_vcf = baseline_complex_resolve_vcf, baseline_complex_genotype_vcf = baseline_complex_genotype_vcf, @@ -353,18 +353,18 @@ workflow Module0506 { } output { - File vcf = Module0506Clean.cleaned_vcf - File vcf_index = Module0506Clean.cleaned_vcf_index + File vcf = CleanVcf.cleaned_vcf + File vcf_index = CleanVcf.cleaned_vcf_index File vcf_qc = MasterVcfQc.sv_vcf_qc_output # If merge_intermediate_vcfs enabled - File? cluster_vcf = Module0506Cluster.merged_vcf - File? cluster_vcf_index = Module0506Cluster.merged_vcf_index - File? complex_resolve_vcf = Module0506ComplexResolve.merged_vcf - File? complex_resolve_vcf_index = Module0506ComplexResolve.merged_vcf_index - File? complex_genotype_vcf = Module0506ComplexGenotype.merged_vcf - File? complex_genotype_vcf_index = Module0506ComplexGenotype.merged_vcf_index + File? cluster_vcf = CombineBatches.merged_vcf + File? cluster_vcf_index = CombineBatches.merged_vcf_index + File? complex_resolve_vcf = ResolveComplexVariants.merged_vcf + File? complex_resolve_vcf_index = ResolveComplexVariants.merged_vcf_index + File? complex_genotype_vcf = GenotypeComplexVariants.merged_vcf + File? complex_genotype_vcf_index = GenotypeComplexVariants.merged_vcf_index - File? metrics_file_0506 = Module0506Metrics.metrics_file + File? metrics_file_makecohortvcf = MakeCohortVcfMetrics.metrics_file } } diff --git a/wdl/Module0506Metrics.wdl b/wdl/MakeCohortVcfMetrics.wdl similarity index 97% rename from wdl/Module0506Metrics.wdl rename to wdl/MakeCohortVcfMetrics.wdl index a13b5acb4..73912bb50 100644 --- a/wdl/Module0506Metrics.wdl +++ b/wdl/MakeCohortVcfMetrics.wdl @@ -3,7 +3,7 @@ version 1.0 import "TestUtils.wdl" as tu import "Utils.wdl" as util -workflow Module0506Metrics { +workflow MakeCohortVcfMetrics { input { Array[String]? samples String name @@ -87,7 +87,7 @@ workflow Module0506Metrics { call tu.CatMetrics { input: - prefix = "module0506." + name, + prefix = "MakeCohortVcf." + name, metric_files = select_all([ClusterMetrics.out, ComplexResolveMetrics.out, ComplesGenotypeMetrics.out, CleanedMetrics.out]), linux_docker = linux_docker } diff --git a/wdl/MasterVcfQc.wdl b/wdl/MasterVcfQc.wdl index 2c23167da..08893cbc7 100644 --- a/wdl/MasterVcfQc.wdl +++ b/wdl/MasterVcfQc.wdl @@ -6,7 +6,7 @@ import "ShardedQcCollection.wdl" as ShardedQcCollection import "CollectQcPerSample.wdl" as CollectQcPerSample import "PerSampleExternalBenchmark.wdl" as PerSampleExternalBenchmark -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks # Master workflow to perform comprehensive quality control (QC) on # an SV VCF output by GATK-SV diff --git a/wdl/MergeCohortVcfs.wdl b/wdl/MergeBatchSites.wdl similarity index 99% rename from wdl/MergeCohortVcfs.wdl rename to wdl/MergeBatchSites.wdl index 70bcc4a58..cf187cb3b 100644 --- a/wdl/MergeCohortVcfs.wdl +++ b/wdl/MergeBatchSites.wdl @@ -2,7 +2,7 @@ version 1.0 import "Structs.wdl" -workflow MergeCohortVcfs { +workflow MergeBatchSites { input { Array[File] depth_vcfs # Filtered depth VCFs across batches Array[File] pesr_vcfs # Filtered PESR VCFs across batches diff --git a/wdl/Module0506Clean.wdl b/wdl/Module0506Clean.wdl deleted file mode 100644 index f7d4906c5..000000000 --- a/wdl/Module0506Clean.wdl +++ /dev/null @@ -1,102 +0,0 @@ -version 1.0 - -import "CleanVcf.wdl" as CleanVcfContig -import "Tasks0506.wdl" as MiniTasks - -workflow Module0506Clean { - input { - String cohort_name - - Array[File] complex_genotype_vcfs - Array[File] complex_resolve_bothside_pass_lists - Array[File] complex_resolve_background_fail_lists - File merged_ped_file - - File contig_list - File allosome_fai - Int max_shards_per_chrom - Int max_shards_per_chrom_clean_vcf_step1 - Int min_records_per_shard_clean_vcf_step1 - Int samples_per_clean_vcf_step2_shard - - File? outlier_samples_list - - String sv_base_mini_docker - String sv_pipeline_docker - - # overrides for mini tasks - RuntimeAttr? runtime_override_concat_cleaned_vcfs - - # overrides for CleanVcfContig - RuntimeAttr? runtime_override_clean_vcf_1a - RuntimeAttr? runtime_override_clean_vcf_1b - RuntimeAttr? runtime_override_clean_vcf_2 - RuntimeAttr? runtime_override_clean_vcf_3 - RuntimeAttr? runtime_override_clean_vcf_4 - RuntimeAttr? runtime_override_clean_vcf_5 - RuntimeAttr? runtime_override_drop_redundant_cnvs - RuntimeAttr? runtime_override_stitch_fragmented_cnvs - RuntimeAttr? runtime_override_final_cleanup - RuntimeAttr? runtime_override_split_vcf_to_clean - RuntimeAttr? runtime_override_combine_step_1_vcfs - RuntimeAttr? runtime_override_combine_step_1_sex_chr_revisions - RuntimeAttr? runtime_override_split_include_list - RuntimeAttr? runtime_override_combine_clean_vcf_2 - RuntimeAttr? runtime_override_combine_revised_4 - RuntimeAttr? runtime_override_combine_multi_ids_4 - } - - #Scatter per chromosome - Array[String] contigs = transpose(read_tsv(contig_list))[0] - scatter ( i in range(length(contigs)) ) { - String contig = contigs[i] - - call CleanVcfContig.CleanVcf as CleanContigVcf { - input: - vcf=complex_genotype_vcfs[i], - contig=contig, - background_list=complex_resolve_background_fail_lists[i], - ped_file=merged_ped_file, - bothsides_pass_list=complex_resolve_bothside_pass_lists[i], - allosome_fai=allosome_fai, - prefix=cohort_name, - max_shards_per_chrom_step1=max_shards_per_chrom_clean_vcf_step1, - min_records_per_shard_step1=min_records_per_shard_clean_vcf_step1, - samples_per_step2_shard=samples_per_clean_vcf_step2_shard, - outlier_samples_list=outlier_samples_list, - sv_base_mini_docker=sv_base_mini_docker, - sv_pipeline_docker=sv_pipeline_docker, - runtime_override_clean_vcf_1a=runtime_override_clean_vcf_1a, - runtime_override_clean_vcf_1b=runtime_override_clean_vcf_1b, - runtime_override_clean_vcf_2=runtime_override_clean_vcf_2, - runtime_override_clean_vcf_3=runtime_override_clean_vcf_3, - runtime_override_clean_vcf_4=runtime_override_clean_vcf_4, - runtime_override_clean_vcf_5=runtime_override_clean_vcf_5, - runtime_override_drop_redundant_cnvs=runtime_override_drop_redundant_cnvs, - runtime_override_stitch_fragmented_cnvs=runtime_override_stitch_fragmented_cnvs, - runtime_override_final_cleanup=runtime_override_final_cleanup, - runtime_override_split_vcf_to_clean=runtime_override_split_vcf_to_clean, - runtime_override_combine_step_1_vcfs=runtime_override_combine_step_1_vcfs, - runtime_override_combine_step_1_sex_chr_revisions=runtime_override_combine_step_1_sex_chr_revisions, - runtime_override_split_include_list=runtime_override_split_include_list, - runtime_override_combine_clean_vcf_2=runtime_override_combine_clean_vcf_2, - runtime_override_combine_revised_4=runtime_override_combine_revised_4, - runtime_override_combine_multi_ids_4=runtime_override_combine_multi_ids_4 - } - } - - call MiniTasks.ConcatVcfs as ConcatCleanedVcfs { - input: - vcfs=CleanContigVcf.out, - vcfs_idx=CleanContigVcf.out_idx, - merge_sort=true, - outfile_prefix="~{cohort_name}.cleaned", - sv_base_mini_docker=sv_base_mini_docker, - runtime_attr_override=runtime_override_concat_cleaned_vcfs - } - - output { - File cleaned_vcf = ConcatCleanedVcfs.concat_vcf - File cleaned_vcf_index = ConcatCleanedVcfs.concat_vcf_idx - } -} diff --git a/wdl/Module07FilterCleanupQualRecalibration.wdl b/wdl/Module07FilterCleanupQualRecalibration.wdl index 56f7d7a79..90449e351 100644 --- a/wdl/Module07FilterCleanupQualRecalibration.wdl +++ b/wdl/Module07FilterCleanupQualRecalibration.wdl @@ -5,7 +5,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow FilterCleanupQualRecalibration { diff --git a/wdl/Module07MinGQ.wdl b/wdl/Module07MinGQ.wdl index eb7080aa7..a0ad1ab37 100644 --- a/wdl/Module07MinGQ.wdl +++ b/wdl/Module07MinGQ.wdl @@ -3,7 +3,7 @@ version 1.0 import "MinGQRocOpt.wdl" as roc_opt_sub import "CalcAF.wdl" as calcAF import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "ReviseSVtypeINStoMEI.wdl" as ReviseSVtype diff --git a/wdl/Module07Preprocessing.wdl b/wdl/Module07Preprocessing.wdl deleted file mode 100644 index 122610d27..000000000 --- a/wdl/Module07Preprocessing.wdl +++ /dev/null @@ -1,72 +0,0 @@ -version 1.0 - -import "PrepareGencode.wdl" as pg -# import "PrepareNoncoding.wdl" as pn - -# Workflow for the preprocessing sub-module in Module07, its output will be used for the annotation sub-module -workflow Module07Preprocessing { - input { - - ### args for PrepareGencode - File gencode_annotation_gtf # Gencode annotation GTF - File gencode_pc_translations_fa # Gencode protein-coding translation fasta - File gencode_pc_transcripts_fa # Gencode protein-coding transcript fasta - File gencode_transcript_source # Gencode transcript source metadata - Int promoter_window # Window upstream of TSS to consider as promoter region - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_get_canonical_transcripts - RuntimeAttr? runtime_attr_make_canonical_gtf - RuntimeAttr? runtime_attr_make_promoters - RuntimeAttr? runtime_attr_subset_gtf - -# ### args for PrepareNoncoding -# File noncoding_bed_list -# -# RuntimeAttr? runtime_attr_clean_noncoding_bed -# RuntimeAttr? runtime_attr_make_noncoding_bed - } - - call pg.PrepareGencode as PrepareGencode { - input: - - gencode_annotation_gtf = gencode_annotation_gtf, - gencode_pc_translations_fa = gencode_pc_translations_fa, - gencode_pc_transcripts_fa = gencode_pc_transcripts_fa, - gencode_transcript_source = gencode_transcript_source, - promoter_window = promoter_window, - - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - - runtime_attr_get_canonical_transcripts = runtime_attr_get_canonical_transcripts, - runtime_attr_make_canonical_gtf = runtime_attr_make_canonical_gtf, - runtime_attr_make_promoters = runtime_attr_make_promoters, - runtime_attr_subset_gtf = runtime_attr_subset_gtf - } - -# call pn.PrepareNoncoding as PrepareNoncoding { -# input: -# -# noncoding_bed_list = noncoding_bed_list, -# -# sv_base_mini_docker = sv_base_mini_docker, -# -# runtime_attr_clean_noncoding_bed = runtime_attr_clean_noncoding_bed, -# runtime_attr_make_noncoding_bed = runtime_attr_make_noncoding_bed -# } - - output { - - File canonical_gtf = PrepareGencode.canonical_gtf - File canonical_promoters = PrepareGencode.canonical_promoters - File antisense_gtf = PrepareGencode.antisense_gtf - File lincRNA_gtf = PrepareGencode.lincRNA_gtf - File processed_transcript_gtf = PrepareGencode.processed_transcript_gtf - File pseudogene_gtf = PrepareGencode.pseudogene_gtf - - # File noncoding_bed = PrepareNoncoding.noncoding_bed - } -} diff --git a/wdl/Module07XfBatchEffect.wdl b/wdl/Module07XfBatchEffect.wdl index cd867e245..246ba7057 100644 --- a/wdl/Module07XfBatchEffect.wdl +++ b/wdl/Module07XfBatchEffect.wdl @@ -6,7 +6,7 @@ version 1.0 import "prune_add_af.wdl" as calcAF import "batch_effect_helper.wdl" as helper -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow XfBatchEffect { input{ diff --git a/wdl/Module08Annotation.wdl b/wdl/Module08Annotation.wdl deleted file mode 100644 index d2878708c..000000000 --- a/wdl/Module08Annotation.wdl +++ /dev/null @@ -1,109 +0,0 @@ -version 1.0 - -import "AnnotateVcf.wdl" as ann -import "PruneAndAddVafs.wdl" as pav -import "AnnotateExternalAF.wdl" as eaf - -workflow Module08Annotation { - - input { - File vcf - File vcf_idx - File contig_list - String prefix - - File protein_coding_gtf - File linc_rna_gtf - File promoter_bed - File noncoding_bed - - Int max_shards_per_chrom_step1 - Int min_records_per_shard_step1 - - File? sample_pop_assignments # Two-column file with sample ID & pop assignment. "." for pop will ignore sample - File? prune_list # List of samples to be excluded from the output vcf - File? ped_file # Used for M/F AF calculations - Int sv_per_shard - - File? ref_bed # File with external allele frequencies - String? ref_prefix # prefix name for external AF call set (required if ref_bed set) - Array[String]? population # populations to annotate external AF for (required if ref_bed set) - - String sv_base_mini_docker - String sv_pipeline_docker - - RuntimeAttr? runtime_attr_annotate_intervals - RuntimeAttr? runtime_attr_merge_annotations - RuntimeAttr? runtime_attr_subset_vcf - RuntimeAttr? runtime_attr_concat_vcfs - RuntimeAttr? runtime_attr_prune_vcf - RuntimeAttr? runtime_attr_shard_vcf - RuntimeAttr? runtime_attr_compute_AFs - RuntimeAttr? runtime_attr_combine_vcfs - RuntimeAttr? runtime_attr_modify_vcf - RuntimeAttr? runtime_override_combine_vcfs - RuntimeAttr? runtime_override_split_vcf - } - - call ann.AnnotateVcf as AnnotateVcf { - input: - vcf = vcf, - vcf_idx = vcf_idx, - prefix = prefix, - contig_list = contig_list, - protein_coding_gtf = protein_coding_gtf, - linc_rna_gtf = linc_rna_gtf, - promoter_bed = promoter_bed, - noncoding_bed = noncoding_bed, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_annotate_intervals = runtime_attr_annotate_intervals, - runtime_attr_merge_annotations = runtime_attr_merge_annotations, - runtime_attr_subset_vcf = runtime_attr_subset_vcf, - runtime_attr_concat_vcfs = runtime_attr_concat_vcfs - } - - call pav.PruneAndAddVafs as PruneAndAddVafs { - input: - vcf = AnnotateVcf.annotated_vcf, - vcf_idx = AnnotateVcf.annotated_vcf_idx, - prefix = prefix, - sample_pop_assignments = sample_pop_assignments, - prune_list = prune_list, - ped_file = ped_file, - sv_per_shard = sv_per_shard, - contig_list = contig_list, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_prune_vcf = runtime_attr_prune_vcf, - runtime_attr_shard_vcf = runtime_attr_shard_vcf, - runtime_attr_compute_AFs = runtime_attr_compute_AFs, - runtime_attr_combine_vcfs = runtime_attr_combine_vcfs, - runtime_attr_concat_vcfs = runtime_attr_concat_vcfs - } - - if (defined(ref_bed)) { - call eaf.AnnotateExternalAF as AnnotateExternalAF { - input: - vcf = PruneAndAddVafs.output_vcf, - vcf_idx = PruneAndAddVafs.output_vcf_idx, - ref_bed = select_first([ref_bed]), - population = select_first([population]), - ref_prefix = select_first([ref_prefix]), - prefix = prefix, - contigs = read_lines(contig_list), - max_shards_per_chrom_step1 = max_shards_per_chrom_step1, - min_records_per_shard_step1 = min_records_per_shard_step1, - sv_base_mini_docker = sv_base_mini_docker, - sv_pipeline_docker = sv_pipeline_docker, - runtime_attr_modify_vcf = runtime_attr_modify_vcf, - runtime_override_split_vcf = runtime_override_split_vcf, - runtime_override_combine_vcfs = runtime_override_combine_vcfs - } - } - - output { - File output_vcf = select_first([AnnotateExternalAF.annotated_vcf, PruneAndAddVafs.output_vcf]) - File output_vcf_idx = select_first([AnnotateExternalAF.annotated_vcf_tbi, PruneAndAddVafs.output_vcf_idx]) - } -} diff --git a/wdl/Module10Benchmark.wdl b/wdl/Module10Benchmark.wdl index c4bbbea09..2fb7f4f4e 100644 --- a/wdl/Module10Benchmark.wdl +++ b/wdl/Module10Benchmark.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as tasks0506 +import "TasksMakeCohortVcf.wdl" as MiniTasks import "TasksBenchmark.wdl" as tasks10 import "VaPoR.wdl" as vapor @@ -171,7 +171,7 @@ workflow BenchmarkAnnotation { } } - call tasks0506.ConcatVcfs as ConcatVcfsPB{ + call MiniTasks.ConcatVcfs as ConcatVcfsPB{ input: vcfs=Bcf2VcfPB.vcf, merge_sort=true, @@ -180,7 +180,7 @@ workflow BenchmarkAnnotation { runtime_attr_override=runtime_attr_ConcatVcfs } - call tasks0506.ConcatVcfs as ConcatVcfsIL{ + call MiniTasks.ConcatVcfs as ConcatVcfsIL{ input: vcfs=Bcf2VcfIL.vcf, merge_sort=true, @@ -189,7 +189,7 @@ workflow BenchmarkAnnotation { runtime_attr_override=runtime_attr_ConcatVcfs } - call tasks0506.ConcatBeds as ConcatBeds{ + call MiniTasks.ConcatBeds as ConcatBeds{ input: shard_bed_files=RunVaPoR.vapor, prefix=prefix, @@ -197,7 +197,7 @@ workflow BenchmarkAnnotation { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatPesrAnno{ + call MiniTasks.ConcatBeds as ConcatPesrAnno{ input: shard_bed_files=RunRdPeSrAnnotation.pesr_anno, prefix=prefix, @@ -205,7 +205,7 @@ workflow BenchmarkAnnotation { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatRdAnno{ + call MiniTasks.ConcatBeds as ConcatRdAnno{ input: shard_bed_files=RunRdPeSrAnnotation.cov, prefix=prefix, @@ -213,7 +213,7 @@ workflow BenchmarkAnnotation { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatRdAnnoLeFlank{ + call MiniTasks.ConcatBeds as ConcatRdAnnoLeFlank{ input: shard_bed_files=RunRdPeSrAnnotation.cov_le_flank, prefix=prefix, @@ -221,7 +221,7 @@ workflow BenchmarkAnnotation { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatRdAnnoRiFlank{ + call MiniTasks.ConcatBeds as ConcatRdAnnoRiFlank{ input: shard_bed_files=RunRdPeSrAnnotation.cov_ri_flank, prefix=prefix, diff --git a/wdl/Mosaic.wdl b/wdl/Mosaic.wdl index 927dc4dbf..c856765ef 100644 --- a/wdl/Mosaic.wdl +++ b/wdl/Mosaic.wdl @@ -7,7 +7,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "PreRFCohort.wdl" as preRF import "MosaicDepth.wdl" as depth_mosaic import "MosaicPesrPart1.wdl" as mosaic_pesr_part1 diff --git a/wdl/PETest.wdl b/wdl/PETest.wdl index 7d1483c4f..4d146ecb7 100644 --- a/wdl/PETest.wdl +++ b/wdl/PETest.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics import "PETestChromosome.wdl" as pec workflow PETest { @@ -88,7 +88,7 @@ workflow PETest { } } - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = flatten([PETestAutosome.stats, PETestAllosome.stats]), prefix = "${batch}.${algorithm}", @@ -96,7 +96,7 @@ workflow PETest { runtime_attr_override = runtime_attr_merge_stats } - call tasks02.MergeStats as MergeStatsCommon { + call tasksbatchmetrics.MergeStats as MergeStatsCommon { input: stats = select_all(PETestAutosome.stats_common), prefix = "${batch}.${algorithm}.common", diff --git a/wdl/PETestChromosome.wdl b/wdl/PETestChromosome.wdl index a84abf29e..1560b9a52 100644 --- a/wdl/PETestChromosome.wdl +++ b/wdl/PETestChromosome.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics workflow PETestChromosome { input { @@ -31,7 +31,7 @@ workflow PETestChromosome { File discfile_idx = discfile + ".tbi" - call tasks02.SplitVCF as SplitVCF { + call tasksbatchmetrics.SplitVCF as SplitVCF { input: vcf = vcf, batch = batch, @@ -73,7 +73,7 @@ workflow PETestChromosome { runtime_attr_override = runtime_attr_petest } - call tasks02.MergeAllosomes as MergeAllosomes { + call tasksbatchmetrics.MergeAllosomes as MergeAllosomes { input: male_test = PETestMale.stats, female_test = PETestFemale.stats, @@ -101,7 +101,7 @@ workflow PETestChromosome { } if (!allosome) { - call tasks02.GetCommonVCF { + call tasksbatchmetrics.GetCommonVCF { input: vcf = vcf, cnv_size_cutoff = common_cnv_size_cutoff, @@ -109,7 +109,7 @@ workflow PETestChromosome { runtime_attr_override = runtime_attr_split_vcf } - call tasks02.SplitVCF as SplitCommonVCF { + call tasksbatchmetrics.SplitVCF as SplitCommonVCF { input: vcf = GetCommonVCF.common_vcf, batch = batch, @@ -141,7 +141,7 @@ workflow PETestChromosome { Array[File] unmerged_stats = if allosome then select_all(MergeAllosomes.merged_test) else select_all(PETestAutosome.stats) Array[File] unmerged_stats_common = select_first([PETestAutosomeCommon.stats, []]) - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = unmerged_stats, prefix = "${batch}.${algorithm}.${chrom}", @@ -150,7 +150,7 @@ workflow PETestChromosome { } if (!allosome) { - call tasks02.MergeStats as MergeStatsCommon { + call tasksbatchmetrics.MergeStats as MergeStatsCommon { input: stats = unmerged_stats_common, prefix = "${batch}.${algorithm}.${chrom}.common", diff --git a/wdl/PerSampleExternalBenchmark.wdl b/wdl/PerSampleExternalBenchmark.wdl index 7ebadcbf6..dda68e1a2 100644 --- a/wdl/PerSampleExternalBenchmark.wdl +++ b/wdl/PerSampleExternalBenchmark.wdl @@ -2,7 +2,7 @@ version 1.0 # Author: Ryan Collins -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks # Workflow to perform per-sample benchmarking from an SV VCF vs an external dataset workflow PerSampleExternalBenchmark { diff --git a/wdl/PruneAndAddVafs.wdl b/wdl/PruneAndAddVafs.wdl index 9bfa2658a..939744905 100644 --- a/wdl/PruneAndAddVafs.wdl +++ b/wdl/PruneAndAddVafs.wdl @@ -3,7 +3,7 @@ version 1.0 -import "Tasks0506.wdl" as tasks0506 +import "TasksMakeCohortVcf.wdl" as MiniTasks import "ChromosomeAlleleFrequencies.wdl" as calcAF # Prune off samples in annotated VCF, add VAF annotation @@ -68,7 +68,7 @@ workflow PruneAndAddVafs { } # Merge pruned VCFs with allele info - call tasks0506.ConcatVcfs as ConcatVcfs{ + call MiniTasks.ConcatVcfs as ConcatVcfs{ input: vcfs = ChromosomeAlleleFrequencies.vcf_wAFs, vcfs_idx = ChromosomeAlleleFrequencies.vcf_wAFs_idx, diff --git a/wdl/RDTest.wdl b/wdl/RDTest.wdl index 86707f610..f06cc9142 100644 --- a/wdl/RDTest.wdl +++ b/wdl/RDTest.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics import "RDTestChromosome.wdl" as rdc workflow RDTest { @@ -86,7 +86,7 @@ workflow RDTest { } } - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = flatten([RDTestAutosome.out_stats, RDTestAllosome.out_stats]), prefix = "${batch}.${algorithm}", diff --git a/wdl/RDTestChromosome.wdl b/wdl/RDTestChromosome.wdl index 469088c4f..4516b4521 100644 --- a/wdl/RDTestChromosome.wdl +++ b/wdl/RDTestChromosome.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics workflow RDTestChromosome { input { @@ -75,7 +75,7 @@ workflow RDTestChromosome { runtime_attr_override = runtime_attr_rdtest } - call tasks02.MergeAllosomes as MergeAllosomes { + call tasksbatchmetrics.MergeAllosomes as MergeAllosomes { input: male_test = RDTestMale.stats, female_test = RDTestFemale.stats, @@ -106,7 +106,7 @@ workflow RDTestChromosome { Array[File?] stats = if allosome then MergeAllosomes.merged_test else RDTestAutosome.stats - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = select_all(stats), prefix = "${batch}.${algorithm}.${chrom}", diff --git a/wdl/RdPeSrAnno.wdl b/wdl/RdPeSrAnno.wdl index e3334ba46..b81d55f49 100644 --- a/wdl/RdPeSrAnno.wdl +++ b/wdl/RdPeSrAnno.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as tasks0506 +import "TasksMakeCohortVcf.wdl" as MiniTasks import "TasksBenchmark.wdl" as tasks10 workflow RdPeSrAnno { @@ -77,7 +77,7 @@ workflow RdPeSrAnno { } } - call tasks0506.ConcatBeds as ConcatPesrAnno{ + call MiniTasks.ConcatBeds as ConcatPesrAnno{ input: shard_bed_files=RunRdPeSrAnnotation.pesr_anno, prefix=prefix, @@ -85,7 +85,7 @@ workflow RdPeSrAnno { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatRdAnno{ + call MiniTasks.ConcatBeds as ConcatRdAnno{ input: shard_bed_files=RunRdPeSrAnnotation.cov, prefix=prefix, @@ -93,7 +93,7 @@ workflow RdPeSrAnno { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatRdAnnoLeFlank{ + call MiniTasks.ConcatBeds as ConcatRdAnnoLeFlank{ input: shard_bed_files=RunRdPeSrAnnotation.cov_le_flank, prefix=prefix, @@ -101,7 +101,7 @@ workflow RdPeSrAnno { runtime_attr_override=runtime_attr_ConcatBeds } - call tasks0506.ConcatBeds as ConcatRdAnnoRiFlank{ + call MiniTasks.ConcatBeds as ConcatRdAnnoRiFlank{ input: shard_bed_files=RunRdPeSrAnnotation.cov_ri_flank, prefix=prefix, diff --git a/wdl/Module04b.wdl b/wdl/RegenotypeCNVs.wdl similarity index 99% rename from wdl/Module04b.wdl rename to wdl/RegenotypeCNVs.wdl index 2686e1a24..a07a04091 100644 --- a/wdl/Module04b.wdl +++ b/wdl/RegenotypeCNVs.wdl @@ -4,7 +4,7 @@ import "Genotype_2.wdl" as g2 import "CombineReassess.wdl" as creassess import "Utils.wdl" as util -workflow Module04b { +workflow RegenotypeCNVs { input { String sv_base_mini_docker String sv_pipeline_docker @@ -539,8 +539,8 @@ task GetRegenotype { File regeno_sample_counts_lookup File regeno_raw_combined_depth Int n_samples_cohort - Float regeno_max_allele_freq # default = 0.01 set in Module04b.wdl - Int regeno_allele_count_threshold # default = 3 set in Module04b.wdl + Float regeno_max_allele_freq # default = 0.01 set in RegenotypeCNVs.wdl + Int regeno_allele_count_threshold # default = 3 set in RegenotypeCNVs.wdl String Batch String sv_pipeline_docker RuntimeAttr? runtime_attr_override diff --git a/wdl/Module0506ComplexResolve.wdl b/wdl/ResolveComplexVariants.wdl similarity index 98% rename from wdl/Module0506ComplexResolve.wdl rename to wdl/ResolveComplexVariants.wdl index ba9572ca0..d38be00dd 100644 --- a/wdl/Module0506ComplexResolve.wdl +++ b/wdl/ResolveComplexVariants.wdl @@ -1,9 +1,9 @@ version 1.0 import "ResolveCpxSv.wdl" as ResolveComplexContig -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks -workflow Module0506ComplexResolve { +workflow ResolveComplexVariants { input { String cohort_name @@ -173,7 +173,7 @@ workflow Module0506ComplexResolve { vcfs=RenameVariants.renamed_vcf, vcfs_idx=RenameVariants.renamed_vcf_index, merge_sort=true, - outfile_prefix="~{cohort_name}.0506_complex", + outfile_prefix="~{cohort_name}.complex_resolve", sv_base_mini_docker=sv_base_mini_docker, runtime_attr_override=runtime_override_concat } diff --git a/wdl/ResolveCpxSv.wdl b/wdl/ResolveCpxSv.wdl index 15578309c..06b6c2359 100644 --- a/wdl/ResolveCpxSv.wdl +++ b/wdl/ResolveCpxSv.wdl @@ -2,7 +2,7 @@ version 1.0 # Author: Ryan Collins -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks #Resolve complex SV for a single chromosome workflow ResolveComplexSv { diff --git a/wdl/ReviseSVtypeINStoMEI.wdl b/wdl/ReviseSVtypeINStoMEI.wdl index ebb3b9bfd..1c22edbc3 100644 --- a/wdl/ReviseSVtypeINStoMEI.wdl +++ b/wdl/ReviseSVtypeINStoMEI.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "ReviseSVtypeINStoMEIperContig.wdl" as ReviseSVtypePerContig workflow ReviseSVtypeINStoMEI { diff --git a/wdl/ReviseSVtypeINStoMEIperContig.wdl b/wdl/ReviseSVtypeINStoMEIperContig.wdl index 058913831..0f0265443 100644 --- a/wdl/ReviseSVtypeINStoMEIperContig.wdl +++ b/wdl/ReviseSVtypeINStoMEIperContig.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow ReviseSVtypeINStoMEIperContig { diff --git a/wdl/SRTest.wdl b/wdl/SRTest.wdl index 5e56f46a4..790ab583d 100644 --- a/wdl/SRTest.wdl +++ b/wdl/SRTest.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics import "SRTestChromosome.wdl" as src workflow SRTest { @@ -93,7 +93,7 @@ workflow SRTest { } # Combine srtest results into single file - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = flatten([SRTestAutosome.stats, SRTestAllosome.stats]), prefix = "${batch}.${algorithm}", @@ -102,7 +102,7 @@ workflow SRTest { } if (run_common) { - call tasks02.MergeStats as MergeStatsCommon { + call tasksbatchmetrics.MergeStats as MergeStatsCommon { input: stats = select_all(SRTestAutosome.stats_common), prefix = "${batch}.${algorithm}.common", diff --git a/wdl/SRTestChromosome.wdl b/wdl/SRTestChromosome.wdl index 82b033292..4491e98e8 100644 --- a/wdl/SRTestChromosome.wdl +++ b/wdl/SRTestChromosome.wdl @@ -1,6 +1,6 @@ version 1.0 -import "Tasks02.wdl" as tasks02 +import "TasksGenerateBatchMetrics.wdl" as tasksbatchmetrics workflow SRTestChromosome { input { @@ -32,7 +32,7 @@ workflow SRTestChromosome { File splitfile_idx = splitfile + ".tbi" - call tasks02.SplitVCF as SplitVCF { + call tasksbatchmetrics.SplitVCF as SplitVCF { input: vcf = vcf, batch = batch, @@ -74,7 +74,7 @@ workflow SRTestChromosome { runtime_attr_override = runtime_attr_srtest } - call tasks02.MergeAllosomes as MergeAllosomes { + call tasksbatchmetrics.MergeAllosomes as MergeAllosomes { input: male_test = SRTestMale.stats, female_test = SRTestFemale.stats, @@ -103,7 +103,7 @@ workflow SRTestChromosome { } if (run_common && !allosome) { - call tasks02.GetCommonVCF { + call tasksbatchmetrics.GetCommonVCF { input: vcf = vcf, cnv_size_cutoff = select_first([common_cnv_size_cutoff]), @@ -111,7 +111,7 @@ workflow SRTestChromosome { runtime_attr_override = runtime_attr_split_vcf } - call tasks02.SplitVCF as SplitCommonVCF { + call tasksbatchmetrics.SplitVCF as SplitCommonVCF { input: vcf = GetCommonVCF.common_vcf, batch = batch, @@ -143,7 +143,7 @@ workflow SRTestChromosome { Array[File] unmerged_stats = if allosome then select_all(MergeAllosomes.merged_test) else select_all(SRTestAutosome.stats) Array[File] unmerged_stats_common = select_first([SRTestAutosomeCommon.stats, []]) - call tasks02.MergeStats as MergeStats { + call tasksbatchmetrics.MergeStats as MergeStats { input: stats = unmerged_stats, prefix = "${batch}.${algorithm}.${chrom}", @@ -152,7 +152,7 @@ workflow SRTestChromosome { } if (run_common && !allosome) { - call tasks02.MergeStats as MergeStatsCommon { + call tasksbatchmetrics.MergeStats as MergeStatsCommon { input: stats = unmerged_stats_common, prefix = "${batch}.${algorithm}.${chrom}.common", diff --git a/wdl/ScatterAnnotateVcfByChrom.wdl b/wdl/ScatterAnnotateVcfByChrom.wdl new file mode 100644 index 000000000..00aba2b1c --- /dev/null +++ b/wdl/ScatterAnnotateVcfByChrom.wdl @@ -0,0 +1,140 @@ +# Workflow to parallelize VCF annotation by chromosome + +version 1.0 + +import "TasksMakeCohortVcf.wdl" as MiniTasks +import "AnnotateChromosome.wdl" as annotate_by_chrom + +# Scatter VCF and apply prepared annotations +workflow ScatterAnnotateVcfByChrom { + + input { + + File vcf + String prefix + File vcf_idx + File contig_list + File protein_coding_gtf + File linc_rna_gtf + File promoter_bed + File noncoding_bed + + String sv_base_mini_docker + String sv_pipeline_docker + + RuntimeAttr? runtime_attr_annotate_intervals + RuntimeAttr? runtime_attr_merge_annotations + RuntimeAttr? runtime_attr_subset_vcf + RuntimeAttr? runtime_attr_concat_vcfs + } + + Array[Array[String]] contigs = read_tsv(contig_list) + + # Annotate, scattered by chromosome + scatter (contig in contigs) { + # Remote tabix each chromosome + call SubsetVcf { + input: + vcf = vcf, + vcf_idx = vcf_idx, + contig = contig[0], + prefix = "${prefix}.${contig[0]}", + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_override = runtime_attr_subset_vcf + } + + # Annotate per chromosome + call annotate_by_chrom.AnnotateChromosome as AnnotateChromosome { + input: + vcf = SubsetVcf.subsetted_vcf, + prefix = "${prefix}.${contig[0]}", + protein_coding_gtf = protein_coding_gtf, + linc_rna_gtf = linc_rna_gtf, + promoter_bed = promoter_bed, + noncoding_bed = noncoding_bed, + sv_pipeline_docker = sv_pipeline_docker, + runtime_attr_annotate_intervals = runtime_attr_annotate_intervals, + runtime_attr_merge_annotations = runtime_attr_merge_annotations + } + } + + # Merge integrated vcfs across chromosomes + call MiniTasks.ConcatVcfs as ConcatVcfs { + input: + vcfs = AnnotateChromosome.annotated_vcf, + vcfs_idx = AnnotateChromosome.annotated_vcf_idx, + outfile_prefix = "${prefix}.annotated", + sv_base_mini_docker = sv_base_mini_docker, + runtime_attr_override = runtime_attr_concat_vcfs + } + + output { + File annotated_vcf = ConcatVcfs.concat_vcf + File annotated_vcf_idx = ConcatVcfs.concat_vcf_idx + } +} + +# Scatter VCF by chromosome +task SubsetVcf { + + input { + + File vcf + File vcf_idx + String contig + String prefix + + String sv_pipeline_docker + + RuntimeAttr? runtime_attr_override + } + + parameter_meta { + vcf: { + localization_optional: true + } + vcf_idx: { + localization_optional: true + } + } + + output { + File subsetted_vcf = "${prefix}.${contig}.vcf.gz" + File subsetted_vcf_idx = "${prefix}.${contig}.vcf.gz.tbi" + } + + ######################### + RuntimeAttr default_attr = object { + cpu_cores: 1, + mem_gb: 3.75, + disk_gb: 50, + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 0 + } + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + + Float mem_gb = select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + Int java_mem_mb = ceil(mem_gb * 1000 * 0.8) + + command <<< + + set -euo pipefail + + java -Xmx~{java_mem_mb}M -jar ${GATK_JAR} SelectVariants \ + -V "~{vcf}" \ + -L "~{contig}" \ + -O ~{prefix}.~{contig}.vcf.gz + + >>> + + runtime { + cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + memory: mem_gb + " GiB" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) + preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + docker: sv_pipeline_docker + } +} diff --git a/wdl/ScatterCpxGenotyping.wdl b/wdl/ScatterCpxGenotyping.wdl index daff14ee6..9c85ea7cb 100644 --- a/wdl/ScatterCpxGenotyping.wdl +++ b/wdl/ScatterCpxGenotyping.wdl @@ -3,10 +3,10 @@ version 1.0 # Author: Ryan Collins import "GenotypeCpxCnvs.wdl" as GenotypeCpx -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks # Workflow to perform depth-based genotyping for a single vcf shard scattered -# across batches on predicted CPX CNVs from 04b +# across batches on predicted CPX CNVs workflow ScatterCpxGenotyping { input { File bin_exclude diff --git a/wdl/ShardedCluster.wdl b/wdl/ShardedCluster.wdl index 683387991..f31a67760 100644 --- a/wdl/ShardedCluster.wdl +++ b/wdl/ShardedCluster.wdl @@ -3,7 +3,7 @@ version 1.0 # Author: Ryan Collins import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks # Workflow to shard a filtered vcf & run vcfcluster (sub-sub-sub workflow) workflow ShardedCluster { diff --git a/wdl/ShardedQcCollection.wdl b/wdl/ShardedQcCollection.wdl index ffd227bdc..451dc93a4 100644 --- a/wdl/ShardedQcCollection.wdl +++ b/wdl/ShardedQcCollection.wdl @@ -4,7 +4,7 @@ version 1.0 # Workflow to gather SV VCF summary stats for an input VCF -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow ShardedQcCollection { input { diff --git a/wdl/Tasks02.wdl b/wdl/TasksGenerateBatchMetrics.wdl similarity index 100% rename from wdl/Tasks02.wdl rename to wdl/TasksGenerateBatchMetrics.wdl diff --git a/wdl/Tasks04.wdl b/wdl/TasksGenotypeBatch.wdl similarity index 100% rename from wdl/Tasks04.wdl rename to wdl/TasksGenotypeBatch.wdl diff --git a/wdl/Tasks0506.wdl b/wdl/TasksMakeCohortVcf.wdl similarity index 100% rename from wdl/Tasks0506.wdl rename to wdl/TasksMakeCohortVcf.wdl diff --git a/wdl/TrainPEGenotyping.wdl b/wdl/TrainPEGenotyping.wdl index 24676af8c..b1a631466 100644 --- a/wdl/TrainPEGenotyping.wdl +++ b/wdl/TrainPEGenotyping.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks04.wdl" as tasks04 +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch workflow TrainPEGenotyping { input { @@ -28,7 +28,7 @@ workflow TrainPEGenotyping { RuntimeAttr? runtime_attr_genotype } - call tasks04.SplitVcf as SplitVcf { + call tasksgenotypebatch.SplitVcf as SplitVcf { input: vcf = batch_vcf, n_per_split = n_per_split, @@ -47,7 +47,7 @@ workflow TrainPEGenotyping { } scatter (vcf in SplitVcf.vcfs) { - call tasks04.CountPE as CountPE { + call tasksgenotypebatch.CountPE as CountPE { input: vcf = vcf, discfile = discfile, @@ -60,7 +60,7 @@ workflow TrainPEGenotyping { } } - call tasks04.MergePESRCounts as MergePECounts { + call tasksgenotypebatch.MergePESRCounts as MergePECounts { input: count_list = CountPE.pe_counts, sum_list = [], diff --git a/wdl/TrainRDGenotyping.wdl b/wdl/TrainRDGenotyping.wdl index 91e4f2577..1e85a4ed8 100644 --- a/wdl/TrainRDGenotyping.wdl +++ b/wdl/TrainRDGenotyping.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks04.wdl" as tasks04 +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch workflow TrainRDGenotyping { input { @@ -42,7 +42,7 @@ workflow TrainRDGenotyping { runtime_attr_override = runtime_attr_training_bed } - call tasks04.RDTestGenotype as GenotypeTrain { + call tasksgenotypebatch.RDTestGenotype as GenotypeTrain { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, @@ -79,7 +79,7 @@ workflow TrainRDGenotyping { runtime_attr_override = runtime_attr_update_cutoff } - call tasks04.SplitVariants as SplitVariants { + call tasksgenotypebatch.SplitVariants as SplitVariants { input: vcf = vcf, n_per_split = n_per_split, @@ -89,7 +89,7 @@ workflow TrainRDGenotyping { } scatter (pesr_bed in SplitVariants.lt5kb_beds) { - call tasks04.RDTestGenotype as GenotypePESR { + call tasksgenotypebatch.RDTestGenotype as GenotypePESR { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, @@ -110,7 +110,7 @@ workflow TrainRDGenotyping { } scatter (gt5kb_bed in SplitVariants.gt5kb_beds) { - call tasks04.RDTestGenotype as GenotypeOver5kb { + call tasksgenotypebatch.RDTestGenotype as GenotypeOver5kb { input: bin_exclude=bin_exclude, bin_exclude_idx=bin_exclude_idx, diff --git a/wdl/TrainSRGenotyping.wdl b/wdl/TrainSRGenotyping.wdl index 4d8168bd8..d2df64b71 100644 --- a/wdl/TrainSRGenotyping.wdl +++ b/wdl/TrainSRGenotyping.wdl @@ -1,7 +1,7 @@ version 1.0 import "Structs.wdl" -import "Tasks04.wdl" as tasks04 +import "TasksGenotypeBatch.wdl" as tasksgenotypebatch workflow TrainSRGenotyping { input { @@ -27,7 +27,7 @@ workflow TrainSRGenotyping { RuntimeAttr? runtime_attr_genotype } - call tasks04.SplitVcf as SplitVcf { + call tasksgenotypebatch.SplitVcf as SplitVcf { input: vcf = batch_vcf, n_per_split = n_per_split, @@ -38,7 +38,7 @@ workflow TrainSRGenotyping { } scatter (vcf in SplitVcf.vcfs) { - call tasks04.CountSR as CountSR { + call tasksgenotypebatch.CountSR as CountSR { input: vcf = vcf, splitfile = splitfile, @@ -51,7 +51,7 @@ workflow TrainSRGenotyping { } } - call tasks04.MergePESRCounts as MergeSRCounts { + call tasksgenotypebatch.MergePESRCounts as MergeSRCounts { input: count_list = CountSR.sr_counts, sum_list = CountSR.sr_sum, diff --git a/wdl/VaPoRVcf.wdl b/wdl/VaPoRVcf.wdl index f4156db71..45e76eca5 100644 --- a/wdl/VaPoRVcf.wdl +++ b/wdl/VaPoRVcf.wdl @@ -1,7 +1,6 @@ version 1.0 import "Structs.wdl" -import "Tasks0506.wdl" as tasks0506 import "TasksBenchmark.wdl" as tasks10 workflow VaPoRVcf { diff --git a/wdl/VcfClusterSingleChromsome.wdl b/wdl/VcfClusterSingleChromsome.wdl index be4f5f8cb..4be481295 100644 --- a/wdl/VcfClusterSingleChromsome.wdl +++ b/wdl/VcfClusterSingleChromsome.wdl @@ -3,7 +3,7 @@ version 1.0 # Author: Ryan Collins import "Structs.wdl" -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks import "ClusterSingleChromosome.wdl" as VcfClusterTasks # Workflow to run parallelized vcf clustering for a single chromosome diff --git a/wdl/XfBatchEffect.wdl b/wdl/XfBatchEffect.wdl index 824daf429..9aa08e692 100644 --- a/wdl/XfBatchEffect.wdl +++ b/wdl/XfBatchEffect.wdl @@ -2,7 +2,7 @@ version 1.0 import "prune_add_af.wdl" as calcAF import "batch_effect_helper.wdl" as helper -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow XfBatchEffect { input{ diff --git a/wdl/prune_add_af.wdl b/wdl/prune_add_af.wdl index 0c906c9dc..60c9addb7 100644 --- a/wdl/prune_add_af.wdl +++ b/wdl/prune_add_af.wdl @@ -2,7 +2,7 @@ version 1.0 import "CalcAF.wdl" as calcAF -import "Tasks0506.wdl" as MiniTasks +import "TasksMakeCohortVcf.wdl" as MiniTasks workflow prune_and_add_vafs { input {