diff --git a/wdl/GatherBatchEvidence.wdl b/wdl/GatherBatchEvidence.wdl index f3f01a7e1..1e77ec426 100644 --- a/wdl/GatherBatchEvidence.wdl +++ b/wdl/GatherBatchEvidence.wdl @@ -505,7 +505,8 @@ workflow GatherBatchEvidence { File? BAF_stats = MatrixQC.BAF_stats File? Matrix_QC_plot = MatrixQC.QC_plot - Array[File]? manta_tloc = TinyResolve.tloc_manta_vcf + File? manta_tloc_tar = TinyResolve.manta_tloc_tar + File? manta_unresolved_tar = TinyResolve.manta_unresolved_tar File? metrics_file_batchevidence = GatherBatchEvidenceMetrics.metrics_file } diff --git a/wdl/TinyResolve.wdl b/wdl/TinyResolve.wdl index d4ba9773b..c33ba6fe2 100644 --- a/wdl/TinyResolve.wdl +++ b/wdl/TinyResolve.wdl @@ -15,8 +15,10 @@ workflow TinyResolve { Int samples_per_shard = 25 String sv_pipeline_docker String linux_docker + Boolean rm_cpx_type = true RuntimeAttr? runtime_attr_resolve RuntimeAttr? runtime_attr_untar + RuntimeAttr? runtime_attr_concattars } scatter (disc in discfile) { @@ -73,9 +75,11 @@ workflow TinyResolve { input: raw_vcfs=GetShardVcfs.shard_items, samples=GetShardSamples.shard_items, + shard_number = i, sv_pipeline_docker = sv_pipeline_docker, cytoband=cytoband, cytoband_idx=cytoband_idx, + rm_cpx_type = rm_cpx_type, discfile=GetShardDiscfiles.shard_items, discfile_idx=GetShardDiscfileIndexes.shard_items, mei_bed=mei_bed, @@ -83,8 +87,17 @@ workflow TinyResolve { } } + call ConcatTars { + input: + manta_tloc_tars=ResolveManta.manta_tloc_tar, + manta_unresolved_tars=ResolveManta.manta_unresolved_tar, + linux_docker=linux_docker, + runtime_attr_override=runtime_attr_concattars + } + output { - Array[File] tloc_manta_vcf = flatten(ResolveManta.tloc_vcf) + File manta_tloc_tar = ConcatTars.manta_tloc_tar + File manta_unresolved_tar = ConcatTars.manta_unresolved_tar } } @@ -94,11 +107,13 @@ task ResolveManta { Array[File] raw_vcfs Array[String] samples File cytoband_idx + Int shard_number Array[File] discfile Array[File] discfile_idx File cytoband File mei_bed String sv_pipeline_docker + Boolean rm_cpx_type RuntimeAttr? runtime_attr_override } @@ -127,12 +142,36 @@ task ResolveManta { pe=${discfiles[$i]} sample_no=`printf %03d $i` bash /opt/sv-pipeline/00_preprocessing/scripts/mantatloccheck.sh $vcf $pe ${sample_id} ~{mei_bed} ~{cytoband} - mv ${sample_id}.manta.complex.vcf.gz tloc_${sample_no}.${sample_id}.manta.complex.vcf.gz + bcftools sort --output manta.unresolved.vcf.tmp --output-type v manta.unresolved.vcf + mv manta.unresolved.vcf.tmp manta.unresolved.vcf + if [[ ~{true='true' false='false' rm_cpx_type} = 'true' ]]; then + bcftools annotate --include 'INFO/SVTYPE != "CPX"' --keep-sites \ + --remove 'INFO/CPX_TYPE' \ + --output "tloc_${sample_no}.${sample_id}.manta.complex.vcf.gz" \ + --output-type z \ + "${sample_id}.manta.complex.vcf.gz" + rm "${sample_id}.manta.complex.vcf.gz" + bcftools annotate --include 'INFO/SVTYPE != "CPX"' --keep-sites \ + --remove 'INFO/CPX_TYPE' \ + --output "${sample_no}.${sample_id}.manta.unresolved.vcf.gz" \ + --output-type z \ + manta.unresolved.vcf + rm manta.unresolved.vcf + else + mv ${sample_id}.manta.complex.vcf.gz tloc_${sample_no}.${sample_id}.manta.complex.vcf.gz + bgzip manta.unresolved.vcf + mv manta.unresolved.vcf.gz ${sample_no}.${sample_id}.manta.unresolved.vcf.gz + fi done + find . -type f -name '*.complex.vcf.gz' \ + | tar --create --file='manta_tloc_~{shard_number}.tar' --files-from=- + find . -type f -name '*.unresolved.vcf.gz' \ + | tar --create --file='manta_unresolved_~{shard_number}.tar' --files-from=- >>> output { - Array[File] tloc_vcf = glob("tloc_*.vcf.gz") + File manta_tloc_tar = 'manta_tloc_${shard_number}.tar' + File manta_unresolved_tar = 'manta_unresolved_${shard_number}.tar' } runtime { @@ -145,3 +184,64 @@ task ResolveManta { maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) } } + +task ConcatTars { + input { + Array[File] manta_tloc_tars + Array[File] manta_unresolved_tars + String linux_docker + RuntimeAttr? runtime_attr_override + } + + RuntimeAttr default_attr = object { + cpu_cores: 1, + mem_gb: 1.0, + disk_gb: ceil(10 + 3 * (size(manta_tloc_tars, "GB") + size(manta_unresolved_tars, "GB"))), + boot_disk_gb: 10, + preemptible_tries: 3, + max_retries: 1 + } + RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr]) + + command <<< + set -o errexit + set -o pipefail + set -o nounset + + mkdir manta_tloc + while read -r tl; do + tar --extract --file "${tl}" --directory manta_tloc + done < '~{write_lines(manta_tloc_tars)}' + pushd manta_tloc + find . -type f -name '*.vcf.gz' > manifest.list + popd + tar --create --gzip --file manta_tloc.tar.gz --directory manta_tloc \ + --files-from manta_tloc/manifest.list + rm -rf manta_tloc + + mkdir manta_unresolved + while read -r ur; do + tar --extract --file "${ur}" --directory manta_unresolved + done < '~{write_lines(manta_unresolved_tars)}' + pushd manta_unresolved + find . -type f -name '*.vcf.gz' > manifest.list + popd + tar --create --gzip --file manta_unresolved.tar.gz \ + --directory manta_unresolved --files-from manta_unresolved/manifest.list + >>> + + runtime { + cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores]) + memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB" + disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD" + bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb]) + docker: linux_docker + preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries]) + maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries]) + } + + output { + File manta_tloc_tar = 'manta_tloc.tar.gz' + File manta_unresolved_tar = 'manta_unresolved.tar.gz' + } +}