Skip to content

Commit

Permalink
WDL and docker file for ExpansionHunter (#210)
Browse files Browse the repository at this point in the history
* Add STR Dockerfile.

* Draft ExpansionHunter WDL.

* Add output files.

* Pin the versions of dependencies to their current latest.

* Gather multiple runs in a single run.

* Add a comment listing the available tools.

* Add no-install-recommends to decrease image size.

* Build bedtools from source.
  • Loading branch information
VJalili authored Aug 28, 2021
1 parent 73d4e29 commit 20684c7
Show file tree
Hide file tree
Showing 2 changed files with 173 additions and 0 deletions.
63 changes: 63 additions & 0 deletions dockerfiles/str/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# This docker image contains the following
# list of tools and their dependencies:
# - GangSTR
# - TRTools
# - ExpansionHunter

FROM ubuntu:20.04

RUN apt-get update && DEBIAN_FRONTEND="noninteractive" apt-get install --no-install-recommends -qqy \
python3-dev \
python3-pip \
python \
python-dev \
awscli \
build-essential \
git \
libbz2-dev \
liblzma-dev \
make \
pkg-config \
wget \
unzip \
zlib1g-dev

RUN pip3 install pybedtools==0.8.2 pyvcf==0.6.8 scipy==1.7.1 numpy==1.21.1

# Install samtools (needed to index reference fasta files)
RUN wget -O samtools-1.9.tar.bz2 https://github.com/samtools/samtools/releases/download/1.9/samtools-1.9.tar.bz2 \
&& tar -xjf samtools-1.9.tar.bz2 \
&& cd samtools-1.9 \
&& ./configure --without-curses && make && make install \
&& cd ..

# Install bedtools (needed for DumpSTR)
## Option 1: install from source
RUN wget -O bedtools-2.27.1.tar.gz https://github.com/arq5x/bedtools2/releases/download/v2.27.1/bedtools-2.27.1.tar.gz
RUN tar -xzvf bedtools-2.27.1.tar.gz
WORKDIR bedtools2
RUN make && make install
WORKDIR ..
## Option 2: install from apt
#RUN apt-get install bedtools

# Download, compile, and install GangSTR
RUN wget -O GangSTR-2.4.tar.gz https://github.com/gymreklab/GangSTR/releases/download/v2.4/GangSTR-2.4.tar.gz \
&& tar -xzvf GangSTR-2.4.tar.gz \
&& cd GangSTR-2.4 \
&& ./install-gangstr.sh \
&& ldconfig \
&& cd ..

# Download and install TRTools
RUN git clone https://github.com/gymreklab/TRTools \
&& cd TRTools \
&& python3 setup.py install \
&& cd ..

ENV EH_VERSION=v4.0.2
RUN wget https://github.com/Illumina/ExpansionHunter/releases/download/${EH_VERSION}/ExpansionHunter-${EH_VERSION}-linux_x86_64.tar.gz \
&& tar xzf ExpansionHunter-${EH_VERSION}-linux_x86_64.tar.gz \
&& rm ExpansionHunter-${EH_VERSION}-linux_x86_64.tar.gz \
&& mv /ExpansionHunter-${EH_VERSION}-linux_x86_64 /ExpansionHunter
ENV PATH="/ExpansionHunter/bin/:$PATH"
110 changes: 110 additions & 0 deletions wdl/ExpansionHunter.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
## This WDL implements workflow for ExpansionHunter.
version 1.0

import "Structs.wdl"

struct FilenamePostfixes {
String locus
String motif
String profile
String merged_profile
Int profile_len
}

workflow ExpansionHunter {

input {
File bam_or_cram
File? bam_or_cram_index
File reference_fasta
File? reference_fasta_index
File variant_catalog
String docker_file
RuntimeAttr? runtime_attr
}
Boolean is_bam = basename(bam_or_cram, ".bam") + ".bam" == basename(bam_or_cram)
File bam_or_cram_index_ =
if defined(bam_or_cram_index) then
select_first([bam_or_cram_index])
else
bam_or_cram + if is_bam then ".bai" else ".crai"
File reference_fasta_index_ = select_first([
reference_fasta_index,
reference_fasta + ".fai"])
call RunExpansionHunter {
input:
bam_or_cram = bam_or_cram,
bam_or_cram_index = bam_or_cram_index_,
reference_fasta = reference_fasta,
reference_fasta_index = reference_fasta_index_,
variant_catalog = variant_catalog,
docker_file = docker_file,
runtime_attr_override = runtime_attr,
}
output {
File json = RunExpansionHunter.json
File vcf = RunExpansionHunter.vcf
File overlapping_reads = RunExpansionHunter.overlapping_reads
}
}

task RunExpansionHunter {
input {
File bam_or_cram
File bam_or_cram_index
File reference_fasta
File reference_fasta_index
File variant_catalog
String docker_file
RuntimeAttr? runtime_attr_override
}
String output_prefix = "output"
output {
File json = "${output_prefix}.json"
File vcf = "${output_prefix}.vcf"
File overlapping_reads = "${output_prefix}_realigned.bam"
}

command <<<
set -euxo pipefail

ExpansionHunter \
--reads ~{bam_or_cram} \
--reference ~{reference_fasta} \
--variant-catalog ~{variant_catalog} \
--output-prefix ~{output_prefix}
>>>

RuntimeAttr runtime_attr_str_profile_default = object {
cpu_cores: 1,
mem_gb: 4,
boot_disk_gb: 10,
preemptible_tries: 3,
max_retries: 1,
disk_gb: 10 + ceil(size([
bam_or_cram,
bam_or_cram_index,
reference_fasta,
reference_fasta_index], "GiB"))
}
RuntimeAttr runtime_attr = select_first([
runtime_attr_override,
runtime_attr_str_profile_default])
runtime {
docker: docker_file
cpu: runtime_attr.cpu_cores
memory: runtime_attr.mem_gb + " GiB"
disks: "local-disk " + runtime_attr.disk_gb + " HDD"
bootDiskSizeGb: runtime_attr.boot_disk_gb
preemptible: runtime_attr.preemptible_tries
maxRetries: runtime_attr.max_retries
}
}

0 comments on commit 20684c7

Please sign in to comment.