Skip to content

Commit

Permalink
Merge pull request #583 from atrigila/fusioninspector
Browse files Browse the repository at this point in the history
Add nf-test to `STARFUSION_BUILD` and refactor module
  • Loading branch information
atrigila authored Dec 19, 2024
2 parents 1ede9b4 + fb64ca6 commit 34121d1
Show file tree
Hide file tree
Showing 11 changed files with 525 additions and 23 deletions.
3 changes: 2 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ process {
]
}

withName: 'STARFUSION_BUILD' {
withName: 'NFCORE_RNAFUSION:BUILD_REFERENCES:STARFUSION_BUILD' {
cpus = { 24 * task.attempt }
memory = { 100.GB * task.attempt }
time = { 2.d * task.attempt }
Expand All @@ -329,6 +329,7 @@ process {
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
ext.args = "--max_readlength ${params.read_length} --human_gencode_filter"
}

withName: 'STARFUSION_DOWNLOAD' {
Expand Down
3 changes: 3 additions & 0 deletions conf/test_build.config
Original file line number Diff line number Diff line change
Expand Up @@ -21,4 +21,7 @@ params {
all = true

skip_salmon_index = true
starfusion_build = true
fusion_annot_lib = 'https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz'

}
111 changes: 92 additions & 19 deletions modules/local/starfusion/build/main.nf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
process STARFUSION_BUILD {
tag 'star-fusion'
tag "$meta.id"
label 'process_high'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
Expand All @@ -9,30 +10,22 @@ process STARFUSION_BUILD {
input:
tuple val(meta), path(fasta)
tuple val(meta2), path(gtf)
path fusion_annot_lib
val dfam_species

output:
path "*" , emit: reference
tuple val(meta), path("ctat_genome_lib_build_dir"), emit: reference

script:
def args = task.ext.args ?: ''
"""
export TMPDIR=/tmp
wget http://ftp.ebi.ac.uk/pub/databases/Pfam/releases/Pfam34.0/Pfam-A.hmm.gz --no-check-certificate
wget https://github.com/FusionAnnotator/CTAT_HumanFusionLib/releases/download/v0.3.0/fusion_lib.Mar2021.dat.gz -O CTAT_HumanFusionLib_Mar2021.dat.gz --no-check-certificate
wget https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/AnnotFilterRule.pm -O AnnotFilterRule.pm --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3f --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3i --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3m --no-check-certificate
wget https://www.dfam.org/releases/Dfam_3.4/infrastructure/dfamscan/homo_sapiens_dfam.hmm.h3p --no-check-certificate
gunzip Pfam-A.hmm.gz && hmmpress Pfam-A.hmm
prep_genome_lib.pl \\
--genome_fa $fasta \\
--gtf $gtf \\
--annot_filter_rule AnnotFilterRule.pm \\
--fusion_annot_lib CTAT_HumanFusionLib_Mar2021.dat.gz \\
--pfam_db Pfam-A.hmm \\
--dfam_db homo_sapiens_dfam.hmm \\
--max_readlength $params.read_length \\
--dfam_db ${dfam_species} \\
--pfam_db current \\
--fusion_annot_lib $fusion_annot_lib \\
${args} \\
--CPU $task.cpus
cat <<-END_VERSIONS > versions.yml
Expand All @@ -43,8 +36,88 @@ process STARFUSION_BUILD {

stub:
"""
mkdir ctat_genome_lib_build_dir
touch ref_annot.cdna.fa
mkdir -p ctat_genome_lib_build_dir
touch ctat_genome_lib_build_dir/AnnotFilterRule.pm
gzip -c /dev/null > ctat_genome_lib_build_dir/blast_pairs.dat.gz
touch ctat_genome_lib_build_dir/blast_pairs.idx
mkdir -p ctat_genome_lib_build_dir/__chkpts
touch ctat_genome_lib_build_dir/__chkpts/annotfiltrule_cp.ok
touch ctat_genome_lib_build_dir/__chkpts/blast_pairs.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_gene_blast_pairs.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_pfam_dat.ok
touch ctat_genome_lib_build_dir/__chkpts/cp_ref_annot_cdna.ok
touch ctat_genome_lib_build_dir/__chkpts/fusion_annot_lib.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/_fusion_annot_lib.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/index_pfam_hits.ok
touch ctat_genome_lib_build_dir/__chkpts/index_ref_annot_cdna.ok
touch ctat_genome_lib_build_dir/__chkpts/makeblastdb.ok
touch ctat_genome_lib_build_dir/__chkpts/mm2_genome_idx.ok
touch ctat_genome_lib_build_dir/__chkpts/mm2.splice_bed.ok
touch ctat_genome_lib_build_dir/__chkpts/_prot_info_db.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.gene_spans.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.mini.sortu.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_annot.gtf.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_genome_fai.ok
touch ctat_genome_lib_build_dir/__chkpts/ref_genome.fa.ok
touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.cp.ok
touch ctat_genome_lib_build_dir/__chkpts/trans.blast.dat.index.ok
touch ctat_genome_lib_build_dir/__chkpts/validate_ctat_genome_lib.ok
gzip -c /dev/null > ctat_genome_lib_build_dir/fusion_annot_lib.gz
touch ctat_genome_lib_build_dir/fusion_annot_lib.idx
touch ctat_genome_lib_build_dir/pfam_domains.dbm
gzip -c /dev/null > ctat_genome_lib_build_dir/PFAM.domtblout.dat.gz
touch ctat_genome_lib_build_dir/ref_annot.cdna.fa
touch ctat_genome_lib_build_dir/ref_annot.cdna.fa.idx
touch ctat_genome_lib_build_dir/ref_annot.cds
touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa
touch ctat_genome_lib_build_dir/ref_annot.cdsplus.fa.idx
touch ctat_genome_lib_build_dir/ref_annot.gtf
touch ctat_genome_lib_build_dir/ref_annot.gtf.gene_spans
touch ctat_genome_lib_build_dir/ref_annot.gtf.mini.sortu
touch ctat_genome_lib_build_dir/ref_annot.gtf.mm2.splice.bed
touch ctat_genome_lib_build_dir/ref_annot.pep
touch ctat_genome_lib_build_dir/ref_annot.prot_info.dbm
touch ctat_genome_lib_build_dir/ref_genome.fa
touch ctat_genome_lib_build_dir/ref_genome.fa.fai
touch ctat_genome_lib_build_dir/ref_genome.fa.mm2
touch ctat_genome_lib_build_dir/ref_genome.fa.ndb
touch ctat_genome_lib_build_dir/ref_genome.fa.nhr
touch ctat_genome_lib_build_dir/ref_genome.fa.nin
touch ctat_genome_lib_build_dir/ref_genome.fa.njs
touch ctat_genome_lib_build_dir/ref_genome.fa.not
touch ctat_genome_lib_build_dir/ref_genome.fa.nsq
touch ctat_genome_lib_build_dir/ref_genome.fa.ntf
touch ctat_genome_lib_build_dir/ref_genome.fa.nto
mkdir -p ctat_genome_lib_build_dir/ref_genome.fa.star.idx
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/build.ok
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrLength.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrNameLength.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrName.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/chrStart.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonGeTrInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/exonInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/geneInfo.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Genome
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/genomeParameters.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/Log.out
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SA
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/SAindex
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbInfo.txt
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/sjdbList.out.tab
touch ctat_genome_lib_build_dir/ref_genome.fa.star.idx/transcriptInfo.tab
touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dat
touch ctat_genome_lib_build_dir/trans.blast.align_coords.align_coords.dbm
gzip -c /dev/null > ctat_genome_lib_build_dir/trans.blast.dat.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
11 changes: 9 additions & 2 deletions modules/local/starfusion/build/meta.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name: starfusion_downloadgenome
name: starfusion_build
description: Download STAR-fusion genome resource required to run STAR-Fusion caller
keywords:
- downoad
- download
tools:
- star-fusion:
description: Fusion calling algorithm for RNAseq data
Expand All @@ -20,6 +20,13 @@ input:
type: file
description: genome gtf file
pattern: "*.{gtf}"
- fusion_annot_lib:
type: file
description: Fusion annotation library (key/val pairs, tab-delimited).
pattern: "*.dat.gz"
- dfam_species:
type: string
description: DNA transposable element database (Dfam.hmm), required for repeat masking. Only 'human' or 'mouse' are accepted (will automatically pull the resources from dfam).

output:
- reference:
Expand Down
138 changes: 138 additions & 0 deletions modules/local/starfusion/build/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
nextflow_process {

name "Test Process STARFUSION_BUILD"
script "../main.nf"
process "STARFUSION_BUILD"

test("STARFUSION_BUILD - human - minigenome") {

when {
process {
"""
input[0] = [
[ id:'minigenome fasta' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa")
]
input[1] = [
[ id:'minigenome gtf' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf")
]
input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz")
input [3] = "human"
"""
}
}

then {
assert snapshot(
path(process.out.reference[0][1]).resolve("AnnotFilterRule.pm"),
path(process.out.reference[0][1]).resolve("blast_pairs.dat.gz").exists(),
path(process.out.reference[0][1]).resolve("blast_pairs.idx").exists(),
path(process.out.reference[0][1]).resolve("__chkpts/annotfiltrule_cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/blast_pairs.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_gene_blast_pairs.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_pfam_dat.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/cp_ref_annot_cdna.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/fusion_annot_lib.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/_fusion_annot_lib.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/index_pfam_hits.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/index_ref_annot_cdna.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/makeblastdb.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/mm2_genome_idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/mm2.splice_bed.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/_prot_info_db.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.cdsplus.dfam_masked.fa.idx.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.gene_spans.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.mini.sortu.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_annot.gtf.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_genome_fai.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/ref_genome.fa.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.cp.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/trans.blast.dat.index.ok"),
path(process.out.reference[0][1]).resolve("__chkpts/validate_ctat_genome_lib.ok"),
path(process.out.reference[0][1]).resolve("fusion_annot_lib.gz"),
path(process.out.reference[0][1]).resolve("fusion_annot_lib.idx").exists(),
path(process.out.reference[0][1]).resolve("pfam_domains.dbm").exists(),
path(process.out.reference[0][1]).resolve("PFAM.domtblout.dat.gz").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdna.fa.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cds").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.cdsplus.fa.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.gtf"),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.gene_spans").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.mini.sortu"),
path(process.out.reference[0][1]).resolve("ref_annot.gtf.mm2.splice.bed"),
path(process.out.reference[0][1]).resolve("ref_annot.pep").exists(),
path(process.out.reference[0][1]).resolve("ref_annot.prot_info.dbm").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.fai"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.mm2"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.ndb"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nhr"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nin").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.njs").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.not"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nsq"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.ntf"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.nto"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/build.ok"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrLength.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrNameLength.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrName.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/chrStart.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonGeTrInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/exonInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/geneInfo.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Genome"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/genomeParameters.txt").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/Log.out").exists(),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SA"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/SAindex"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbInfo.txt"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.fromGTF.out.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/sjdbList.out.tab"),
path(process.out.reference[0][1]).resolve("ref_genome.fa.star.idx/transcriptInfo.tab"),
path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dat"),
path(process.out.reference[0][1]).resolve("trans.blast.align_coords.align_coords.dbm").exists(),
path(process.out.reference[0][1]).resolve("trans.blast.dat.gz"),
process.out.versions
).match()
}

}

test("STARFUSION_BUILD - human - minigenome - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'minigenome fasta' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.fa")
]
input[1] = [
[ id:'minigenome gtf' ],
file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/minigenome.gtf")
]
input [2] = file("https://github.com/STAR-Fusion/STAR-Fusion-Tutorial/raw/master/CTAT_HumanFusionLib.mini.dat.gz")
input [3] = "human"
"""
}
}

then {
assert snapshot(process.out).match()
}

}

}
Loading

0 comments on commit 34121d1

Please sign in to comment.