diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..8c1a491 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,42 @@ +name: nf-test CI +# This workflow runs the nf-test test suite +on: + pull_request: + release: + types: [published] + +env: + NXF_ANSI_LOG: false + +jobs: + test: + name: Run ${{ matrix.profile }} pipeline test + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "latest-stable" + profile: ["docker"] # TODO , "singularity", "conda"] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - uses: actions/cache@v2 + with: + path: /usr/local/bin/nextflow + key: ${{ runner.os }} + restore-keys: | + ${{ runner.os }}-nextflow- + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Install nf-test + run: | + wget -qO- https://code.askimed.com/install/nf-test | bash + sudo mv nf-test /usr/local/bin/ + + - name: Run nf-test + run: nf-test test --profile=${{ matrix.profile }} tests/*nf.test diff --git a/.gitignore b/.gitignore index 3e5e3db..7b8bfed 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,8 @@ testing/ testing* *.pyc test_data/kraken_viral_db/ +.nf-test +nf-test +.nf-test.log +test.nf +site diff --git a/CITATIONS.md b/CITATIONS.md index 58cd6aa..2a47d88 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,5 +1,9 @@ # dalmolingroup/euryale: Citations +## [EURYALE](https://ieeexplore.ieee.org/document/10702116) + +> J. V. F. Cavalcante, I. Dantas de Souza, D. A. A. Morais and R. J. S. Dalmolin, "EURYALE: A versatile Nextflow pipeline for taxonomic classification and functional annotation of metagenomics data," 2024 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB), Natal, Brazil, 2024, pp. 1-7, doi: 10.1109/CIBCB58642.2024.10702116. + ## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) > Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. diff --git a/README.md b/README.md index b5905b1..fc9a46d 100644 --- a/README.md +++ b/README.md @@ -149,10 +149,9 @@ We thank the following people for their extensive assistance in the development ## Citations -> Morais DAA, Cavalcante JVF, Monteiro SS, Pasquali MAB and Dalmolin RJS (2022) -> MEDUSA: A Pipeline for Sensitive Taxonomic Classification and Flexible Functional Annotation -> of Metagenomic Shotgun Sequences. -> Front. Genet. 13:814437. doi: 10.3389/fgene.2022.814437 +> J. V. F. Cavalcante, I. Dantas de Souza, D. A. A. Morais and R. J. S. Dalmolin, "EURYALE: A versatile Nextflow pipeline for taxonomic classification and functional annotation of metagenomics data," +> 2024 IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB), Natal, Brazil, 2024, pp. 1-7, +> doi: 10.1109/CIBCB58642.2024.10702116. This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 3bdbbf4..7f3393a 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -9,9 +9,8 @@ class WorkflowMain { // public static String citation(workflow) { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // TODO nf-core: Add Zenodo DOI for pipeline after first release - //"* The pipeline\n" + - //" https://doi.org/10.5281/zenodo.XXXXXXX\n\n" + + "* The pipeline\n" + + " https://doi.org/10.1109/CIBCB58642.2024.10702116\n\n" + "* The nf-core framework\n" + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + "* Software dependencies\n" + diff --git a/nextflow.config b/nextflow.config index d98ac4c..caadf3f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,7 +9,6 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 0000000..46133e4 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,5 @@ +config { + testsDir "tests" + workDir ".nf-test" + profile "test,docker" +} diff --git a/test_data/MEGAHIT-test_minigut.contigs.fa.gz b/test_data/MEGAHIT-test_minigut.contigs.fa.gz new file mode 100644 index 0000000..83185a0 Binary files /dev/null and b/test_data/MEGAHIT-test_minigut.contigs.fa.gz differ diff --git a/test_data/test_minigut_sample2.txt.gz b/test_data/test_minigut_sample2.txt.gz new file mode 100644 index 0000000..3000939 Binary files /dev/null and b/test_data/test_minigut_sample2.txt.gz differ diff --git a/test_data/testdb-kraken2/hash.k2d b/test_data/testdb-kraken2/hash.k2d new file mode 100644 index 0000000..3683deb Binary files /dev/null and b/test_data/testdb-kraken2/hash.k2d differ diff --git a/test_data/testdb-kraken2/opts.k2d b/test_data/testdb-kraken2/opts.k2d new file mode 100644 index 0000000..8385b76 Binary files /dev/null and b/test_data/testdb-kraken2/opts.k2d differ diff --git a/test_data/testdb-kraken2/taxo.k2d b/test_data/testdb-kraken2/taxo.k2d new file mode 100644 index 0000000..f4e7e5c Binary files /dev/null and b/test_data/testdb-kraken2/taxo.k2d differ diff --git a/tests/alignment.nf.test b/tests/alignment.nf.test new file mode 100644 index 0000000..159399f --- /dev/null +++ b/tests/alignment.nf.test @@ -0,0 +1,57 @@ +nextflow_workflow { + + name "Test ALIGNMENT workflow" + script "subworkflows/local/alignment.nf" + workflow "ALIGNMENT" + tag "ALIGNMENT" + tag "subworkflows" + + test("Should run alignment with DIAMOND_MAKEDB and DIAMOND_BLASTX") { + + when { + workflow { + """ + input[0] = [[id:'MEGAHIT-test_minigut'], "$baseDir/test_data/MEGAHIT-test_minigut.contigs.fa.gz"] + input[1] = Channel.fromPath("$baseDir/test_data/protein.faa.gz") + input[2] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.trace.succeeded().size() == 2 }, + { assert workflow.out.alignments }, + { assert workflow.out.multiqc_files }, + { assert workflow.out.versions }, + { assert path(workflow.out.alignments[0][1]).exists() }, + ) + + } + } + + test("Should run alignment with pre-built DIAMOND database") { + + when { + workflow { + """ + input[0] = [[id:'minigut'], '$baseDir/test_data/MEGAHIT-test_minigut.contigs.fa.gz'] + input[1] = Channel.fromPath("$baseDir/test_data/protein.faa.gz") + input[2] = Channel.fromPath("$baseDir/test_data/protein.faa.gz.dmnd") + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.trace.succeeded().size() == 1 }, + { assert workflow.out.alignments }, + { assert workflow.out.multiqc_files }, + { assert workflow.out.versions }, + { assert path(workflow.out.alignments[0][1]).exists() }, + ) + } + } +} diff --git a/tests/functional.nf.test b/tests/functional.nf.test new file mode 100644 index 0000000..76b57bc --- /dev/null +++ b/tests/functional.nf.test @@ -0,0 +1,54 @@ +nextflow_workflow { + + name "Test FUNCTIONAL workflow" + script "subworkflows/local/functional.nf" + workflow "FUNCTIONAL" + tag "FUNCTIONAL" + tag "subworkflows" + + test("Should run functional annotation workflow") { + + when { + workflow { + """ + input[0] = [ + [ id:'test_sample' ], + file("$baseDir/test_data/test_minigut_sample2.txt.gz") + ] + input[1] = file("$baseDir/test_data/idmapping_selected.tab.example.gz") + """ + } + params { + minimum_bitscore = 50 + minimum_pident = 90 + minimum_alen = 100 + maximum_evalue = 1e-5 + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.annotated }, + ) + + // Check GUNZIP output + with(workflow.out.annotated) { + assert size() == 1 + with(get(0)) { + assert get(0).id == "test_sample" + } + } + + // Check ANNOTATE output + with(workflow.out.annotated) { + assert size() == 1 + with(get(0)) { + assert get(0).id == "test_sample" + assert get(1).readLines().size() > 0 + // You might want to add more specific checks on the content of the annotated file + } + } + } + } +} diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 0000000..7b16634 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,38 @@ +/* +======================================================================================== + Nextflow config file for running tests +======================================================================================== +*/ + +params { + // Limit resources so that this can run on GitHub Actions + cpus = 2 + memory = 4.GB + time = 4.h +} + +process { + withLabel:process_single { + cpus = 1 + memory = 6.GB + time = 4.h + } + withLabel:process_low { + cpus = 2 + memory = 4.GB + time = 4.h + } + withLabel:process_medium { + cpus = 2 + memory = 4.GB + time = 4.h + } + withLabel:process_high { + cpus = 2 + memory = 4.GB + time = 4.h + } + withLabel:process_high_memory { + memory = 4.GB + } +} diff --git a/tests/preprocess.nf.test b/tests/preprocess.nf.test new file mode 100644 index 0000000..ffa6df2 --- /dev/null +++ b/tests/preprocess.nf.test @@ -0,0 +1,60 @@ +nextflow_workflow { + + name "Test PREPROCESS workflow" + script "subworkflows/local/preprocess.nf" + workflow "PREPROCESS" + tag "PREPROCESS" + tag "subworkflows" + + test("Should run preprocessing with paired-end reads") { + + when { + workflow { + """ + input[0] = [ + [ id:'test_sample', single_end:false ], // meta map + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/test_minigut_R1.fastq.gz", + "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/test_minigut_R2.fastq.gz" + ] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.reads }, + { assert workflow.out.merged_reads }, + { assert workflow.out.multiqc_files }, + { assert workflow.out.versions } + ) + + // Check FASTP output + with(workflow.out.reads) { + assert size() == 1 + with(get(0)) { + assert get(0).id == "test_sample" + assert get(0).single_end == false + assert get(1).size() == 2 // Two fastq files for paired-end + } + } + + // Check merged reads + with(workflow.out.merged_reads) { + assert size() == 1 + with(get(0)) { + assert get(0).id == "test_sample" + assert get(0).single_end == true + } + } + + // Check versions + with(workflow.out.versions) { + assert size() == 3 + assert path(get(0)).text.contains("FASTP") + } + } + } +} diff --git a/tests/taxonomy.nf.test b/tests/taxonomy.nf.test new file mode 100644 index 0000000..0ad7b69 --- /dev/null +++ b/tests/taxonomy.nf.test @@ -0,0 +1,47 @@ +nextflow_workflow { + + name "Test TAXONOMY workflow" + script "subworkflows/local/taxonomy.nf" + workflow "TAXONOMY" + tag "TAXONOMY" + tag "subworkflows" + + test("Should run taxonomy workflow with Kraken2") { + + when { + workflow { + """ + input[0] = [ + [ id:'test_sample', single_end:false ], // meta map + [ + "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/test_minigut_R1.fastq.gz", + "https://raw.githubusercontent.com/nf-core/test-datasets/mag/test_data/test_minigut_R2.fastq.gz" + ] + + ] + input[1] = [] + input[2] = file("$baseDir/test_data/testdb-kraken2") + """ + } + params { + run_kaiju = false + run_kraken2 = true + skip_microview = true + } + } + + then { + assertAll( + { assert workflow.success }, + { assert workflow.out.tax_report }, + { assert workflow.out.krona_report }, + { assert workflow.out.versions } + ) + + // Check Krona output + with(workflow.out.krona_report) { + assert size() == 1 + } + } + } +}