diff --git a/conf/base.config b/conf/base.config index 0158901..d62b5e1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { check_max( 1 * task.attempt, 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } time = { check_max( 4.h * task.attempt, 'time' ) } @@ -19,13 +18,6 @@ process { maxRetries = 1 maxErrors = '-1' - // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. - // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. - // If possible, it would be nice to keep the same label naming convention when - // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { check_max( 1 , 'cpus' ) } memory = { check_max( 6.GB * task.attempt, 'memory' ) } @@ -59,6 +51,10 @@ process { errorStrategy = 'retry' maxRetries = 2 } + withLabel:error_retry_delay { + errorStrategy = { sleep(Math.pow(2, task.attempt) * 200 as long); return 'retry' } + maxRetries = 3 + } withName:CUSTOM_DUMPSOFTWAREVERSIONS { cache = false } diff --git a/conf/modules.config b/conf/modules.config index 6bf4697..be1fe2d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,37 @@ process { ] } + // DOWNLOAD ENTRY + withName: DOWNLOAD_FUNCTIONAL_DB { + publishDir = [ + path: { "${params.outdir}/references/functional" }, + mode: "move", + ] + } + withName: DOWNLOAD_FUNCTIONAL_DICT { + publishDir = [ + path: { "${params.outdir}/references/functional" }, + mode: "move", + ] + } + withName: DOWNLOAD_KAIJU { + publishDir = [ + path: { "${params.outdir}/references/kaiju" }, + mode: "move", + ] + } + withName: DOWNLOAD_KRAKEN { + publishDir = [ + path: { "${params.outdir}/references/kraken2" }, + mode: "move", + ] + } + withName: DOWNLOAD_HOST { + publishDir = [ + path: { "${params.outdir}/references/host" }, + mode: "move", + ] + } // Host removal withName: BOWTIE2_ALIGN { ext.args = "--sensitive" diff --git a/conf/test.config b/conf/test.config index c2ffbe1..dccd906 100644 --- a/conf/test.config +++ b/conf/test.config @@ -26,6 +26,13 @@ params { id_mapping = "$projectDir/test_data/idmapping_selected.tab.example.gz" reference_fasta = "$projectDir/test_data/protein.faa.gz" + // download entry + functional_db = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/protein.faa.gz' + functional_dictionary = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/idmapping_selected.tab.example.gz' + kaiju_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kaiju/kaiju.tar.gz' + kraken2_db_url = 'https://github.com/nf-core/test-datasets/raw/taxprofiler/data/database/kraken2/testdb-kraken2.tar.gz' + host_url = 'https://github.com/dalmolingroup/euryale/raw/main/test_data/GCA_002596845.1_ASM259684v1_genomic.fna.gz' + // Annotation params minimum_bitscore = 30 minimum_pident = 30 diff --git a/docs/params.md b/docs/params.md index a4e6f84..f27a5e2 100644 --- a/docs/params.md +++ b/docs/params.md @@ -87,6 +87,22 @@ Reference genome related files and options required for the workflow. | `igenomes_ignore` | Do not load the iGenomes reference config.
HelpDo not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`.
| `boolean` | | | True | | `fasta` | | `string` | | | | +## Download Entry + + + +| Parameter | Description | Type | Default | Required | Hidden | +|-----------|-----------|-----------|-----------|-----------|-----------| +| `download_functional` | Whether to dowload functional references | `boolean` | True | | | +| `download_kaiju` | Whether to dowload the Kaiju reference db | `boolean` | True | | | +| `download_kraken` | Whether to dowload the Kraken2 reference db | `boolean` | | | | +| `download_host` | Whether to download the host reference genome | `boolean` | | | | +| `functional_db` | Functional reference URL (download entry) | `string` | https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz | | | +| `functional_dictionary` | Functional dictionary URL (download entry) | `string` | https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz | | | +| `kaiju_db_url` | Kaiju reference URL (download entry) | `string` | https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz | | | +| `kraken2_db_url` | Kraken2 reference URL (download entry) | `string` | https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz | | | +| `host_url` | Host FASTA reference URL (download entry) | `string` | http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz | | | + ## Max job request options Set the top limit for requested resources for any single job. diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 5bd1677..3bdbbf4 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -78,12 +78,6 @@ class WorkflowMain { // Check AWS batch settings NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" - System.exit(1) - } } // // Get attribute from genome config file e.g. fasta diff --git a/main.nf b/main.nf index 13ac8fd..5fc23b4 100644 --- a/main.nf +++ b/main.nf @@ -32,12 +32,10 @@ WorkflowMain.initialise(workflow, params, log) */ include { EURYALE } from './workflows/euryale' +include { DOWNLOAD } from './workflows/download' -// -// WORKFLOW: Run main dalmolingroup/euryale analysis pipeline -// -workflow DALMOLINGROUP_EURYALE { - EURYALE () +workflow download { + DOWNLOAD () } /* @@ -51,7 +49,7 @@ workflow DALMOLINGROUP_EURYALE { // See: https://github.com/nf-core/rnaseq/issues/619 // workflow { - DALMOLINGROUP_EURYALE () + EURYALE () } /* diff --git a/modules/local/download/main.nf b/modules/local/download/main.nf new file mode 100644 index 0000000..6f12b08 --- /dev/null +++ b/modules/local/download/main.nf @@ -0,0 +1,25 @@ +process DOWNLOAD { + tag "$id" + + label 'process_single' + label 'error_retry_delay' + + input: + val id + val url + + output: + path "${prefix}", emit: db + + script: + prefix = task.ext.prefix ?: "${id}" + + """ + wget -O ${prefix} $url + """ + + stub: + """ + touch ${prefix} + """ +} diff --git a/nextflow.config b/nextflow.config index eb9dcf7..d98ac4c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,6 +50,18 @@ params { skip_alignment = false skip_microview = false + // Dowload entry options + download_functional = true + download_kaiju = true + download_kraken = false + download_host = false + + functional_db = 'https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz' + functional_dictionary = 'https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz' + kaiju_db_url = 'https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz' + kraken2_db_url = 'https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz' + host_url = 'http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz' + // MultiQC options multiqc_config = null multiqc_title = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 35e15f4..c8f1b68 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,7 +11,6 @@ "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "required": [ - "input", "outdir" ], "properties": { @@ -154,9 +153,6 @@ "description": "Run Kraken2 classifier" } }, - "required": [ - "kaiju_db" - ], "fa_icon": "fab fa-pagelines" }, "functional": { @@ -237,6 +233,58 @@ } } }, + "download_entry": { + "title": "Download Entry", + "type": "object", + "description": "", + "default": "", + "properties": { + "download_functional": { + "type": "boolean", + "default": true, + "description": "Whether to dowload functional references" + }, + "download_kaiju": { + "type": "boolean", + "default": true, + "description": "Whether to dowload the Kaiju reference db" + }, + "download_kraken": { + "type": "boolean", + "description": "Whether to dowload the Kraken2 reference db" + }, + "download_host": { + "type": "boolean", + "description": "Whether to download the host reference genome" + }, + "functional_db": { + "type": "string", + "default": "https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz", + "description": "Functional reference URL (download entry)" + }, + "functional_dictionary": { + "type": "string", + "default": "https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/idmapping.dat.gz", + "description": "Functional dictionary URL (download entry)" + }, + "kaiju_db_url": { + "type": "string", + "default": "https://kaiju-idx.s3.eu-central-1.amazonaws.com/2023/kaiju_db_nr_2023-05-10.tgz", + "description": "Kaiju reference URL (download entry)" + }, + "kraken2_db_url": { + "type": "string", + "default": "https://genome-idx.s3.amazonaws.com/kraken/k2_standard_08gb_20240112.tar.gz", + "description": "Kraken2 reference URL (download entry)" + }, + "host_url": { + "type": "string", + "default": "http://ftp.ensembl.org/pub/release-112/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz", + "description": "Host FASTA reference URL (download entry)" + } + }, + "fa_icon": "fas fa-database" + }, "max_job_request_options": { "title": "Max job request options", "type": "object", @@ -413,6 +461,9 @@ { "$ref": "#/definitions/reference_genome_options" }, + { + "$ref": "#/definitions/download_entry" + }, { "$ref": "#/definitions/max_job_request_options" }, diff --git a/workflows/download.nf b/workflows/download.nf new file mode 100644 index 0000000..083470d --- /dev/null +++ b/workflows/download.nf @@ -0,0 +1,45 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowEuryale.initialise(params, log) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULES +// + +include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DB } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_FUNCTIONAL_DICT } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_KAIJU } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_KRAKEN } from '../modules/local/download/main' +include { DOWNLOAD as DOWNLOAD_HOST } from '../modules/local/download/main' + +workflow DOWNLOAD { + if (params.download_functional) { + DOWNLOAD_FUNCTIONAL_DB("reference_fasta.fa.gz", params.functional_db) + DOWNLOAD_FUNCTIONAL_DICT("id_mapping.tab.gz", params.functional_dictionary) + } + + if (params.download_kaiju) { + DOWNLOAD_KAIJU("kaiju_db.tar.gz", params.kaiju_db_url) + } + + if (params.download_kraken) { + DOWNLOAD_KRAKEN("kraken2_db.tar.gz", params.kraken2_db_url) + } + + if (params.download_host) { + DOWNLOAD_HOST("host_fasta.fa.gz", params.host_url) + } +} diff --git a/workflows/euryale.nf b/workflows/euryale.nf index d5999fb..11a5414 100644 --- a/workflows/euryale.nf +++ b/workflows/euryale.nf @@ -13,9 +13,6 @@ WorkflowEuryale.initialise(params, log) def checkPathParamList = [ params.input, params.multiqc_config, params.kaiju_db ] for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -72,6 +69,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoft def multiqc_report = [] workflow EURYALE { + // Check mandatory parameters + if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } if (params.reference_fasta == null && params.diamond_db == null && params.skip_alignment == false) { exit 1, 'A reference fasta (--reference_fasta) or a DIAMOND db (--diamond_db) must be specified' } if (params.run_kaiju == true && params.kaiju_db == null && params.skip_classification == false) {exit 1, 'A Kaiju tar.gz database must be specified with --kaiju_db'} if (params.run_kraken2 == true && params.kraken2_db == null && params.skip_classification == false) {exit 1, 'A Kraken2 database must be specified with --kraken2_db'}