From 25c1cc520dff900c3b34318e003a9774cb90c1f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Vitor?= Date: Thu, 28 Sep 2023 09:18:28 -0300 Subject: [PATCH] refactor: Gzip DIAMOND outs (#26) * refactor: Gzip DIAMOND outs * refactor: Adapt annotate for bytecode text --- bin/annotate.py | 23 ++++++++++--------- conf/modules.config | 2 +- .../diamond/blastx/diamond-blastx.diff | 9 ++++++++ modules/nf-core/diamond/blastx/main.nf | 2 +- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/bin/annotate.py b/bin/annotate.py index 9727971..a315ab0 100755 --- a/bin/annotate.py +++ b/bin/annotate.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +import gzip import plyvel import os import time @@ -42,7 +43,7 @@ def getAll( ): check = True query = None - with open(input, "r") as f: + with gzip.open(input, "rb") as f: write(out, "Query", "Annotation") for line in f: ls = line.split(sep) @@ -73,15 +74,15 @@ def getAll( query = ls[queryCol] if not checkHit(ls, alen, evalue, bitscore, identity, alenCol, evalueCol, bitscoreCol, pidentCol): if unknown: - write(out, query, "Unknown") + write(out, query.decode(), "Unknown") continue - result = db.get(ls[subjectCol].strip().encode()) + result = db.get(ls[subjectCol].strip()) if result == None: if unknown: - write(out, query, "Unknown") + write(out, query.decode(), "Unknown") continue result = result.decode() - write(out, query, result) + write(out, query.decode(), result) def getBestHits( @@ -105,7 +106,7 @@ def getBestHits( match = False check = True query = None - with open(input, "r") as f: + with gzip.open(input, "rb") as f: write(out, "Query", "Annotation") for line in f: ls = line.split(sep) @@ -145,19 +146,19 @@ def getBestHits( else: if query != ls[queryCol]: if unknown: - write(out, query, "Unknown") + write(out, query.decode(), "Unknown") query = ls[queryCol] if not checkHit(ls, alen, evalue, bitscore, identity, alenCol, evalueCol, bitscoreCol, pidentCol): continue - result = db.get(ls[subjectCol].strip().encode()) + result = db.get(ls[subjectCol].strip()) if result == None: continue result = result.decode() - write(out, query, result) + write(out, query.decode(), result) match = True if not match: if unknown: - write(out, query, "Unknown") + write(out, query.decode(), "Unknown") def checkHit(ls, alen, evalue, bitscore, identity, alenCol, evalueCol, bitscoreCol, pidentCol): @@ -269,7 +270,7 @@ def createLevelDB(input, key, value, sep, header, db): type=str2bool, default=True, ) -idmapping_parser.add_argument("--sep", help="The separator between columns (default: \\t)", default="\t") +idmapping_parser.add_argument("--sep", help="The separator between columns (default: \\t)", default=b"\t") fixplyvel = subparsers.add_parser("fixplyvel", description="Fix plyvel undefined symbol error by reinstalling it") args = parser.parse_args() diff --git a/conf/modules.config b/conf/modules.config index e0eb17d..41faf9b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -78,7 +78,7 @@ process { // Alignment withName: DIAMOND_BLASTX { - ext.args = '--more-sensitive --top 3' + ext.args = '--more-sensitive --top 3 --compress 1' publishDir = [ path: { "${params.outdir}/alignment/${meta.id}" }, mode: params.publish_dir_mode, diff --git a/modules/nf-core/diamond/blastx/diamond-blastx.diff b/modules/nf-core/diamond/blastx/diamond-blastx.diff index 6135e9e..2655857 100644 --- a/modules/nf-core/diamond/blastx/diamond-blastx.diff +++ b/modules/nf-core/diamond/blastx/diamond-blastx.diff @@ -17,5 +17,14 @@ Changes in module 'nf-core/diamond/blastx' input: tuple val(meta), path(fasta) +@@ -16,7 +18,7 @@ + output: + tuple val(meta), path('*.blast'), optional: true, emit: blast + tuple val(meta), path('*.xml') , optional: true, emit: xml +- tuple val(meta), path('*.txt') , optional: true, emit: txt ++ tuple val(meta), path('*.txt.gz') , optional: true, emit: txt + tuple val(meta), path('*.daa') , optional: true, emit: daa + tuple val(meta), path('*.sam') , optional: true, emit: sam + tuple val(meta), path('*.tsv') , optional: true, emit: tsv ************************************************************ diff --git a/modules/nf-core/diamond/blastx/main.nf b/modules/nf-core/diamond/blastx/main.nf index ccbf8b4..50cf24a 100644 --- a/modules/nf-core/diamond/blastx/main.nf +++ b/modules/nf-core/diamond/blastx/main.nf @@ -18,7 +18,7 @@ process DIAMOND_BLASTX { output: tuple val(meta), path('*.blast'), optional: true, emit: blast tuple val(meta), path('*.xml') , optional: true, emit: xml - tuple val(meta), path('*.txt') , optional: true, emit: txt + tuple val(meta), path('*.txt.gz') , optional: true, emit: txt tuple val(meta), path('*.daa') , optional: true, emit: daa tuple val(meta), path('*.sam') , optional: true, emit: sam tuple val(meta), path('*.tsv') , optional: true, emit: tsv