Skip to content

Commit

Permalink
refactor: Gzip DIAMOND outs (#26)
Browse files Browse the repository at this point in the history
* refactor: Gzip DIAMOND outs

* refactor: Adapt annotate for bytecode text
  • Loading branch information
jvfe authored Sep 28, 2023
1 parent 361c766 commit 25c1cc5
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 13 deletions.
23 changes: 12 additions & 11 deletions bin/annotate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env python
import gzip
import plyvel
import os
import time
Expand Down Expand Up @@ -42,7 +43,7 @@ def getAll(
):
check = True
query = None
with open(input, "r") as f:
with gzip.open(input, "rb") as f:
write(out, "Query", "Annotation")
for line in f:
ls = line.split(sep)
Expand Down Expand Up @@ -73,15 +74,15 @@ def getAll(
query = ls[queryCol]
if not checkHit(ls, alen, evalue, bitscore, identity, alenCol, evalueCol, bitscoreCol, pidentCol):
if unknown:
write(out, query, "Unknown")
write(out, query.decode(), "Unknown")
continue
result = db.get(ls[subjectCol].strip().encode())
result = db.get(ls[subjectCol].strip())
if result == None:
if unknown:
write(out, query, "Unknown")
write(out, query.decode(), "Unknown")
continue
result = result.decode()
write(out, query, result)
write(out, query.decode(), result)


def getBestHits(
Expand All @@ -105,7 +106,7 @@ def getBestHits(
match = False
check = True
query = None
with open(input, "r") as f:
with gzip.open(input, "rb") as f:
write(out, "Query", "Annotation")
for line in f:
ls = line.split(sep)
Expand Down Expand Up @@ -145,19 +146,19 @@ def getBestHits(
else:
if query != ls[queryCol]:
if unknown:
write(out, query, "Unknown")
write(out, query.decode(), "Unknown")
query = ls[queryCol]
if not checkHit(ls, alen, evalue, bitscore, identity, alenCol, evalueCol, bitscoreCol, pidentCol):
continue
result = db.get(ls[subjectCol].strip().encode())
result = db.get(ls[subjectCol].strip())
if result == None:
continue
result = result.decode()
write(out, query, result)
write(out, query.decode(), result)
match = True
if not match:
if unknown:
write(out, query, "Unknown")
write(out, query.decode(), "Unknown")


def checkHit(ls, alen, evalue, bitscore, identity, alenCol, evalueCol, bitscoreCol, pidentCol):
Expand Down Expand Up @@ -269,7 +270,7 @@ def createLevelDB(input, key, value, sep, header, db):
type=str2bool,
default=True,
)
idmapping_parser.add_argument("--sep", help="The separator between columns (default: \\t)", default="\t")
idmapping_parser.add_argument("--sep", help="The separator between columns (default: \\t)", default=b"\t")

fixplyvel = subparsers.add_parser("fixplyvel", description="Fix plyvel undefined symbol error by reinstalling it")
args = parser.parse_args()
Expand Down
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ process {

// Alignment
withName: DIAMOND_BLASTX {
ext.args = '--more-sensitive --top 3'
ext.args = '--more-sensitive --top 3 --compress 1'
publishDir = [
path: { "${params.outdir}/alignment/${meta.id}" },
mode: params.publish_dir_mode,
Expand Down
9 changes: 9 additions & 0 deletions modules/nf-core/diamond/blastx/diamond-blastx.diff

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion modules/nf-core/diamond/blastx/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 25c1cc5

Please sign in to comment.