forked from shpakoo/YAP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SplitFastX.py
executable file
·84 lines (68 loc) · 2.66 KB
/
SplitFastX.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
########################################################################################
## This file is a part of YAP package of scripts. https://github.com/shpakoo/YAP
## Distributed under the MIT license: http://www.opensource.org/licenses/mit-license.php
## Copyright (c) 2011-2013 Sebastian Szpakowski
########################################################################################
#################################################
## Split FastX file into chunks of specified size
#################################################
import sys
from optparse import OptionParser
from Bio import SeqIO
_author="Sebastian Szpakowski"
_date="2012/03/29"
_version="Version 1"
#################################################
## Classes
##
#################################################
## Functions
##
################################################
### Read in a file and return a list of lines
###
def loadLines(x):
try:
fp = open(x, "r")
cont=fp.readlines()
fp.close()
#print "%s line(s) loaded." % (len(cont))
except:
cont=""
#print "%s cannot be opened, does it exist? " % ( x )
return cont
#################################################
## Arguments
##
parser = OptionParser()
parser.add_option("-i", "--input", dest="fn_input",
help="fastQ file name (or names, comma separated)", metavar="FILE")
parser.add_option("-c", "--chunk", dest="chunk_size", type=int,
help="put N sequences per output file", metavar="N")
parser.add_option("-f", "--format", dest="file_format", default="fasta",
help="list of names to remove", metavar="FILE")
(options, args) = parser.parse_args()
#################################################
## Begin
##
counter_F = 0
outputs= list()
file_in = open(options.fn_input, "r")
for record in SeqIO.parse(file_in, options.file_format) :
outputs.append(record)
if len(outputs) >= options.chunk_size:
newfilename = "%s.%s.chunk.%s" % (".".join(options.fn_input.strip().split(".")[:-1]), counter_F, options.file_format)
file_out = open(newfilename, "w")
SeqIO.write(outputs, file_out, options.file_format)
file_out.close()
outputs = list()
counter_F += 1
if len(outputs)>0:
newfilename = "%s.%s.chunk.%s" % (".".join(options.fn_input.strip().split(".")[:-1]), counter_F, options.file_format)
file_out = open(newfilename, "w")
SeqIO.write(outputs, file_out, options.file_format)
file_out.close()
file_in.close()
#################################################
## Finish
#################################################