-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
10 changed files
with
188 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
test: | ||
python3.3 tests/test_all.py | ||
python3 tests/test_all.py | ||
|
||
coverage: | ||
coverage3 run tests/test_all.py | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,7 +9,7 @@ __author__ = "Konrad Foerstner <[email protected]>" | |
__copyright__ = "2011-2014 by Konrad Foerstner <[email protected]>" | ||
__license__ = "ISC license" | ||
__email__ = "[email protected]" | ||
__version__ = "0.2.6" | ||
__version__ = "0.2.8" | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser() | ||
|
@@ -80,6 +80,14 @@ def main(): | |
read_aligning_parser.add_argument( | ||
"--progress", "-g", default=False, action="store_true", | ||
help="Show progress of the segemehl mapping.") | ||
read_aligning_parser.add_argument( | ||
"--crossalign_cleaning", "-x", default=None, | ||
dest="crossalign_cleaning_str", metavar="CROSSALIGN_CLEANING_STRING", | ||
help="Remove reads that are cross-mapped to replicons of different " | ||
"species. To associated species and replicons give a string in the " | ||
"following format: '<ORG_NAME_1>:<org_1_repl1>,<org_1_repl2>,..," | ||
"<org_1_repl_n>;<ORG_NAME_2>:<org_2_repl1>,<org_2_repl2>,..," | ||
"<org_2_repl_n>'") | ||
read_aligning_parser.set_defaults(func=align_reads) | ||
|
||
# Parameters for coverage file building | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
import pysam | ||
import sys | ||
|
||
class CrossAlignFilter(object): | ||
|
||
def __init__(self, input_bam, output_bam, output_crossmapped_reads, | ||
orgs_and_replicon_ids): | ||
self._input_bam = input_bam | ||
self._output_bam = output_bam | ||
self._output_crossmapped_reads = output_crossmapped_reads | ||
self._orgs_and_replicon_ids = orgs_and_replicon_ids | ||
self._crossmapped_reads = set() | ||
self.no_of_crossmapped_reads = None | ||
|
||
def determine_crossmapped_reads(self): | ||
"""Find reads that are mapped to sequences of different species. | ||
The comparison is performed replicon wise in order to reduce | ||
the memory footprint. | ||
""" | ||
self._check_replicon_existance() | ||
done_replicon_comparison = [] | ||
with pysam.Samfile(self._input_bam) as bam: | ||
for org, replicon_ids in self._orgs_and_replicon_ids.items(): | ||
for replicon_id in replicon_ids: | ||
self._read_ids = set() | ||
# First, collect the ids of the aligned reads of | ||
# this replicon | ||
for alignment in bam.fetch(reference=replicon_id): | ||
self._read_ids.add(alignment.qname) | ||
# Then compare them to the alignments of each | ||
# replicon of the other organism(s) | ||
for comp_org, comp_replicon_ids in ( | ||
self._orgs_and_replicon_ids.items()): | ||
# Only compare replicons of different species | ||
if org == comp_org: | ||
continue | ||
for comp_replicon_id in comp_replicon_ids: | ||
comparison = sorted([replicon_id, comp_replicon_id]) | ||
# Check if comparison of the two replicons | ||
# has been done already | ||
if comparison in done_replicon_comparison: | ||
continue | ||
done_replicon_comparison.append(comparison) | ||
# Compare all read ids of the comparison | ||
# replicon to the query replicon read ids | ||
for alignment in bam.fetch( | ||
reference=comp_replicon_id): | ||
if alignment.qname in self._read_ids: | ||
self._crossmapped_reads.add( | ||
alignment.qname) | ||
self.no_of_crossmapped_reads = len(self._crossmapped_reads) | ||
# Write list of cross mapped read to file | ||
with open(self._output_crossmapped_reads, "w") as output_list_fh: | ||
output_list_fh.write("\n".join(self._crossmapped_reads) + "\n") | ||
|
||
def _check_replicon_existance(self): | ||
found_all = True | ||
with pysam.Samfile(self._input_bam) as bam: | ||
for replicon_ids in self._orgs_and_replicon_ids.values(): | ||
for replicon_id in replicon_ids: | ||
if not replicon_id in bam.references: | ||
sys.stderr.write( | ||
"\"%s\" not found in BAM header.\n" % replicon_id) | ||
found_all = False | ||
if not found_all: | ||
raise RepliconIdNotInBam | ||
|
||
def write_crossmapping_free_bam(self): | ||
with pysam.Samfile(self._input_bam) as input_bam: | ||
with pysam.Samfile(self._output_bam, "wb", | ||
header=input_bam.header) as output_bam: | ||
for alignment in input_bam.fetch(): | ||
if not alignment.qname in self._crossmapped_reads: | ||
output_bam.write(alignment) | ||
pysam.index(self._output_bam) | ||
|
||
class RepliconIdNotInBam(BaseException): | ||
pass | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
|
||
setup( | ||
name='READemption', | ||
version='0.2.7', | ||
version='0.2.8', | ||
packages=['reademptionlib', 'tests'], | ||
author='Konrad U. Förstner', | ||
author_email='[email protected]', | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters