Skip to content

Commit

Permalink
Fix for incorrect output of dedup with --paired (CGATOxford#347)
Browse files Browse the repository at this point in the history
  • Loading branch information
christianbioinf committed Jul 5, 2019
1 parent e8c2b47 commit 26f71dc
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions umi_tools/sam_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ def __init__(self, infile, outfile, tags=False):
self.infile = infile
self.outfile = outfile
self.read1s = set()
self.read1s_done = set()
self.chrom = None

def write(self, read, unique_id=None, umi=None, unmapped=False):
Expand All @@ -590,7 +591,8 @@ def write(self, read, unique_id=None, umi=None, unmapped=False):
self.chrom = read.reference_name

key = read.query_name, read.next_reference_name, read.next_reference_start
self.read1s.add(key)
if key not in self.read1s_done:
self.read1s.add(key)

self.outfile.write(read)

Expand All @@ -609,6 +611,7 @@ def write_mates(self):
if key in self.read1s:
self.outfile.write(read)
self.read1s.remove(key)
self.read1s_done.add(key)

U.debug("%i mates remaining" % len(self.read1s))

Expand All @@ -623,7 +626,7 @@ def close(self):
found = 0
for read in self.infile.fetch(until_eof=True, multiple_iterators=True):

if read.is_unmapped:
if any((read.is_unmapped, read.mate_is_unmapped, read.is_read1)):
continue

key = read.query_name, read.reference_name, read.reference_start
Expand Down

0 comments on commit 26f71dc

Please sign in to comment.