From ba12de178ddd3042a06ea45006ad52cb9a6474d3 Mon Sep 17 00:00:00 2001 From: Karan Jaisingh Date: Mon, 13 Jan 2025 16:34:29 -0500 Subject: [PATCH] Updated dragen standardizer --- src/svtk/svtk/standardize/std_dragen.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/svtk/svtk/standardize/std_dragen.py b/src/svtk/svtk/standardize/std_dragen.py index e47e2f37c..83d9366da 100644 --- a/src/svtk/svtk/standardize/std_dragen.py +++ b/src/svtk/svtk/standardize/std_dragen.py @@ -47,7 +47,10 @@ def standardize_records(self): Skip mated events that are not marked with SECONDARY tag. """ + # Track IDs of observed records mate_IDs = deque() + + # Iterate over records for record in self.filter_raw_vcf(): # Filter unmarked SECONDARY on same chromosome if 'MATEID' in record.info: @@ -56,8 +59,7 @@ def standardize_records(self): # Skip records with an observed mate if mate_ID in mate_IDs: continue - - # Track IDs of observed records + mate_IDs.append(record.id) yield self.standardize_record(record) @@ -117,15 +119,9 @@ def standardize_info(self, std_rec, raw_rec): strands = '+-' elif svtype == 'DUP': strands = '-+' - elif svtype == 'INS': # Treat DUPSVLEN as DUP - if 'DUPSVLEN' in raw_rec.info: - svtype = 'DUP' - std_rec.info['SVTYPE'] = svtype - strands = '-+' - else: - strands = '+-' - else: # Default + elif svtype == 'INS': strands = '+-' + if not is_smaller_chrom(std_rec.chrom, std_rec.info['CHR2']): strands = strands[::-1] std_rec.info['STRANDS'] = strands