Merge branch 'release_v0.3.6'

foerstner-lab · Mar 18, 2015 · 69173bd · 69173bd
2 parents 9c389f8 + e8dbda9
commit 69173bd
Show file tree

Hide file tree

Showing 10 changed files with 79 additions and 17 deletions.
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -1,3 +1,5 @@
+v0.3.6 (2015-03-18)
+- Add strand-unspecific read per gene quantification
 v0.3.5 (2015-01-13)
 - Update some pysam related function due to changes in API changes of pysam
 - Add further requirements (matplotlib, Biopython, pandas)

diff --git a/LICENSE.txt b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2011-2014, Konrad Förstner <[email protected]>
+Copyright (c) 2011-2015, Konrad Förstner <[email protected]>
 
 Permission to use, copy, modify, and/or distribute this software for
 any purpose with or without fee is hereby granted, provided that the

diff --git a/bin/reademption b/bin/reademption
@@ -6,10 +6,10 @@ import argparse
 from reademptionlib.controller import Controller
 
 __author__ = "Konrad Foerstner <[email protected]>"
-__copyright__ = "2011-2014 by Konrad Foerstner <[email protected]>"
+__copyright__ = "2011-2015 by Konrad Foerstner <[email protected]>"
 __license__ = "ISC license"
 __email__ = "[email protected]"
-__version__ = "0.3.5"
+__version__ = "0.3.6"
 
 def main():
     parser = argparse.ArgumentParser()
@@ -167,6 +167,10 @@ def main():
         "--skip_antisense", "-a", default=False, action="store_true", 
         help="Do not count anti-sense read-gene-overlaps. By default sense "
         "and anti-sense overlaps are counted and separately reported.")
+    gene_wise_quanti_parser.add_argument(
+        "--non_strand_specific", default=False, action="store_true", 
+        help="Use countings of reads overlapping with a gene on both strands "
+        "and sum them up.")
     gene_wise_quanti_parser.add_argument(
         "--processes", "-p", default=1, type=int,
         help="Number of processes that should be used (default 1).")

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -41,7 +41,7 @@
 
 # General information about the project.
 project = u'READemption'
-copyright = u'2014, Konrad U. Förstner'
+copyright = u'2015, Konrad U. Förstner'
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -50,7 +50,7 @@
 # The short X.Y version.
 version = '0.3'
 # The full version, including alpha/beta/rc tags.
-release = '0.3.5'
+release = '0.3.6'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

diff --git a/docs/source/license.rst b/docs/source/license.rst
@@ -3,7 +3,7 @@ License
 
 READemption is open source software and available under the ISC license.
 
-Copyright (c) 2011-2014, Konrad Förstner <[email protected]>
+Copyright (c) 2011-2015, Konrad Förstner <[email protected]>
 
 Permission to use, copy, modify, and/or distribute this software for
 any purpose with or without fee is hereby granted, provided that the

diff --git a/docs/source/subcommands.rst b/docs/source/subcommands.rst
@@ -235,6 +235,9 @@ overlaps are counted and separately listed.
     --skip_antisense, -a  Do not count anti-sense read-gene-overlaps. By default
                           sense and anti-sense overlaps are counted and
                           separately reported.
+    --non_strand_specific
+                          Use countings of reads overlapping with a gene on both
+                          strands and sum them up.
     --processes PROCESSES, -p PROCESSES
                           Number of processes that should be used (default 1).
     --features ALLOWED_FEATURES, -t ALLOWED_FEATURES

diff --git a/reademptionlib/controller.py b/reademptionlib/controller.py
@@ -665,9 +665,13 @@ def _quantify_gene_wise(
     def _gene_quanti_create_overview(
             self, annotation_files, annotation_paths, lib_names):
         """Create an overview table of all gene quantification for all libs."""
+        strand_specific = True
+        if self._args.non_strand_specific:
+                    strand_specific = False
         gene_wise_overview = GeneWiseOverview(
             allowed_features_str=self._args.allowed_features,
-            skip_antisense=self._args.skip_antisense)
+            skip_antisense=self._args.skip_antisense,
+            strand_specific=strand_specific)
         path_and_name_combos = {}
         for annotation_file, annotation_path in zip(
                 annotation_files, annotation_paths):

diff --git a/reademptionlib/coveragecalculator.py b/reademptionlib/coveragecalculator.py
@@ -59,8 +59,8 @@ def _open_bam_file(self, bam_file):
         return pysam.Samfile(bam_file)
 
     def _add_whole_alignment_coverage(self, entry, increment, start, end):
-        if ((entry.is_reverse is False and entry.is_read2 == False) or
-            (entry.is_reverse is True and entry.is_read2 == True)):
+        if ((entry.is_reverse is False and entry.is_read2 is False) or
+            (entry.is_reverse is True and entry.is_read2 is True)):
             self._coverages["forward"][start:end] = [
                 coverage + increment for coverage in
                 self._coverages["forward"][start:end]]

diff --git a/reademptionlib/genewisequanti.py b/reademptionlib/genewisequanti.py
@@ -3,6 +3,7 @@
 from reademptionlib.gff3 import Gff3Parser
 import pysam
 
+
 class GeneWiseQuantification(object):
 
     def __init__(self, min_overlap=1, norm_by_alignment_freq=True,
@@ -142,11 +143,14 @@ def _values_to_gene_key(self, seq_id, feature, start, end, strand):
         return ("|".join(
                 [str(val) for val in [seq_id, feature, start, end, strand]]))
 
+
 class GeneWiseOverview(object):
 
-    def __init__(self, allowed_features_str=None, skip_antisense=False):
+    def __init__(self, allowed_features_str=None, skip_antisense=False, 
+                 strand_specific=True):
         self._allowed_features = _allowed_features(allowed_features_str)
         self._skip_antisense = skip_antisense
+        self._strand_specific = strand_specific
 
     def create_overview_raw_countings(
             self, path_and_name_combos, read_files, overview_path):
@@ -170,13 +174,18 @@ def _create_overview(self, path_and_name_combos, read_files, overview_path,
                 ["Orientation of counted reads relative to the strand "
                  "location of the annotation"] + _gff_field_descriptions() 
                 + read_files) + "\n")
-        self._add_to_overview(
-            path_and_name_combos, "sense", 9, output_fh, normalization,
-            libs_and_tnoar)
-        if self._skip_antisense is False:
+        if self._strand_specific:
             self._add_to_overview(
-                path_and_name_combos, "anti-sense", 10, output_fh,
-                normalization, libs_and_tnoar)
+                path_and_name_combos, "sense", 9, output_fh, normalization,
+                libs_and_tnoar)
+            if self._skip_antisense is False:
+                self._add_to_overview(
+                    path_and_name_combos, "anti-sense", 10, output_fh,
+                    normalization, libs_and_tnoar)
+        else:
+            self._add_to_overview_strand_unspecific(
+                path_and_name_combos, "sense_and_antisense", 9, 10,
+                output_fh, normalization, libs_and_tnoar)
 
     def _add_to_overview(
             self, path_and_name_combos, direction, column, output_fh,
@@ -211,6 +220,46 @@ def _add_to_overview(
             for row in table:
                 output_fh.write("\t".join(row) + "\n")
 
+    def _add_to_overview_strand_unspecific(
+            self, path_and_name_combos, direction, column1, column2, output_fh,
+            normalization=None, libs_and_tnoar=None):
+        gff3_parser = Gff3Parser()
+        for annotation_path in sorted(path_and_name_combos.keys()):
+            table_columns = []
+            entries = []
+            seq_lengths = []
+            for entry in gff3_parser.entries(open(annotation_path)):
+                if _entry_to_use(entry, self._allowed_features) is False:
+                    continue
+                entries.append(direction + "\t" + str(entry))
+                seq_lengths.append(entry.end - entry.start + 1)
+            table_columns.append(entries)
+            for read_file, gene_quanti_path in path_and_name_combos[
+                    annotation_path]:
+                reader = csv.reader(open(gene_quanti_path), delimiter="\t")
+                next(reader) # skip first line
+                if normalization == "RPKM":
+                    table_columns.append([
+                        self._rpkm(str(
+                            float(row[column1])+float(row[column2])),
+                                   length, libs_and_tnoar[read_file])
+                        for row, length in zip(reader, seq_lengths)])
+                elif normalization == "TNOAR":
+                    table_columns.append([
+                        self._norm_by_tnoar(
+                            str(float(row[column1])+float(row[column2])),
+                            libs_and_tnoar[read_file])
+                        for row, length in zip(reader, seq_lengths)])
+                else:
+                    table_columns.append(
+                        [str(float(row[column1])+float(row[column2]))
+                                          for row in reader])
+            # Generate a table by rotating the column list
+            table = zip(*table_columns)
+            for row in table:
+                output_fh.write("\t".join(row) + "\n")
+
+
     def _rpkm(self, counting, length, total_no_of_aligned_reads):
         """
         Formula in Supplemenatary Material S1 of

diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name='READemption',
-    version='0.3.5',
+    version='0.3.6',
     packages=['reademptionlib', 'tests'],
     author='Konrad U. Förstner',
     author_email='[email protected]',