Skip to content

Commit

Permalink
Merge branch 'release_v0.3.9'
Browse files Browse the repository at this point in the history
  • Loading branch information
konrad committed Mar 7, 2016
2 parents 479b397 + 3704017 commit 94d1ad9
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 35 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
*pyc
rapl/doc/build/

# Links for development only
bin/reademptionlib

32 changes: 22 additions & 10 deletions bin/reademption
Original file line number Diff line number Diff line change
Expand Up @@ -137,13 +137,19 @@ def main():
"base.")
coverage_creation_parser.add_argument(
"--coverage_style", "-b", choices=["global", "first_base_only",
"last_base_only"],
default="global", help="Choose for coverage "
"generation if only the first aligned base at the 5' end of each read "
"('first_base_only') or the last aligned base at the 3' end of each "
"read ('last_base_only') is taken into account. By default the "
"coverage is generated using the whole range of each alignment "
"last_base_only", "centered"],
default="global", help="Select for coverage generation if only the "
"first aligned base at the 5' end of each read ('first_base_only') or "
"the last aligned base at the 3' end of each read ('last_base_only') "
"is taken into account. The centered approach ('centered') clips a "
"predefined number of nts from each alignment end and adds to the "
"remaining genomic region a value divided by its length. By default "
"the coverage is generated using the whole range of each alignment "
"('global').")
coverage_creation_parser.add_argument(
"--clip_length", "-cl", type=int, default=11, help="Number of "
"nucleotides that are clipped from each alignment end for centered "
"approach.")
coverage_creation_parser.add_argument(
"--check_for_existing_files", "-f", default=False,
action="store_true", help="Check for existing files (e.g. from a "
Expand All @@ -164,13 +170,19 @@ def main():
help="Minimal read-annotation-overlap (in nt) (default 1).")
gene_wise_quanti_parser.add_argument(
"--read_region", "-b", choices=["global", "first_base_only",
"last_base_only"],
default="global", help="Choose for gene-wise quantification "
"last_base_only", "centered"],
default="global", help="Select for gene-wise quantification "
"if only the first aligned base at the 5' end of each read "
"('first_base_only') or the last aligned base at the 3' end of each "
"read ('last_base_only') is taken into account. By default the "
"overlap is calculated based on the whole range of each alignment "
"read ('last_base_only') is taken into account. The centered approach "
"('centered') clips a predefined number of nts from each alignment end "
"and calculates the overlap based on the remaining region. By default "
"the overlap is calculated based on the whole range of each alignment "
"('global').")
gene_wise_quanti_parser.add_argument(
"--clip_length", "-cl", type=int, default=11, help="Number of "
"nucleotides that are clipped from each alignment end for centered "
"approach.")
gene_wise_quanti_parser.add_argument(
"--no_count_split_by_alignment_no", "-n", default=False,
action="store_true", help="Do not split read countings by the number "
Expand Down
4 changes: 3 additions & 1 deletion reademptionlib/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,9 @@ def _create_coverage_files_for_lib(
read_count_splitting = False
coverage_calculator = CoverageCalculator(
read_count_splitting=read_count_splitting,
uniqueley_aligned_only=self._args.unique_only,
uniquely_aligned_only=self._args.unique_only,
coverage_style=self._args.coverage_style,
clip_length=self._args.clip_length,
non_strand_specific=self._args.non_strand_specific)
(coverage_writers_raw, coverage_writers_tnoar_min_norm,
coverage_writers_tnoar_mil_norm) = self._wiggle_writers(
Expand Down Expand Up @@ -649,6 +650,7 @@ def _quantify_gene_wise(
gene_wise_quantification = GeneWiseQuantification(
min_overlap=self._args.min_overlap,
read_region=self._args.read_region,
clip_length=self._args.clip_length,
norm_by_alignment_freq=norm_by_alignment_freq,
norm_by_overlap_freq=norm_by_overlap_freq,
allowed_features_str=self._args.allowed_features,
Expand Down
50 changes: 31 additions & 19 deletions reademptionlib/coveragecalculator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import numpy as np
import pysam


class CoverageCalculator(object):

def __init__(self, read_count_splitting=True, uniqueley_aligned_only=False,
coverage_style="global", non_strand_specific=False):
def __init__(self, read_count_splitting=True, uniquely_aligned_only=False,
coverage_style="global", clip_length=11,
non_strand_specific=False):
self._read_count_splitting = read_count_splitting
self._uniqueley_aligned_only = uniqueley_aligned_only
self._uniquely_aligned_only = uniquely_aligned_only
self._coverage_style = coverage_style
self._clip_length = clip_length
self._coverage_add_function = self._select_coverage_add_function()
self._coverages = {}
self._non_strand_specific = non_strand_specific
Expand All @@ -30,12 +33,12 @@ def _sum_strand_coverages(self):

def _init_coverage_list(self, length):
for strand in ["forward", "reverse"]:
self._coverages[strand] = [0.0] * length
self._coverages[strand] = np.array([0.0] * length)

def _calc_coverage(self, ref_seq, bam):
for entry in bam.fetch(ref_seq):
number_of_hits = dict(entry.tags)["NH"]
if self._uniqueley_aligned_only is True and number_of_hits != 1:
if self._uniquely_aligned_only is True and number_of_hits != 1:
continue
# Note: No translation from SAMParsers coordinates to python
# list coorindates is needed.
Expand All @@ -54,6 +57,8 @@ def _select_coverage_add_function(self):
return self._add_first_base_coverage
elif self._coverage_style == "last_base_only":
return self._add_last_base_coverage
elif self._coverage_style == "centered":
return self._add_centered_coverage
else:
return self._add_whole_alignment_coverage

Expand All @@ -63,28 +68,35 @@ def _open_bam_file(self, bam_file):
def _add_whole_alignment_coverage(self, entry, increment, start, end):
if ((entry.is_reverse is False and entry.is_read2 is False) or
(entry.is_reverse is True and entry.is_read2 is True)):
self._coverages["forward"][start:end] = [
coverage + increment for coverage in
self._coverages["forward"][start:end]]
self._coverages["forward"][start:end] += increment
else:
self._coverages["reverse"][start:end] = [
coverage - increment for coverage in
self._coverages["reverse"][start:end]]
self._coverages["reverse"][start:end] -= increment

def _add_first_base_coverage(self, entry, increment, start, end):
if ((entry.is_reverse is False and entry.is_read2 is False) or
(entry.is_reverse is True and entry.is_read2 is True)):
self._coverages["forward"][start] = self._coverages[
"forward"][start] + increment
self._coverages["forward"][start] += increment
else:
self._coverages["reverse"][end-1] = self._coverages[
"reverse"][end-1] - increment
self._coverages["reverse"][end-1] -= increment

def _add_last_base_coverage(self, entry, increment, start, end):
if ((entry.is_reverse is False and entry.is_read2 is False) or
(entry.is_reverse is True and entry.is_read2 is True)):
self._coverages["forward"][end-1] = self._coverages[
"forward"][end-1] + increment
self._coverages["forward"][end-1] += increment
else:
self._coverages["reverse"][start] = self._coverages[
"reverse"][start] - increment
self._coverages["reverse"][start] -= increment

def _add_centered_coverage(self, entry, increment, start, end):
center_start = start + self._clip_length
center_end = end - self._clip_length
center_length = float(center_end - center_start)
if center_length < 1.0:
#print(entry)
return
if ((entry.is_reverse is False and entry.is_read2 is False) or
(entry.is_reverse is True and entry.is_read2 is True)):
self._coverages["forward"][center_start:center_end] += (
increment / center_length)
else:
self._coverages["reverse"][center_start:center_end] -= (
increment / center_length)
16 changes: 14 additions & 2 deletions reademptionlib/genewisequanti.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class GeneWiseQuantification(object):

def __init__(self, min_overlap=1, read_region="global",
def __init__(self, min_overlap=1, read_region="global", clip_length=11,
norm_by_alignment_freq=True, norm_by_overlap_freq=True,
allowed_features_str=None, skip_antisense=False,
unique_only=False):
Expand All @@ -18,6 +18,7 @@ def __init__(self, min_overlap=1, read_region="global",
"""
self._min_overlap = min_overlap
self._read_region = read_region
self._clip_length = clip_length
self._norm_by_alignment_freq = norm_by_alignment_freq
self._norm_by_overlap_freq = norm_by_overlap_freq
self._allowed_features = _allowed_features(allowed_features_str)
Expand Down Expand Up @@ -144,6 +145,12 @@ def _overlapping_alignments(self, sam, entry):
if (alignment.is_reverse is True) and (
(start < entry.start) or (start > entry.end)):
continue
elif self._read_region == "centered":
if _get_overlap(start + self._clip_length,
end - self._clip_length,
entry.start,
entry.end) < self._min_overlap:
continue
else:
if alignment.get_overlap(entry.start-1,
entry.end) < self._min_overlap:
Expand Down Expand Up @@ -321,7 +328,12 @@ def _allowed_features(allowed_features_str):
return [
feature.strip() for feature in allowed_features_str.split(",")]


def _gff_field_descriptions():
return ["Sequence name", "Source", "Feature", "Start", "End", "Score",
"Strand", "Frame", "Attributes"]

def _get_overlap(alignment_start, alignment_end, feature_start, feature_end):
return max(0,
min(alignment_end, feature_end) -
max(alignment_start, feature_start) + 1)
9 changes: 7 additions & 2 deletions reademptionlib/vizdeseq.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def create_scatter_plots(self, plot_path):
self._pp_scatterplots.close()

def _create_scatter_plots(self, condition_1, condition_2):
pd.options.display.mpl_style = 'default'
matplotlib.style.use('ggplot')
font = {'family': 'sans-serif', 'size': 7}
matplotlib.rc('font', **font)
deseq_result = pd.read_table(
Expand All @@ -41,9 +41,14 @@ def _create_scatter_plots(self, condition_1, condition_2):
deseq_result.log2FoldChange, ".", alpha=0.3)
significant_deseq_result = deseq_result[
deseq_result.padj < self._p_value_significance_limit]
non_significant_deseq_result = deseq_result[
deseq_result.padj >= self._p_value_significance_limit]
plt.plot(
np.log10(significant_deseq_result.baseMean),
significant_deseq_result.log2FoldChange, ".r", alpha=0.5)
significant_deseq_result.log2FoldChange, ".r", alpha=0.3)
plt.plot(
np.log10(non_significant_deseq_result.baseMean),
non_significant_deseq_result.log2FoldChange, ".k", alpha=0.5)
plt.title("{} vs. {} - MA plot".format(condition_1, condition_2))
y_max = max([abs(log2_fc)
for log2_fc in deseq_result.log2FoldChange])
Expand Down
3 changes: 2 additions & 1 deletion reademptionlib/vizgenequanti.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,8 @@ def plot_annotation_class_quantification(self, plot_path):
matplotlib.rc('font', **font)
plt.title("Number of reads per RNA classes")
color_index = 0
for direction in ["sense", "anti-sense"]:
for direction in self._lib_names_and_class_quanti[
self._lib_names[0]].keys():
for anno_class in all_classes_sorted:
countings = [
self._lib_names_and_class_quanti[lib][direction][
Expand Down

0 comments on commit 94d1ad9

Please sign in to comment.