Skip to content

Commit

Permalink
Merge branch 'develop' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
siebrenf authored Jun 1, 2022
2 parents d69db16 + 61ca482 commit 33ade29
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 44 deletions.
1 change: 1 addition & 0 deletions gimmemotifs/c_metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ double matrix_ed_mean(double matrix1[][4], double matrix2[][4], int length) {

double distance(double col1[], double col2[]) {
// Return the distance between two motifs (Harbison et al.)
// https://www.nature.com/articles/nature02800#Sec2
int n;
double d = 0;

Expand Down
5 changes: 4 additions & 1 deletion gimmemotifs/commands/motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from gimmemotifs.background import create_background_file
from gimmemotifs.comparison import MotifComparer, select_nonredundant_motifs
Expand Down Expand Up @@ -255,7 +256,8 @@ def motifs(args):
# At the moment this is not ideal, as scanning is now performed twice
# for this set of non-redundant motifs.
motif_dict = dict([(m.id, m) for m in motifs])
for motif in nr_motifs:
logger.info("creating BED files with scan results")
for motif in tqdm(nr_motifs):
with NamedTemporaryFile(mode="w") as f:
print(motif_dict[motif].to_ppm(), file=f)
f.flush()
Expand All @@ -270,6 +272,7 @@ def motifs(args):
zscore=args.zscore,
gcnorm=args.gc,
bgfile=bgfile,
progress=False,
)

if args.report:
Expand Down
38 changes: 29 additions & 9 deletions gimmemotifs/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from gimmemotifs.config import MotifConfig
from gimmemotifs.c_metrics import pfmscan, score
from gimmemotifs.motif import parse_motifs, read_motifs
from gimmemotifs.utils import pfmfile_location, make_equal_length
from gimmemotifs.utils import pfmfile_location, make_equal_length, ppm_pseudocount

# pool import is at the bottom

Expand Down Expand Up @@ -204,6 +204,7 @@ def chisq(p1, p2):
-------
score : float
"""

return chi2_contingency([p1, p2])[1]


Expand Down Expand Up @@ -440,8 +441,13 @@ def compare_motifs(
combine,
self.max_partial(m1.pwm, m2.pwm, metric, combine),
)
elif metric in ["pcc", "ed", "distance", "wic", "chisq", "ssd"]:
return self.max_partial(m1.pwm, m2.pwm, metric, combine)
elif metric in ["pcc", "ed", "distance", "wic", "chisq", "ssd", "akl"]:
return self.max_partial(
ppm_pseudocount(m1.pwm),
ppm_pseudocount(m2.pwm),
metric,
combine,
)
else:
return self.max_partial(m1.pfm, m2.pfm, metric, combine)

Expand All @@ -455,19 +461,33 @@ def compare_motifs(
combine,
self.max_total(m1.pwm, m2.pwm, metric, combine),
)
elif metric in ["pcc", "akl"]:
# Slightly randomize the weight matrix
elif metric in [
"ed",
"distance",
"wic",
"chisq",
"pcc",
"ssd",
"akl",
"pcc",
]:
return self.max_total(
m1.wiggle_pwm(), m2.wiggle_pwm(), metric, combine
ppm_pseudocount(m1.pwm),
ppm_pseudocount(m2.pwm),
metric,
combine,
)
elif metric in ["ed", "distance", "wic", "chisq", "pcc", "ssd"]:
return self.max_total(m1.pwm, m2.pwm, metric, combine)
else:
return self.max_total(m1.pfm, m2.pfm, metric, combine)

elif match == "subtotal":
if metric in ["pcc", "ed", "distance", "wic", "chisq", "ssd"]:
return self.max_subtotal(m1.pwm, m2.pwm, metric, combine)
return self.max_subtotal(
ppm_pseudocount(m1.pwm),
ppm_pseudocount(m2.pwm),
metric,
combine,
)
else:
return self.max_subtotal(m1.pfm, m2.pfm, metric, combine)
else:
Expand Down
2 changes: 1 addition & 1 deletion gimmemotifs/moap.py
Original file line number Diff line number Diff line change
Expand Up @@ -691,7 +691,7 @@ def moap(
motif_names = [m.id for m in read_motifs(pfmfile)]
scores = []
if method == "classic" or scoring == "count":
logger.info("motif scanning (scores)")
logger.info("motif scanning (counts)")
scores = scan_regionfile_to_table(
inputfile,
genome,
Expand Down
2 changes: 1 addition & 1 deletion gimmemotifs/motif/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self, pfm=None, ppm=None, places=4):
else:
self.pfm = []
else:
if np.all(np.isclose(np.sum(pfm, 1), 1)) and ppm is None:
if np.all(np.isclose(np.sum(pfm, 1), 1, atol=1e-3)) and ppm is None:
# PFM is specified actually a PPM. We don't mind.
self.ppm = pfm
else:
Expand Down
9 changes: 8 additions & 1 deletion gimmemotifs/motif/_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,14 @@ def pcc(self, ppm1, ppm2, pos):
ppm1, ppm2 = make_equal_length(ppm1, ppm2, pos, truncate="both")

# Compute pearson correlation between aligned parts of the motif
r = np.array([pearsonr(x, y)[0] for x, y in zip(ppm1, ppm2)])
r = np.array(
[
pearsonr(x, y)[0]
if not (np.all(x == 0.25)) or not (np.all(y == 0.25))
else 0
for x, y in zip(ppm1, ppm2)
]
)
r[np.isnan(r)] = 0

return r.sum()
Expand Down
2 changes: 2 additions & 0 deletions gimmemotifs/orthologs.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,7 @@ def load_orthogroups_in_db(db, genomes, orthofinder_result):
for gene in genes.split(", "):
# for each gene we store its gene_name (if present,
# otherwise .) and gene_id. We load them both in the database.
gene = gene.replace("'", "")
gene_name, gene_id = gene.split("|")
if gene_name != ".":
conn.execute(
Expand All @@ -449,6 +450,7 @@ def load_orthogroups_in_db(db, genomes, orthofinder_result):

# for each gene we store its gene_name (if present,
# otherwise .) and gene_id. We load them both in the database.
gene = gene.replace("'", "")
gene_name, gene_id = gene.split("|")
if gene_name != ".":
conn.execute(
Expand Down
4 changes: 2 additions & 2 deletions gimmemotifs/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ def _current_index(self, subset):
]
return idx_slice

def _translate(self):
def _translate(self, *args, **kwargs):
self._compute_data()
d = super()._translate()
d = super()._translate(*args, **kwargs)
circle_styles = self.circle_styles or []
palette_styles = self.palette_styles or []
col_heading_style = self.col_heading_style or []
Expand Down
Loading

0 comments on commit 33ade29

Please sign in to comment.