Skip to content

Commit

Permalink
feat: diffexp returns two genesets (#2230)
Browse files Browse the repository at this point in the history
* feat: return two lists for diffexp (#2221)

* sp

* split out derive sort order, tests passing

* sp

* return diff exp results in two lists

* update

* copy implementation over to desktop

* add tests for two lists

* small fixes to complete backend implementation

* accept new diffexp response

* map diff exp response to genesets

* delete )

* name diffexp genesets with population names

* take constants out of state and allow width prop to override

* shorten mini-histo properly truncate and resize depending on expansion

* prepend new genesets

* rename data within diffexp action

* backend

* move diffexp ttest to common code module, update tests

* update for unit tests

* reference actual var

Co-authored-by: Madison Dunitz <[email protected]>
Co-authored-by: Madison Dunitz <[email protected]>
  • Loading branch information
3 people authored Jun 8, 2021
1 parent 7ed53c0 commit 28b526b
Show file tree
Hide file tree
Showing 25 changed files with 268 additions and 284 deletions.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ def diffexp_ttest(adaptor, maskA, maskB, top_n=8, diffexp_lfc_cutoff=0.01):
:param maskB: observation selection mask for set 2
:param top_n: number of variables to return stats for
:param diffexp_lfc_cutoff: minimum
:return: for top N genes, [ varindex, logfoldchange, pval, pval_adj ]
absolute value returning [ varindex, logfoldchange, pval, pval_adj ] for top N genes
:return: for top N genes, {"positive": for top N genes, [ varindex, logfoldchange, pval, pval_adj ], "negative": for top N genes, [ varindex, logfoldchange, pval, pval_adj ]}
"""

dataA = adaptor.get_X_array(maskA, None)
Expand Down Expand Up @@ -66,32 +67,39 @@ def diffexp_ttest_from_mean_var(meanA, varA, nA, meanB, varB, nB, top_n, diffexp
# logfoldchanges: log2(meanA / meanB)
logfoldchanges = np.log2(np.abs((meanA + 1e-9) / (meanB + 1e-9)))

stats_to_sort = tscores
# find all with lfc > cutoff
lfc_above_cutoff_idx = np.nonzero(np.abs(logfoldchanges) > diffexp_lfc_cutoff)[0]
stats_to_sort = np.abs(tscores)

# derive sort order
if lfc_above_cutoff_idx.shape[0] > top_n:
if lfc_above_cutoff_idx.shape[0] > top_n*2:
# partition top N
rel_t_partition = np.argpartition(stats_to_sort[lfc_above_cutoff_idx], -top_n)[-top_n:]
t_partition = lfc_above_cutoff_idx[rel_t_partition]
rel_t_partition = np.argpartition(stats_to_sort[lfc_above_cutoff_idx], (top_n, -top_n))
rel_t_partition_top_n = np.concatenate((rel_t_partition[-top_n:], rel_t_partition[:top_n]))
t_partition = lfc_above_cutoff_idx[rel_t_partition_top_n]
# sort the top N partition
rel_sort_order = np.argsort(stats_to_sort[t_partition])[::-1]
sort_order = t_partition[rel_sort_order]
else:
# partition and sort top N, ignoring lfc cutoff
partition = np.argpartition(stats_to_sort, -top_n)[-top_n:]
rel_sort_order = np.argsort(stats_to_sort[partition])[::-1]
partition = np.argpartition(stats_to_sort, (top_n, -top_n))
partition_top_n = np.concatenate((partition[-top_n:], partition[:top_n]))

rel_sort_order = np.argsort(stats_to_sort[partition_top_n])[::-1]
indices = np.indices(stats_to_sort.shape)[0]
sort_order = indices[partition][rel_sort_order]
sort_order = indices[partition_top_n][rel_sort_order]

# top n slice based upon sort order
logfoldchanges_top_n = logfoldchanges[sort_order]
pvals_top_n = pvals[sort_order]
pvals_adj_top_n = pvals_adj[sort_order]

# varIndex, logfoldchange, pval, pval_adj
result = [[sort_order[i], logfoldchanges_top_n[i], pvals_top_n[i], pvals_adj_top_n[i]] for i in range(top_n)]
result = {"positive": [[sort_order[i], logfoldchanges_top_n[i], pvals_top_n[i], pvals_adj_top_n[i]] for i in
range(top_n)],
"negative": [[sort_order[i], logfoldchanges_top_n[i], pvals_top_n[i], pvals_adj_top_n[i]] for i in
range(-1, -1 - top_n, -1)], }

return result


Expand Down
1 change: 0 additions & 1 deletion backend/czi_hosted/common/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ def diffexp_obs_post(request, data_adaptor):
try:
# TODO: implement varfilter mode
mode = DiffExpMode(args["mode"])

if mode == DiffExpMode.VAR_FILTER or "varFilter" in args:
return abort_and_log(HTTPStatus.NOT_IMPLEMENTED, "varFilter not enabled")

Expand Down
18 changes: 9 additions & 9 deletions backend/czi_hosted/compute/diffexp_cxg.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from numba import jit

from backend.czi_hosted.data_cxg.cxg_util import pack_selector_from_indices
from backend.czi_hosted.compute.diffexp_generic import diffexp_ttest_from_mean_var, mean_var_n
from backend.common.compute.diffexp_generic import diffexp_ttest_from_mean_var, mean_var_n
from backend.common.errors import ComputeError

"""
Expand Down Expand Up @@ -115,14 +115,14 @@ def diffexp_ttest(adaptor, maskA, maskB, top_n=8, diffexp_lfc_cutoff=0.01):
meanB += X_col_shift

r = diffexp_ttest_from_mean_var(
meanA.astype(dtype),
varA.astype(dtype),
nA,
meanB.astype(dtype),
varB.astype(dtype),
nB,
top_n,
diffexp_lfc_cutoff,
meanA=meanA.astype(dtype),
varA=varA.astype(dtype),
nA=nA,
meanB=meanB.astype(dtype),
varB=varB.astype(dtype),
nB=nB,
top_n=top_n,
diffexp_lfc_cutoff=diffexp_lfc_cutoff
)

return r
Expand Down
2 changes: 1 addition & 1 deletion backend/czi_hosted/data_anndata/anndata_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from scipy import sparse
from server_timing import Timing as ServerTiming

import backend.czi_hosted.compute.diffexp_generic as diffexp_generic
import backend.common.compute.diffexp_generic as diffexp_generic
from backend.common.colors import convert_anndata_category_colors_to_cxg_category_colors
from backend.common.constants import Axis, MAX_LAYOUTS
from backend.czi_hosted.common.corpora import corpora_get_props_from_anndata
Expand Down
7 changes: 4 additions & 3 deletions backend/czi_hosted/data_common/data_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _index_filter_to_mask(self, filter, count):
mask = np.zeros((count,), dtype=np.bool)
for i in filter:
if type(i) == list:
mask[i[0] : i[1]] = True
mask[i[0]: i[1]] = True
else:
mask[i] = True
return mask
Expand Down Expand Up @@ -321,11 +321,12 @@ def diffexp_topN(self, obsFilterA, obsFilterB, top_n=None):
top_n = self.dataset_config.diffexp__top_n

if self.server_config.exceeds_limit(
"diffexp_cellcount_max", np.count_nonzero(obs_mask_A) + np.count_nonzero(obs_mask_B)
"diffexp_cellcount_max", np.count_nonzero(obs_mask_A) + np.count_nonzero(obs_mask_B)
):
raise ExceedsLimitError("Diffexp request exceeds max cell count limit")

result = self.compute_diffexp_ttest(obs_mask_A, obs_mask_B, top_n, self.dataset_config.diffexp__lfc_cutoff)
result = self.compute_diffexp_ttest(
maskA=obs_mask_A, maskB=obs_mask_B, top_n=top_n, lfc_cutoff=self.dataset_config.diffexp__lfc_cutoff)

try:
return jsonify_numpy(result)
Expand Down
3 changes: 2 additions & 1 deletion backend/czi_hosted/data_cxg/cxg_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def compute_diffexp_ttest(self, maskA, maskB, top_n=None, lfc_cutoff=None):
top_n = self.dataset_config.diffexp__top_n
if lfc_cutoff is None:
lfc_cutoff = self.dataset_config.diffexp__lfc_cutoff
return diffexp_cxg.diffexp_ttest(self, maskA, maskB, top_n, lfc_cutoff)
return diffexp_cxg.diffexp_ttest(
adaptor=self, maskA=maskA, maskB=maskB, top_n=top_n, diffexp_lfc_cutoff=lfc_cutoff)

def get_colors(self):
if self.cxg_version == "0.0":
Expand Down
134 changes: 0 additions & 134 deletions backend/server/compute/diffexp_generic.py

This file was deleted.

2 changes: 1 addition & 1 deletion backend/server/data_anndata/anndata_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from scipy import sparse
from server_timing import Timing as ServerTiming

import backend.server.compute.diffexp_generic as diffexp_generic
import backend.common.compute.diffexp_generic as diffexp_generic
from backend.common.colors import convert_anndata_category_colors_to_cxg_category_colors
from backend.common.constants import Axis, MAX_LAYOUTS
from backend.server.common.corpora import corpora_get_props_from_anndata
Expand Down
9 changes: 7 additions & 2 deletions backend/server/data_common/data_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def get_embedding_array(self, ename, dims=2):

@abstractmethod
def compute_embedding(self, method, filter):
"""compute a new embedding on the specified obs subset, and return the embedding schema. """
"""compute a new embedding on the specified obs subset, and return the embedding schema."""
pass

@abstractmethod
Expand Down Expand Up @@ -324,7 +324,12 @@ def diffexp_topN(self, obsFilterA, obsFilterB, top_n=None):
):
raise ExceedsLimitError("Diffexp request exceeds max cell count limit")

result = self.compute_diffexp_ttest(obs_mask_A, obs_mask_B, top_n, self.dataset_config.diffexp__lfc_cutoff)
result = self.compute_diffexp_ttest(
maskA=obs_mask_A,
maskB=obs_mask_B,
top_n=top_n,
lfc_cutoff=self.dataset_config.diffexp__lfc_cutoff,
)

try:
return jsonify_numpy(result)
Expand Down
3 changes: 2 additions & 1 deletion backend/test/test_czi_hosted/performance/run_diffexp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import numpy as np

from backend.czi_hosted.common.config.app_config import AppConfig
from backend.czi_hosted.compute import diffexp_generic, diffexp_cxg
from backend.czi_hosted.compute import diffexp_cxg
from backend.common.compute import diffexp_generic
from backend.czi_hosted.data_common.matrix_loader import MatrixDataLoader
from backend.czi_hosted.data_cxg.cxg_adaptor import CxgAdaptor

Expand Down
7 changes: 5 additions & 2 deletions backend/test/test_czi_hosted/unit/common/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@ def test_diff_exp(self):
self.assertEqual(result.status_code, HTTPStatus.OK)
self.assertEqual(result.headers["Content-Type"], "application/json")
result_data = result.json()
self.assertEqual(len(result_data), 7)
self.assertEqual(len(result_data['positive']), 7)
self.assertEqual(len(result_data['negative']), 7)

def test_diff_exp_indices(self):
endpoint = "diffexp/obs"
Expand All @@ -173,7 +174,8 @@ def test_diff_exp_indices(self):
self.assertEqual(result.status_code, HTTPStatus.OK)
self.assertEqual(result.headers["Content-Type"], "application/json")
result_data = result.json()
self.assertEqual(len(result_data), 10)
self.assertEqual(len(result_data['positive']), 10)
self.assertEqual(len(result_data['negative']), 10)

def test_get_annotations_var_fbs(self):
endpoint = "annotations/var"
Expand Down Expand Up @@ -382,6 +384,7 @@ def test_post_summaryvar(self):
query_hash = hashlib.sha1(query.encode()).hexdigest()
url = f"{self.URL_BASE}{endpoint}?key={query_hash}"
result = self.session.post(url, headers=headers, data=query)

self.assertEqual(result.status_code, HTTPStatus.OK)
self.assertEqual(result.headers["Content-Type"], "application/octet-stream")
df = decode_fbs.decode_matrix_FBS(result.content)
Expand Down
Loading

0 comments on commit 28b526b

Please sign in to comment.