Skip to content

Commit

Permalink
Merge pull request #323 from theislab/feature/display_qc_metrics
Browse files Browse the repository at this point in the history
[FEATURE] Display QC metrics of var #239
  • Loading branch information
Zethson authored Feb 28, 2022
2 parents 8e4a3cd + e31d4ca commit f801077
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 20 deletions.
2 changes: 1 addition & 1 deletion docs/tutorials/notebooks
10 changes: 9 additions & 1 deletion docs/usage/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Quality control
.. autosummary::
:toctree: preprocessing

preprocessing.calculate_qc_metrics
preprocessing.qc_metrics

Imputation
++++++++++
Expand Down Expand Up @@ -198,6 +198,14 @@ Generic
plot.ranking
plot.dendrogram

Specific
++++++++

.. autosummary::
:toctree: plot

plot.qc_metrics

Classes
+++++++

Expand Down
12 changes: 12 additions & 0 deletions docs/usage/plot/ehrapy.plot.qc_metrics.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
:github_url: ehrapy.plot.qc_metrics

ehrapy.plot.qc\_metrics
=======================

.. autofunction:: ehrapy.plot.qc_metrics

.. _sphx_glr_backref_ehrapy.plot.qc_metrics:

.. minigallery:: ehrapy.plot.qc_metrics
:add-heading: Gallery
:heading-level: -

This file was deleted.

12 changes: 12 additions & 0 deletions docs/usage/preprocessing/ehrapy.preprocessing.qc_metrics.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
:github_url: ehrapy.preprocessing.qc_metrics

ehrapy.preprocessing.qc\_metrics
================================

.. autofunction:: ehrapy.preprocessing.qc_metrics

.. _sphx_glr_backref_ehrapy.preprocessing.qc_metrics:

.. minigallery:: ehrapy.preprocessing.qc_metrics
:add-heading: Gallery
:heading-level: -
1 change: 1 addition & 0 deletions ehrapy/plot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from ehrapy.plot._plot_util import * # noqa: E402,F403
from ehrapy.plot._scanpy_pl_api import * # noqa: E402,F403
from ehrapy.plot.ehrapy_plot.plot_qc import qc_metrics
51 changes: 51 additions & 0 deletions ehrapy/plot/ehrapy_plot/plot_qc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

from anndata import AnnData
from rich.console import Console
from rich.table import Table


def qc_metrics(adata: AnnData, extra_columns: list[str] | None = None) -> None:
"""Plots the calculated quality control metrics for var of adata. Per default this will display the following features:
``missing_values_abs``, ``missing_values_pct``, ``mean``, ``median``, ``standard_deviation``, ``max``, ``min``.
Args:
adata: Annotated data matrix.
extra_columns: List of custom (qc) var columns to be displayed additionally.
"""
table = Table(title="[bold blue]Ehrapy qc metrics of var")
# add special column header for the column name
table.add_column("[bold blue]Column name", justify="right", style="bold green")
var_names = list(adata.var_names)
# default qc columns added to var
fixed_qc_columns = [
"missing_values_abs",
"missing_values_pct",
"mean",
"median",
"standard_deviation",
"min",
"max",
]
# update columns to display with extra columns (if any)
columns_to_display = fixed_qc_columns if not extra_columns else fixed_qc_columns + extra_columns
# check whether all columns exist (qc has been executed before and extra columns are var columns)
if (set(columns_to_display) & set(adata.var.columns)) != set(columns_to_display):
raise QCDisplayError(
"Cannot display QC metrics of current AnnData object. Either QC has not been executed before or "
"some column(s) of the extra_columns parameter are not in var!"
)
vars_to_display = adata.var[columns_to_display]
# add column headers
for col in vars_to_display:
table.add_column(f"[bold blue]{col}", justify="right", style="bold green")
for var in range(len(vars_to_display)):
table.add_row(var_names[var], *map(str, list(vars_to_display.iloc[var])))

console = Console()
console.print(table)


class QCDisplayError(Exception):
pass
2 changes: 1 addition & 1 deletion ehrapy/preprocessing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@
norm_scale,
norm_sqrt,
)
from ehrapy.preprocessing._quality_control import calculate_qc_metrics
from ehrapy.preprocessing._quality_control import qc_metrics
from ehrapy.preprocessing._scanpy_pp_api import * # noqa: E402,F403
from ehrapy.preprocessing.encoding._encode import encode, undo_encoding
4 changes: 2 additions & 2 deletions ehrapy/preprocessing/_data_imputation.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,9 @@ def _warn_imputation_threshold(adata: AnnData, var_names: list[str] | None, thre
adata.var["missing_values_pct"]
except KeyError:
print("[bold yellow]Quality control metrics missing. Calculating...")
from ehrapy.preprocessing import calculate_qc_metrics
from ehrapy.preprocessing import qc_metrics

calculate_qc_metrics(adata)
qc_metrics(adata)
used_var_names = set(adata.var_names) if var_names is None else set(var_names)

thresholded_var_names = set(adata.var[adata.var["missing_values_pct"] > threshold].index) & set(used_var_names)
Expand Down
2 changes: 1 addition & 1 deletion ehrapy/preprocessing/_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from anndata import AnnData


def calculate_qc_metrics(
def qc_metrics(
adata: AnnData, qc_vars: Collection[str] = (), layer: str = None, inplace: bool = True
) -> pd.DataFrame | None:
"""Calculates various quality control metrics.
Expand Down
4 changes: 2 additions & 2 deletions tests/preprocessing/test_quality_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd
from anndata import AnnData

from ehrapy.preprocessing._quality_control import _obs_qc_metrics, _var_qc_metrics, calculate_qc_metrics
from ehrapy.preprocessing._quality_control import _obs_qc_metrics, _var_qc_metrics, qc_metrics

CURRENT_DIR = Path(__file__).parent
_TEST_PATH = f"{CURRENT_DIR}/test_preprocessing"
Expand Down Expand Up @@ -45,7 +45,7 @@ def test_var_qc_metrics(self):
assert np.allclose(var_metrics["max"].values, np.array([np.nan, np.nan, 41.419998]), equal_nan=True)

def test_calculate_qc_metrics(self):
obs_metrics, var_metrics = calculate_qc_metrics(self.test_adata, inplace=True)
obs_metrics, var_metrics = qc_metrics(self.test_adata, inplace=True)

assert obs_metrics is not None
assert var_metrics is not None
Expand Down

0 comments on commit f801077

Please sign in to comment.