From 8a019f1970f600cebae7b4a4503ca6f9eb166bd5 Mon Sep 17 00:00:00 2001 From: Imipenem Date: Sat, 26 Feb 2022 22:18:57 +0100 Subject: [PATCH 1/6] [FEATURE] Display QC metrics of var #239 - display qc metrics of var - obs might be too big to actually display --- ehrapy/preprocessing/__init__.py | 2 +- ehrapy/preprocessing/_quality_control.py | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/ehrapy/preprocessing/__init__.py b/ehrapy/preprocessing/__init__.py index 4df807f2..e62bcb81 100644 --- a/ehrapy/preprocessing/__init__.py +++ b/ehrapy/preprocessing/__init__.py @@ -9,6 +9,6 @@ norm_scale, norm_sqrt, ) -from ehrapy.preprocessing._quality_control import calculate_qc_metrics +from ehrapy.preprocessing._quality_control import calculate_qc_metrics, display_qc_metrics from ehrapy.preprocessing._scanpy_pp_api import * # noqa: E402,F403 from ehrapy.preprocessing.encoding._encode import encode, undo_encoding diff --git a/ehrapy/preprocessing/_quality_control.py b/ehrapy/preprocessing/_quality_control.py index ce7fae67..643374c2 100644 --- a/ehrapy/preprocessing/_quality_control.py +++ b/ehrapy/preprocessing/_quality_control.py @@ -70,6 +70,29 @@ def calculate_qc_metrics( return obs_metrics, var_metrics +def display_qc_metrics(adata: AnnData) -> None: + """Displays the calculated quality control metrics for var of adata. + + Args: + adata: Annotated data matrix. + """ + from rich.console import Console + from rich.table import Table + + table = Table(title="[bold blue]Ehrapy qc metrics of var") + table.add_column("[bold blue]Column name", justify="right", style="bold green") + var_names = list(adata.var_names) + + for col in adata.var.columns: + table.add_column(f"[bold blue]{col}", justify="right", style="bold green") + + for var in range(len(adata.var)): + table.add_row(var_names[var], *map(str, list(adata.var.iloc[var]))) + + console = Console() + console.print(table) + + def _missing_values( arr: np.ndarray, shape: tuple[int, int] = None, df_type: Literal["obs", "var"] = "obs" ) -> np.ndarray: From ac28d63a40655e25a5265c5a1f7de289130d29c9 Mon Sep 17 00:00:00 2001 From: Imipenem Date: Sun, 27 Feb 2022 12:05:20 +0100 Subject: [PATCH 2/6] [FEATURE] Display QC metrics - display qc metrics of var with extra columns if wanted - updated docs --- docs/usage/api.rst | 1 + ...hrapy.preprocessing.display_qc_metrics.rst | 12 +++++ ehrapy/preprocessing/_quality_control.py | 45 ++++++++++++++----- 3 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst diff --git a/docs/usage/api.rst b/docs/usage/api.rst index c210bd90..7d05c4c0 100644 --- a/docs/usage/api.rst +++ b/docs/usage/api.rst @@ -62,6 +62,7 @@ Quality control :toctree: preprocessing preprocessing.calculate_qc_metrics + preprocessing.display_qc_metrics Imputation ++++++++++ diff --git a/docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst b/docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst new file mode 100644 index 00000000..8f7cc76c --- /dev/null +++ b/docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst @@ -0,0 +1,12 @@ +:github_url: ehrapy.preprocessing.display_qc_metrics + +ehrapy.preprocessing.display\_qc\_metrics +========================================= + +.. autofunction:: ehrapy.preprocessing.display_qc_metrics + +.. _sphx_glr_backref_ehrapy.preprocessing.display_qc_metrics: + +.. minigallery:: ehrapy.preprocessing.display_qc_metrics + :add-heading: Gallery + :heading-level: - \ No newline at end of file diff --git a/ehrapy/preprocessing/_quality_control.py b/ehrapy/preprocessing/_quality_control.py index 643374c2..60efe514 100644 --- a/ehrapy/preprocessing/_quality_control.py +++ b/ehrapy/preprocessing/_quality_control.py @@ -5,6 +5,8 @@ import numpy as np import pandas as pd from anndata import AnnData +from rich.console import Console +from rich.table import Table def calculate_qc_metrics( @@ -70,24 +72,43 @@ def calculate_qc_metrics( return obs_metrics, var_metrics -def display_qc_metrics(adata: AnnData) -> None: - """Displays the calculated quality control metrics for var of adata. +def display_qc_metrics(adata: AnnData, extra_columns: list[str] | None = None) -> None: + """Displays the calculated quality control metrics for var of adata. Per default this will display the following features: + ``missing_values_abs``, ``missing_values_pct``, ``mean``, ``median``, ``standard_deviation``, ``max``, ``min``. Args: adata: Annotated data matrix. - """ - from rich.console import Console - from rich.table import Table + extra_columns: List of custom (qc) var columns to be displayed additionally. + """ table = Table(title="[bold blue]Ehrapy qc metrics of var") + # add special column header for the column name table.add_column("[bold blue]Column name", justify="right", style="bold green") var_names = list(adata.var_names) - - for col in adata.var.columns: + # default qc columns added to var + fixed_qc_columns = [ + "missing_values_abs", + "missing_values_pct", + "mean", + "median", + "standard_deviation", + "min", + "max", + ] + # update columns to display with extra columns (if any) + columns_to_display = fixed_qc_columns if not extra_columns else fixed_qc_columns + extra_columns + # check whether all columns exist (qc has been executed before and extra columns are var columns) + if (set(columns_to_display) & set(adata.var.columns)) != set(columns_to_display): + raise QCDisplayError( + "Cannot display QC metrics of current AnnData object. Either QC has not been executed before or " + "some column(s) of the extra_columns parameter are not in var!" + ) + vars_to_display = adata.var[columns_to_display] + # add column headers + for col in vars_to_display: table.add_column(f"[bold blue]{col}", justify="right", style="bold green") - - for var in range(len(adata.var)): - table.add_row(var_names[var], *map(str, list(adata.var.iloc[var]))) + for var in range(len(vars_to_display)): + table.add_row(var_names[var], *map(str, list(vars_to_display.iloc[var]))) console = Console() console.print(table) @@ -185,3 +206,7 @@ def _var_qc_metrics(adata: AnnData, layer: str = None) -> pd.DataFrame: var_metrics["max"] = np.nan return var_metrics + + +class QCDisplayError(Exception): + pass From e4e0149dcf90642f75f14084bddd7d515f66378a Mon Sep 17 00:00:00 2001 From: Imipenem Date: Mon, 28 Feb 2022 19:29:08 +0100 Subject: [PATCH 3/6] PR feedback + updated qc API --- docs/usage/api.rst | 11 +++- docs/usage/plot/ehrapy.plot.qc_metrics.rst | 12 +++++ ...apy.preprocessing.calculate_qc_metrics.rst | 12 ----- ...hrapy.preprocessing.display_qc_metrics.rst | 12 ----- .../ehrapy.preprocessing.qc_metrics.rst | 12 +++++ ehrapy/plot/__init__.py | 1 + ehrapy/plot/ehrapy_plot/plot_qc.py | 50 +++++++++++++++++++ ehrapy/preprocessing/__init__.py | 2 +- ehrapy/preprocessing/_quality_control.py | 50 +------------------ 9 files changed, 86 insertions(+), 76 deletions(-) create mode 100644 docs/usage/plot/ehrapy.plot.qc_metrics.rst delete mode 100644 docs/usage/preprocessing/ehrapy.preprocessing.calculate_qc_metrics.rst delete mode 100644 docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst create mode 100644 docs/usage/preprocessing/ehrapy.preprocessing.qc_metrics.rst create mode 100644 ehrapy/plot/ehrapy_plot/plot_qc.py diff --git a/docs/usage/api.rst b/docs/usage/api.rst index 7d05c4c0..361f1c4b 100644 --- a/docs/usage/api.rst +++ b/docs/usage/api.rst @@ -61,8 +61,7 @@ Quality control .. autosummary:: :toctree: preprocessing - preprocessing.calculate_qc_metrics - preprocessing.display_qc_metrics + preprocessing.qc_metrics Imputation ++++++++++ @@ -199,6 +198,14 @@ Generic plot.ranking plot.dendrogram +Specific +++++++++ + +.. autosummary:: + :toctree: plot + + plot.qc_metrics + Classes +++++++ diff --git a/docs/usage/plot/ehrapy.plot.qc_metrics.rst b/docs/usage/plot/ehrapy.plot.qc_metrics.rst new file mode 100644 index 00000000..36d376c3 --- /dev/null +++ b/docs/usage/plot/ehrapy.plot.qc_metrics.rst @@ -0,0 +1,12 @@ +:github_url: ehrapy.plot.qc_metrics + +ehrapy.plot.qc\_metrics +======================= + +.. autofunction:: ehrapy.plot.qc_metrics + +.. _sphx_glr_backref_ehrapy.plot.qc_metrics: + +.. minigallery:: ehrapy.plot.qc_metrics + :add-heading: Gallery + :heading-level: - \ No newline at end of file diff --git a/docs/usage/preprocessing/ehrapy.preprocessing.calculate_qc_metrics.rst b/docs/usage/preprocessing/ehrapy.preprocessing.calculate_qc_metrics.rst deleted file mode 100644 index 3f1cadc3..00000000 --- a/docs/usage/preprocessing/ehrapy.preprocessing.calculate_qc_metrics.rst +++ /dev/null @@ -1,12 +0,0 @@ -:github_url: ehrapy.preprocessing.calculate_qc_metrics - -ehrapy.preprocessing.calculate\_qc\_metrics -=========================================== - -.. autofunction:: ehrapy.preprocessing.calculate_qc_metrics - -.. _sphx_glr_backref_ehrapy.preprocessing.calculate_qc_metrics: - -.. minigallery:: ehrapy.preprocessing.calculate_qc_metrics - :add-heading: Gallery - :heading-level: - \ No newline at end of file diff --git a/docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst b/docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst deleted file mode 100644 index 8f7cc76c..00000000 --- a/docs/usage/preprocessing/ehrapy.preprocessing.display_qc_metrics.rst +++ /dev/null @@ -1,12 +0,0 @@ -:github_url: ehrapy.preprocessing.display_qc_metrics - -ehrapy.preprocessing.display\_qc\_metrics -========================================= - -.. autofunction:: ehrapy.preprocessing.display_qc_metrics - -.. _sphx_glr_backref_ehrapy.preprocessing.display_qc_metrics: - -.. minigallery:: ehrapy.preprocessing.display_qc_metrics - :add-heading: Gallery - :heading-level: - \ No newline at end of file diff --git a/docs/usage/preprocessing/ehrapy.preprocessing.qc_metrics.rst b/docs/usage/preprocessing/ehrapy.preprocessing.qc_metrics.rst new file mode 100644 index 00000000..a67b9f90 --- /dev/null +++ b/docs/usage/preprocessing/ehrapy.preprocessing.qc_metrics.rst @@ -0,0 +1,12 @@ +:github_url: ehrapy.preprocessing.qc_metrics + +ehrapy.preprocessing.qc\_metrics +================================ + +.. autofunction:: ehrapy.preprocessing.qc_metrics + +.. _sphx_glr_backref_ehrapy.preprocessing.qc_metrics: + +.. minigallery:: ehrapy.preprocessing.qc_metrics + :add-heading: Gallery + :heading-level: - \ No newline at end of file diff --git a/ehrapy/plot/__init__.py b/ehrapy/plot/__init__.py index deb6aff5..8fb6c576 100644 --- a/ehrapy/plot/__init__.py +++ b/ehrapy/plot/__init__.py @@ -1,2 +1,3 @@ from ehrapy.plot._plot_util import * # noqa: E402,F403 from ehrapy.plot._scanpy_pl_api import * # noqa: E402,F403 +from ehrapy.plot.ehrapy_plot.plot_qc import qc_metrics diff --git a/ehrapy/plot/ehrapy_plot/plot_qc.py b/ehrapy/plot/ehrapy_plot/plot_qc.py new file mode 100644 index 00000000..c4dafcbd --- /dev/null +++ b/ehrapy/plot/ehrapy_plot/plot_qc.py @@ -0,0 +1,50 @@ +from __future__ import annotations + +from anndata import AnnData +from rich.console import Console +from rich.table import Table + +def qc_metrics(adata: AnnData, extra_columns: list[str] | None = None) -> None: + """Plots the calculated quality control metrics for var of adata. Per default this will display the following features: + ``missing_values_abs``, ``missing_values_pct``, ``mean``, ``median``, ``standard_deviation``, ``max``, ``min``. + + Args: + adata: Annotated data matrix. + extra_columns: List of custom (qc) var columns to be displayed additionally. + + """ + table = Table(title="[bold blue]Ehrapy qc metrics of var") + # add special column header for the column name + table.add_column("[bold blue]Column name", justify="right", style="bold green") + var_names = list(adata.var_names) + # default qc columns added to var + fixed_qc_columns = [ + "missing_values_abs", + "missing_values_pct", + "mean", + "median", + "standard_deviation", + "min", + "max", + ] + # update columns to display with extra columns (if any) + columns_to_display = fixed_qc_columns if not extra_columns else fixed_qc_columns + extra_columns + # check whether all columns exist (qc has been executed before and extra columns are var columns) + if (set(columns_to_display) & set(adata.var.columns)) != set(columns_to_display): + raise QCDisplayError( + "Cannot display QC metrics of current AnnData object. Either QC has not been executed before or " + "some column(s) of the extra_columns parameter are not in var!" + ) + vars_to_display = adata.var[columns_to_display] + # add column headers + for col in vars_to_display: + table.add_column(f"[bold blue]{col}", justify="right", style="bold green") + for var in range(len(vars_to_display)): + table.add_row(var_names[var], *map(str, list(vars_to_display.iloc[var]))) + + console = Console() + console.print(table) + + +class QCDisplayError(Exception): + pass diff --git a/ehrapy/preprocessing/__init__.py b/ehrapy/preprocessing/__init__.py index e62bcb81..104272df 100644 --- a/ehrapy/preprocessing/__init__.py +++ b/ehrapy/preprocessing/__init__.py @@ -9,6 +9,6 @@ norm_scale, norm_sqrt, ) -from ehrapy.preprocessing._quality_control import calculate_qc_metrics, display_qc_metrics +from ehrapy.preprocessing._quality_control import qc_metrics from ehrapy.preprocessing._scanpy_pp_api import * # noqa: E402,F403 from ehrapy.preprocessing.encoding._encode import encode, undo_encoding diff --git a/ehrapy/preprocessing/_quality_control.py b/ehrapy/preprocessing/_quality_control.py index 60efe514..ce369710 100644 --- a/ehrapy/preprocessing/_quality_control.py +++ b/ehrapy/preprocessing/_quality_control.py @@ -5,11 +5,9 @@ import numpy as np import pandas as pd from anndata import AnnData -from rich.console import Console -from rich.table import Table -def calculate_qc_metrics( +def qc_metrics( adata: AnnData, qc_vars: Collection[str] = (), layer: str = None, inplace: bool = True ) -> pd.DataFrame | None: """Calculates various quality control metrics. @@ -72,48 +70,6 @@ def calculate_qc_metrics( return obs_metrics, var_metrics -def display_qc_metrics(adata: AnnData, extra_columns: list[str] | None = None) -> None: - """Displays the calculated quality control metrics for var of adata. Per default this will display the following features: - ``missing_values_abs``, ``missing_values_pct``, ``mean``, ``median``, ``standard_deviation``, ``max``, ``min``. - - Args: - adata: Annotated data matrix. - extra_columns: List of custom (qc) var columns to be displayed additionally. - - """ - table = Table(title="[bold blue]Ehrapy qc metrics of var") - # add special column header for the column name - table.add_column("[bold blue]Column name", justify="right", style="bold green") - var_names = list(adata.var_names) - # default qc columns added to var - fixed_qc_columns = [ - "missing_values_abs", - "missing_values_pct", - "mean", - "median", - "standard_deviation", - "min", - "max", - ] - # update columns to display with extra columns (if any) - columns_to_display = fixed_qc_columns if not extra_columns else fixed_qc_columns + extra_columns - # check whether all columns exist (qc has been executed before and extra columns are var columns) - if (set(columns_to_display) & set(adata.var.columns)) != set(columns_to_display): - raise QCDisplayError( - "Cannot display QC metrics of current AnnData object. Either QC has not been executed before or " - "some column(s) of the extra_columns parameter are not in var!" - ) - vars_to_display = adata.var[columns_to_display] - # add column headers - for col in vars_to_display: - table.add_column(f"[bold blue]{col}", justify="right", style="bold green") - for var in range(len(vars_to_display)): - table.add_row(var_names[var], *map(str, list(vars_to_display.iloc[var]))) - - console = Console() - console.print(table) - - def _missing_values( arr: np.ndarray, shape: tuple[int, int] = None, df_type: Literal["obs", "var"] = "obs" ) -> np.ndarray: @@ -206,7 +162,3 @@ def _var_qc_metrics(adata: AnnData, layer: str = None) -> pd.DataFrame: var_metrics["max"] = np.nan return var_metrics - - -class QCDisplayError(Exception): - pass From 250286f93d0ee8d0a427ef20856cf09e11a9ffcd Mon Sep 17 00:00:00 2001 From: Imipenem Date: Mon, 28 Feb 2022 19:36:58 +0100 Subject: [PATCH 4/6] Fix CI --- ehrapy/plot/ehrapy_plot/plot_qc.py | 1 + ehrapy/preprocessing/_data_imputation.py | 4 ++-- tests/preprocessing/test_quality_control.py | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ehrapy/plot/ehrapy_plot/plot_qc.py b/ehrapy/plot/ehrapy_plot/plot_qc.py index c4dafcbd..ffe030ef 100644 --- a/ehrapy/plot/ehrapy_plot/plot_qc.py +++ b/ehrapy/plot/ehrapy_plot/plot_qc.py @@ -4,6 +4,7 @@ from rich.console import Console from rich.table import Table + def qc_metrics(adata: AnnData, extra_columns: list[str] | None = None) -> None: """Plots the calculated quality control metrics for var of adata. Per default this will display the following features: ``missing_values_abs``, ``missing_values_pct``, ``mean``, ``median``, ``standard_deviation``, ``max``, ``min``. diff --git a/ehrapy/preprocessing/_data_imputation.py b/ehrapy/preprocessing/_data_imputation.py index f35a9b60..c905e7b3 100644 --- a/ehrapy/preprocessing/_data_imputation.py +++ b/ehrapy/preprocessing/_data_imputation.py @@ -398,9 +398,9 @@ def _warn_imputation_threshold(adata: AnnData, var_names: list[str] | None, thre adata.var["missing_values_pct"] except KeyError: print("[bold yellow]Quality control metrics missing. Calculating...") - from ehrapy.preprocessing import calculate_qc_metrics + from ehrapy.preprocessing import qc_metrics - calculate_qc_metrics(adata) + qc_metrics(adata) used_var_names = set(adata.var_names) if var_names is None else set(var_names) thresholded_var_names = set(adata.var[adata.var["missing_values_pct"] > threshold].index) & set(used_var_names) diff --git a/tests/preprocessing/test_quality_control.py b/tests/preprocessing/test_quality_control.py index a0654740..77b6a337 100644 --- a/tests/preprocessing/test_quality_control.py +++ b/tests/preprocessing/test_quality_control.py @@ -4,7 +4,7 @@ import pandas as pd from anndata import AnnData -from ehrapy.preprocessing._quality_control import _obs_qc_metrics, _var_qc_metrics, calculate_qc_metrics +from ehrapy.preprocessing._quality_control import _obs_qc_metrics, _var_qc_metrics, qc_metrics CURRENT_DIR = Path(__file__).parent _TEST_PATH = f"{CURRENT_DIR}/test_preprocessing" @@ -45,7 +45,7 @@ def test_var_qc_metrics(self): assert np.allclose(var_metrics["max"].values, np.array([np.nan, np.nan, 41.419998]), equal_nan=True) def test_calculate_qc_metrics(self): - obs_metrics, var_metrics = calculate_qc_metrics(self.test_adata, inplace=True) + obs_metrics, var_metrics = qc_metrics(self.test_adata, inplace=True) assert obs_metrics is not None assert var_metrics is not None From d3aa2c12fcbcea2e56a353ab401e78438d444d55 Mon Sep 17 00:00:00 2001 From: Imipenem Date: Mon, 28 Feb 2022 19:45:37 +0100 Subject: [PATCH 5/6] Update Notebook --- docs/tutorials/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index 81620655..00b100a7 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit 8162065556f3c67c0d4538af7ac1945c895166cc +Subproject commit 00b100a77fbb5096c2b566692eb31fad85c24b91 From e31d4cac261c79dd89f33199ae45be2853cb264f Mon Sep 17 00:00:00 2001 From: Imipenem Date: Mon, 28 Feb 2022 20:20:59 +0100 Subject: [PATCH 6/6] Update notebook mimic2 intro --- docs/tutorials/notebooks | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/notebooks b/docs/tutorials/notebooks index 00b100a7..4bfe8447 160000 --- a/docs/tutorials/notebooks +++ b/docs/tutorials/notebooks @@ -1 +1 @@ -Subproject commit 00b100a77fbb5096c2b566692eb31fad85c24b91 +Subproject commit 4bfe8447b045b66909f9e9eb2e1de67da526f40a