Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make all imputation methods consistent in regard to encoding requirements #827

Merged
merged 20 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ehrapy/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def __init__(
figdir: str | Path = "./figures/",
cache_compression: str | None = "lzf",
max_memory=15,
n_jobs: int = 1,
Zethson marked this conversation as resolved.
Show resolved Hide resolved
n_jobs: int = -1,
logfile: str | Path | None = None,
categories_to_ignore: Iterable[str] = ("N/A", "dontknow", "no_gate", "?"),
_frameon: bool = True,
Expand Down
6 changes: 3 additions & 3 deletions ehrapy/core/_tool_available.py → ehrapy/_utils_available.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from subprocess import PIPE, Popen


def _check_module_importable(package: str) -> bool: # pragma: no cover
def _check_module_importable(package: str) -> bool:
"""Checks whether a module is installed and can be loaded.

Args:
Expand All @@ -19,7 +19,7 @@ def _check_module_importable(package: str) -> bool: # pragma: no cover
return module_available


def _shell_command_accessible(command: list[str]) -> bool: # pragma: no cover
def _shell_command_accessible(command: list[str]) -> bool:
"""Checks whether the provided command is accessible in the current shell.

Args:
Expand All @@ -29,7 +29,7 @@ def _shell_command_accessible(command: list[str]) -> bool: # pragma: no cover
True if the command is accessible, False otherwise.
"""
command_accessible = Popen(command, stdout=PIPE, stderr=PIPE, universal_newlines=True, shell=True)
(commmand_stdout, command_stderr) = command_accessible.communicate()
command_accessible.communicate()
if command_accessible.returncode != 0:
return False

Expand Down
File renamed without changes.
21 changes: 21 additions & 0 deletions ehrapy/_utils_rendering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import functools

from rich.progress import Progress, SpinnerColumn


def spinner(message: str = "Running task"):
def wrap(func):
@functools.wraps(func)
def wrapped_f(*args, **kwargs):
with Progress(
"[progress.description]{task.description}",
SpinnerColumn(),
refresh_per_second=1500,
) as progress:
progress.add_task(f"[blue]{message}", total=1)
result = func(*args, **kwargs)
return result

return wrapped_f

return wrap
52 changes: 49 additions & 3 deletions ehrapy/anndata/anndata_ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import random
from collections import OrderedDict
from string import ascii_letters
from typing import TYPE_CHECKING, NamedTuple
from typing import TYPE_CHECKING, Any, NamedTuple

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -303,7 +303,7 @@ def move_to_x(adata: AnnData, to_x: list[str] | str) -> AnnData:
return new_adata


def _get_column_indices(adata: AnnData, col_names: str | Iterable[str]) -> list[int]:
def get_column_indices(adata: AnnData, col_names: str | Iterable[str]) -> list[int]:
"""Fetches the column indices in X for a given list of column names

Args:
Expand Down Expand Up @@ -383,7 +383,7 @@ def set_numeric_vars(
if copy:
adata = adata.copy()

vars_idx = _get_column_indices(adata, vars)
vars_idx = get_column_indices(adata, vars)

adata.X[:, vars_idx] = values

Expand Down Expand Up @@ -663,3 +663,49 @@ def get_rank_features_df(

class NotEncodedError(AssertionError):
pass


def _are_ndarrays_equal(arr1: np.ndarray, arr2: np.ndarray) -> np.bool_:
"""Check if two arrays are equal member-wise.

Note: Two NaN are considered equal.

Args:
arr1: First array to compare
arr2: Second array to compare

Returns:
True if the two arrays are equal member-wise
"""
return np.all(np.equal(arr1, arr2, dtype=object) | ((arr1 != arr1) & (arr2 != arr2)))


def _is_val_missing(data: np.ndarray) -> np.ndarray[Any, np.dtype[np.bool_]]:
"""Check if values in a AnnData matrix are missing.

Args:
data: The AnnData matrix to check

Returns:
An array of bool representing the missingness of the original data, with the same shape
"""
return np.isin(data, [None, ""]) | (data != data)


def _to_dense_matrix(adata: AnnData, layer: str | None = None) -> np.ndarray: # pragma: no cover
"""Extract a layer from an AnnData object and convert it to a dense matrix if required.

Args:
adata: The AnnData where to extract the layer from.
layer: Name of the layer to extract. If omitted, X is considered.

Returns:
The layer as a dense matrix. If a conversion was required, this function returns a copy of the original layer,
othersize this function returns a reference.
"""
from scipy.sparse import issparse

if layer is None:
return adata.X.toarray() if issparse(adata.X) else adata.X
else:
return adata.layers[layer].toarray() if issparse(adata.layers[layer]) else adata.layers[layer]
2 changes: 1 addition & 1 deletion ehrapy/plot/_scanpy_pl_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import scanpy as sc
from scanpy.plotting import DotPlot, MatrixPlot, StackedViolin

from ehrapy._doc_util import (
from ehrapy._utils_doc import (
_doc_params,
doc_adata_color_etc,
doc_common_groupby_plot_args,
Expand Down
Loading
Loading