Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update validation for uns['spatial'] #1129

Merged
merged 15 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@

VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
SPATIAL_HIRES_IMAGE_MAX_DIEMSNION_SIZE_VISIUM_11MM = 4000
ejmolinelli marked this conversation as resolved.
Show resolved Hide resolved

ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
Expand Down Expand Up @@ -95,9 +96,11 @@ def _is_supported_spatial_assay(self) -> bool:
"""
if self.is_spatial is None:
try:
self.is_spatial = False
if self.adata.obs.assay_ontology_term_id.isin([ASSAY_VISIUM, ASSAY_SLIDE_SEQV2]).any():
self.is_spatial = True
_spatial = (
self._is_visium_including_descendants()
or self.adata.obs.assay_ontology_term_id.isin([ASSAY_SLIDE_SEQV2]).any()
)
self.is_spatial = bool(_spatial)
except AttributeError:
# specific error reporting will occur downstream in the validation
self.is_spatial = False
Expand Down Expand Up @@ -1693,7 +1696,11 @@ def _check_spatial_uns(self):
self.errors.append("uns['spatial'][library_id]['images'] must contain the key 'hires'.")
# hires is specified: proceed with validation of hires.
else:
self._validate_spatial_image_shape("hires", uns_images["hires"], SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE)
_assay_term = self.adata.obs["assay_ontology_term_id"].values[0]
_max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
if is_ontological_descendant_of(ONTOLOGY_PARSER, _assay_term, "EFO:0022860", True):
_max_size = SPATIAL_HIRES_IMAGE_MAX_DIEMSNION_SIZE_VISIUM_11MM
self._validate_spatial_image_shape("hires", uns_images["hires"], _max_size)

# fullres is optional.
uns_fullres = uns_images.get("fullres")
Expand Down Expand Up @@ -1802,12 +1809,12 @@ def _is_visium_including_descendants(self) -> bool:
# check if any assay_ontology_term_ids are descendants of VISIUM
includes_and_visium = (
self.adata.obs[_assay_key]
.astype("string")
.apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
.any()
)
self.is_visium = includes_and_visium

# save state and return
self.is_visium = includes_and_visium
return includes_and_visium

def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
Expand Down
12 changes: 9 additions & 3 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import tempfile
import unittest
from copy import deepcopy

import anndata
import fixtures.examples_validate as examples
Expand Down Expand Up @@ -1673,11 +1674,16 @@ def test_should_warn_for_low_gene_count(self, validator_with_adata):
Raise a warning if there are too few genes
"""
validator = validator_with_adata
# NOTE:[EM] changing the schema def here is stateful and results in unpredictable test results.
# Reset after mutating.
_old_schema = deepcopy(validator.schema_def.copy())

validator.schema_def["components"]["var"]["warn_if_less_than_rows"] = 100
validator.validate_adata()
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix."
]
validator.schema_def = _old_schema

@pytest.mark.parametrize(
"df,column",
Expand Down Expand Up @@ -2198,7 +2204,7 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad
]
assert validator.is_spatial is False
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
# "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
ejmolinelli marked this conversation as resolved.
Show resolved Hide resolved
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down Expand Up @@ -2248,7 +2254,7 @@ def test_obsm_values_warn_start_with_X(self, validator_with_adata):
validator.adata.obsm["harmony"] = pd.DataFrame(validator.adata.obsm["X_umap"], index=validator.adata.obs_names)
validator.validate_adata()
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
# "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down Expand Up @@ -2282,7 +2288,7 @@ def test_obsm_values_key_start_with_number(self, validator_with_adata):
"'pandas.core.frame.DataFrame'>').",
]
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
# "WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' 3D is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down
145 changes: 107 additions & 38 deletions cellxgene_schema_cli/tests/test_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0,
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED,
SPATIAL_HIRES_IMAGE_MAX_DIEMSNION_SIZE_VISIUM_11MM,
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE,
Validator,
validate,
)
Expand Down Expand Up @@ -454,19 +456,37 @@ def test__validate_spatial_forbidden_if_not_visium_or_slide_seqv2(self):
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)." in validator.errors[0]
)

def test__validate_spatial_required_if_visium(self):
@pytest.mark.parametrize(
"assay_ontology_term_id, is_descendant",
[("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
)
def test__validate_spatial_required_if_visium(self, assay_ontology_term_id, is_descendant):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.uns = good_uns.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id

# Confirm spatial is required for Visium.
validator._check_spatial_uns()
assert len(validator.errors) == 1
assert (
"A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)." in validator.errors[0]
)
if is_descendant:
# check pass if 'spatial' included
validator.adata.uns = good_uns_with_visium_spatial.copy()
validator._check_spatial_uns()
assert len(validator.errors) == 0
validator.reset()

# check fail if 'spatial' not included
validator.adata.uns = good_uns.copy()
validator._check_spatial_uns()
assert validator.errors == [
"A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
]
validator.reset()
else:
# check fail if 'spatial' included
validator.adata.uns = good_uns_with_visium_spatial.copy()
validator._check_spatial_uns()
assert len(validator.errors) == 1
validator.reset()

def test__validate_spatial_required_if_slide_seqV2(self):
validator: Validator = Validator()
Expand Down Expand Up @@ -496,16 +516,26 @@ def test__validate_spatial_allowed_keys_error(self):
"More than two top-level keys detected:" in validator.errors[0]
)

def test__validate_is_single_required_visium_error(self):
@pytest.mark.parametrize(
"assay_ontology_term_id, is_descendant",
[("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
)
def test__validate_is_single_required_visium_error(self, assay_ontology_term_id, is_descendant):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.uns["spatial"].pop("is_single")

# Confirm is_single is identified as required.
validator._check_spatial_uns()
assert validator.errors
assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0]

if is_descendant:
# if spatial, MUST specify `is_single`
assert "uns['spatial'] must contain the key 'is_single'." in validator.errors[0]
else:
# if not spatial, MUST NOT speciffy `is_single`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: specify

assert validator.errors == [
"uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values 'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
ejmolinelli marked this conversation as resolved.
Show resolved Hide resolved
]

def test__validate_is_single_required_slide_seqV2_error(self):
validator: Validator = Validator()
Expand Down Expand Up @@ -560,19 +590,36 @@ def test__validate_library_id_forbidden_if_visium_or_is_single_false(self):
assert len(validator.errors) == 1
assert f"uns['spatial'][library_id] {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN}." in validator.errors[0]

def test__validate_library_id_required_if_visium(self):
@pytest.mark.parametrize(
"assay_ontology_term_id, is_descendant",
[("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
)
def test__validate_library_id_required_if_visium(self, assay_ontology_term_id, is_descendant):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.uns["spatial"].pop(visium_library_id)

# Confirm library_id is identified as required.
validator._check_spatial_uns()
assert validator.errors
assert (
f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
in validator.errors[0]
)
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
if is_descendant:
# if spatial, `library_id` must exist
validator._check_spatial_uns()
assert len(validator.errors) == 0
validator.reset()

# if spatial, but missing from `uns`
validator.adata.uns["spatial"].pop(visium_library_id)
validator._check_spatial_uns()
assert validator.errors == [
f"uns['spatial'] must contain at least one key representing the library_id when {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}."
]
else:
# if not spatial, MUST NOT define `library_id`
validator.adata.uns["spatial"][visium_library_id] = {"images": []}
validator._check_spatial_uns()
# Report the most general top level error
assert validator.errors == [
"uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values 'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
]

@pytest.mark.parametrize("library_id", [None, "invalid", 1, 1.0, True])
def test__validate_library_id_type_error(self, library_id):
Expand Down Expand Up @@ -610,7 +657,11 @@ def test__validate_images_required_error(self):
assert validator.errors
assert "uns['spatial'][library_id] must contain the key 'images'." in validator.errors[0]

def test__validate_images_allowed_keys_error(self):
@pytest.mark.parametrize(
"assay_ontology_term_id, is_descendant",
[("EFO:0010961", True), ("EFO:0022858", True), ("EFO:0030029", False), ("EFO:0002697", False)],
)
def test__validate_images_allowed_keys_error(self, assay_ontology_term_id, is_descendant):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
Expand Down Expand Up @@ -730,33 +781,51 @@ def test__validate_images_image_is_shape_error(self, image_name):
"for example) or 4 (RGBA color model for example) for its last dimension" in validator.errors[0]
)

def test__validate_images_hires_max_dimension_greater_than_error(self):
@pytest.mark.parametrize(
"assay_ontology_term_id, hi_res_size, image_max",
[
("EFO:0022858", 2001, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
("EFO:0022860", 4001, SPATIAL_HIRES_IMAGE_MAX_DIEMSNION_SIZE_VISIUM_11MM),
ejmolinelli marked this conversation as resolved.
Show resolved Hide resolved
],
)
def test__validate_images_hires_max_dimension_greater_than_error(
self, assay_ontology_term_id, hi_res_size, image_max
):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 2001, 3), dtype=np.uint8)
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros(
(1, hi_res_size, 3), dtype=np.uint8
)

# Confirm hires is identified as invalid.
validator._check_spatial_uns()
assert validator.errors
assert (
"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels"
in validator.errors[0]
)
assert validator.errors == [
f"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be {image_max} pixels, it has a largest dimension of {hi_res_size} pixels."
]

def test__validate_images_hires_max_dimension_less_than_error(self):
@pytest.mark.parametrize(
"assay_ontology_term_id, hi_res_size, image_max",
[
("EFO:0022858", 1999, SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE),
("EFO:0022860", 3999, SPATIAL_HIRES_IMAGE_MAX_DIEMSNION_SIZE_VISIUM_11MM),
],
)
def test__validate_images_hires_max_dimension_less_than_error(self, assay_ontology_term_id, hi_res_size, image_max):
validator: Validator = Validator()
validator._set_schema_def()
validator.adata = adata_visium.copy()
validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros((1, 1999, 3), dtype=np.uint8)
validator.adata.obs["assay_ontology_term_id"] = assay_ontology_term_id
validator.adata.uns["spatial"][visium_library_id]["images"]["hires"] = np.zeros(
(1, hi_res_size, 3), dtype=np.uint8
)

# Confirm hires is identified as invalid.
validator._check_spatial_uns()
assert validator.errors
assert (
"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be 2000 pixels"
in validator.errors[0]
)
assert validator.errors == [
f"The largest dimension of uns['spatial'][library_id]['images']['hires'] must be {image_max} pixels, it has a largest dimension of {hi_res_size} pixels."
]

def test__validate_scalefactors_required_error(self):
validator: Validator = Validator()
Expand Down
Loading