Skip to content

Commit

Permalink
feat: update validation for uns['spatial'] (#1129)
Browse files Browse the repository at this point in the history
Co-authored-by: Evan Molinelli <[email protected]>
Co-authored-by: Nayib Gloria <[email protected]>
  • Loading branch information
3 people authored Nov 26, 2024
1 parent 0c9f9af commit 478648e
Show file tree
Hide file tree
Showing 3 changed files with 179 additions and 75 deletions.
42 changes: 23 additions & 19 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@

VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000

ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
CONDITION_IS_VISIUM = "a descendant of 'EFO:0010961' (Visium Spatial Gene Expression)"
CONDITION_IS_SEQV2 = f"'{ASSAY_SLIDE_SEQV2}' (Slide-seqV2)"


ERROR_SUFFIX_SPATIAL = f"obs['assay_ontology_term_id'] is either {CONDITION_IS_VISIUM} or {CONDITION_IS_SEQV2}"
ERROR_SUFFIX_VISIUM = f"obs['assay_ontology_term_id'] is {CONDITION_IS_VISIUM}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = f"{ERROR_SUFFIX_VISIUM} and uns['spatial']['is_single'] is True"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"
Expand Down Expand Up @@ -95,9 +102,11 @@ def _is_supported_spatial_assay(self) -> bool:
"""
if self.is_spatial is None:
try:
self.is_spatial = False
if self.adata.obs.assay_ontology_term_id.isin([ASSAY_VISIUM, ASSAY_SLIDE_SEQV2]).any():
self.is_spatial = True
_spatial = (
self._is_visium_including_descendants()
or self.adata.obs.assay_ontology_term_id.isin([ASSAY_SLIDE_SEQV2]).any()
)
self.is_spatial = bool(_spatial)
except AttributeError:
# specific error reporting will occur downstream in the validation
self.is_spatial = False
Expand Down Expand Up @@ -1466,10 +1475,7 @@ def _validate_spatial_assay_ontology_term_id(self):
# Validate assay ontology term ids are identical.
term_count = obs["assay_ontology_term_id"].nunique()
if term_count > 1:
self.errors.append(
"When obs['assay_ontology_term_id'] is either 'EFO:0010961' (Visium Spatial Gene Expression) or "
"'EFO:0030062' (Slide-seqV2), all observations must contain the same value."
)
self.errors.append(f"When {ERROR_SUFFIX_SPATIAL}" ", all observations must contain the same value.")

def _validate_spatial_cell_type_ontology_term_id(self):
"""
Expand Down Expand Up @@ -1599,10 +1605,7 @@ def _check_spatial_uns(self):
uns_spatial = self.adata.uns.get("spatial")
is_supported_spatial_assay = self._is_supported_spatial_assay()
if uns_spatial is not None and not is_supported_spatial_assay:
self.errors.append(
"uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values "
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
)
self.errors.append(f"uns['spatial'] is only allowed when {ERROR_SUFFIX_SPATIAL}")
return

# Exit if we aren't dealing with a supported spatial assay as no further checks are necessary.
Expand All @@ -1611,10 +1614,7 @@ def _check_spatial_uns(self):

# spatial is required for supported spatial assays.
if not isinstance(uns_spatial, dict):
self.errors.append(
"A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
)
self.errors.append("A dict in uns['spatial'] is required when " f"{ERROR_SUFFIX_SPATIAL}.")
return

# is_single is required.
Expand Down Expand Up @@ -1693,7 +1693,11 @@ def _check_spatial_uns(self):
self.errors.append("uns['spatial'][library_id]['images'] must contain the key 'hires'.")
# hires is specified: proceed with validation of hires.
else:
self._validate_spatial_image_shape("hires", uns_images["hires"], SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE)
_assay_term = self.adata.obs["assay_ontology_term_id"].values[0]
_max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
if is_ontological_descendant_of(ONTOLOGY_PARSER, _assay_term, "EFO:0022860", True):
_max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM
self._validate_spatial_image_shape("hires", uns_images["hires"], _max_size)

# fullres is optional.
uns_fullres = uns_images.get("fullres")
Expand Down Expand Up @@ -1802,12 +1806,12 @@ def _is_visium_including_descendants(self) -> bool:
# check if any assay_ontology_term_ids are descendants of VISIUM
includes_and_visium = (
self.adata.obs[_assay_key]
.astype("string")
.apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
.any()
)
self.is_visium = includes_and_visium

# save state and return
self.is_visium = includes_and_visium
return includes_and_visium

def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
Expand Down
11 changes: 7 additions & 4 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import tempfile
import unittest
from copy import deepcopy

import anndata
import fixtures.examples_validate as examples
Expand Down Expand Up @@ -495,7 +496,7 @@ def test_column_presence_in_tissue(self, validator_with_visium_assay, assay_onto
assert validator.errors == []
else:
assert validator.errors == [
"obs['in_tissue'] is only allowed for descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
"obs['in_tissue'] is only allowed for obs['assay_ontology_term_id'] is a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
]

@pytest.mark.parametrize("reserved_column", schema_def["components"]["obs"]["reserved_columns"])
Expand Down Expand Up @@ -1673,11 +1674,16 @@ def test_should_warn_for_low_gene_count(self, validator_with_adata):
Raise a warning if there are too few genes
"""
validator = validator_with_adata
# NOTE:[EM] changing the schema def here is stateful and results in unpredictable test results.
# Reset after mutating.
_old_schema = deepcopy(validator.schema_def.copy())

validator.schema_def["components"]["var"]["warn_if_less_than_rows"] = 100
validator.validate_adata()
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix."
]
validator.schema_def = _old_schema

@pytest.mark.parametrize(
"df,column",
Expand Down Expand Up @@ -2198,7 +2204,6 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad
]
assert validator.is_spatial is False
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down Expand Up @@ -2248,7 +2253,6 @@ def test_obsm_values_warn_start_with_X(self, validator_with_adata):
validator.adata.obsm["harmony"] = pd.DataFrame(validator.adata.obsm["X_umap"], index=validator.adata.obs_names)
validator.validate_adata()
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down Expand Up @@ -2282,7 +2286,6 @@ def test_obsm_values_key_start_with_number(self, validator_with_adata):
"'pandas.core.frame.DataFrame'>').",
]
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' 3D is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down
Loading

0 comments on commit 478648e

Please sign in to comment.