Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: update validation for uns['spatial'] #1129

Merged
merged 15 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 23 additions & 19 deletions cellxgene_schema_cli/cellxgene_schema/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,15 @@

VISIUM_AND_IS_SINGLE_TRUE_MATRIX_SIZE = 4992
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE = 2000
SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM = 4000

ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = "descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True"
CONDITION_IS_VISIUM = "a descendant of 'EFO:0010961' (Visium Spatial Gene Expression)"
CONDITION_IS_SEQV2 = f"'{ASSAY_SLIDE_SEQV2}' (Slide-seqV2)"


ERROR_SUFFIX_SPATIAL = f"obs['assay_ontology_term_id'] is either {CONDITION_IS_VISIUM} or {CONDITION_IS_SEQV2}"
ERROR_SUFFIX_VISIUM = f"obs['assay_ontology_term_id'] is {CONDITION_IS_VISIUM}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE = f"{ERROR_SUFFIX_VISIUM} and uns['spatial']['is_single'] is True"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_FORBIDDEN = f"is only allowed for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_REQUIRED = f"is required for {ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE}"
ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE_IN_TISSUE_0 = f"{ERROR_SUFFIX_VISIUM_AND_IS_SINGLE_TRUE} and in_tissue is 0"
Expand Down Expand Up @@ -95,9 +102,11 @@ def _is_supported_spatial_assay(self) -> bool:
"""
if self.is_spatial is None:
try:
self.is_spatial = False
if self.adata.obs.assay_ontology_term_id.isin([ASSAY_VISIUM, ASSAY_SLIDE_SEQV2]).any():
self.is_spatial = True
_spatial = (
self._is_visium_including_descendants()
or self.adata.obs.assay_ontology_term_id.isin([ASSAY_SLIDE_SEQV2]).any()
)
self.is_spatial = bool(_spatial)
except AttributeError:
# specific error reporting will occur downstream in the validation
self.is_spatial = False
Expand Down Expand Up @@ -1466,10 +1475,7 @@ def _validate_spatial_assay_ontology_term_id(self):
# Validate assay ontology term ids are identical.
term_count = obs["assay_ontology_term_id"].nunique()
if term_count > 1:
self.errors.append(
"When obs['assay_ontology_term_id'] is either 'EFO:0010961' (Visium Spatial Gene Expression) or "
"'EFO:0030062' (Slide-seqV2), all observations must contain the same value."
)
self.errors.append(f"When {ERROR_SUFFIX_SPATIAL}" ", all observations must contain the same value.")

def _validate_spatial_cell_type_ontology_term_id(self):
"""
Expand Down Expand Up @@ -1599,10 +1605,7 @@ def _check_spatial_uns(self):
uns_spatial = self.adata.uns.get("spatial")
is_supported_spatial_assay = self._is_supported_spatial_assay()
if uns_spatial is not None and not is_supported_spatial_assay:
self.errors.append(
"uns['spatial'] is only allowed for obs['assay_ontology_term_id'] values "
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
)
self.errors.append(f"uns['spatial'] is only allowed when {ERROR_SUFFIX_SPATIAL}")
return

# Exit if we aren't dealing with a supported spatial assay as no further checks are necessary.
Expand All @@ -1611,10 +1614,7 @@ def _check_spatial_uns(self):

# spatial is required for supported spatial assays.
if not isinstance(uns_spatial, dict):
self.errors.append(
"A dict in uns['spatial'] is required for obs['assay_ontology_term_id'] values "
"'EFO:0010961' (Visium Spatial Gene Expression) and 'EFO:0030062' (Slide-seqV2)."
)
self.errors.append("A dict in uns['spatial'] is required when " f"{ERROR_SUFFIX_SPATIAL}.")
return

# is_single is required.
Expand Down Expand Up @@ -1693,7 +1693,11 @@ def _check_spatial_uns(self):
self.errors.append("uns['spatial'][library_id]['images'] must contain the key 'hires'.")
# hires is specified: proceed with validation of hires.
else:
self._validate_spatial_image_shape("hires", uns_images["hires"], SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE)
_assay_term = self.adata.obs["assay_ontology_term_id"].values[0]
_max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE
if is_ontological_descendant_of(ONTOLOGY_PARSER, _assay_term, "EFO:0022860", True):
_max_size = SPATIAL_HIRES_IMAGE_MAX_DIMENSION_SIZE_VISIUM_11MM
self._validate_spatial_image_shape("hires", uns_images["hires"], _max_size)

# fullres is optional.
uns_fullres = uns_images.get("fullres")
Expand Down Expand Up @@ -1802,12 +1806,12 @@ def _is_visium_including_descendants(self) -> bool:
# check if any assay_ontology_term_ids are descendants of VISIUM
includes_and_visium = (
self.adata.obs[_assay_key]
.astype("string")
.apply(lambda assay: is_ontological_descendant_of(ONTOLOGY_PARSER, assay, ASSAY_VISIUM, True))
.any()
)
self.is_visium = includes_and_visium

# save state and return
self.is_visium = includes_and_visium
return includes_and_visium

def _validate_spatial_image_shape(self, image_name: str, image: np.ndarray, max_dimension: int = None):
Expand Down
11 changes: 7 additions & 4 deletions cellxgene_schema_cli/tests/test_schema_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import tempfile
import unittest
from copy import deepcopy

import anndata
import fixtures.examples_validate as examples
Expand Down Expand Up @@ -495,7 +496,7 @@ def test_column_presence_in_tissue(self, validator_with_visium_assay, assay_onto
assert validator.errors == []
else:
assert validator.errors == [
"obs['in_tissue'] is only allowed for descendants of obs['assay_ontology_term_id'] 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
"obs['in_tissue'] is only allowed for obs['assay_ontology_term_id'] is a descendant of 'EFO:0010961' (Visium Spatial Gene Expression) and uns['spatial']['is_single'] is True."
]

@pytest.mark.parametrize("reserved_column", schema_def["components"]["obs"]["reserved_columns"])
Expand Down Expand Up @@ -1673,11 +1674,16 @@ def test_should_warn_for_low_gene_count(self, validator_with_adata):
Raise a warning if there are too few genes
"""
validator = validator_with_adata
# NOTE:[EM] changing the schema def here is stateful and results in unpredictable test results.
# Reset after mutating.
_old_schema = deepcopy(validator.schema_def.copy())

validator.schema_def["components"]["var"]["warn_if_less_than_rows"] = 100
validator.validate_adata()
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix."
]
validator.schema_def = _old_schema

@pytest.mark.parametrize(
"df,column",
Expand Down Expand Up @@ -2198,7 +2204,6 @@ def test_obsm_values_no_X_embedding__non_spatial_dataset(self, validator_with_ad
]
assert validator.is_spatial is False
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down Expand Up @@ -2248,7 +2253,6 @@ def test_obsm_values_warn_start_with_X(self, validator_with_adata):
validator.adata.obsm["harmony"] = pd.DataFrame(validator.adata.obsm["X_umap"], index=validator.adata.obs_names)
validator.validate_adata()
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' harmony is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down Expand Up @@ -2282,7 +2286,6 @@ def test_obsm_values_key_start_with_number(self, validator_with_adata):
"'pandas.core.frame.DataFrame'>').",
]
assert validator.warnings == [
"WARNING: Dataframe 'var' only has 4 rows. Features SHOULD NOT be filtered from expression matrix.",
"WARNING: Embedding key in 'adata.obsm' 3D is not 'spatial' nor does it start with 'X_'. "
"Thus, it will not be available in Explorer",
"WARNING: Validation of raw layer was not performed due to current errors, try again after fixing current errors.",
Expand Down
Loading
Loading