chanzuckerberg · kaloster · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024 · Mar 28, 2024
diff --git a/backend/common/constants.py b/backend/common/constants.py
@@ -4,4 +4,6 @@
     "dev": "https://api.cellxgene.dev.single-cell.czi.technology",
 }
 
+UNS_META_KEYS = ["spatial"]
+
 DATA_SUBMISSION_POLICY_VERSION = "2.0"
diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py
@@ -1,11 +1,14 @@
 import json
 import logging
+import pickle
 
 import numpy as np
 import pandas as pd
 import tiledb
 
+from backend.common.constants import UNS_META_KEYS
 from backend.common.utils.type_conversion_utils import get_dtype_and_schema_of_array
+from backend.common.utils.uns import filter_spatial_data
 
 
 def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None):
@@ -31,6 +34,29 @@
             metadata_array.meta[key] = value
 
 
+def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None):
+    """
+    Convert uns (unstructured) metadata to CXG output directory specified
+    """
+
+    array_name = f"{cxg_container}/{group_metadata_name}"
+    object_filtered = {}
+
+    tiledb.from_numpy(array_name, np.zeros((1,)))
+
+    with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array:
+        for key, value in metadata_dict.items():
+            if key not in UNS_META_KEYS:
+                continue
+            for object_id, content in value.items():
+                if key == "spatial":
+                    object_filtered = filter_spatial_data(content, object_id)
+                else:
+                    object_filtered[object_id] = content
+
+            metadata_array.meta[key] = pickle.dumps(object_filtered)
+
+
 def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, index_column_name, ctx):
     """
     Saves the contents of the dataframe to the CXG output directory specified.
@@ -52,7 +78,12 @@
     tdb_attrs = []
 
     for column_name, column_values in dataframe.items():
-        dtype, hints = get_dtype_and_schema_of_array(column_values)
+        # Cast 'in_tissue' column values as boolean to make it categorical
+        # https://github.com/chanzuckerberg/single-cell-explorer/issues/841
+        if column_name == "in_tissue":
+            dtype, hints = get_dtype_and_schema_of_array(column_values.astype(bool))
+        else:
+            dtype, hints = get_dtype_and_schema_of_array(column_values)
         if "categories" in hints and len(hints.get("categories", [])) > 0.75 * dataframe.shape[0]:
             hints["type"] = "string"
             del hints["categories"]

diff --git a/backend/common/utils/uns.py b/backend/common/utils/uns.py
@@ -0,0 +1,18 @@
+def filter_spatial_data(content, library_id):
+    """
+    This filters data associated with the "spatial" key in a dictionary, specifically
+    retaining certain sub-items from "images" and "scalefactors" sub-dictionaries.
+    https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata
+    """
+    spatial_filtered = {}
+    spatial_filtered[library_id] = {
+        "images": {
+            "hires": content["images"]["hires"],  # Omit hires data once deep zooming feature is implemented
+            "fullres": [],  # Currently not including fullsres data, due to deep zooming feature coming soon
+        },
+        "scalefactors": {
+            "spot_diameter_fullres": content["scalefactors"]["spot_diameter_fullres"],
+            "tissue_hires_scalef": content["scalefactors"]["tissue_hires_scalef"],
+        },
+    }
+    return spatial_filtered
diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py
@@ -18,6 +18,7 @@
     convert_dictionary_to_cxg_group,
     convert_matrices_to_cxg_arrays,
     convert_ndarray_to_cxg_dense_array,
+    convert_uns_to_cxg_group,
 )
 from backend.common.utils.matrix_utils import is_matrix_sparse
 from backend.common.utils.tiledb import consolidation_buffer_size
@@ -79,6 +80,9 @@ def to_cxg(self, output_cxg_directory, sparse_threshold, convert_anndata_colors_
         convert_dataframe_to_cxg_array(output_cxg_directory, "var", self.var, self.var_index_column_name, ctx)
         logging.info("\t...dataset var dataframe saved")
 
+        convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx)
+        logging.info("\t...dataset uns dataframe saved")
+
         self.write_anndata_embeddings_to_cxg(output_cxg_directory, ctx)
         logging.info("\t...dataset embeddings saved")
 

diff --git a/tests/unit/backend/layers/utils/test_cxg_generation_utils.py b/tests/unit/backend/layers/utils/test_cxg_generation_utils.py
@@ -1,4 +1,5 @@
 import json
+import pickle
 import unittest
 from os import mkdir, path
 from shutil import rmtree
@@ -13,6 +14,7 @@
     convert_dictionary_to_cxg_group,
     convert_matrices_to_cxg_arrays,
     convert_ndarray_to_cxg_dense_array,
+    convert_uns_to_cxg_group,
 )
 from tests.unit.backend.fixtures.environment_setup import fixture_file_path
 
@@ -28,7 +30,7 @@ def tearDown(self):
 
     def test__convert_dictionary_to_cxg_group__writes_successfully(self):
         random_dictionary = {"cookies": "chocolate_chip", "brownies": "chocolate", "cake": "double chocolate"}
-        dictionary_name = "favorite_desserts"
+        dictionary_name = "spatial"
         expected_array_directory = f"{self.testing_cxg_temp_directory}/{dictionary_name}"
 
         convert_dictionary_to_cxg_group(
@@ -42,6 +44,33 @@ def test__convert_dictionary_to_cxg_group__writes_successfully(self):
         self.assertTrue(isinstance(array, tiledb.DenseArray))
         self.assertEqual(random_dictionary, actual_stored_metadata)
 
+    def test__convert_uns_to_cxg_group__writes_successfully(self):
+        random_dictionary = {
+            "spatial": {
+                "abcd": {
+                    "images": {
+                        "hires": "123",
+                        "fullres": [],
+                    },
+                    "scalefactors": {
+                        "spot_diameter_fullres": "123",
+                        "tissue_hires_scalef": "123",
+                    },
+                }
+            }
+        }
+        dictionary_name = "uns"
+        expected_array_directory = f"{self.testing_cxg_temp_directory}/{dictionary_name}"
+        convert_uns_to_cxg_group(
+            self.testing_cxg_temp_directory, random_dictionary, group_metadata_name=dictionary_name
+        )
+        array = tiledb.open(expected_array_directory)
+        actual_stored_metadata = dict(array.meta.items())
+
+        self.assertTrue(path.isdir(expected_array_directory))
+        self.assertTrue(isinstance(array, tiledb.DenseArray))
+        self.assertEqual(random_dictionary["spatial"], pickle.loads(actual_stored_metadata["spatial"]))
+
     def test__convert_dataframe_to_cxg_array__writes_successfully(self):
         random_int_category = Series(data=[3, 1, 2, 4], dtype=np.int64)
         random_bool_category = Series(data=[True, True, False, True], dtype=np.bool_)