From 02c799f918d4c10513386086371c5f5db753b79a Mon Sep 17 00:00:00 2001 From: kaloster Date: Tue, 13 Feb 2024 13:48:23 -0500 Subject: [PATCH 01/14] chore: changes to add uns metadata for spatial --- backend/common/utils/cxg_generation_utils.py | 14 ++++++++++---- backend/layers/processing/h5ad_data_file.py | 4 ++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index 5cfecf321249c..bf9e67bc40ddd 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -22,13 +22,19 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata array_name = f"{cxg_container}/{group_metadata_name}" # Because TileDB does not allow one to attach metadata directly to a CXG group, we need to have a workaround - # where we create an empty array and attached the metadata onto to this empty array. Below we construct this empty + # where we create an empty array and attach the metadata onto to this empty array. Below we construct this empty # array. tiledb.from_numpy(array_name, np.zeros((1,))) - with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: - for key, value in metadata_dict.items(): - metadata_array.meta[key] = value + def iterate_over_dict(metadata_dict): + with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: + for key, value in metadata_dict.items(): + if isinstance(value, dict): + iterate_over_dict(value) + else: + metadata_array.meta[key] = value + + iterate_over_dict(metadata_dict) def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, index_column_name, ctx): diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py index 26022a372a063..ca86609b59629 100644 --- a/backend/layers/processing/h5ad_data_file.py +++ b/backend/layers/processing/h5ad_data_file.py @@ -79,6 +79,9 @@ def to_cxg(self, output_cxg_directory, sparse_threshold, convert_anndata_colors_ convert_dataframe_to_cxg_array(output_cxg_directory, "var", self.var, self.var_index_column_name, ctx) logging.info("\t...dataset var dataframe saved") + convert_dictionary_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx) + logging.info("\t...dataset uns dataframe saved") + self.write_anndata_embeddings_to_cxg(output_cxg_directory, ctx) logging.info("\t...dataset embeddings saved") @@ -175,6 +178,7 @@ def extract_anndata_elements_from_file(self): self.obs = self.transform_dataframe_index_into_column(self.anndata.obs, "obs", self.obs_index_column_name) self.var = self.transform_dataframe_index_into_column(self.anndata.var, "var", self.var_index_column_name) + def extract_metadata_about_dataset(self): """ Extract metadata information about the dataset that upon conversion will be saved as group metadata with the From b5c643640c966b83f6d1987be3cba79c4ad706b8 Mon Sep 17 00:00:00 2001 From: kaloster Date: Tue, 13 Feb 2024 13:52:10 -0500 Subject: [PATCH 02/14] chore: changes to add uns metadata for spatial --- backend/layers/processing/h5ad_data_file.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py index ca86609b59629..965bac42e9f8a 100644 --- a/backend/layers/processing/h5ad_data_file.py +++ b/backend/layers/processing/h5ad_data_file.py @@ -178,7 +178,6 @@ def extract_anndata_elements_from_file(self): self.obs = self.transform_dataframe_index_into_column(self.anndata.obs, "obs", self.obs_index_column_name) self.var = self.transform_dataframe_index_into_column(self.anndata.var, "var", self.var_index_column_name) - def extract_metadata_about_dataset(self): """ Extract metadata information about the dataset that upon conversion will be saved as group metadata with the From 05c0e285d15dcd093ed115e73e2e3da65b16fd92 Mon Sep 17 00:00:00 2001 From: kaloster Date: Thu, 28 Mar 2024 21:21:30 +0200 Subject: [PATCH 03/14] revised script --- backend/common/utils/cxg_generation_utils.py | 27 +++++++++++++++++--- backend/layers/processing/h5ad_data_file.py | 3 ++- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index bf9e67bc40ddd..6d76e57a1a085 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -1,5 +1,6 @@ import json import logging +import pickle import numpy as np import pandas as pd @@ -22,17 +23,37 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata array_name = f"{cxg_container}/{group_metadata_name}" # Because TileDB does not allow one to attach metadata directly to a CXG group, we need to have a workaround - # where we create an empty array and attach the metadata onto to this empty array. Below we construct this empty + # where we create an empty array and attached the metadata onto to this empty array. Below we construct this empty # array. tiledb.from_numpy(array_name, np.zeros((1,))) + with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: + for key, value in metadata_dict.items(): + metadata_array.meta[key] = value + + +def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None): + + array_name = f"{cxg_container}/{group_metadata_name}" + + tiledb.from_numpy(array_name, np.zeros((1,))) + def iterate_over_dict(metadata_dict): with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: for key, value in metadata_dict.items(): + if not key.startswith("spatial"): + continue + print(f"key: {key}, type:{type(value)}, value: {value}") if isinstance(value, dict): - iterate_over_dict(value) + try: + metadata_array.meta[key] = pickle.dumps(value) + except Exception as e: + logging.error(f"Error adding metadata {key} to {array_name}: {e}") else: - metadata_array.meta[key] = value + try: + metadata_array.meta[key] = value + except Exception as e: + logging.error(f"Error adding metadata {key} to {array_name}: {e}") iterate_over_dict(metadata_dict) diff --git a/backend/layers/processing/h5ad_data_file.py b/backend/layers/processing/h5ad_data_file.py index 965bac42e9f8a..5ed297159862b 100644 --- a/backend/layers/processing/h5ad_data_file.py +++ b/backend/layers/processing/h5ad_data_file.py @@ -18,6 +18,7 @@ convert_dictionary_to_cxg_group, convert_matrices_to_cxg_arrays, convert_ndarray_to_cxg_dense_array, + convert_uns_to_cxg_group, ) from backend.common.utils.matrix_utils import is_matrix_sparse from backend.common.utils.tiledb import consolidation_buffer_size @@ -79,7 +80,7 @@ def to_cxg(self, output_cxg_directory, sparse_threshold, convert_anndata_colors_ convert_dataframe_to_cxg_array(output_cxg_directory, "var", self.var, self.var_index_column_name, ctx) logging.info("\t...dataset var dataframe saved") - convert_dictionary_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx) + convert_uns_to_cxg_group(output_cxg_directory, self.anndata.uns, "uns", ctx) logging.info("\t...dataset uns dataframe saved") self.write_anndata_embeddings_to_cxg(output_cxg_directory, ctx) From 0b05e85b6839ecdcd525b8697158858b86d26325 Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 1 Apr 2024 11:08:35 +0300 Subject: [PATCH 04/14] make cxg notebook --- make_cxg.ipynb | 291 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 make_cxg.ipynb diff --git a/make_cxg.ipynb b/make_cxg.ipynb new file mode 100644 index 0000000000000..a97d2aeef587e --- /dev/null +++ b/make_cxg.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from backend.layers.processing.h5ad_data_file import H5ADDataFile\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", + "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "key: spatial, type:, value: {'WSA_LngSP10193345': {'images': {'fullres': array([[[244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " ...,\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240]],\n", + "\n", + " [[244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " ...,\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240]],\n", + "\n", + " [[244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " ...,\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240]],\n", + "\n", + " ...,\n", + "\n", + " [[244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " ...,\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240]],\n", + "\n", + " [[244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " ...,\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240]],\n", + "\n", + " [[244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " ...,\n", + " [244, 240, 240],\n", + " [244, 240, 240],\n", + " [244, 240, 240]]], dtype=uint8), 'hires': array([[[0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " ...,\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ]],\n", + "\n", + " [[0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " ...,\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ]],\n", + "\n", + " [[0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " ...,\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ]],\n", + "\n", + " ...,\n", + "\n", + " [[0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " ...,\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ]],\n", + "\n", + " [[0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " ...,\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ]],\n", + "\n", + " [[0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " ...,\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ],\n", + " [0.95686275, 0.9411765 , 0.9411765 ]]], dtype=float32)}, 'metadata': {'chemistry_description': \"Spatial 3' v1\", 'software_version': 'spaceranger-1.1.0'}, 'scalefactors': {'spot_diameter_fullres': 148.37971291260436, 'tissue_hires_scalef': 0.056960583}}}\n" + ] + }, + { + "data": { + "text/plain": [ + "'UXR_0bb15784-1cea-47e1-9a00-57dcd127746c.cxg'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def make_cxg(local_filename):\n", + " \"\"\"\n", + " Convert the uploaded H5AD file to the CXG format servicing the cellxgene Explorer.\n", + " \"\"\"\n", + "\n", + " cxg_output_container = local_filename.replace(\".h5ad\", \".cxg\")\n", + " try:\n", + " h5ad_data_file = H5ADDataFile(local_filename, var_index_column_name=\"feature_name\")\n", + " h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0)\n", + " except Exception as ex:\n", + " # TODO use a specialized exception\n", + " msg = \"CXG conversion failed.\"\n", + "\n", + " raise RuntimeError(msg) from ex\n", + " raise ex\n", + "\n", + " return cxg_output_container\n", + "\n", + "make_cxg(\"UXR_0bb15784-1cea-47e1-9a00-57dcd127746c.h5ad\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From b68d9165d887aca1a17391242722ed4b787e4ff2 Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 1 Apr 2024 11:12:42 +0300 Subject: [PATCH 05/14] make cxg notebook --- make_cxg.ipynb | 236 +------------------------------------------------ 1 file changed, 3 insertions(+), 233 deletions(-) diff --git a/make_cxg.ipynb b/make_cxg.ipynb index a97d2aeef587e..7639cc7625339 100644 --- a/make_cxg.ipynb +++ b/make_cxg.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -11,239 +11,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n", - "WARNING:root:Type float64 will be converted to 32 bit float and may lose precision.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "key: spatial, type:, value: {'WSA_LngSP10193345': {'images': {'fullres': array([[[244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " ...,\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240]],\n", - "\n", - " [[244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " ...,\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240]],\n", - "\n", - " [[244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " ...,\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240]],\n", - "\n", - " ...,\n", - "\n", - " [[244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " ...,\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240]],\n", - "\n", - " [[244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " ...,\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240]],\n", - "\n", - " [[244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " ...,\n", - " [244, 240, 240],\n", - " [244, 240, 240],\n", - " [244, 240, 240]]], dtype=uint8), 'hires': array([[[0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " ...,\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ]],\n", - "\n", - " [[0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " ...,\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ]],\n", - "\n", - " [[0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " ...,\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ]],\n", - "\n", - " ...,\n", - "\n", - " [[0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " ...,\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ]],\n", - "\n", - " [[0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " ...,\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ]],\n", - "\n", - " [[0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " ...,\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ],\n", - " [0.95686275, 0.9411765 , 0.9411765 ]]], dtype=float32)}, 'metadata': {'chemistry_description': \"Spatial 3' v1\", 'software_version': 'spaceranger-1.1.0'}, 'scalefactors': {'spot_diameter_fullres': 148.37971291260436, 'tissue_hires_scalef': 0.056960583}}}\n" - ] - }, - { - "data": { - "text/plain": [ - "'UXR_0bb15784-1cea-47e1-9a00-57dcd127746c.cxg'" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "def make_cxg(local_filename):\n", " \"\"\"\n", From 1d7e449f8f889fa3c1cebeccb69b628b5c5c6b31 Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 8 Apr 2024 13:01:13 +0300 Subject: [PATCH 06/14] convert uns for spatial key --- backend/common/constants.py | 2 + backend/common/utils/cxg_generation_utils.py | 56 ++++++++++++++------ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/backend/common/constants.py b/backend/common/constants.py index a4c7574906409..bd6c4c84b6cf8 100644 --- a/backend/common/constants.py +++ b/backend/common/constants.py @@ -3,3 +3,5 @@ "staging": "https://api.cellxgene.staging.single-cell.czi.technology", "dev": "https://api.cellxgene.dev.single-cell.czi.technology", } + +UNS_META_KEYS = ['spatial'] \ No newline at end of file diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index 6d76e57a1a085..d339359005a9d 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -6,6 +6,7 @@ import pandas as pd import tiledb +from backend.common.constants import UNS_META_KEYS from backend.common.utils.type_conversion_utils import get_dtype_and_schema_of_array @@ -33,29 +34,52 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None): + """ + Convert uns (unstructured) metadata to CXG output directory + + Args: + cxg_container (str): The name of the cxg container. + metadata_dict (dict): The dictionary containing the metadata. + group_metadata_name (str, optional): The name of the group metadata. Defaults to "cxg_group_metadata". + ctx (tiledb.Ctx, optional): The TileDB context. + https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata + """ + def filter_spatial_data(content, library_id): + """ + This filters data associated with the "spatial" key in a dictionary, specifically retaining + certain sub-items from "images" and "scalefactors" sub-dictionaries. + """ + spatial_filtered = {} + spatial_filtered[library_id] = { + 'images': { + 'hires': content['images']['hires'], # Omit hires data once deep zooming feature is implemented + 'fullres': [] # Currently not including fullsres data, due to deep zooming feature coming soon + }, + 'scalefactors': { + 'spot_diameter_fullres': content['scalefactors']['spot_diameter_fullres'], + 'tissue_hires_scalef': content['scalefactors']['tissue_hires_scalef'] + } + } + return spatial_filtered + + array_name = f"{cxg_container}/{group_metadata_name}" + object_filtered = {} tiledb.from_numpy(array_name, np.zeros((1,))) - def iterate_over_dict(metadata_dict): - with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: - for key, value in metadata_dict.items(): - if not key.startswith("spatial"): - continue - print(f"key: {key}, type:{type(value)}, value: {value}") - if isinstance(value, dict): - try: - metadata_array.meta[key] = pickle.dumps(value) - except Exception as e: - logging.error(f"Error adding metadata {key} to {array_name}: {e}") + with tiledb.open(array_name, mode="w", ctx=ctx) as metadata_array: + for key, value in metadata_dict.items(): + if key not in UNS_META_KEYS: + continue + for object_id, content in value.items(): + if key == 'spatial': + object_filtered = filter_spatial_data(content, object_id) else: - try: - metadata_array.meta[key] = value - except Exception as e: - logging.error(f"Error adding metadata {key} to {array_name}: {e}") + object_filtered[object_id] = content - iterate_over_dict(metadata_dict) + metadata_array.meta[key] = pickle.dumps(object_filtered) def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, index_column_name, ctx): From 2232e0a0797f292f185e0ef2c6b02c3d54acc70d Mon Sep 17 00:00:00 2001 From: Ronen Date: Mon, 8 Apr 2024 13:03:42 +0300 Subject: [PATCH 07/14] Delete make_cxg.ipynb --- make_cxg.ipynb | 61 -------------------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 make_cxg.ipynb diff --git a/make_cxg.ipynb b/make_cxg.ipynb deleted file mode 100644 index 7639cc7625339..0000000000000 --- a/make_cxg.ipynb +++ /dev/null @@ -1,61 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from backend.layers.processing.h5ad_data_file import H5ADDataFile\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def make_cxg(local_filename):\n", - " \"\"\"\n", - " Convert the uploaded H5AD file to the CXG format servicing the cellxgene Explorer.\n", - " \"\"\"\n", - "\n", - " cxg_output_container = local_filename.replace(\".h5ad\", \".cxg\")\n", - " try:\n", - " h5ad_data_file = H5ADDataFile(local_filename, var_index_column_name=\"feature_name\")\n", - " h5ad_data_file.to_cxg(cxg_output_container, sparse_threshold=25.0)\n", - " except Exception as ex:\n", - " # TODO use a specialized exception\n", - " msg = \"CXG conversion failed.\"\n", - "\n", - " raise RuntimeError(msg) from ex\n", - " raise ex\n", - "\n", - " return cxg_output_container\n", - "\n", - "make_cxg(\"UXR_0bb15784-1cea-47e1-9a00-57dcd127746c.h5ad\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 0b1da805565e1f7e660752fe509e0d16e239f94a Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 8 Apr 2024 13:04:25 +0300 Subject: [PATCH 08/14] convert uns for spatial key --- backend/common/constants.py | 2 +- backend/common/utils/cxg_generation_utils.py | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/backend/common/constants.py b/backend/common/constants.py index bd6c4c84b6cf8..4ab11214859e0 100644 --- a/backend/common/constants.py +++ b/backend/common/constants.py @@ -4,4 +4,4 @@ "dev": "https://api.cellxgene.dev.single-cell.czi.technology", } -UNS_META_KEYS = ['spatial'] \ No newline at end of file +UNS_META_KEYS = ["spatial"] diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index d339359005a9d..9e35e827ffd8b 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -44,7 +44,7 @@ def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name=" ctx (tiledb.Ctx, optional): The TileDB context. https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata """ - + def filter_spatial_data(content, library_id): """ This filters data associated with the "spatial" key in a dictionary, specifically retaining @@ -52,18 +52,17 @@ def filter_spatial_data(content, library_id): """ spatial_filtered = {} spatial_filtered[library_id] = { - 'images': { - 'hires': content['images']['hires'], # Omit hires data once deep zooming feature is implemented - 'fullres': [] # Currently not including fullsres data, due to deep zooming feature coming soon + "images": { + "hires": content["images"]["hires"], # Omit hires data once deep zooming feature is implemented + "fullres": [], # Currently not including fullsres data, due to deep zooming feature coming soon + }, + "scalefactors": { + "spot_diameter_fullres": content["scalefactors"]["spot_diameter_fullres"], + "tissue_hires_scalef": content["scalefactors"]["tissue_hires_scalef"], }, - 'scalefactors': { - 'spot_diameter_fullres': content['scalefactors']['spot_diameter_fullres'], - 'tissue_hires_scalef': content['scalefactors']['tissue_hires_scalef'] - } } return spatial_filtered - array_name = f"{cxg_container}/{group_metadata_name}" object_filtered = {} @@ -74,7 +73,7 @@ def filter_spatial_data(content, library_id): if key not in UNS_META_KEYS: continue for object_id, content in value.items(): - if key == 'spatial': + if key == "spatial": object_filtered = filter_spatial_data(content, object_id) else: object_filtered[object_id] = content From 1131d927cbaeb24c0228ba037a15f410087916a7 Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 8 Apr 2024 13:23:19 +0300 Subject: [PATCH 09/14] convert uns for spatial key --- backend/common/utils/cxg_generation_utils.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index 9e35e827ffd8b..babeb4df6dd50 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -35,20 +35,14 @@ def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None): """ - Convert uns (unstructured) metadata to CXG output directory - - Args: - cxg_container (str): The name of the cxg container. - metadata_dict (dict): The dictionary containing the metadata. - group_metadata_name (str, optional): The name of the group metadata. Defaults to "cxg_group_metadata". - ctx (tiledb.Ctx, optional): The TileDB context. - https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata + Convert uns (unstructured) metadata to CXG output directory specified """ def filter_spatial_data(content, library_id): """ This filters data associated with the "spatial" key in a dictionary, specifically retaining certain sub-items from "images" and "scalefactors" sub-dictionaries. + https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata """ spatial_filtered = {} spatial_filtered[library_id] = { From e3f6a3cd06d30ce6974a7309bd0cc41fde5600da Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 8 Apr 2024 13:27:23 +0300 Subject: [PATCH 10/14] convert uns for spatial key --- backend/common/utils/cxg_generation_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index babeb4df6dd50..376ad1c6d8f07 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -40,8 +40,8 @@ def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name=" def filter_spatial_data(content, library_id): """ - This filters data associated with the "spatial" key in a dictionary, specifically retaining - certain sub-items from "images" and "scalefactors" sub-dictionaries. + This filters data associated with the "spatial" key in a dictionary, specifically + retaining certain sub-items from "images" and "scalefactors" sub-dictionaries. https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata """ spatial_filtered = {} From 78cf0d4e646a19036cf460262bdef780e5542089 Mon Sep 17 00:00:00 2001 From: kaloster Date: Mon, 8 Apr 2024 19:27:48 +0300 Subject: [PATCH 11/14] fix: spatial test and dataset --- backend/common/utils/cxg_generation_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index 376ad1c6d8f07..ee703b959a469 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -40,7 +40,7 @@ def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name=" def filter_spatial_data(content, library_id): """ - This filters data associated with the "spatial" key in a dictionary, specifically + This filters data associated with the "spatial" key in a dictionary, specifically retaining certain sub-items from "images" and "scalefactors" sub-dictionaries. https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata """ From cde30970e080920615d8e507037b32c528db6913 Mon Sep 17 00:00:00 2001 From: kaloster Date: Wed, 10 Apr 2024 13:42:48 +0300 Subject: [PATCH 12/14] cast in_tissue as bool and unit test --- backend/common/utils/cxg_generation_utils.py | 5 ++- .../layers/utils/test_cxg_generation_utils.py | 31 ++++++++++++++++++- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index ee703b959a469..bf53689d71d5e 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -96,7 +96,10 @@ def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, ind tdb_attrs = [] for column_name, column_values in dataframe.items(): - dtype, hints = get_dtype_and_schema_of_array(column_values) + if column_name == "in_tissue": # Cast 'in_tissue' column values as boolean to make it categorical + dtype, hints = get_dtype_and_schema_of_array(column_values.astype(bool)) + else: + dtype, hints = get_dtype_and_schema_of_array(column_values) if "categories" in hints and len(hints.get("categories", [])) > 0.75 * dataframe.shape[0]: hints["type"] = "string" del hints["categories"] diff --git a/tests/unit/backend/layers/utils/test_cxg_generation_utils.py b/tests/unit/backend/layers/utils/test_cxg_generation_utils.py index c470b358ec955..52f982ba61d59 100644 --- a/tests/unit/backend/layers/utils/test_cxg_generation_utils.py +++ b/tests/unit/backend/layers/utils/test_cxg_generation_utils.py @@ -1,4 +1,5 @@ import json +import pickle import unittest from os import mkdir, path from shutil import rmtree @@ -13,6 +14,7 @@ convert_dictionary_to_cxg_group, convert_matrices_to_cxg_arrays, convert_ndarray_to_cxg_dense_array, + convert_uns_to_cxg_group, ) from tests.unit.backend.fixtures.environment_setup import fixture_file_path @@ -28,7 +30,7 @@ def tearDown(self): def test__convert_dictionary_to_cxg_group__writes_successfully(self): random_dictionary = {"cookies": "chocolate_chip", "brownies": "chocolate", "cake": "double chocolate"} - dictionary_name = "favorite_desserts" + dictionary_name = "spatial" expected_array_directory = f"{self.testing_cxg_temp_directory}/{dictionary_name}" convert_dictionary_to_cxg_group( @@ -42,6 +44,33 @@ def test__convert_dictionary_to_cxg_group__writes_successfully(self): self.assertTrue(isinstance(array, tiledb.DenseArray)) self.assertEqual(random_dictionary, actual_stored_metadata) + def test__convert_uns_to_cxg_group__writes_successfully(self): + random_dictionary = { + "spatial": { + "abcd": { + "images": { + "hires": "123", + "fullres": [], + }, + "scalefactors": { + "spot_diameter_fullres": "123", + "tissue_hires_scalef": "123", + }, + } + } + } + dictionary_name = "uns" + expected_array_directory = f"{self.testing_cxg_temp_directory}/{dictionary_name}" + convert_uns_to_cxg_group( + self.testing_cxg_temp_directory, random_dictionary, group_metadata_name=dictionary_name + ) + array = tiledb.open(expected_array_directory) + actual_stored_metadata = dict(array.meta.items()) + + self.assertTrue(path.isdir(expected_array_directory)) + self.assertTrue(isinstance(array, tiledb.DenseArray)) + self.assertEqual(random_dictionary["spatial"], pickle.loads(actual_stored_metadata["spatial"])) + def test__convert_dataframe_to_cxg_array__writes_successfully(self): random_int_category = Series(data=[3, 1, 2, 4], dtype=np.int64) random_bool_category = Series(data=[True, True, False, True], dtype=np.bool_) From b0cee8f327ac26383679d9c439f8b9baa22a6ac5 Mon Sep 17 00:00:00 2001 From: kaloster Date: Wed, 10 Apr 2024 13:49:15 +0300 Subject: [PATCH 13/14] cast in_tissue as bool and unit test --- backend/common/utils/cxg_generation_utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index bf53689d71d5e..c917384d047a5 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -96,7 +96,9 @@ def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, ind tdb_attrs = [] for column_name, column_values in dataframe.items(): - if column_name == "in_tissue": # Cast 'in_tissue' column values as boolean to make it categorical + # Cast 'in_tissue' column values as boolean to make it categorical + # https://github.com/chanzuckerberg/single-cell-explorer/issues/841 + if column_name == "in_tissue": dtype, hints = get_dtype_and_schema_of_array(column_values.astype(bool)) else: dtype, hints = get_dtype_and_schema_of_array(column_values) From 0cd25da58ed7fcc75201f71b861a23197b66149c Mon Sep 17 00:00:00 2001 From: kaloster Date: Wed, 10 Apr 2024 14:14:17 +0300 Subject: [PATCH 14/14] move spatial filter to uns utility --- backend/common/utils/cxg_generation_utils.py | 20 +------------------- backend/common/utils/uns.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) create mode 100644 backend/common/utils/uns.py diff --git a/backend/common/utils/cxg_generation_utils.py b/backend/common/utils/cxg_generation_utils.py index c917384d047a5..dae4ecb9a36d1 100644 --- a/backend/common/utils/cxg_generation_utils.py +++ b/backend/common/utils/cxg_generation_utils.py @@ -8,6 +8,7 @@ from backend.common.constants import UNS_META_KEYS from backend.common.utils.type_conversion_utils import get_dtype_and_schema_of_array +from backend.common.utils.uns import filter_spatial_data def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None): @@ -38,25 +39,6 @@ def convert_uns_to_cxg_group(cxg_container, metadata_dict, group_metadata_name=" Convert uns (unstructured) metadata to CXG output directory specified """ - def filter_spatial_data(content, library_id): - """ - This filters data associated with the "spatial" key in a dictionary, specifically - retaining certain sub-items from "images" and "scalefactors" sub-dictionaries. - https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata - """ - spatial_filtered = {} - spatial_filtered[library_id] = { - "images": { - "hires": content["images"]["hires"], # Omit hires data once deep zooming feature is implemented - "fullres": [], # Currently not including fullsres data, due to deep zooming feature coming soon - }, - "scalefactors": { - "spot_diameter_fullres": content["scalefactors"]["spot_diameter_fullres"], - "tissue_hires_scalef": content["scalefactors"]["tissue_hires_scalef"], - }, - } - return spatial_filtered - array_name = f"{cxg_container}/{group_metadata_name}" object_filtered = {} diff --git a/backend/common/utils/uns.py b/backend/common/utils/uns.py new file mode 100644 index 0000000000000..cf7be6fada15a --- /dev/null +++ b/backend/common/utils/uns.py @@ -0,0 +1,18 @@ +def filter_spatial_data(content, library_id): + """ + This filters data associated with the "spatial" key in a dictionary, specifically + retaining certain sub-items from "images" and "scalefactors" sub-dictionaries. + https://github.com/chanzuckerberg/single-cell-curation/blob/main/schema/5.1.0/schema.md#uns-dataset-metadata + """ + spatial_filtered = {} + spatial_filtered[library_id] = { + "images": { + "hires": content["images"]["hires"], # Omit hires data once deep zooming feature is implemented + "fullres": [], # Currently not including fullsres data, due to deep zooming feature coming soon + }, + "scalefactors": { + "spot_diameter_fullres": content["scalefactors"]["spot_diameter_fullres"], + "tissue_hires_scalef": content["scalefactors"]["tissue_hires_scalef"], + }, + } + return spatial_filtered