Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: cxg conversion script updates for uns["spatial"] #7023

Merged
merged 43 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
0af8287
adding pyvips
kaloster May 10, 2024
b052c45
unit tests
kaloster May 14, 2024
1ac6e2d
lint
kaloster May 14, 2024
6026e97
add pyvips to processing
kaloster May 14, 2024
457779e
add libvips to processing docker
kaloster May 14, 2024
8e6a183
add libvips to processing docker
kaloster May 14, 2024
9bf18bb
remove libvips from processing docker
kaloster May 14, 2024
499dc4c
remove libvips from processing base docker
kaloster May 15, 2024
59fded4
Merge branch 'main' into kaloster/cxg-conversion
kaloster May 15, 2024
0ef00dc
spatial key
kaloster May 15, 2024
8fca6c6
spatial key fmt
kaloster May 15, 2024
b038508
docstring and error handling
kaloster May 15, 2024
52861a4
add spatial to is_valid
kaloster May 16, 2024
c00b683
more tests
kaloster May 16, 2024
5c20e05
fix constant
kaloster May 17, 2024
c219429
remove old test
kaloster May 17, 2024
b6d8555
revise test data
kaloster May 17, 2024
4252ad5
fmt
kaloster May 17, 2024
b655b4f
added some exceptions
kaloster May 17, 2024
68c1303
removed some exceptions
kaloster May 17, 2024
7647ebc
Merge branch 'main' into kaloster/cxg-conversion
kaloster May 17, 2024
bcc309a
trent f/b
kaloster May 17, 2024
ae5c39f
trent f/b
kaloster May 17, 2024
0a11bf1
trent f/b
kaloster May 17, 2024
35a890f
pr feedback
kaloster May 20, 2024
17fb1a7
Merge branch 'main' into kaloster/cxg-conversion
kaloster May 20, 2024
616c85f
lint
kaloster May 20, 2024
e5f826f
cleanup
kaloster May 20, 2024
8f6316c
cleanup
kaloster May 20, 2024
e637cb1
cleanup
kaloster May 20, 2024
f2bd29d
cleanup
kaloster May 20, 2024
2fb5e28
cleanup
kaloster May 20, 2024
fcdf86d
review feedback
kaloster May 20, 2024
1b7e0c6
review feedback
kaloster May 20, 2024
d40131e
fixture cleanup
kaloster May 20, 2024
7aba39a
parametrize
kaloster May 21, 2024
9ade557
lint...
kaloster May 21, 2024
1d1a2bc
Merge branch 'main' into kaloster/cxg-conversion
kaloster May 21, 2024
aa658dd
update crop test
kaloster May 21, 2024
d02e229
constant for in_tissue
kaloster May 21, 2024
d4c801a
don't strip spatial
kaloster May 21, 2024
c01f19a
remove print...
kaloster May 21, 2024
ffa4666
update constant
kaloster May 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.backend
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ENV LC_ALL=C.UTF-8
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
apt-get install -y python3 libhdf5-dev python3-h5py gettext moreutils build-essential libxml2-dev python3-dev python3-pip zlib1g-dev python3-requests python3-aiohttp llvm jq libvips && \
apt-get install -y python3 libhdf5-dev python3-h5py gettext moreutils build-essential libxml2-dev python3-dev python3-pip zlib1g-dev python3-requests python3-aiohttp llvm jq && \
rm -rf /var/lib/apt/lists/*

# Don't re-run pip install unless either requirements.txt has changed.
Expand Down
11 changes: 4 additions & 7 deletions backend/layers/processing/h5ad_data_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@
)
from backend.common.utils.corpora_constants import CorporaConstants
from backend.common.utils.cxg_constants import CxgConstants
from backend.common.utils.cxg_generation_utils import (
from backend.common.utils.matrix_utils import is_matrix_sparse
from backend.common.utils.tiledb import consolidation_buffer_size
from backend.layers.processing.utils.cxg_generation_utils import (
convert_dataframe_to_cxg_array,
convert_dictionary_to_cxg_group,
convert_matrices_to_cxg_arrays,
convert_ndarray_to_cxg_dense_array,
convert_uns_to_cxg_group,
)
from backend.common.utils.matrix_utils import is_matrix_sparse
from backend.common.utils.tiledb import consolidation_buffer_size


class H5ADDataFile:
Expand Down Expand Up @@ -131,10 +131,7 @@ def is_valid_embedding(adata, embedding_name, embedding_array):

for embedding_name, embedding_values in self.anndata.obsm.items():
if is_valid_embedding(self.anndata, embedding_name, embedding_values):
if embedding_name == "spatial": # if spatial no need to strip X_ prefix
embedding_name = f"{embedding_container}/{embedding_name}"
else:
embedding_name = f"{embedding_container}/{embedding_name[2:]}"
embedding_name = f"{embedding_container}/{embedding_name[2:]}"
convert_ndarray_to_cxg_dense_array(embedding_name, embedding_values, ctx)
logging.info(f"\t\t...{embedding_name} embedding created")

Expand Down
2 changes: 1 addition & 1 deletion backend/layers/processing/process_seurat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from backend.layers.processing.logger import logit
from backend.layers.processing.process_logic import ProcessingLogic
from backend.layers.processing.utils import rds_citation_from_h5ad
from backend.layers.processing.utils.rds_citation_from_h5ad import rds_citation_from_h5ad
from backend.layers.thirdparty.s3_provider import S3ProviderInterface
from backend.layers.thirdparty.uri_provider import UriProviderInterface

Expand Down
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import tiledb

from backend.common.constants import UNS_SPATIAL_KEY
from backend.common.utils.spatial import SpatialDataProcessor
from backend.common.utils.type_conversion_utils import get_dtype_and_schema_of_array
from backend.layers.processing.utils.spatial import SpatialDataProcessor


def convert_dictionary_to_cxg_group(cxg_container, metadata_dict, group_metadata_name="cxg_group_metadata", ctx=None):
Expand Down Expand Up @@ -53,10 +53,8 @@
for object_id, content in value.items():
object_filtered = spatial_processor.filter_spatial_data(content, object_id)
spatial_processor.create_deep_zoom_assets(cxg_container, content)
else:
continue

metadata_array.meta[key] = pickle.dumps(object_filtered)
metadata_array.meta[key] = pickle.dumps(object_filtered)


def convert_dataframe_to_cxg_array(cxg_container, dataframe_name, dataframe, index_column_name, ctx):
Expand All @@ -83,7 +81,7 @@
# Cast 'in_tissue' column values as boolean to make it categorical
# https://github.com/chanzuckerberg/single-cell-explorer/issues/841
if column_name == "in_tissue":
dtype, hints = get_dtype_and_schema_of_array(column_values.astype(bool))

Check warning on line 84 in backend/layers/processing/utils/cxg_generation_utils.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/processing/utils/cxg_generation_utils.py#L84

Added line #L84 was not covered by tests
else:
dtype, hints = get_dtype_and_schema_of_array(column_values)
if "categories" in hints and len(hints.get("categories", [])) > 0.75 * dataframe.shape[0]:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import io
import logging
import os
import shutil
import tempfile

import numpy as np
import pyvips
Expand Down Expand Up @@ -51,7 +51,7 @@
upper = (height - new_dimension) / 2
right = (width + new_dimension) / 2
lower = (height + new_dimension) / 2
return (left, upper, right, lower)
return tuple(map(int, (left, upper, right, lower)))

def _prepare_image(self, content):
"""
Expand Down Expand Up @@ -81,30 +81,27 @@
Image.MAX_IMAGE_PIXELS = None # Disable the image size limit
try:
with Image.fromarray(image_array_uint8) as img:
# Convert image to RGB mode if it's not already
if img.mode != "RGB":
img = img.convert("RGB")
cropped_img = img.crop(self._calculate_aspect_ratio_crop(img.size)) # Crop the image
cropped_img.save(io.BytesIO(), format="JPEG", quality=90) # Save or manipulate as needed
cropped_img.save(io.BytesIO(), format="JPEG", quality=100) # Save or manipulate as needed
# Flip the image vertically due to explorer client rendering images upside down
flipped_img = cropped_img.transpose(Image.FLIP_TOP_BOTTOM)
return np.array(flipped_img)
except Exception as e:
logger.error(f"Error processing image: {e}")
raise Exception(f"Error processing image: {e}") from e
except Exception:
logger.exception("Error processing image")
raise

Check warning on line 91 in backend/layers/processing/utils/spatial.py

View check run for this annotation

Codecov / codecov/patch

backend/layers/processing/utils/spatial.py#L89-L91

Added lines #L89 - L91 were not covered by tests

def _generate_deep_zoom_assets(self, image_array, folder_name):
def _generate_deep_zoom_assets(self, image_array, assets_folder):
"""
Generate deep zoom assets from the image array.

Args:
image_array (np.ndarray): The image array.
folder_name (str): The name of the folder to save the assets.
assets_folder (str): The temporary directory to save the assets.
"""
h, w, bands = image_array.shape
linear = image_array.reshape(w * h * bands)
image = pyvips.Image.new_from_memory(linear.data, w, h, bands, "uchar")
image.dzsave(folder_name + "spatial", suffix=".jpeg")
image.dzsave(os.path.join(assets_folder, "spatial"), suffix=".webp")

def _upload_assets(self, assets_folder):
"""
Expand All @@ -113,9 +110,8 @@
Args:
assets_folder (str): The folder containing the assets.
"""
s3_uri = f"s3://{self.bucket_name}/{self.asset_directory}/{assets_folder}"
s3_uri = f"s3://{self.bucket_name}/{self.asset_directory}/{os.path.basename(assets_folder)}"
self.s3_provider.upload_directory(assets_folder, s3_uri)
shutil.rmtree(assets_folder)

def create_deep_zoom_assets(self, container_name, content):
"""
Expand All @@ -124,17 +120,19 @@
Args:
container_name (str): The name of the container.
content (dict): The content dictionary containing the image array.

"""
try:
assets_folder = container_name.replace(".cxg", "") + "/"
image_array = self._prepare_image(content)
processed_image = self._process_and_flip_image(image_array)
self._generate_deep_zoom_assets(processed_image, assets_folder)
self._upload_assets(assets_folder)
with tempfile.TemporaryDirectory() as temp_dir:
assets_folder = os.path.join(temp_dir, container_name.replace(".cxg", ""))
os.makedirs(assets_folder)

image_array = self._prepare_image(content)
processed_image = self._process_and_flip_image(image_array)
self._generate_deep_zoom_assets(processed_image, assets_folder)
self._upload_assets(assets_folder)
except Exception as e:
logging.error(f"Failed to create and upload deep zoom assets: {e}")
raise Exception(f"An error occurred while creating and uploading deep zoom assets: {e}") from e
logger.exception(f"Failed to create and upload deep zoom assets: {e}")
raise

def filter_spatial_data(self, content, library_id):
"""
Expand Down
1 change: 0 additions & 1 deletion python_dependencies/backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ PyMySQL==1.1.0
pydantic>=1.10.7, <2
python-jose[cryptography]>=3.1.0
python-json-logger
pyvips==2.2.2
requests>=2.22.0
rsa>=4.7 # not directly required, pinned by Snyk to avoid a vulnerability
scanpy>=1.9.8, <2
Expand Down
1 change: 1 addition & 0 deletions python_dependencies/common/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ click
coverage
parameterized
pytest
pytest-mock
pytest-subtests
ruff==0.4.3 # Must be kept in sync with ruff version in .pre-commit-config.yaml
Loading
Loading