Skip to content

Commit

Permalink
feat: dataset metadata endpoint (#64)
Browse files Browse the repository at this point in the history
  • Loading branch information
ebezzi authored Sep 23, 2021
1 parent 0624e07 commit 6536f05
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 47 deletions.
18 changes: 16 additions & 2 deletions server/app/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,13 @@

import server.common.rest as common_rest
from server.common.utils.data_locator import DataLocator
from server.common.errors import DatasetAccessError, RequestException, DatasetNotFoundError, TombstoneError
from server.common.errors import (
DatasetAccessError,
RequestException,
DatasetNotFoundError,
TombstoneError,
DatasetMetadataError
)
from server.common.health import health_check
from server.common.utils.utils import path_join, Float32JSONEncoder
from server.data_common.dataset_metadata import get_dataset_metadata_for_explorer_location
Expand Down Expand Up @@ -134,7 +140,7 @@ def wrapped_function(self, dataset=None):
with get_data_adaptor(self.url_dataroot, dataset) as data_adaptor:
data_adaptor.set_uri_path(f"{self.url_dataroot}/{dataset}")
return func(self, data_adaptor)
except (DatasetAccessError, DatasetNotFoundError) as e:
except (DatasetAccessError, DatasetNotFoundError, DatasetMetadataError) as e:
return common_rest.abort_and_log(
e.status_code, f"Invalid dataset {dataset}: {e.message}", loglevel=logging.INFO, include_exc_info=True
)
Expand Down Expand Up @@ -213,6 +219,13 @@ def get(self, data_adaptor):
return common_rest.schema_get(data_adaptor)


class DatasetMetadataAPI(DatasetResource):
@cache_control(public=True, max_age=ONE_WEEK)
@rest_get_data_adaptor
def get(self, data_adaptor):
return common_rest.dataset_metadata_get(current_app.app_config, data_adaptor)


class ConfigAPI(DatasetResource):
@cache_control(public=True, max_age=ONE_WEEK)
@rest_get_data_adaptor
Expand Down Expand Up @@ -310,6 +323,7 @@ def add_resource(resource, url):

# Initialization routes
add_resource(SchemaAPI, "/schema")
add_resource(DatasetMetadataAPI, "/dataset-metadata")
add_resource(ConfigAPI, "/config")
# Data routes
add_resource(AnnotationsObsAPI, "/annotations/obs")
Expand Down
12 changes: 0 additions & 12 deletions server/common/config/client_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from server import display_version as cellxgene_display_version
from server.data_common.dataset_metadata import get_dataset_metadata_for_explorer_location


def get_client_config(app_config, data_adaptor, current_app):
Expand Down Expand Up @@ -79,15 +78,4 @@ def get_client_config(app_config, data_adaptor, current_app):
"column_request_max": server_config.limits__column_request_max,
"diffexp_cellcount_max": server_config.limits__diffexp_cellcount_max,
}
dataset_metadata_manager = current_app.dataset_metadata_cache_manager
with dataset_metadata_manager.get(
cache_key=data_adaptor.uri_path,
create_data_function=get_dataset_metadata_for_explorer_location,
create_data_args={"app_config": app_config},
) as dataset_identifiers:
config["dataset_identification"] = {
"dataset_id": dataset_identifiers["dataset_id"],
"collection_id": dataset_identifiers["collection_id"],
"collection_visibility": dataset_identifiers["collection_visibility"],
}
return client_config
3 changes: 3 additions & 0 deletions server/common/config/server_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,3 +356,6 @@ def get_web_base_url(self):
if self.app__web_base_url.endswith("/"):
return self.app__web_base_url[:-1]
return self.app__web_base_url

def get_data_locator_api_base_url(self):
return self.data_locator__api_base
1 change: 1 addition & 0 deletions server/common/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def define_tombstone_exception(name, doc, default_status_code=HTTPStatus.FOUND):
define_request_exception(
"DatasetNotFoundError", "Raised when the dataset location cant be found based on the explorer url"
)
define_request_exception("DatasetMetadataError", "Raised when dataset metadata cannot be retrieved")
define_request_exception("DisabledFeatureError", "Raised when an attempt to use a disabled feature occurs")
define_request_exception("AnnotationsError", "Raised when an attempt to use the annotations feature fails")
define_request_exception(
Expand Down
10 changes: 10 additions & 0 deletions server/common/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
from server.common.genesets import summarizeQueryHash
from server.common.fbs.matrix import decode_matrix_fbs

from server.data_common import dataset_metadata


def abort_and_log(code, logmsg, loglevel=logging.DEBUG, include_exc_info=False):
"""
Expand Down Expand Up @@ -120,6 +122,14 @@ def schema_get(data_adaptor):
return make_response(jsonify({"schema": schema}), HTTPStatus.OK)


def dataset_metadata_get(app_config, data_adaptor):
metadata = dataset_metadata.get_dataset_and_collection_metadata(data_adaptor.uri_path, app_config, current_app)
if metadata is not None:
return make_response(jsonify({"metadata": metadata}), HTTPStatus.OK)
else:
return abort(HTTPStatus.NOT_FOUND)


def config_get(app_config, data_adaptor):
config = get_client_config(app_config, data_adaptor, current_app)
return make_response(jsonify(config), HTTPStatus.OK)
Expand Down
42 changes: 41 additions & 1 deletion server/data_common/dataset_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from flask import current_app

from server.common.utils.utils import path_join
from server.common.errors import DatasetNotFoundError, DatasetAccessError, TombstoneError
from server.common.errors import DatasetNotFoundError, DatasetAccessError, DatasetMetadataError, TombstoneError
from server.common.config.app_config import AppConfig
from server.common.config.server_config import ServerConfig

Expand Down Expand Up @@ -99,3 +99,43 @@ def get_dataset_metadata_for_explorer_location(dataset_explorer_location: str, a
raise DatasetNotFoundError(f"Dataset location not found for {dataset_explorer_location}")

return dataset_metadata


def get_dataset_and_collection_metadata(dataset_explorer_location: str, app_config: AppConfig, current_app):
data_locator_base_url = app_config.server_config.get_data_locator_api_base_url()
web_base_url = app_config.server_config.get_web_base_url()

try:
dataset_metadata_manager = current_app.dataset_metadata_cache_manager
with dataset_metadata_manager.get(
cache_key=dataset_explorer_location,
create_data_function=get_dataset_metadata_for_explorer_location,
create_data_args={"app_config": app_config},
) as base_metadata:

collection_id = base_metadata.get("collection_id")
if collection_id is None:
return None

dataset_id = base_metadata["dataset_id"]
collection_visibility = base_metadata["collection_visibility"]

suffix = "/private" if collection_visibility == "PRIVATE" else ""

res = requests.get(f"{data_locator_base_url}/collections/{collection_id}{suffix}").json()

metadata = {
"dataset_name": [dataset["name"] for dataset in res["datasets"] if dataset["id"] == dataset_id][0],
"collection_url": f"{web_base_url}/collections/{collection_id}{suffix}",
"collection_name": res["name"],
"collection_description": res["description"],
"collection_contact_email": res["contact_email"],
"collection_contact_name": res["contact_name"],
"collection_links": res["links"],
"collection_datasets": res["datasets"],
}

return metadata

except Exception:
raise DatasetMetadataError("Error retrieving dataset metadata")
144 changes: 112 additions & 32 deletions server/tests/unit/common/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ def test_config(self):
result_data = json.loads(result.data)
self.assertIn("library_versions", result_data["config"])
self.assertEqual(result_data["config"]["displayNames"]["dataset"], "pbmc3k")
self.assertEqual(
result_data["config"]["dataset_identification"],
{"collection_id": None, "collection_visibility": None, "dataset_id": None},
)

def test_get_layout_fbs(self):
endpoint = "layout/obs"
Expand Down Expand Up @@ -622,6 +618,7 @@ def test_metadata_api_called_for_new_dataset(self, mock_get):
"http://api.cellxgene.staging.single-cell.czi.technology/dp/v1/datasets/meta?url=https://cellxgene.staging.single-cell.czi.technology.com/e/pbmc3k_v0.cxg/", # noqa E501
)


@patch("server.data_common.dataset_metadata.requests.get")
def test_data_locator_defaults_to_name_based_lookup_if_metadata_api_throws_error(self, mock_get):
self.TEST_DATASET_URL_BASE = "/e/pbmc3k.cxg"
Expand Down Expand Up @@ -718,33 +715,6 @@ def test_dataset_does_not_exist(self):
response = self.client.get(url)
self.assertEqual(response.status_code, 404)

@patch("server.data_common.dataset_metadata.requests.get")
def test_config_with_portal_metadata(self, mock_get):
response_body = json.dumps(
{
"collection_id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"collection_visibility": "PUBLIC",
"dataset_id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"s3_uri": f"{FIXTURES_ROOT}/pbmc3k.cxg",
"tombstoned": False,
}
)
mock_get.return_value = MockResponse(body=response_body, status_code=200)
endpoint = "config"
self.TEST_DATASET_URL_BASE = "/e/pbmc3k_v1.cxg"
url = f"{self.TEST_DATASET_URL_BASE}/api/v0.2/{endpoint}"
result = self.client.get(url)
self.assertEqual(result.status_code, HTTPStatus.OK)
self.assertEqual(result.headers["Content-Type"], "application/json")
result_data = json.loads(result.data)
self.assertEqual(
result_data["config"]["dataset_identification"],
{
"collection_id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"collection_visibility": "PUBLIC",
"dataset_id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
},
)

@patch("server.data_common.dataset_metadata.requests.get")
def test_tombstoned_datasets_redirect_to_data_portal(self, mock_get):
Expand All @@ -764,10 +734,120 @@ def test_tombstoned_datasets_redirect_to_data_portal(self, mock_get):
self.assertEqual(result.headers['Location'], "https://cellxgene.staging.single-cell.czi.technology.com/collections/4f098ff4-4a12-446b-a841-91ba3d8e3fa6?tombstoned_dataset_id=2fa37b10-ab4d-49c9-97a8-b4b3d80bf939") # noqa E501



class TestDatasetMetadata(BaseTest):

@classmethod
def setUpClass(cls):
cls.data_locator_api_base = "api.cellxgene.staging.single-cell.czi.technology/dp/v1"
cls.app__web_base_url = "https://cellxgene.staging.single-cell.czi.technology/"
cls.config = AppConfig()
cls.config.update_server_config(
data_locator__api_base=cls.data_locator_api_base,
app__web_base_url=cls.app__web_base_url,
multi_dataset__dataroot={"e": {"base_url": "e", "dataroot": FIXTURES_ROOT}},
app__flask_secret_key="testing",
app__debug=True,
data_locator__s3__region_name="us-east-1",
)
cls.meta_response_body = {
"collection_id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"collection_visibility": "PUBLIC",
"dataset_id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"s3_uri": f"{FIXTURES_ROOT}/pbmc3k.cxg",
"tombstoned": False,
}
super().setUpClass(cls.config)

cls.app.testing = True
cls.client = cls.app.test_client()


@patch("server.data_common.dataset_metadata.request_dataset_metadata_from_data_portal")
@patch("server.data_common.dataset_metadata.requests.get")
def test_dataset_metadata_api_called(self, mock_get, mock_dp):
self.TEST_DATASET_URL_BASE = "/e/pbmc3k_v0.cxg"
self.TEST_URL_BASE = f"{self.TEST_DATASET_URL_BASE}/api/v0.2/"

response_body = {
"contact_email": "test_email",
"contact_name": "test_user",
"datasets": [
{
"collection_visibility": "PUBLIC",
"id": "2fa37b10-ab4d-49c9-97a8-b4b3d80bf939",
"name": "Test Dataset",
},
],
"description": "test_description",
"id": "4f098ff4-4a12-446b-a841-91ba3d8e3fa6",
"links": [
"http://test.link",
],
"name": "Test Collection",
"visibility": "PUBLIC",
}

mock_get.return_value = MockResponse(body=json.dumps(response_body), status_code=200)
mock_dp.return_value = self.meta_response_body

endpoint = "dataset-metadata"
url = f"{self.TEST_URL_BASE}{endpoint}"
result = self.client.get(url)

self.assertEqual(result.status_code, HTTPStatus.OK)
self.assertEqual(result.headers["Content-Type"], "application/json")

self.assertEqual(mock_get.call_count, 1)

response_obj = json.loads(result.data)["metadata"]

self.assertEqual(response_obj["dataset_name"], "Test Dataset")

expected_url = f"https://cellxgene.staging.single-cell.czi.technology/collections/{response_body['id']}"
self.assertEqual(response_obj["collection_url"], expected_url)
self.assertEqual(response_obj["collection_name"], response_body["name"])
self.assertEqual(response_obj["collection_contact_email"], response_body["contact_email"])
self.assertEqual(response_obj["collection_contact_name"], response_body["contact_name"])
self.assertEqual(response_obj["collection_description"], response_body["description"])
self.assertEqual(response_obj["collection_links"], response_body["links"])
self.assertEqual(response_obj["collection_datasets"], response_body["datasets"])

@patch("server.data_common.dataset_metadata.request_dataset_metadata_from_data_portal")
def test_dataset_metadata_api_fails_gracefully_on_dataset_not_found(self, mock_dp):
# Force a new dataset name, otherwise a cache entry will be found and the mock will not be applied
self.TEST_DATASET_URL_BASE = "/e/pbmc3k_v0_2.cxg"
self.TEST_URL_BASE = f"{self.TEST_DATASET_URL_BASE}/api/v0.2/"

# If request_dataset_metadata_from_data_portal, it always returns None
mock_dp.return_value = None

endpoint = "dataset-metadata"
url = f"{self.TEST_URL_BASE}{endpoint}"
result = self.client.get(url)

self.assertEqual(result.status_code, HTTPStatus.NOT_FOUND)

@patch("server.data_common.dataset_metadata.request_dataset_metadata_from_data_portal")
@patch("server.data_common.dataset_metadata.requests.get")
def test_dataset_metadata_api_fails_gracefully_on_connection_failure(self, mock_get, mock_dp):
self.TEST_DATASET_URL_BASE = "/e/pbmc3k_v0.cxg"
self.TEST_URL_BASE = f"{self.TEST_DATASET_URL_BASE}/api/v0.2/"

mock_dp.return_value = self.meta_response_body
mock_get.side_effect = Exception("Cannot connect to the data portal")

endpoint = "dataset-metadata"
url = f"{self.TEST_URL_BASE}{endpoint}"
result = self.client.get(url)

self.assertEqual(result.status_code, HTTPStatus.BAD_REQUEST)


class MockResponse:
def __init__(self, body, status_code):
self.content = body
self.status_code = status_code

def json(self):
return self.json_data
return json.loads(self.content)

0 comments on commit 6536f05

Please sign in to comment.