Skip to content
This repository has been archived by the owner on Nov 23, 2023. It is now read-only.

Commit

Permalink
refactor: Auto-generate AWS resource names prep (#472)
Browse files Browse the repository at this point in the history
* test: Simplify imports

* refactor: Pull logger into STAC metadata utils

Simplifies some tests and production code.

Co-authored-by: kodiakhq[bot] <49736102+kodiakhq[bot]@users.noreply.github.com>
  • Loading branch information
l0b0 and kodiakhq[bot] authored Mar 29, 2021
1 parent 951ece4 commit 90c9709
Show file tree
Hide file tree
Showing 7 changed files with 205 additions and 213 deletions.
2 changes: 1 addition & 1 deletion backend/check_stac_metadata/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def main() -> int:

hash_key = f"DATASET#{arguments.dataset_id}#VERSION#{arguments.version_id}"
validation_result_factory = ValidationResultFactory(hash_key)
validator = STACDatasetValidator(s3_url_reader, validation_result_factory, LOGGER)
validator = STACDatasetValidator(s3_url_reader, validation_result_factory)

try:
validator.run(arguments.metadata_url)
Expand Down
7 changes: 4 additions & 3 deletions backend/check_stac_metadata/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@
from jsonschema._utils import URIDict # type: ignore[import]

from ..check import Check
from ..log import set_up_logging
from ..processing_assets_model import ProcessingAssetType, ProcessingAssetsModel
from ..types import JsonObject
from ..validation_results_model import ValidationResult, ValidationResultFactory

LOGGER = set_up_logging(__name__)


class STACSchemaValidator(Draft7Validator):
def __init__(self) -> None:
Expand Down Expand Up @@ -60,11 +63,9 @@ def __init__(
self,
url_reader: Callable[[str], StreamingBody],
validation_result_factory: ValidationResultFactory,
logger: Logger,
):
self.url_reader = url_reader
self.validation_result_factory = validation_result_factory
self.logger = logger

self.traversed_urls: List[str] = []
self.dataset_assets: List[Dict[str, str]] = []
Expand Down Expand Up @@ -108,7 +109,7 @@ def validate(self, url: str) -> None: # pylint: disable=too-complex
self.validate_directory(asset_url, url)

asset_dict = {"url": asset_url, "multihash": asset["checksum:multihash"]}
self.logger.debug(dumps({"asset": asset_dict}))
LOGGER.debug(dumps({"asset": asset_dict}))
self.dataset_assets.append(asset_dict)

for link_object in object_json["links"]:
Expand Down
180 changes: 82 additions & 98 deletions tests/test_check_stac_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import logging
import sys
from copy import deepcopy
from datetime import timedelta
Expand Down Expand Up @@ -44,9 +43,7 @@


@patch("backend.check_stac_metadata.task.STACDatasetValidator.validate")
def should_return_non_zero_exit_code_on_validation_failure(
validate_url_mock: MagicMock,
) -> None:
def should_return_non_zero_exit_code_on_validation_failure(validate_url_mock: MagicMock) -> None:
validate_url_mock.side_effect = ValidationError(any_error_message())
sys.argv = [
any_program_name(),
Expand Down Expand Up @@ -515,115 +512,102 @@ def should_detect_invalid_datetime() -> None:
STACSchemaValidator().validate(stac_object)


class TestsWithLogger:
logger: logging.Logger
def should_validate_metadata_files_recursively() -> None:
base_url = any_s3_url()
parent_url = f"{base_url}/{any_safe_filename()}"
child_url = f"{base_url}/{any_safe_filename()}"

@classmethod
def setup_class(cls) -> None:
cls.logger = logging.getLogger("backend.check_stac_metadata.task")
stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
stac_object["links"].append({"href": child_url, "rel": "child"})
url_reader = MockJSONURLReader(
{parent_url: stac_object, child_url: deepcopy(MINIMAL_VALID_STAC_OBJECT)}
)

def should_validate_metadata_files_recursively(self) -> None:
base_url = any_s3_url()
parent_url = f"{base_url}/{any_safe_filename()}"
child_url = f"{base_url}/{any_safe_filename()}"
STACDatasetValidator(url_reader, MockValidationResultFactory()).validate(parent_url)

stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
stac_object["links"].append({"href": child_url, "rel": "child"})
url_reader = MockJSONURLReader(
{parent_url: stac_object, child_url: deepcopy(MINIMAL_VALID_STAC_OBJECT)}
)
assert url_reader.mock_calls == [call(parent_url), call(child_url)]

STACDatasetValidator(url_reader, MockValidationResultFactory(), self.logger).validate(
parent_url
)

assert url_reader.mock_calls == [call(parent_url), call(child_url)]
def should_only_validate_each_file_once() -> None:
base_url = any_s3_url()
root_url = f"{base_url}/{any_safe_filename()}"
child_url = f"{base_url}/{any_safe_filename()}"
leaf_url = f"{base_url}/{any_safe_filename()}"

def should_only_validate_each_file_once(self) -> None:
base_url = any_s3_url()
root_url = f"{base_url}/{any_safe_filename()}"
child_url = f"{base_url}/{any_safe_filename()}"
leaf_url = f"{base_url}/{any_safe_filename()}"
root_stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
root_stac_object["links"] = [
{"href": child_url, "rel": "child"},
{"href": root_url, "rel": "root"},
{"href": root_url, "rel": "self"},
]
child_stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
child_stac_object["links"] = [
{"href": leaf_url, "rel": "child"},
{"href": root_url, "rel": "root"},
{"href": child_url, "rel": "self"},
]
leaf_stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
leaf_stac_object["links"] = [
{"href": root_url, "rel": "root"},
{"href": leaf_url, "rel": "self"},
]
url_reader = MockJSONURLReader(
{
root_url: root_stac_object,
child_url: child_stac_object,
leaf_url: leaf_stac_object,
},
call_limit=3,
)

root_stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
root_stac_object["links"] = [
{"href": child_url, "rel": "child"},
{"href": root_url, "rel": "root"},
{"href": root_url, "rel": "self"},
]
child_stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
child_stac_object["links"] = [
{"href": leaf_url, "rel": "child"},
{"href": root_url, "rel": "root"},
{"href": child_url, "rel": "self"},
]
leaf_stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
leaf_stac_object["links"] = [
{"href": root_url, "rel": "root"},
{"href": leaf_url, "rel": "self"},
]
url_reader = MockJSONURLReader(
{
root_url: root_stac_object,
child_url: child_stac_object,
leaf_url: leaf_stac_object,
},
call_limit=3,
)
STACDatasetValidator(url_reader, MockValidationResultFactory()).validate(root_url)

STACDatasetValidator(url_reader, MockValidationResultFactory(), self.logger).validate(
root_url
)
assert url_reader.mock_calls == [call(root_url), call(child_url), call(leaf_url)]

assert url_reader.mock_calls == [call(root_url), call(child_url), call(leaf_url)]

def should_raise_exception_if_non_s3_url_is_passed(self) -> None:
https_url = any_https_url()
url_reader = MockJSONURLReader({})
def should_raise_exception_if_non_s3_url_is_passed() -> None:
https_url = any_https_url()
url_reader = MockJSONURLReader({})

with raises(AssertionError, match=f"URL doesn't start with “s3://”: “{https_url}”"):
STACDatasetValidator(url_reader, MockValidationResultFactory(), self.logger).run(
https_url
)
with raises(AssertionError, match=f"URL doesn't start with “s3://”: “{https_url}”"):
STACDatasetValidator(url_reader, MockValidationResultFactory()).run(https_url)

def should_return_assets_from_validated_metadata_files(
self,
subtests: SubTests,
) -> None:
base_url = any_s3_url()
metadata_url = f"{base_url}/{any_safe_filename()}"
stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
first_asset_url = f"{base_url}/{any_safe_filename()}"
first_asset_multihash = any_hex_multihash()
second_asset_url = f"{base_url}/{any_safe_filename()}"
second_asset_multihash = any_hex_multihash()
stac_object["assets"] = {
any_asset_name(): {
"href": first_asset_url,
"checksum:multihash": first_asset_multihash,
},
any_asset_name(): {
"href": second_asset_url,
"checksum:multihash": second_asset_multihash,
},
}
expected_assets = [
{"multihash": first_asset_multihash, "url": first_asset_url},
{"multihash": second_asset_multihash, "url": second_asset_url},
]
expected_metadata = [
{"url": metadata_url},
]
url_reader = MockJSONURLReader({metadata_url: stac_object})

validator = STACDatasetValidator(url_reader, MockValidationResultFactory(), self.logger)
def should_return_assets_from_validated_metadata_files(subtests: SubTests) -> None:
base_url = any_s3_url()
metadata_url = f"{base_url}/{any_safe_filename()}"
stac_object = deepcopy(MINIMAL_VALID_STAC_OBJECT)
first_asset_url = f"{base_url}/{any_safe_filename()}"
first_asset_multihash = any_hex_multihash()
second_asset_url = f"{base_url}/{any_safe_filename()}"
second_asset_multihash = any_hex_multihash()
stac_object["assets"] = {
any_asset_name(): {
"href": first_asset_url,
"checksum:multihash": first_asset_multihash,
},
any_asset_name(): {
"href": second_asset_url,
"checksum:multihash": second_asset_multihash,
},
}
expected_assets = [
{"multihash": first_asset_multihash, "url": first_asset_url},
{"multihash": second_asset_multihash, "url": second_asset_url},
]
expected_metadata = [
{"url": metadata_url},
]
url_reader = MockJSONURLReader({metadata_url: stac_object})

validator = STACDatasetValidator(url_reader, MockValidationResultFactory())

validator.validate(metadata_url)
validator.validate(metadata_url)

with subtests.test():
assert _sort_assets(validator.dataset_assets) == _sort_assets(expected_assets)
with subtests.test():
assert validator.dataset_metadata == expected_metadata
with subtests.test():
assert _sort_assets(validator.dataset_assets) == _sort_assets(expected_assets)
with subtests.test():
assert validator.dataset_metadata == expected_metadata


def _sort_assets(assets: List[Dict[str, str]]) -> List[Dict[str, str]]:
Expand Down
109 changes: 0 additions & 109 deletions tests/test_check_stac_metadata_logging.py

This file was deleted.

Loading

0 comments on commit 90c9709

Please sign in to comment.