diff --git a/pyproject.toml b/pyproject.toml index c6906ad..59bbb72 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,9 @@ dynamic = ["version"] dependencies = [ 'dask', - 'aind-data-transfer-models==0.8.4' + 'aind-data-schema-models==0.4.4', + 'pydantic>=2.0', + 'pydantic-settings>=2.0' ] [project.optional-dependencies] diff --git a/src/aind_data_upload_utils/check_directories_job.py b/src/aind_data_upload_utils/check_directories_job.py index 3e71f1d..57e9eee 100644 --- a/src/aind_data_upload_utils/check_directories_job.py +++ b/src/aind_data_upload_utils/check_directories_job.py @@ -4,11 +4,9 @@ """ import argparse -import json import logging import os import sys -from copy import deepcopy from glob import glob from pathlib import Path from time import time @@ -16,9 +14,8 @@ from aind_data_schema_models.modalities import Modality from aind_data_schema_models.platforms import Platform -from aind_data_transfer_models.core import ModalityConfigs from dask import bag as dask_bag -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel, Field from pydantic_settings import BaseSettings # Set log level from env var @@ -26,6 +23,14 @@ logging.basicConfig(level=LOG_LEVEL) +class ModalityConfigs(BaseModel): + """Modality type and source directories""" + + modality: Modality.ONE_OF + extra_configs: Optional[str] = Field(default=None) + source: str + + class DirectoriesToCheckConfigs(BaseModel): """Basic model needed from BasicUploadConfigs""" @@ -33,34 +38,6 @@ class DirectoriesToCheckConfigs(BaseModel): modalities: List[ModalityConfigs] = [] metadata_dir: Optional[Path] = None - @field_validator("modalities", mode="before") - def parse_json_str( - cls, mod_configs: Union[List[ModalityConfigs], List[dict]] - ) -> List[ModalityConfigs]: - """ - Method to ignore computed fields in serialized model, which might - raise validation errors. - Parameters - ---------- - mod_configs : Union[List[ModalityConfigs], List[dict]] - - Returns - ------- - List[ModalityConfigs] - """ - parsed_configs = [] - for mod_conf in mod_configs: - if isinstance(mod_conf, dict): - json_obj = deepcopy(mod_conf) - if "output_folder_name" in json_obj: - del json_obj["output_folder_name"] - parsed_configs.append( - ModalityConfigs.model_validate_json(json.dumps(json_obj)) - ) - else: - parsed_configs.append(mod_conf) - return parsed_configs - class JobSettings(BaseSettings, extra="allow"): """Job settings for CheckDirectoriesJob""" @@ -131,6 +108,9 @@ def _get_list_of_directories_to_check(self) -> List[Union[Path, str]]: for modality_config in dirs_to_check_configs.modalities: modality = modality_config.modality source_dir = modality_config.source + extra_configs = modality_config.extra_configs + if extra_configs is not None: + self._check_path(extra_configs) # We'll handle SmartSPIM differently and partition 3 levels deep if modality == Modality.SPIM and platform == Platform.SMARTSPIM: # Check top level files @@ -150,7 +130,7 @@ def _get_list_of_directories_to_check(self) -> List[Union[Path, str]]: else: self._check_path(Path(smart_spim_path).as_posix()) else: - directories_to_check.append(source_dir.as_posix()) + directories_to_check.append(Path(source_dir).as_posix()) return directories_to_check def _dask_task_to_process_directory_list( diff --git a/tests/test_check_directories_job.py b/tests/test_check_directories_job.py index fba4a65..f7e2b63 100644 --- a/tests/test_check_directories_job.py +++ b/tests/test_check_directories_job.py @@ -7,12 +7,12 @@ from aind_data_schema_models.modalities import Modality from aind_data_schema_models.platforms import Platform -from aind_data_transfer_models.core import ModalityConfigs from aind_data_upload_utils.check_directories_job import ( CheckDirectoriesJob, DirectoriesToCheckConfigs, JobSettings, + ModalityConfigs, ) RESOURCES_DIR = Path(os.path.dirname(os.path.realpath(__file__))) / "resources" @@ -286,6 +286,34 @@ def test_get_list_of_directories_to_check( self.expected_list_of_directories_to_check, list_of_directories ) + @patch( + "aind_data_upload_utils.check_directories_job.CheckDirectoriesJob." + "_check_path" + ) + def test_get_list_of_directories_to_check_with_extra_configs( + self, mock_check_path: MagicMock + ): + """Tests _get_list_of_directories_to_check with extra_configs""" + configs = DirectoriesToCheckConfigs( + platform=Platform.SMARTSPIM, + modalities=[ + ModalityConfigs( + source=( + RESOURCES_DIR / "example_ephys_data_set" + ).as_posix(), + modality=Modality.ECEPHYS, + extra_configs=(RESOURCES_DIR / "extra_conf").as_posix(), + ), + ], + metadata_dir=(RESOURCES_DIR / "metadata_dir").as_posix(), + ) + example_job = CheckDirectoriesJob( + job_settings=JobSettings(directories_to_check_configs=configs) + ) + list_of_directories = example_job._get_list_of_directories_to_check() + self.assertEqual(1, len(list_of_directories)) + self.assertEqual(2, len(mock_check_path.mock_calls)) + @patch( "aind_data_upload_utils.check_directories_job.CheckDirectoriesJob." "_check_path"