From 67c055018ef5fd9c9254488ac2707142c81c8b9b Mon Sep 17 00:00:00 2001 From: Benjamin Schmidt Date: Tue, 26 Nov 2024 15:21:39 +0100 Subject: [PATCH] refactor(path_handling): move to `pathlib.Path` Transform all path variables to the more modern `pathlib.Path` object. This helps with cross-platform compatibility and makes the code more readable. Some type annotations were also passed. Meant to be applied in conjunction with a move to Pydantic for configuration validation. NOTE: If cherry-picked, needs to be revised carefully as the commit is meant to be applied with a move to Pydantic. Fixes #81 --- COSIPY.py | 57 +++++++++++-------- convert_config.py | 26 ++++++--- cosipy/config.py | 20 +++++-- cosipy/constants.py | 2 + cosipy/cpkernel/io.py | 25 ++++---- .../field_plots/plot_cosipy_fields.py | 3 +- .../profile_plots/plot_profiles.py | 8 ++- cosipy/utilities/aws2cosipy/aws2cosipy.py | 39 ++++++++----- .../createStatic/create_static_file.py | 29 ++++------ cosipy/utilities/setup_cosipy/setup_cosipy.py | 45 ++++++++------- cosipy/utilities/wrf2cosipy/wrf2cosipy.py | 7 ++- 11 files changed, 152 insertions(+), 109 deletions(-) diff --git a/COSIPY.py b/COSIPY.py index bf02ce8..8a0fc39 100644 --- a/COSIPY.py +++ b/COSIPY.py @@ -22,11 +22,14 @@ Correspondence: tobias.sauter@fau.de """ -import cProfile + +from __future__ import annotations + import logging -import os from datetime import datetime from itertools import product +from pathlib import Path +from typing import Union import numpy as np import pandas as pd @@ -116,8 +119,8 @@ def main(): #encoding[var] = dict(zlib=True, complevel=compression_level, dtype=dtype, scale_factor=scale_factor, add_offset=add_offset, _FillValue=FillValue) encoding[var] = dict(zlib=True, complevel=Config.compression_level) output_netcdf = set_output_netcdf_path() - output_path = create_data_directory(path='output') - IO.get_result().to_netcdf(os.path.join(output_path,output_netcdf), encoding=encoding, mode='w') + output_path = create_data_directory(name="output") + IO.get_result().to_netcdf(output_path / output_netcdf, encoding=encoding, mode="w") encoding = dict() for var in IO.get_restart().data_vars: @@ -129,8 +132,8 @@ def main(): #encoding[var] = dict(zlib=True, complevel=compression_level, dtype=dtype, scale_factor=scale_factor, add_offset=add_offset, _FillValue=FillValue) encoding[var] = dict(zlib=True, complevel=Config.compression_level) - restart_path = create_data_directory(path='restart') - IO.get_restart().to_netcdf(os.path.join(restart_path,f'restart_{timestamp}.nc'), encoding=encoding) + restart_path = create_data_directory(name='restart') + IO.get_restart().to_netcdf(restart_path / f'restart_{timestamp}.nc', encoding=encoding) #----------------------------------------------- # Stop time measurement @@ -326,19 +329,29 @@ def run_cosipy(cluster, IO, DATA, RESULT, RESTART, futures): if Config.stake_evaluation: # Save the statistics and the mass balance simulations at the stakes to files - output_path = create_data_directory(path='output') - df_stat.to_csv(os.path.join(output_path,'stake_statistics.csv'),sep='\t', float_format='%.2f') - df_val.to_csv(os.path.join(output_path,'stake_simulations.csv'),sep='\t', float_format='%.2f') - - -def create_data_directory(path: str) -> str: + output_path = create_data_directory(name="output") + df_stat.to_csv( + output_path / "stake_statistics.csv", + sep="\t", + float_format="%.2f", + ) + df_val.to_csv( + output_path / "stake_simulations.csv", + sep="\t", + float_format="%.2f", + ) + + +def create_data_directory(name: Union[Path, str]) -> Path: """Create a directory in the configured data folder. Returns: Path to the created directory. """ - dir_path = os.path.join(Config.data_path, path) - os.makedirs(dir_path, exist_ok=True) + if isinstance(name, Path): + name = name.name + dir_path = Path(Config.data_path) / str(name) + dir_path.mkdir(parents=True, exist_ok=True) return dir_path @@ -355,7 +368,7 @@ def get_timestamp_label(timestamp: str) -> str: return (timestamp[0:10]).replace("-", "") -def set_output_netcdf_path() -> str: +def set_output_netcdf_path() -> Path: """Set the file path for the output netCDF file. Returns: @@ -363,17 +376,15 @@ def set_output_netcdf_path() -> str: """ time_start = get_timestamp_label(timestamp=Config.time_start) time_end = get_timestamp_label(timestamp=Config.time_end) - output_path = f"{Config.output_prefix}_{time_start}-{time_end}.nc" - - return output_path + return Path(f"{Config.output_prefix}_{time_start}-{time_end}.nc") def start_logging(): - """Start the python logging""" - - if os.path.exists('./cosipy.yaml'): - with open('./cosipy.yaml', 'rt') as f: - config = yaml.load(f.read(),Loader=yaml.SafeLoader) + """Start the python logging.""" + log_config_path = Path("./cosipy.yaml") + if log_config_path.exists(): + with log_config_path.open() as f: + config = yaml.load(f.read(), Loader=yaml.SafeLoader) logging.config.dictConfig(config) else: logging.basicConfig(level=logging.INFO) diff --git a/convert_config.py b/convert_config.py index 984e21d..b4eedf5 100644 --- a/convert_config.py +++ b/convert_config.py @@ -19,6 +19,7 @@ import configparser import inspect import sys +from pathlib import Path import config import constants @@ -284,12 +285,12 @@ def get_utilities_params() -> dict: return params -def write_toml(parameters: dict, filename: str): +def write_toml(parameters: dict, filename: Path | str): """Write parameters to .toml file.""" - - with open(f"{filename}.toml", "w") as f: + if isinstance(filename, str): + filename = Path(filename) + with filename.with_suffix(".toml").open("w") as f: toml.dump(parameters, f) - print(f"Generated {filename}.toml") @@ -308,15 +309,22 @@ def main(): print_warning() - script_path = inspect.getfile(inspect.currentframe()) - toml_suffix = script_path.split("/")[-2] # avoid overwrite + frame = inspect.currentframe() + if frame is None: + msg = "Could not find the current frame. This is likely due to a bug in the code." + raise RuntimeError(msg) + try: + script_path = Path(inspect.getfile(frame)).resolve() + finally: + del frame + _ = script_path.parent.name # HACK: avoid overwrite (Why is this here?) config_params = get_config_params() - write_toml(parameters=config_params, filename=f"config") + write_toml(parameters=config_params, filename="config") constants_params = get_constants_params() - write_toml(parameters=constants_params, filename=f"constants") + write_toml(parameters=constants_params, filename="constants") utilities_params = get_utilities_params() - write_toml(parameters=utilities_params, filename=f"utilities_config") + write_toml(parameters=utilities_params, filename="utilities_config") if __name__ == "__main__": diff --git a/cosipy/config.py b/cosipy/config.py index 8166291..d8944d2 100644 --- a/cosipy/config.py +++ b/cosipy/config.py @@ -5,12 +5,20 @@ import argparse import sys from importlib.metadata import entry_points +from pathlib import Path if sys.version_info >= (3, 11): import tomllib else: import tomli as tomllib # backwards compatibility +# FIXME: Will this work for all occasions or do we need to use frame? +cwd = Path.cwd() +default_path = cwd / "config.toml" +default_slurm_path = cwd / "slurm_config.toml" +default_constants_path = cwd / "constants.toml" +default_utilities_path = cwd / "utilities_config.toml" + def set_parser() -> argparse.ArgumentParser: """Set argument parser for COSIPY.""" @@ -23,9 +31,9 @@ def set_parser() -> argparse.ArgumentParser: parser.add_argument( "-c", "--config", - default="./config.toml", + default=default_path, dest="config_path", - type=str, + type=Path, metavar="", required=False, help="relative path to configuration file", @@ -34,9 +42,9 @@ def set_parser() -> argparse.ArgumentParser: parser.add_argument( "-x", "--constants", - default="./constants.toml", + default=default_constants_path, dest="constants_path", - type=str, + type=Path, metavar="", required=False, help="relative path to constants file", @@ -45,9 +53,9 @@ def set_parser() -> argparse.ArgumentParser: parser.add_argument( "-s", "--slurm", - default="./slurm_config.toml", + default=default_slurm_path, dest="slurm_path", - type=str, + type=Path, metavar="", required=False, help="relative path to Slurm configuration file", diff --git a/cosipy/constants.py b/cosipy/constants.py index 7c624ef..223c1eb 100644 --- a/cosipy/constants.py +++ b/cosipy/constants.py @@ -1,4 +1,6 @@ import sys +from pathlib import Path +from typing import Literal from cosipy.config import Config, TomlLoader, get_user_arguments diff --git a/cosipy/cpkernel/io.py b/cosipy/cpkernel/io.py index 754f0c0..ea9343f 100644 --- a/cosipy/cpkernel/io.py +++ b/cosipy/cpkernel/io.py @@ -1,10 +1,11 @@ -""" - Read the input data (model forcing) and write the output to netCDF file. -""" +"""Read the input data (model forcing) and write the output to netCDF file.""" + +from __future__ import annotations import os import warnings from datetime import datetime +from pathlib import Path import numpy as np import xarray as xr @@ -14,8 +15,7 @@ class IOClass: - - def __init__(self, DATA=None): + def __init__(self, data: xr.Dataset | None = None): """Initialise the IO Class. Attributes: @@ -134,11 +134,15 @@ def create_data_file(self) -> xr.Dataset: start_timestamp = self.get_datetime(time_start) end_timestamp = self.get_datetime(time_end) timestamp = start_timestamp.strftime("%Y-%m-%dT%H-%M") - restart_path = os.path.join( - Config.data_path, "restart", f"restart_{timestamp}.nc" - ) + restart_path = Path(Config.data_path) / "restart" / f"restart_{timestamp}.nc" + if not restart_path.is_file(): + msg = f"No restart file available at {restart_path}" + raise FileNotFoundError(msg) + if start_timestamp == end_timestamp: + msg = f"Start date {time_start} equals end date {time_end}" + raise IndexError(msg) try: - if not os.path.isfile(restart_path): + if not restart_path.is_file(): raise FileNotFoundError elif start_timestamp == end_timestamp: raise IndexError @@ -269,9 +273,8 @@ def init_data_dataset(self): :U2: Wind speed (magnitude) [|m s^-1|]. :HGT: Elevation [m]. """ - + input_path = Path(Config.data_path) / "input" / Config.input_netcdf try: - input_path = os.path.join(Config.data_path, "input", Config.input_netcdf) self.DATA = xr.open_dataset(input_path) except FileNotFoundError: raise SystemExit(f"Input file not found at: {input_path}") diff --git a/cosipy/postprocessing/field_plots/plot_cosipy_fields.py b/cosipy/postprocessing/field_plots/plot_cosipy_fields.py index aadbfbe..f2c2464 100644 --- a/cosipy/postprocessing/field_plots/plot_cosipy_fields.py +++ b/cosipy/postprocessing/field_plots/plot_cosipy_fields.py @@ -3,6 +3,7 @@ """ import argparse +from pathlib import Path import re import cartopy.crs as ccrs @@ -409,7 +410,7 @@ def parse_arguments() -> argparse.Namespace: dest="file", required=True, default=None, - type=str, + type=Path, metavar="", help="Path to .nc file", ) diff --git a/cosipy/postprocessing/profile_plots/plot_profiles.py b/cosipy/postprocessing/profile_plots/plot_profiles.py index d18473f..d8695c6 100644 --- a/cosipy/postprocessing/profile_plots/plot_profiles.py +++ b/cosipy/postprocessing/profile_plots/plot_profiles.py @@ -1,3 +1,7 @@ +import argparse +import os +from pathlib import Path + import matplotlib # matplotlib.use("TkAgg") import matplotlib.pyplot as plt @@ -242,14 +246,14 @@ def naive_fast(latvar,lonvar,lat0,lon0): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Quick plot of the results file.') - parser.add_argument('-f', '--file', dest='file', help='Path to the result file') + parser.add_argument('-f', '--file', dest='file', type=Path, help='Path to the result file') parser.add_argument('-d', '--date', dest='pdate', help='Date of the profile plot') parser.add_argument('-v', '--var', dest='var', default='RHO', help='Which variable to plot (e.g. T, RHO, etc.)') parser.add_argument('-n', '--lat', dest='lat', default=None, help='Latitude value in case of 2D simulation', type=float) parser.add_argument('-m', '--lon', dest='lon', default=None, help='Longitude value in case of 2D simulation', type=float) parser.add_argument('-s', '--start', dest='start', default=None, help='Start date for the time plot') parser.add_argument('-e', '--end', dest='end', default=None, help='End date for the time plot') - parser.add_argument('--stake-file', dest='stake_file', default=None, help='Path to the stake data file') + parser.add_argument('--stake-file', dest='stake_file', default=None, type=Path, help='Path to the stake data file') parser.add_argument('--pit', dest='pit_name', default=None, help='Name of the pit in the stake data file') parser.add_argument('--depth', dest='d', nargs='+', default=None, help='An array with depth values for which the corresponding values are to be displayed', type=float) diff --git a/cosipy/utilities/aws2cosipy/aws2cosipy.py b/cosipy/utilities/aws2cosipy/aws2cosipy.py index e2c4208..6f875da 100644 --- a/cosipy/utilities/aws2cosipy/aws2cosipy.py +++ b/cosipy/utilities/aws2cosipy/aws2cosipy.py @@ -36,8 +36,11 @@ --yu Upper latitude value of the subset. """ +from __future__ import annotations + import argparse from itertools import product +from pathlib import Path import dateutil import netCDF4 as nc @@ -52,7 +55,7 @@ _cfg = None -def read_input_file(input_path: str) -> tuple: +def read_input_file(input_path: Path) -> pd.DataFrame: """Read input data, parse dates, and convert to a dataframe. Args: @@ -238,12 +241,18 @@ def get_pressure_bias(data, height): return pressure -def write_netcdf(dataset, output_path): +def write_netcdf(dataset: xr.Dataset, output_path: Path | str) -> None: dataset.to_netcdf(output_path) print(f"{'-' * 43}\nInput file created: {output_path}\n{'-' * 43}") -def create_1D_input(cs_file, cosipy_file, static_file, start_date, end_date): +def create_1D_input( + cs_file: Path, + cosipy_file: Path, + static_file: Path | None, + start_date: str, + end_date: str, +) -> None: """Create an input dataset from a csv file with input point data. Here you need to define how to interpolate the data. @@ -440,15 +449,15 @@ def create_1D_input(cs_file, cosipy_file, static_file, start_date, end_date): def create_2D_input( - cs_file, - cosipy_file, - static_file, - start_date, - end_date, - x0=None, - x1=None, - y0=None, - y1=None, + cs_file: Path, + cosipy_file: Path, + static_file: Path, + start_date: str, + end_date: str, + x0: int | None=None, + x1: int | None=None, + y0: int | None=None, + y1: int | None=None, ): """Create a 2D input dataset from a .csv file. @@ -928,7 +937,7 @@ def get_user_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: "-i", "--input", dest="csv_file", - type=str, + type=Path, metavar="", required=True, help="Path to .csv file with meteorological data", @@ -937,7 +946,7 @@ def get_user_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: "-o", "--output", dest="cosipy_file", - type=str, + type=Path, metavar="", required=True, help="Path to the resulting COSIPY netCDF file", @@ -945,7 +954,7 @@ def get_user_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: parser.add_argument( "-s", "--static_file", - type=str, + type=Path, dest="static_file", help="Path to static file with DEM, slope etc.", ) diff --git a/cosipy/utilities/createStatic/create_static_file.py b/cosipy/utilities/createStatic/create_static_file.py index 6ee3823..60c90eb 100644 --- a/cosipy/utilities/createStatic/create_static_file.py +++ b/cosipy/utilities/createStatic/create_static_file.py @@ -19,6 +19,7 @@ import argparse import os from itertools import product +from pathlib import Path import numpy as np import richdem as rd @@ -27,14 +28,6 @@ from cosipy.utilities.config_utils import UtilitiesConfig -def check_folder_path(path: str) -> str: - """Check the folder path includes a forward slash.""" - if not path.endswith("/"): - path = f"{path}/" - - return path - - def check_for_nan(ds,var=None): for y,x in product(range(ds.dims['lat']),range(ds.dims['lon'])): mask = ds.MASK.isel(lat=y, lon=x) @@ -92,16 +85,16 @@ def load_config(module_name: str) -> tuple: def main(): _, _cfg = load_config(module_name="create_static") - static_folder = _cfg.paths["static_folder"] + static_folder = Path(_cfg.paths["static_folder"]) tile = _cfg.coords["tile"] aggregate = _cfg.coords["aggregate"] # input digital elevation model (DEM) - dem_path_tif = f"{static_folder}{_cfg.paths['dem_path']}" + dem_path_tif = static_folder / _cfg.paths['dem_path'] # input shape of glacier or study area, e.g. from the Randolph glacier inventory - shape_path = f"{static_folder}{_cfg.paths['shape_path']}" + shape_path = static_folder / _cfg.paths['shape_path'] # path where the static.nc file is saved - output_path = f"{static_folder}{_cfg.paths['output_file']}" + output_path = static_folder / _cfg.paths['output_file'] # to shrink the DEM use the following lat/lon corners longitude_upper_left = str(_cfg.coords["longitude_upper_left"]) @@ -113,12 +106,12 @@ def main(): aggregate_degree = str(_cfg.coords["aggregate_degree"]) # intermediate files, will be removed afterwards - dem_path_tif_temp = f"{static_folder}DEM_temp.tif" - dem_path_tif_temp2 = f"{static_folder}DEM_temp2.tif" - dem_path = f"{static_folder}dem.nc" - aspect_path = f"{static_folder}aspect.nc" - mask_path = f"{static_folder}mask.nc" - slope_path = f"{static_folder}slope.nc" + dem_path_tif_temp = static_folder / "DEM_temp.tif" + dem_path_tif_temp2 = static_folder / "DEM_temp2.tif" + dem_path = static_folder / "dem.nc" + aspect_path = static_folder / "aspect.nc" + mask_path = static_folder / "mask.nc" + slope_path = static_folder / "slope.nc" if tile: os.system( diff --git a/cosipy/utilities/setup_cosipy/setup_cosipy.py b/cosipy/utilities/setup_cosipy/setup_cosipy.py index fb2957e..315639f 100644 --- a/cosipy/utilities/setup_cosipy/setup_cosipy.py +++ b/cosipy/utilities/setup_cosipy/setup_cosipy.py @@ -24,6 +24,7 @@ import inspect import os import shutil +from pathlib import Path def get_user_arguments() -> argparse.Namespace: @@ -63,7 +64,7 @@ def get_user_arguments() -> argparse.Namespace: "--output", default=None, dest="output_path", - type=str, + type=Path, metavar="", required=False, help="relative path to target configuration directory", @@ -74,29 +75,29 @@ def get_user_arguments() -> argparse.Namespace: return arguments -def check_file_exists(file_path): - if not os.path.isfile(file_path): - raise FileNotFoundError(f"{file_path} not found.") - - -def get_sample_directory() -> str: +def get_sample_directory() -> Path: """Get the path to the sample directory. Returns: Path to sample configuration directory. """ # Package is not installed in working directory - filename = inspect.getfile(inspect.currentframe()) - filename = filename.rsplit("/", 1) - src_dir = f"{filename[0]}" - - return src_dir + frame = inspect.currentframe() + if frame is None: + msg = "Current frame is None." + raise ValueError(msg) + try: + filename = Path(inspect.getfile(frame)).resolve() + finally: + del frame + return filename.parent def copy_file_to_target( basename: str, - source_dir: str, - target_dir: str, + source_dir: Path, + target_dir: Path, + *, silent_overwrite: bool = False, ): """Copy a file to a target directory. @@ -108,12 +109,11 @@ def copy_file_to_target( silent_overwrite: Silently overwrite existing files in target directory. Default False. """ - - target_path = f"{target_dir}/{basename}" - source_path = f"{source_dir}/{basename}" + target_path = target_dir / basename + source_path = source_dir / basename overwrite = True # otherwise no file created if missing - if not silent_overwrite and os.path.isfile(target_path): + if not silent_overwrite and target_path.exists(follow_symlinks=True): prompt = f"{basename} already exists in {target_dir}/\nReplace target? [y/N] " overwrite = get_user_confirmation(prompt) if overwrite: @@ -141,10 +141,10 @@ def main(): sample_path = get_sample_directory() if not args.output_path: - target_path = os.getcwd() + target_path = Path().cwd() else: target_path = args.output_path - os.makedirs(target_path, exist_ok=True) + target_path.mkdir(parents=True, exist_ok=True) if target_path == sample_path: raise ValueError("The target and source paths cannot be identical.") @@ -155,7 +155,10 @@ def main(): "utilities_config.toml", ] for file in config_files: - check_file_exists(file_path=f"{sample_path}/{file}") + filepath = Path(sample_path) / file + if not filepath.exists(): + msg = f"{filepath} does not exist." + raise FileNotFoundError(msg) copy_file_to_target( basename=file, source_dir=sample_path, diff --git a/cosipy/utilities/wrf2cosipy/wrf2cosipy.py b/cosipy/utilities/wrf2cosipy/wrf2cosipy.py index 67bfef4..8a1efd1 100644 --- a/cosipy/utilities/wrf2cosipy/wrf2cosipy.py +++ b/cosipy/utilities/wrf2cosipy/wrf2cosipy.py @@ -27,6 +27,7 @@ """ import argparse +from pathlib import Path import numpy as np import pandas as pd @@ -40,7 +41,7 @@ _cfg = None -def create_input(wrf_file, cosipy_file, start_date, end_date): +def create_input(wrf_file: Path, cosipy_file: Path, start_date, end_date): """Create an input dataset from WRF data.""" print('-------------------------------------------') @@ -190,7 +191,7 @@ def get_user_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: "-i", "--input", dest="wrf_file", - type=str, + type=Path, metavar="", required=True, help="Path to WRF file", @@ -199,7 +200,7 @@ def get_user_arguments(parser: argparse.ArgumentParser) -> argparse.Namespace: "-o", "--output", dest="cosipy_file", - type=str, + type=Path, metavar="", required=True, help="Path to the resulting COSIPY file",