From ce317feed72ebb96c9e9f1b5a833067df97e5160 Mon Sep 17 00:00:00 2001 From: Benedikt Volkel Date: Mon, 25 Sep 2023 11:14:52 +0200 Subject: [PATCH] Be strict about storage path --- src/o2tuner/backends.py | 45 ++++++++++++++--------------- src/o2tuner/config.py | 1 + src/o2tuner/optimise.py | 19 ++---------- tests/test_create_load_study.py | 10 +------ tests/test_full/config_full.yaml | 3 +- tests/test_full/config_minimal.yaml | 2 ++ 6 files changed, 28 insertions(+), 52 deletions(-) diff --git a/src/o2tuner/backends.py b/src/o2tuner/backends.py index 1907497..ba8cbe7 100644 --- a/src/o2tuner/backends.py +++ b/src/o2tuner/backends.py @@ -51,6 +51,7 @@ def adjust_storage_url(storage_url, workdir="./"): if storage_url.find(prefix) == 0: if prefix == "sqlite:///": # in case of SQLite, the file could be requested to be stored under an absolute or relative path + # Do nothing for other storage URLs path = storage_url[len(prefix):] if path[0] != "/": # if not an absolute path, put the working directory in between @@ -69,16 +70,12 @@ def get_default_storage_url(study_name="o2tuner_study"): return f"sqlite:///{study_name}.db" -def create_storage(storage, workdir="./"): +def create_storage(storage, workdir): """ Make sure the path is either absolute path or relative to the specified workdir. Take care of storage identifier. Right now check for MySQL and SQLite. """ - if not storage: - # Empty path, cannot know how to deal with it, return None - return None - # default arguments which we will use # for now, use a high timeout so we don't fail if another process is currently using the storage backend engine_kwargs = {"connect_args": {"timeout": 100}} @@ -88,7 +85,10 @@ def create_storage(storage, workdir="./"): url = storage else: # first pop the url... - url = storage.pop("url", get_default_storage_url()) + url = storage.pop("url", None) + if not url: + LOG.error("No storage URL found in configuration") + return None if storage: # ...then check, if there is more in the dictionary; if so, use it engine_kwargs = storage @@ -116,6 +116,7 @@ def load_or_create_study_from_storage(study_name, storage, sampler=None, create_ study = optuna.create_study(study_name=study_name, storage=storage, sampler=sampler) LOG.debug("Creating new study %s at storage %s", study_name, storage.url) return study + LOG.error("Study %s does not exist but was supposed to be loaded.", study_name) return None @@ -128,8 +129,10 @@ def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_ """ file_name = join(workdir, f"{study_name}.pkl") if not exists_file(file_name) and not create_if_not_exists: + LOG.error("Study %s does not exist but was supposed to be loaded.", study_name) return None if exists_file(file_name): + # in this case, load with open(file_name, "rb") as save_file: LOG.debug("Loading existing study %s from file %s", study_name, file_name) return pickle.load(save_file) @@ -139,7 +142,7 @@ def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_ return optuna.create_study(study_name=study_name, sampler=sampler) -def load_or_create_study(study_name=None, storage=None, sampler=None, workdir="./", create_if_not_exists=True): +def load_or_create_study(study_name, storage_config=None, sampler=None, workdir="./", create_if_not_exists=True): """ Helper to load or create a study Returns tuple of whether it can run via storage and the created/loaded optuna.study.Study object. @@ -154,22 +157,23 @@ def load_or_create_study(study_name=None, storage=None, sampler=None, workdir=". file in the given directory with .pkl. If found, tru to load. If also this does not exist, create a new in-memory study. """ - storage = create_storage(storage, workdir) - if study_name and storage: + if storage_config: + storage = create_storage(storage_config, workdir) + if not storage: + LOG.error("Storage for study %s cannot be established, please check the storage configuration.", study_name) + sys.exit(1) # Although optuna would come up with a unique name when study_name is None, # we force a name to be given by the user for those cases study = load_or_create_study_from_storage(study_name, storage, sampler, create_if_not_exists) - if study: - return True, study - - if not study_name: - study_name = "o2tuner_in_memory_study" + if not study: + LOG.error("Study %s cannot be loaded.", study_name) + sys.exit(1) + return True, study study = load_or_create_study_in_memory(study_name, workdir, sampler, create_if_not_exists) - if not study and not create_if_not_exists: - LOG.error("Study was supposed to be loaded, creating was omitted." - "However, the study %s does neither exist for storage path %s or working directory %s", study_name, storage, workdir) + if not study: + LOG.error("Cannot create in-memory study %s", study_name) sys.exit(1) # simple in-memory @@ -188,13 +192,6 @@ def pickle_study(study, workdir="./"): return file_name -def can_do_storage(storage_url): - """ - Basically a dry run to try and create a study for given storage - """ - return create_storage(storage_url) is not None - - class OptunaHandler: """ Handler based on Optuna backend diff --git a/src/o2tuner/config.py b/src/o2tuner/config.py index 3022e43..5dcf21a 100644 --- a/src/o2tuner/config.py +++ b/src/o2tuner/config.py @@ -76,6 +76,7 @@ def setup_optimisation_stage(self, name, value): optuna_config = {k: v for k, v in value.items() if k not in ("entrypoint", "file", "config")} optuna_config["study"] = optuna_config.get("study", {"name": name}) + # by default, set the study name to the name of the optimisation stage optuna_config["study"]["name"] = optuna_config["study"].get("name", name) value["optuna_config"] = optuna_config diff --git a/src/o2tuner/optimise.py b/src/o2tuner/optimise.py index 9dafeca..75a9b70 100644 --- a/src/o2tuner/optimise.py +++ b/src/o2tuner/optimise.py @@ -9,7 +9,7 @@ import functools from o2tuner.io import make_dir, parse_yaml -from o2tuner.backends import OptunaHandler, can_do_storage, get_default_storage_url +from o2tuner.backends import OptunaHandler from o2tuner.sampler import construct_sampler from o2tuner.inspector import O2TunerInspector from o2tuner.exception import O2TunerStopOptimisation @@ -74,25 +74,10 @@ def prepare_optimisation(optuna_config, work_dir="o2tuner_optimise"): # first, see what we got study_name = optuna_storage_config.get("name", "o2tuner_study") storage = optuna_storage_config.get("storage", None) - in_memory = optuna_storage_config.get("in_memory", False) - if in_memory: + if not storage: jobs = 1 - if not storage and not in_memory: - # make a default storage, optimisation via storage should be the way to go - storage = get_default_storage_url(study_name) - - if not in_memory and not can_do_storage(storage): - # no worries - at this point - if optimisation via storage is not possible - optuna_storage_config["storage"] = None - if jobs > 1: - # however, if more than 1 job requested, abort the preparation here - LOG.error("Requested %d jobs but problem to set up storage %s", jobs, storage) - return None, None, None - else: - optuna_storage_config["storage"] = storage - trials_list = floor(trials / jobs) trials_list = [trials_list] * jobs # add the left-over trials as equally as possible diff --git a/tests/test_create_load_study.py b/tests/test_create_load_study.py index f37be32..752bbcd 100644 --- a/tests/test_create_load_study.py +++ b/tests/test_create_load_study.py @@ -5,7 +5,7 @@ import optuna -from o2tuner.backends import load_or_create_study, can_do_storage, pickle_study +from o2tuner.backends import load_or_create_study, pickle_study def test_inmemory_creation_and_pickle(): @@ -21,14 +21,6 @@ def test_inmemory_creation_and_pickle(): assert exists(filename) -def test_storage_unknown(): - """ - Make sure it fails for unknown storage - """ - storage = "xyz:///study" - assert not can_do_storage(storage) - - def test_storage_creation(needs_sqlite): # pylint: disable=unused-argument """ Test storage creation of study diff --git a/tests/test_full/config_full.yaml b/tests/test_full/config_full.yaml index d0bcc6b..f6ca1e7 100644 --- a/tests/test_full/config_full.yaml +++ b/tests/test_full/config_full.yaml @@ -19,9 +19,8 @@ stages_optimisation: objective: objective jobs: 2 # desired number of jobs trials: 200 # desired number of trials - study: # where the study is stored (only give a name and leave out "storage" key if you do not have MySQL working, it will anyway fall back to the serial run if it cannot communicate with MySQL) + study: name: "test_study" storage: sqlite:///opt.db - in_memory: False deps: - hello diff --git a/tests/test_full/config_minimal.yaml b/tests/test_full/config_minimal.yaml index cb0121b..c78dbcb 100644 --- a/tests/test_full/config_minimal.yaml +++ b/tests/test_full/config_minimal.yaml @@ -15,5 +15,7 @@ stages_optimisation: some_key: some_value file: optimise.py objective: objective + study: + storage: sqlite:///opt.db deps: - hello