Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be strict about storage path #58

Merged
merged 1 commit into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 21 additions & 24 deletions src/o2tuner/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def adjust_storage_url(storage_url, workdir="./"):
if storage_url.find(prefix) == 0:
if prefix == "sqlite:///":
# in case of SQLite, the file could be requested to be stored under an absolute or relative path
# Do nothing for other storage URLs
path = storage_url[len(prefix):]
if path[0] != "/":
# if not an absolute path, put the working directory in between
Expand All @@ -69,16 +70,12 @@ def get_default_storage_url(study_name="o2tuner_study"):
return f"sqlite:///{study_name}.db"


def create_storage(storage, workdir="./"):
def create_storage(storage, workdir):
"""
Make sure the path is either absolute path or relative to the specified workdir.
Take care of storage identifier. Right now check for MySQL and SQLite.
"""

if not storage:
# Empty path, cannot know how to deal with it, return None
return None

# default arguments which we will use
# for now, use a high timeout so we don't fail if another process is currently using the storage backend
engine_kwargs = {"connect_args": {"timeout": 100}}
Expand All @@ -88,7 +85,10 @@ def create_storage(storage, workdir="./"):
url = storage
else:
# first pop the url...
url = storage.pop("url", get_default_storage_url())
url = storage.pop("url", None)
if not url:
LOG.error("No storage URL found in configuration")
return None
if storage:
# ...then check, if there is more in the dictionary; if so, use it
engine_kwargs = storage
Expand Down Expand Up @@ -116,6 +116,7 @@ def load_or_create_study_from_storage(study_name, storage, sampler=None, create_
study = optuna.create_study(study_name=study_name, storage=storage, sampler=sampler)
LOG.debug("Creating new study %s at storage %s", study_name, storage.url)
return study
LOG.error("Study %s does not exist but was supposed to be loaded.", study_name)

return None

Expand All @@ -128,8 +129,10 @@ def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_
"""
file_name = join(workdir, f"{study_name}.pkl")
if not exists_file(file_name) and not create_if_not_exists:
LOG.error("Study %s does not exist but was supposed to be loaded.", study_name)
return None
if exists_file(file_name):
# in this case, load
with open(file_name, "rb") as save_file:
LOG.debug("Loading existing study %s from file %s", study_name, file_name)
return pickle.load(save_file)
Expand All @@ -139,7 +142,7 @@ def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_
return optuna.create_study(study_name=study_name, sampler=sampler)


def load_or_create_study(study_name=None, storage=None, sampler=None, workdir="./", create_if_not_exists=True):
def load_or_create_study(study_name, storage_config=None, sampler=None, workdir="./", create_if_not_exists=True):
"""
Helper to load or create a study
Returns tuple of whether it can run via storage and the created/loaded optuna.study.Study object.
Expand All @@ -154,22 +157,23 @@ def load_or_create_study(study_name=None, storage=None, sampler=None, workdir=".
file in the given directory with <study_name>.pkl. If found, tru to load.
If also this does not exist, create a new in-memory study.
"""
storage = create_storage(storage, workdir)
if study_name and storage:
if storage_config:
storage = create_storage(storage_config, workdir)
if not storage:
LOG.error("Storage for study %s cannot be established, please check the storage configuration.", study_name)
sys.exit(1)
# Although optuna would come up with a unique name when study_name is None,
# we force a name to be given by the user for those cases
study = load_or_create_study_from_storage(study_name, storage, sampler, create_if_not_exists)
if study:
return True, study

if not study_name:
study_name = "o2tuner_in_memory_study"
if not study:
LOG.error("Study %s cannot be loaded.", study_name)
sys.exit(1)
return True, study

study = load_or_create_study_in_memory(study_name, workdir, sampler, create_if_not_exists)

if not study and not create_if_not_exists:
LOG.error("Study was supposed to be loaded, creating was omitted."
"However, the study %s does neither exist for storage path %s or working directory %s", study_name, storage, workdir)
if not study:
LOG.error("Cannot create in-memory study %s", study_name)
sys.exit(1)

# simple in-memory
Expand All @@ -188,13 +192,6 @@ def pickle_study(study, workdir="./"):
return file_name


def can_do_storage(storage_url):
"""
Basically a dry run to try and create a study for given storage
"""
return create_storage(storage_url) is not None


class OptunaHandler:
"""
Handler based on Optuna backend
Expand Down
1 change: 1 addition & 0 deletions src/o2tuner/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def setup_optimisation_stage(self, name, value):

optuna_config = {k: v for k, v in value.items() if k not in ("entrypoint", "file", "config")}
optuna_config["study"] = optuna_config.get("study", {"name": name})
# by default, set the study name to the name of the optimisation stage
optuna_config["study"]["name"] = optuna_config["study"].get("name", name)
value["optuna_config"] = optuna_config

Expand Down
19 changes: 2 additions & 17 deletions src/o2tuner/optimise.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import functools

from o2tuner.io import make_dir, parse_yaml
from o2tuner.backends import OptunaHandler, can_do_storage, get_default_storage_url
from o2tuner.backends import OptunaHandler
from o2tuner.sampler import construct_sampler
from o2tuner.inspector import O2TunerInspector
from o2tuner.exception import O2TunerStopOptimisation
Expand Down Expand Up @@ -74,25 +74,10 @@ def prepare_optimisation(optuna_config, work_dir="o2tuner_optimise"):
# first, see what we got
study_name = optuna_storage_config.get("name", "o2tuner_study")
storage = optuna_storage_config.get("storage", None)
in_memory = optuna_storage_config.get("in_memory", False)

if in_memory:
if not storage:
jobs = 1

if not storage and not in_memory:
# make a default storage, optimisation via storage should be the way to go
storage = get_default_storage_url(study_name)

if not in_memory and not can_do_storage(storage):
# no worries - at this point - if optimisation via storage is not possible
optuna_storage_config["storage"] = None
if jobs > 1:
# however, if more than 1 job requested, abort the preparation here
LOG.error("Requested %d jobs but problem to set up storage %s", jobs, storage)
return None, None, None
else:
optuna_storage_config["storage"] = storage

trials_list = floor(trials / jobs)
trials_list = [trials_list] * jobs
# add the left-over trials as equally as possible
Expand Down
10 changes: 1 addition & 9 deletions tests/test_create_load_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import optuna

from o2tuner.backends import load_or_create_study, can_do_storage, pickle_study
from o2tuner.backends import load_or_create_study, pickle_study


def test_inmemory_creation_and_pickle():
Expand All @@ -21,14 +21,6 @@ def test_inmemory_creation_and_pickle():
assert exists(filename)


def test_storage_unknown():
"""
Make sure it fails for unknown storage
"""
storage = "xyz:///study"
assert not can_do_storage(storage)


def test_storage_creation(needs_sqlite): # pylint: disable=unused-argument
"""
Test storage creation of study
Expand Down
3 changes: 1 addition & 2 deletions tests/test_full/config_full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ stages_optimisation:
objective: objective
jobs: 2 # desired number of jobs
trials: 200 # desired number of trials
study: # where the study is stored (only give a name and leave out "storage" key if you do not have MySQL working, it will anyway fall back to the serial run if it cannot communicate with MySQL)
study:
name: "test_study"
storage: sqlite:///opt.db
in_memory: False
deps:
- hello
2 changes: 2 additions & 0 deletions tests/test_full/config_minimal.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ stages_optimisation:
some_key: some_value
file: optimise.py
objective: objective
study:
storage: sqlite:///opt.db
deps:
- hello
Loading