Skip to content

Commit

Permalink
Enable multi-objective optimisation
Browse files Browse the repository at this point in the history
Multiple scalars can now be returned by an objective function,
while the default is only one scalar.

In case of multiple objectives (multiple numbers), the
objective function must be decorated with the respective directions:

@directions(['minimize', 'maximize'])
def my_objective(...):
  # do your calculations
  return value_to_minimize, value_to_maximize

If only one number is returned, the decorator can be omitted if that number
should be minimized.

If maximization is desired,n single-objective optimisation, the decorator
should be used with
@directions(['maximize'])
def my_objective(...):
  # do your calculations
  return value_to_maximize

All plotting functions of the Inspector now have a keyword argument
objective_number
to indicate for which of those objective the respctive plot should be created.
  • Loading branch information
Benedikt Volkel committed Jul 12, 2024
1 parent 7b1eb09 commit ce0b2af
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 44 deletions.
20 changes: 12 additions & 8 deletions src/o2tuner/backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def create_storage(storage, workdir):
return None


def load_or_create_study_from_storage(study_name, storage, sampler=None, create_if_not_exists=True):
def load_or_create_study_from_storage(study_name, storage, sampler=None, create_if_not_exists=True, directions=None):
"""
Load or create from DB
"""
Expand All @@ -113,15 +113,15 @@ def load_or_create_study_from_storage(study_name, storage, sampler=None, create_
return study
except KeyError:
if create_if_not_exists:
study = optuna.create_study(study_name=study_name, storage=storage, sampler=sampler)
study = optuna.create_study(study_name=study_name, storage=storage, sampler=sampler, directions=directions)
LOG.debug("Creating new study %s at storage %s", study_name, storage.url)
return study
LOG.error("Study %s does not exist but was supposed to be loaded.", study_name)

return None


def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_not_exists=True):
def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_not_exists=True, directions=None):
"""
Try to see if there is a study saved here
Expand All @@ -139,10 +139,10 @@ def load_or_create_study_in_memory(study_name, workdir, sampler=None, create_if_

LOG.debug("Creating new in-memory study %s", study_name)

return optuna.create_study(study_name=study_name, sampler=sampler)
return optuna.create_study(study_name=study_name, sampler=sampler, directions=directions)


def load_or_create_study(study_name, storage_config=None, sampler=None, workdir="./", create_if_not_exists=True):
def load_or_create_study(study_name, storage_config=None, sampler=None, workdir="./", create_if_not_exists=True, directions=None):
"""
Helper to load or create a study
Returns tuple of whether it can run via storage and the created/loaded optuna.study.Study object.
Expand All @@ -164,13 +164,13 @@ def load_or_create_study(study_name, storage_config=None, sampler=None, workdir=
sys.exit(1)
# Although optuna would come up with a unique name when study_name is None,
# we force a name to be given by the user for those cases
study = load_or_create_study_from_storage(study_name, storage, sampler, create_if_not_exists)
study = load_or_create_study_from_storage(study_name, storage, sampler, create_if_not_exists, directions)
if not study:
LOG.error("Study %s cannot be loaded.", study_name)
sys.exit(1)
return True, study

study = load_or_create_study_in_memory(study_name, workdir, sampler, create_if_not_exists)
study = load_or_create_study_in_memory(study_name, workdir, sampler, create_if_not_exists, directions)

if not study:
LOG.error("Cannot create in-memory study %s", study_name)
Expand Down Expand Up @@ -205,6 +205,8 @@ def __init__(self, db_study_name=None, db_storage=None, workdir=None, user_confi
self._objective = None
# Flag whether we need a dedicated cwd per trial
self._needs_cwd_per_trial = False
# directions in which to optimise
self._directions = None
# chosen sampler (can be None, optuna will use TPE then)
self._sampler = None
# our study object
Expand Down Expand Up @@ -249,7 +251,8 @@ def initialise(self, n_trials=100):
Initialise with number of trials to be done
"""
self._n_trials = n_trials
has_db_access, self._study = load_or_create_study(self.db_study_name, self.db_storage, self._sampler, self.workdir)
has_db_access, self._study = load_or_create_study(self.db_study_name, self.db_storage, self._sampler, self.workdir,
directions=self._directions)
# Overwrite in case no DB access but a parallel execution was desired before
self.in_memory = not has_db_access

Expand Down Expand Up @@ -291,6 +294,7 @@ def set_objective(self, objective):
n_params = len(sig.parameters)
if hasattr(objective, "needs_cwd"):
self._needs_cwd_per_trial = True
self._directions = getattr(objective, 'directions', None)
if n_params > 2 or not n_params:
LOG.error("Invalid signature of objective function. Need either 1 argument (only trial obj) or 2 arguments (trial object + user_config)")
sys.exit(1)
Expand Down
128 changes: 93 additions & 35 deletions src/o2tuner/inspector.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ def __init__(self):
self._trials_complete = None
# map internal parameter names to something else (optional)
self._parameter_map = None
# flag whether or not is multi-objective
self._is_multi_objective = False
# hold a list of trial numbers that optuna assigns internanly
self._trial_numbers = None
# map the Inspector's index of trials to the optuna trial number
self._number_to_index = None
# hold a list of Inspector's indices of the best trials
self._best_indices = None

def load(self, opt_config=None, opt_work_dir=None):
"""
Expand All @@ -63,8 +71,12 @@ def load(self, opt_config=None, opt_work_dir=None):
trials_state = self._study.trials_dataframe(("state",))["state"].values
self._trials_complete = [trial for trial, state in zip(self._study.trials, trials_state) if state == TrialState.COMPLETE.name]
# now we sort the trials according to the order in which they were done
trial_numbers = [trial.number for trial in self._trials_complete]
self._trials_complete = [t for _, t in sorted(zip(trial_numbers, self._trials_complete))]
self._trial_numbers = [trial.number for trial in self._trials_complete]
self._trials_complete = [t for _, t in sorted(zip(self._trial_numbers, self._trials_complete))]
self._number_to_index = {t.number: i for i, t in enumerate(self._trials_complete)}
self._best_indices = [self._number_to_index[t.number] for t in self._study.best_trials]
self._importances = [None] * len(self._study.directions)
self._is_multi_objective = len(self._importances) > 1

return True

Expand All @@ -73,15 +85,28 @@ def write_summary(self, filepath="o2tuner_optimisation_summary.yaml"):
Write a short summary to YAML file
"""
LOG.info("Writing optimisation summary to %s", filepath)
best_trial = self._study.best_trial
user_attrs = best_trial.user_attrs
best_trials = self._study.best_trials
cwds = [bt.user_attrs.get("cwd", "./") for bt in best_trials]
numbers = [bt.number for bt in best_trials]
to_write = {"n_trials": len(self._study.trials),
"best_trial_cwd": user_attrs.get("cwd", "./"),
"best_trial_number": best_trial.number,
"best_trial_loss": self._study.best_value,
"best_trial_parameters": self._study.best_params}
"best_trial_cwds": cwds,
"best_trial_numbers": numbers}
dump_yaml(to_write, filepath)

@property
def n_directions(self):
"""
Get the number of directions/objectives
"""
return len(self._study.directions)

@property
def directions(self):
"""
Return the list of directions
"""
return self._study.directions

def get_annotation_per_trial(self, key, accept_missing_annotation=True):
"""
Assemble history of requested annotation
Expand All @@ -97,11 +122,42 @@ def get_annotation_per_trial(self, key, accept_missing_annotation=True):
ret_list.append(user_attrs[key])
return ret_list

def get_losses(self):
def get_losses(self, flatten=True):
"""
Simply return list of losses
"""
return [t.value for t in self._trials_complete]
if not self._is_multi_objective and flatten:
return [t.value for t in self._trials_complete]
directions = self._study.directions
losses = [[] for _ in directions]
for t in self._trials_complete:
for i, v in enumerate(t.values):
losses[i].append(v)
return losses

def get_best_indices(self):
"""
Get indices of best trials.
This list can be used to then get other properties, e.g. to get the losses of each best trial
when using get_losses()
"""
return self._best_indices

def get_trial_numbers(self):
"""
Get the numbers of the trials which optuna assigns internally
"""
return self._trial_numbers

def get_n_trials(self):
"""
Get the numnber of completed trials
This number coincides also with the number of returned losses (get_losses())
or annotations (get_annotations_per_trial())
"""
return len(self._trials_complete)

def set_parameter_name_map(self, param_map):
"""
Expand All @@ -117,37 +173,37 @@ def map_parameter_names(self, parameter_names_raw):
return parameter_names_raw
return [self._parameter_map[pn] if pn in self._parameter_map else pn for pn in parameter_names_raw]

def get_params_importances(self, n_most_important=None):
def get_params_importances(self, n_most_important=None, objective_number=0):
"""
Get most important parameters
"""
if not self._importances:
importances = get_param_importances(self._study, evaluator=None, params=None, target=None)
self._importances = OrderedDict(reversed(list(importances.items())))
if not self._importances[objective_number]:
importances = get_param_importances(self._study, evaluator=None, params=None, target=lambda t: t.values[objective_number])
self._importances[objective_number] = OrderedDict(reversed(list(importances.items())))

if not n_most_important:
n_most_important = len(self._importances)
n_most_important = len(self._importances[objective_number])

# get importances of parameters
importance_values = list(self._importances.values())
n_most_important = min(n_most_important, len(self._importances))
importance_values = list(self._importances[objective_number].values())
n_most_important = min(n_most_important, len(self._importances[objective_number]))
importance_values = importance_values[-n_most_important:]

# get parameter names
param_names = list(self._importances.keys())
param_names = list(self._importances[objective_number].keys())
param_names = param_names[-n_most_important:]

return param_names[:n_most_important], importance_values[:n_most_important]

def plot_importance(self, *, n_most_important=None):
def plot_importance(self, *, n_most_important=None, objective_number=0):
"""
Plot the importance of parameters
Most of it based on https://optuna.readthedocs.io/en/stable/_modules/optuna/visualization/_param_importances.html#plot_param_importances
However, add some functionality we would like to have here
"""
LOG.debug("Plotting importance")
param_names, importance_values = self.get_params_importances(n_most_important)
param_names, importance_values = self.get_params_importances(n_most_important, objective_number)
param_names = self.map_parameter_names(param_names)

figure, ax = plt.subplots(figsize=(30, 10))
Expand All @@ -160,14 +216,14 @@ def plot_importance(self, *, n_most_important=None):

return figure, ax

def plot_parallel_coordinates(self, *, n_most_important=None):
def plot_parallel_coordinates(self, *, n_most_important=None, objective_number=0):
"""
Plot parallel coordinates. Each horizontal line represents a trial, each vertical line a parameter
"""
LOG.debug("Plotting parallel coordinates")
params, _ = self.get_params_importances(n_most_important)
params, _ = self.get_params_importances(n_most_important, objective_number)

losses = self.get_losses()
losses = self.get_losses(flatten=False)[objective_number]
curves = [[] for _ in losses]
skip_trials = {}

Expand All @@ -184,7 +240,8 @@ def plot_parallel_coordinates(self, *, n_most_important=None):
# order trials by loss and prepare colorbar
norm_colors = mplc.Normalize(vmin=min(losses), vmax=max(losses))
# colorbar and sorting of losses reversed if needed
cmap, reverse = (mplcm.get_cmap("Blues_r"), True) if self._study.direction == StudyDirection.MINIMIZE else (mplcm.get_cmap("Blues"), False)
cmap, reverse = (mplcm.get_cmap("Blues_r"), True) \
if self._study.directions[objective_number] == StudyDirection.MINIMIZE else (mplcm.get_cmap("Blues"), False)
curves = [c for _, c in sorted(zip(losses, curves), reverse=reverse)]
# make sure curves of best losses are plotted last and hence on top
losses.sort(reverse=reverse)
Expand All @@ -210,25 +267,25 @@ def plot_parallel_coordinates(self, *, n_most_important=None):

cbar = mplcb.ColorbarBase(axes[-1], cmap="Blues_r", norm=norm_colors, ticks=[min(losses), max(losses)])
cbar.ax.tick_params(labelsize=20)
cbar.ax.set_ylabel("loss", fontsize=20)
cbar.ax.set_ylabel(f"loss {objective_number}", fontsize=20)
figure.subplots_adjust(wspace=0)
figure.suptitle("Parallel coordinates", fontsize=40)

return figure, axes

def plot_slices(self, *, n_most_important=None):
def plot_slices(self, *, n_most_important=None, objective_number=0):
"""
Plot slices
"""
LOG.debug("Plotting slices")
params, _ = self.get_params_importances(n_most_important)
params, _ = self.get_params_importances(n_most_important, objective_number)

n_rows = ceil(sqrt(len(params)))
n_cols = n_rows
if len(params) > n_rows**2:
n_rows += 1

losses = self.get_losses()
losses = self.get_losses(flatten=False)[objective_number]
figure, axes = plt.subplots(n_rows, n_cols, figsize=(50, 50))
axes = axes.flatten()

Expand Down Expand Up @@ -258,12 +315,12 @@ def plot_slices(self, *, n_most_important=None):

return figure, axes

def plot_correlations(self, *, n_most_important=None):
def plot_correlations(self, *, n_most_important=None, objective_number=0):
"""
Plot correlation among parameters
"""
LOG.debug("Plotting parameter correlations")
params, _ = self.get_params_importances(n_most_important)
params, _ = self.get_params_importances(n_most_important, objective_number)
params_labels = self.map_parameter_names(params)

param_values = []
Expand Down Expand Up @@ -316,20 +373,21 @@ def plot_pairwise_scatter(self, *, n_most_important=None):

return pair_grid.figure, pair_grid

def plot_loss_feature_history(self, *, n_most_important=None):
def plot_loss_feature_history(self, *, n_most_important=None, objective_number=0):
"""
Plot parameter and loss history and add correlation of each parameter and loss
"""
LOG.debug("Plot loss and feature history")
params, _ = self.get_params_importances(n_most_important)
params, _ = self.get_params_importances(n_most_important, objective_number)
params_labels = self.map_parameter_names(params)

# find the trials where the loss got better for the first time
losses = self.get_losses()
losses = self.get_losses(flatten=False)[objective_number]
min_max_factor = -1 if self.directions[objective_number] == StudyDirection.MAXIMIZE else 1
better_iterations = [0]
current_best = losses[0]
for i, loss in enumerate(losses[1:], start=1):
if loss < current_best:
if min_max_factor * loss < min_max_factor * current_best:
current_best = loss
better_iterations.append(i)

Expand Down Expand Up @@ -357,7 +415,7 @@ def plot_loss_feature_history(self, *, n_most_important=None):
title = f"{name}, correlation with loss: {corr}"
color = "tab:blue"
if i == len(axes) - 2:
title = "loss"
title = f"loss {objective_number}"
color = "black"

ax.plot(x_axis, values, linewidth=2, color=color)
Expand Down
25 changes: 24 additions & 1 deletion src/o2tuner/optimise.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def prepare_optimisation(optuna_config, work_dir="o2tuner_optimise"):
optuna_config = parse_yaml(optuna_config)

trials = optuna_config.get("trials", 100)
jobs = optuna_config.get("jobs", 1)
# the number of jobs shall be <= number of trials
jobs = min(optuna_config.get("jobs", 1), trials)

if trials < jobs:
LOG.warning("Attempt to do %d trials, hence reducing the number of jobs from %d to %d", trials, jobs, trials)
Expand Down Expand Up @@ -146,3 +147,25 @@ def decorator(*args, **kwargs):
return func(*args, **kwargs)
decorator.needs_cwd = True
return decorator


def directions(directions_list):
"""
Decorator to indicate directions of objectives
For single-objective, the default is 'minimize'. If the objective should be maximized, the decorator should be used
and the single-element list ['maximize'] must be passed..
Also for multiple objectives, a list of 'maximize' and 'minimize' must be passsed to the decorator to indicate the
direction of each objective.
"""
def directions_impl(func):
"""
Decorator to derive directions argument to be given to the study in case of multiple objectives
"""
@functools.wraps(func)
def decorator(*args, **kwargs):
return func(*args, **kwargs)
decorator.directions = directions_list
return decorator
return directions_impl

0 comments on commit ce0b2af

Please sign in to comment.