Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Multi-Objective cache #872

Merged
merged 16 commits into from
Jul 7, 2022
8 changes: 8 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# 1.4.0

## Improvements
* Enabled caching for multi-objectives (#872). Costs are now normalized in `get_cost`
or optionally in `average_cost`/`sum_cost`/`min_cost` to receive a single float value. Therefore,
the cached cost values do not need to be updated everytime a new entry to the runhistory was added.


# 1.3.4
* Added reference to JMLR paper.
* Typos in documentations.
Expand Down
5 changes: 4 additions & 1 deletion examples/python/plot_simple_multi_objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ def plot(all_x):
plt.figure()
for x in all_x:
f1, f2 = schaffer(x)
plt.scatter(f1, f2, c="blue", alpha=0.1)
plt.scatter(f1, f2, c="blue", alpha=0.1, zorder=3000)

plt.vlines([1], 0, 4, linestyles="dashed", colors=["red"])
plt.hlines([1], 0, 4, linestyles="dashed", colors=["red"])

plt.show()

Expand Down
2 changes: 1 addition & 1 deletion smac/facade/smac_ac_facade.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,7 +530,7 @@ def __init__(

if scenario.multi_objectives is not None and num_obj > 1: # type: ignore[attr-defined] # noqa F821
# define any defaults here
_multi_objective_kwargs = {"rng": rng, "num_obj": num_obj}
_multi_objective_kwargs = {"rng": rng}

if multi_objective_kwargs is not None:
_multi_objective_kwargs.update(multi_objective_kwargs)
Expand Down
19 changes: 8 additions & 11 deletions smac/intensification/abstract_racer.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def __init__(
maxR: int = 2000,
adaptive_capping_slackfactor: float = 1.2,
min_chall: int = 1,
num_obj: int = 1,
renesass marked this conversation as resolved.
Show resolved Hide resolved
):

self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
Expand Down Expand Up @@ -129,12 +128,6 @@ def __init__(
# to mark the end of an iteration
self.iteration_done = False

if num_obj > 1:
raise ValueError(
"Intensifiers only support single objective optimization. For multi-objective problems,"
"please refer to multi-objective intensifiers"
renesass marked this conversation as resolved.
Show resolved Hide resolved
)

def get_next_run(
self,
challengers: Optional[List[Configuration]],
Expand Down Expand Up @@ -303,9 +296,10 @@ def _adapt_cutoff(self, challenger: Configuration, run_history: RunHistory, inc_
# reasons)
chall_inst_seeds = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
chal_sum_cost = run_history.sum_cost(
config=challenger,
instance_seed_budget_keys=chall_inst_seeds,
config=challenger, instance_seed_budget_keys=chall_inst_seeds, normalize=True
)
assert type(chal_sum_cost) == float

cutoff = min(curr_cutoff, inc_sum_cost * self.adaptive_capping_slackfactor - chal_sum_cost)
return cutoff

Expand Down Expand Up @@ -348,8 +342,11 @@ def _compare_configs(

# performance on challenger runs, the challenger only becomes incumbent
# if it dominates the incumbent
chal_perf = run_history.average_cost(challenger, to_compare_runs)
inc_perf = run_history.average_cost(incumbent, to_compare_runs)
chal_perf = run_history.average_cost(challenger, to_compare_runs, normalize=True)
inc_perf = run_history.average_cost(incumbent, to_compare_runs, normalize=True)

assert type(chal_perf) == float
assert type(inc_perf) == float

# Line 15
if np.any(chal_perf > inc_perf) and len(chall_runs) >= self.minR:
Expand Down
2 changes: 0 additions & 2 deletions smac/intensification/hyperband.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def __init__(
min_chall: int = 1,
incumbent_selection: str = "highest_executed_budget",
identifier: int = 0,
num_obj: int = 1,
renesass marked this conversation as resolved.
Show resolved Hide resolved
) -> None:

super().__init__(
Expand All @@ -119,7 +118,6 @@ def __init__(
adaptive_capping_slackfactor=adaptive_capping_slackfactor,
min_chall=min_chall,
incumbent_selection=incumbent_selection,
num_obj=num_obj,
)

self.identifier = identifier
Expand Down
10 changes: 3 additions & 7 deletions smac/intensification/intensification.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def __init__(
maxR: int = 2000,
adaptive_capping_slackfactor: float = 1.2,
min_chall: int = 2,
num_obj: int = 1,
renesass marked this conversation as resolved.
Show resolved Hide resolved
):
super().__init__(
stats=stats,
Expand All @@ -159,7 +158,6 @@ def __init__(
maxR=maxR,
adaptive_capping_slackfactor=adaptive_capping_slackfactor,
min_chall=min_chall,
num_obj=num_obj,
)

self.logger = logging.getLogger(self.__module__ + "." + self.__class__.__name__)
Expand Down Expand Up @@ -795,6 +793,7 @@ def _process_racer_results(
"""
chal_runs = run_history.get_runs_for_config(challenger, only_max_observed_budget=True)
chal_perf = run_history.get_cost(challenger)

# if all <instance, seed> have been run, compare challenger performance
if not self.to_run:
new_incumbent = self._compare_configs(
Expand Down Expand Up @@ -896,11 +895,8 @@ def _get_instances_to_run(
# because of efficiency computed here
inst_seed_pairs = list(inc_inst_seeds - set(missing_runs))
# cost used by incumbent for going over all runs in inst_seed_pairs
inc_sum_cost = run_history.sum_cost(
config=incumbent,
instance_seed_budget_keys=inst_seed_pairs,
)

inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inst_seed_pairs, normalize=True)
assert type(inc_sum_cost) == float
return to_run, inc_sum_cost

def get_next_challenger(
Expand Down
2 changes: 0 additions & 2 deletions smac/intensification/parallel_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def __init__(
inst_seed_pairs: Optional[List[Tuple[str, int]]] = None,
min_chall: int = 1,
incumbent_selection: str = "highest_executed_budget",
num_obj: int = 1,
renesass marked this conversation as resolved.
Show resolved Hide resolved
) -> None:

super().__init__(
Expand All @@ -103,7 +102,6 @@ def __init__(
run_obj_time=run_obj_time,
adaptive_capping_slackfactor=adaptive_capping_slackfactor,
min_chall=min_chall,
num_obj=num_obj,
)

# We have a pool of instances that yield configurations ot run
Expand Down
4 changes: 0 additions & 4 deletions smac/intensification/simple_intensifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def __init__(
cutoff: Optional[float] = None,
deterministic: bool = False,
run_obj_time: bool = True,
num_obj: int = 1,
**kwargs: Any,
) -> None:

Expand All @@ -61,10 +60,7 @@ def __init__(
run_obj_time=run_obj_time,
adaptive_capping_slackfactor=1.0,
min_chall=1,
num_obj=num_obj,
)
# Simple intensifier does not require comparing run results, thus we could simply ignore num_obj here

# We want to control the number of runs that are sent to
# the workers. At any time, we want to make sure that if there
# are just W workers, there should be at max W active runs
Expand Down
8 changes: 4 additions & 4 deletions smac/intensification/successive_halving.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ def __init__(
min_chall: int = 1,
incumbent_selection: str = "highest_executed_budget",
identifier: int = 0,
num_obj: int = 1,
) -> None:
super().__init__(
stats=stats,
Expand All @@ -150,7 +149,6 @@ def __init__(
run_obj_time=run_obj_time,
adaptive_capping_slackfactor=adaptive_capping_slackfactor,
min_chall=min_chall,
num_obj=num_obj,
)

self.identifier = identifier
Expand Down Expand Up @@ -629,14 +627,16 @@ def get_next_run(
# - during the 1st intensify run, the incumbent shouldn't be capped after being compared against itself
if incumbent and incumbent != challenger:
inc_runs = run_history.get_runs_for_config(incumbent, only_max_observed_budget=True)
inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inc_runs)
inc_sum_cost = run_history.sum_cost(config=incumbent, instance_seed_budget_keys=inc_runs, normalize=True)
else:
inc_sum_cost = np.inf
if self.first_run:
self.logger.info("First run, no incumbent provided; challenger is assumed to be the incumbent")
incumbent = challenger

# selecting instance-seed subset for this budget, depending on the kind of budget
assert type(inc_sum_cost) == float

# Selecting instance-seed subset for this budget, depending on the kind of budget
if self.instance_as_budget:
prev_budget = int(self.all_budgets[self.stage - 1]) if self.stage > 0 else 0
curr_insts = self.inst_seed_pairs[int(prev_budget) : int(curr_budget)]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from abc import ABC
from typing import Optional

Expand All @@ -10,10 +12,8 @@ class AbstractMultiObjectiveAlgorithm(ABC):
It can be applied to rh2epm or epmchooser.
"""

def __init__(self, num_obj: int, rng: Optional[np.random.RandomState] = None):

def __init__(self, rng: Optional[np.random.RandomState] = None):
if rng is None:
rng = np.random.RandomState(0)

self.num_obj = num_obj
self.rng = rng
24 changes: 15 additions & 9 deletions smac/optimizer/multi_objective/aggregation_strategy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from abc import abstractmethod

import numpy as np
Expand All @@ -9,22 +11,24 @@

class AggregationStrategy(AbstractMultiObjectiveAlgorithm):
"""
An abstract class to aggregate multi-objective losses to a single objective losses, which can then be utilized
by the single-objective optimizer.
An abstract class to aggregate multi-objective losses to a single objective loss,
which can then be utilized by the single-objective optimizer.
"""

@abstractmethod
def __call__(self, values: np.ndarray) -> float:
def __call__(self, values: list[float]) -> float:
"""
Transform a multi-objective loss to a single loss.

Parameters
----------
values: np.ndarray[num_evaluations, num_obj].
values : list[float]
Normalized values.

Returns
-------
cost: float.
cost : float
Combined cost.
"""
raise NotImplementedError

Expand All @@ -35,16 +39,18 @@ class MeanAggregationStrategy(AggregationStrategy):
which can then be utilized by the single-objective optimizer.
"""

def __call__(self, values: np.ndarray) -> float:
def __call__(self, values: list[float]) -> float:
"""
Transform a multi-objective loss to a single loss.

Parameters
----------
values (np.ndarray): Normalized values.
values : list[float]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would argue, that the normalization is part of the aggregation strategy and should probably be moved here instead

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see a reason why the values should not be normalized here.

Normalized values.

Returns
-------
cost (float): Combined cost.
cost : float
Combined cost.
"""
return np.mean(values, axis=1)
return np.mean(values, axis=0)
18 changes: 10 additions & 8 deletions smac/optimizer/multi_objective/parego.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from typing import Optional

import numpy as np
Expand All @@ -8,32 +10,32 @@
class ParEGO(AggregationStrategy):
def __init__(
self,
num_obj: int,
rng: Optional[np.random.RandomState] = None,
rho: float = 0.05,
):
super(ParEGO, self).__init__(num_obj=num_obj, rng=rng)
super(ParEGO, self).__init__(rng=rng)
self.rho = rho

def __call__(self, values: np.ndarray) -> float:
def __call__(self, values: list[float]) -> float:
"""
Transform a multi-objective loss to a single loss.

Parameters
----------
values (np.ndarray): Normalized values.
values : list[float]
Normalized values.

Returns
-------
cost (float): Combined cost.
cost : float
Combined cost.
"""
# Then we have to compute the weight
theta = self.rng.rand(self.num_obj)
theta = self.rng.rand(len(values))

# Normalize st all theta values sum up to 1
theta = theta / (np.sum(theta) + 1e-10)

# Weight the values
theta_f = theta * values

return np.max(theta_f, axis=1) + self.rho * np.sum(theta_f, axis=1)
return np.max(theta_f, axis=0) + self.rho * np.sum(theta_f, axis=0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm somewhat surprised to see that the summation over an axis changes without a respective test changing. Does this mean that there is no test for ParEGO?

Loading