Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement callbacks for python solvers #361

Merged
merged 1 commit into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 26 additions & 3 deletions skdecide/hub/solver/ars/ars.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def flatten(c):


class AugmentedRandomSearch(Solver, Policies, Restorable):
"""Augmented Random Search solver."""

T_domain = D

def __init__(
Expand All @@ -85,7 +87,22 @@ def __init__(
learning_rate=0.02,
policy_noise=0.03,
reward_maximization=True,
callback: Callable[[AugmentedRandomSearch], bool] = lambda solver: False,
) -> None:
"""

# Parameters
n_epochs
epoch_size
directions
top_directions
learning_rate
policy_noise
reward_maximization
callback: function called at each solver epoch. If returning true, the solve process stops.

"""
self.callback = callback
self.env = None
self.n_epochs = n_epochs
self.learning_rate = learning_rate
Expand Down Expand Up @@ -216,10 +233,16 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
self.update_policy(rollouts, sigma_r)

# Printing the final reward of the policy after the update
reward_evaluation = self.explore(normalizer)
print("Step:", step, "Reward:", reward_evaluation, "Policy", self.policy)
self.reward_evaluation = self.explore(normalizer)
print(
"Step:", step, "Reward:", self.reward_evaluation, "Policy", self.policy
)

# Stopping because of user's callback?
if self.callback(self):
break

print("Final Reward:", reward_evaluation, "Policy", self.policy)
print("Final Reward:", self.reward_evaluation, "Policy", self.policy)

def _sample_action(
self, observation: D.T_agent[D.T_observation]
Expand Down
43 changes: 32 additions & 11 deletions skdecide/hub/solver/cgp/cgp.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@
SingleAgent,
UnrestrictedActions,
)
from skdecide.builders.solver import DeterministicPolicies, Restorable
from skdecide.hub.space.gym import GymSpace
from skdecide.builders.solver import DeterministicPolicies

from .pycgp.cgpes import CGP, CGPES, Evaluator
from .pycgp.cgpfunctions import (
Expand Down Expand Up @@ -202,6 +201,8 @@ def denorm(vals, types):


class CGPWrapper(Solver, DeterministicPolicies):
"""Cartesian Genetic Programming solver."""

T_domain = D

def __init__(
Expand All @@ -217,8 +218,26 @@ def __init__(
n_it=1000000,
genome=None,
verbose=True,
callback: Callable[[CGPWrapper], bool] = lambda solver: False,
):
"""

# Parameters
folder_name
library
col
row
nb_ind
mutation_rate_nodes
mutation_rate_outputs
n_cpus
n_it
genome
verbose
callback: function called at each solver iteration. If returning true, the solve process stops.

"""
self.callback = callback
if library is None:
library = self._get_default_function_lib()

Expand Down Expand Up @@ -296,21 +315,23 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
print(cgpFather.genome)

es = CGPES(
self._nb_ind,
self._mutation_rate_nodes,
self._mutation_rate_outputs,
cgpFather,
evaluator,
self._folder_name,
self._n_cpus,
num_offsprings=self._nb_ind,
mutation_rate_nodes=self._mutation_rate_nodes,
mutation_rate_outputs=self._mutation_rate_outputs,
father=cgpFather,
evaluator=evaluator,
folder=self._folder_name,
num_cpus=self._n_cpus,
verbose=self._verbose,
callback=self.callback,
cgpwrapper=self,
)
es.run(self._n_it)

self._domain = domain
self._es = es
self._evaluator = evaluator

es.run(self._n_it)

def _get_next_action(
self, observation: D.T_agent[D.T_observation]
) -> D.T_agent[D.T_concurrency[D.T_event]]:
Expand Down
13 changes: 13 additions & 0 deletions skdecide/hub/solver/cgp/pycgp/cgpes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations # allow using CGPWrapper in annotations

import os
from typing import TYPE_CHECKING, Callable

import numpy as np
from joblib import Parallel, delayed

from .cgp import CGP
from .evaluator import Evaluator

if TYPE_CHECKING: # avoids circular imports
from ..cgp import CGPWrapper


class CGPES:
def __init__(
Expand All @@ -19,10 +25,14 @@ def __init__(
mutation_rate_outputs,
father,
evaluator,
cgpwrapper: CGPWrapper,
callback: Callable[[CGPWrapper], bool],
folder="genomes",
num_cpus=1,
verbose=True,
):
self.callback = callback
self.cgpwrapper = cgpwrapper
self.num_offsprings = num_offsprings
self.mutation_rate_nodes = mutation_rate_nodes
self.mutation_rate_outputs = mutation_rate_outputs
Expand Down Expand Up @@ -116,3 +126,6 @@ def offspring_eval_task(offspring_id):
+ str(self.current_fitness)
+ ".txt"
)
# Stopping because of user's callback?
if self.callback(self.cgpwrapper):
break
7 changes: 2 additions & 5 deletions skdecide/hub/solver/do_solver/do_solver_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __init__(
policy_method_params: PolicyMethodParams,
method: SolvingMethod = SolvingMethod.PILE,
dict_params: Optional[Dict[Any, Any]] = None,
callback: Optional[Callable[[DOSolver], bool]] = None,
callback: Callable[[DOSolver], bool] = lambda solver: False,
):
self.callback = callback
self.method = method
Expand Down Expand Up @@ -206,10 +206,7 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
self.dict_params[k] = params[k]

# callbacks
if self.callback is None:
callbacks = []
else:
callbacks = [_DOCallback(callback=self.callback, solver=self)]
callbacks = [_DOCallback(callback=self.callback, solver=self)]
copy_dict_params = deepcopy(self.dict_params)
if "callbacks" in copy_dict_params:
callbacks = callbacks + copy_dict_params.pop("callbacks")
Expand Down
34 changes: 24 additions & 10 deletions skdecide/hub/solver/lazy_astar/lazy_astar.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ class D(


class LazyAstar(Solver, DeterministicPolicies, Utilities, FromAnyState):
"""Lazy A* solver."""

T_domain = D

def __init__(
Expand All @@ -48,8 +50,19 @@ def __init__(
weight: float = 1.0,
verbose: bool = False,
render: bool = False,
callback: Callable[[LazyAstar], bool] = lambda solver: False,
) -> None:
"""

# Parameters
heuristic
weight
verbose
render
callback: function called at each solver iteration. If returning true, the solve process stops.

"""
self.callback = callback
self._heuristic = (
(lambda _, __: Value(cost=0.0)) if heuristic is None else heuristic
)
Expand Down Expand Up @@ -136,18 +149,19 @@ def extender(node, label, explored):
}
# enqueued = {source: min([(0, self._weight * self._heuristic(source, target, initial_label[source]).cost)
# for target in targets], key=lambda x: x[1]) for source in sources}
queue = [
self.queue = [
(enqueued[source][1], next(c), source, 0, None, initial_label[source])
for source in sources
]
# The explored dict is the CLOSED list.
# It maps explored nodes to a pair of parent closest to the source and label of transition from parent.
explored = {}
self.explored = {}
path = []
estim_total = 0.0
while queue:
while self.queue and not self.callback(self):
# Pop the smallest item from queue, i.e. with smallest f-value
estim_total, __, curnode, dist, parent, label = pop(queue)
estim_total, __, curnode, dist, parent, label = pop(self.queue)

if self._render:
self._domain.render(curnode)
if self._verbose:
Expand All @@ -159,16 +173,16 @@ def extender(node, label, explored):
path = [(parent, label), (curnode, None)]
node = parent
while node is not None:
(parent, label) = explored[node]
(parent, label) = self.explored[node]
if parent is not None:
path.insert(0, (parent, label))
node = parent
break # return path, dist, enqueued[curnode][0], len(enqueued)
if curnode in explored:
if curnode in self.explored:
continue
explored[curnode] = (parent, label)
for neighbor, cost, lbl in extender(curnode, label, explored):
if neighbor in explored:
self.explored[curnode] = (parent, label)
for neighbor, cost, lbl in extender(curnode, label, self.explored):
if neighbor in self.explored:
continue
ncost = dist + cost
if neighbor in enqueued:
Expand All @@ -184,7 +198,7 @@ def extender(node, label, explored):
h = self._heuristic(self._domain, neighbor).cost
enqueued[neighbor] = ncost, h
push(
queue,
self.queue,
(
ncost + (self._weight * h),
next(c),
Expand Down
17 changes: 16 additions & 1 deletion skdecide/hub/solver/lrtastar/lrtastar.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ class D(


class LRTAstar(Solver, DeterministicPolicies, Utilities, FromAnyState):
"""Learning Real-Time A* solver."""

T_domain = D

def _get_next_action(
Expand All @@ -60,7 +62,20 @@ def __init__(
verbose: bool = False,
max_iter=5000,
max_depth=200,
callback: Callable[[LRTAstar], bool] = lambda solver: False,
) -> None:
"""

# Parameters
heuristic
weight
verbose
max_iter
max_depth
callback: function called at each solver iteration. If returning true, the solve process stops.

"""
self.callback = callback
self._heuristic = (
(lambda _, __: Value(cost=0.0)) if heuristic is None else heuristic
)
Expand Down Expand Up @@ -113,7 +128,7 @@ def _solve_from(
iteration = 0
best_cost = float("inf")
# best_path = None
while True:
while not self.callback(self):
print(memory)
dead_end, cumulated_cost, current_roll, list_action = self.doTrial(memory)
if self._verbose:
Expand Down
22 changes: 22 additions & 0 deletions skdecide/hub/solver/maxent_irl/maxent_irl.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class D(RLDomain):


class MaxentIRL(Solver, Policies, Restorable):
"""Maximum Entropy Inverse Reinforcement Learning solver."""

T_domain = D

def __init__(
Expand All @@ -34,7 +36,23 @@ def __init__(
theta_learning_rate=0.05,
n_epochs=20000,
expert_trajectories="maxent_expert_demo.npy",
callback: Callable[[MaxentIRL], bool] = lambda solver: False,
) -> None:
"""

# Parameters
n_states
n_actions
one_feature
gamma
q_learning_rate
theta_learning_rate
n_epochs
expert_trajectories
callback: function called at each solver epoch. If returning true, the solve process stops.

"""
self.callback = callback
self.n_states = n_states
self.feature_matrix = np.eye(self.n_states)
self.n_actions = n_actions
Expand Down Expand Up @@ -227,6 +245,10 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
arr=self.q_table,
)

# Stopping because of user's callback?
if self.callback(self):
break

self.q_table = np.load(
file=self.expert_trajectories[:-4] + "_maxent_q_table.npy"
)
Expand Down
Loading
Loading