airbus · fteicht · May 17, 2024 · May 14, 2024
diff --git a/skdecide/hub/solver/ars/ars.py b/skdecide/hub/solver/ars/ars.py
@@ -74,6 +74,8 @@ def flatten(c):
 
 
 class AugmentedRandomSearch(Solver, Policies, Restorable):
+    """Augmented Random Search solver."""
+
     T_domain = D
 
     def __init__(
@@ -85,7 +87,22 @@ def __init__(
         learning_rate=0.02,
         policy_noise=0.03,
         reward_maximization=True,
+        callback: Callable[[AugmentedRandomSearch], bool] = lambda solver: False,
     ) -> None:
+        """
+
+        # Parameters
+        n_epochs
+        epoch_size
+        directions
+        top_directions
+        learning_rate
+        policy_noise
+        reward_maximization
+        callback: function called at each solver epoch. If returning true, the solve process stops.
+
+        """
+        self.callback = callback
         self.env = None
         self.n_epochs = n_epochs
         self.learning_rate = learning_rate
@@ -216,10 +233,16 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
             self.update_policy(rollouts, sigma_r)
 
             # Printing the final reward of the policy after the update
-            reward_evaluation = self.explore(normalizer)
-            print("Step:", step, "Reward:", reward_evaluation, "Policy", self.policy)
+            self.reward_evaluation = self.explore(normalizer)
+            print(
+                "Step:", step, "Reward:", self.reward_evaluation, "Policy", self.policy
+            )
+
+            # Stopping because of user's callback?
+            if self.callback(self):
+                break
 
-        print("Final Reward:", reward_evaluation, "Policy", self.policy)
+        print("Final Reward:", self.reward_evaluation, "Policy", self.policy)
 
     def _sample_action(
         self, observation: D.T_agent[D.T_observation]

diff --git a/skdecide/hub/solver/cgp/cgp.py b/skdecide/hub/solver/cgp/cgp.py
@@ -21,8 +21,7 @@
     SingleAgent,
     UnrestrictedActions,
 )
-from skdecide.builders.solver import DeterministicPolicies, Restorable
-from skdecide.hub.space.gym import GymSpace
+from skdecide.builders.solver import DeterministicPolicies
 
 from .pycgp.cgpes import CGP, CGPES, Evaluator
 from .pycgp.cgpfunctions import (
@@ -202,6 +201,8 @@ def denorm(vals, types):
 
 
 class CGPWrapper(Solver, DeterministicPolicies):
+    """Cartesian Genetic Programming solver."""
+
     T_domain = D
 
     def __init__(
@@ -217,8 +218,26 @@ def __init__(
         n_it=1000000,
         genome=None,
         verbose=True,
+        callback: Callable[[CGPWrapper], bool] = lambda solver: False,
     ):
+        """
 
+        # Parameters
+        folder_name
+        library
+        col
+        row
+        nb_ind
+        mutation_rate_nodes
+        mutation_rate_outputs
+        n_cpus
+        n_it
+        genome
+        verbose
+        callback: function called at each solver iteration. If returning true, the solve process stops.
+
+        """
+        self.callback = callback
         if library is None:
             library = self._get_default_function_lib()
 
@@ -296,21 +315,23 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
             print(cgpFather.genome)
 
         es = CGPES(
-            self._nb_ind,
-            self._mutation_rate_nodes,
-            self._mutation_rate_outputs,
-            cgpFather,
-            evaluator,
-            self._folder_name,
-            self._n_cpus,
+            num_offsprings=self._nb_ind,
+            mutation_rate_nodes=self._mutation_rate_nodes,
+            mutation_rate_outputs=self._mutation_rate_outputs,
+            father=cgpFather,
+            evaluator=evaluator,
+            folder=self._folder_name,
+            num_cpus=self._n_cpus,
             verbose=self._verbose,
+            callback=self.callback,
+            cgpwrapper=self,
         )
-        es.run(self._n_it)
-
         self._domain = domain
         self._es = es
         self._evaluator = evaluator
 
+        es.run(self._n_it)
+
     def _get_next_action(
         self, observation: D.T_agent[D.T_observation]
     ) -> D.T_agent[D.T_concurrency[D.T_event]]:

diff --git a/skdecide/hub/solver/cgp/pycgp/cgpes.py b/skdecide/hub/solver/cgp/pycgp/cgpes.py
@@ -2,14 +2,20 @@
 # This source code is licensed under the MIT license found in the
 # LICENSE file in the root directory of this source tree.
 
+from __future__ import annotations  # allow using CGPWrapper in annotations
+
 import os
+from typing import TYPE_CHECKING, Callable
 
 import numpy as np
 from joblib import Parallel, delayed
 
 from .cgp import CGP
 from .evaluator import Evaluator
 
+if TYPE_CHECKING:  # avoids circular imports
+    from ..cgp import CGPWrapper
+
 
 class CGPES:
     def __init__(
@@ -19,10 +25,14 @@ def __init__(
         mutation_rate_outputs,
         father,
         evaluator,
+        cgpwrapper: CGPWrapper,
+        callback: Callable[[CGPWrapper], bool],
         folder="genomes",
         num_cpus=1,
         verbose=True,
     ):
+        self.callback = callback
+        self.cgpwrapper = cgpwrapper
         self.num_offsprings = num_offsprings
         self.mutation_rate_nodes = mutation_rate_nodes
         self.mutation_rate_outputs = mutation_rate_outputs
@@ -116,3 +126,6 @@ def offspring_eval_task(offspring_id):
                     + str(self.current_fitness)
                     + ".txt"
                 )
+            # Stopping because of user's callback?
+            if self.callback(self.cgpwrapper):
+                break
diff --git a/skdecide/hub/solver/do_solver/do_solver_scheduling.py b/skdecide/hub/solver/do_solver/do_solver_scheduling.py
@@ -166,7 +166,7 @@ def __init__(
         policy_method_params: PolicyMethodParams,
         method: SolvingMethod = SolvingMethod.PILE,
         dict_params: Optional[Dict[Any, Any]] = None,
-        callback: Optional[Callable[[DOSolver], bool]] = None,
+        callback: Callable[[DOSolver], bool] = lambda solver: False,
     ):
         self.callback = callback
         self.method = method
@@ -206,10 +206,7 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
                 self.dict_params[k] = params[k]
 
         # callbacks
-        if self.callback is None:
-            callbacks = []
-        else:
-            callbacks = [_DOCallback(callback=self.callback, solver=self)]
+        callbacks = [_DOCallback(callback=self.callback, solver=self)]
         copy_dict_params = deepcopy(self.dict_params)
         if "callbacks" in copy_dict_params:
             callbacks = callbacks + copy_dict_params.pop("callbacks")

diff --git a/skdecide/hub/solver/lazy_astar/lazy_astar.py b/skdecide/hub/solver/lazy_astar/lazy_astar.py
@@ -38,6 +38,8 @@ class D(
 
 
 class LazyAstar(Solver, DeterministicPolicies, Utilities, FromAnyState):
+    """Lazy A* solver."""
+
     T_domain = D
 
     def __init__(
@@ -48,8 +50,19 @@ def __init__(
         weight: float = 1.0,
         verbose: bool = False,
         render: bool = False,
+        callback: Callable[[LazyAstar], bool] = lambda solver: False,
     ) -> None:
+        """
+
+        # Parameters
+        heuristic
+        weight
+        verbose
+        render
+        callback: function called at each solver iteration. If returning true, the solve process stops.
 
+        """
+        self.callback = callback
         self._heuristic = (
             (lambda _, __: Value(cost=0.0)) if heuristic is None else heuristic
         )
@@ -136,18 +149,19 @@ def extender(node, label, explored):
         }
         # enqueued = {source: min([(0, self._weight * self._heuristic(source, target, initial_label[source]).cost)
         # for target in targets], key=lambda x: x[1]) for source in sources}
-        queue = [
+        self.queue = [
             (enqueued[source][1], next(c), source, 0, None, initial_label[source])
             for source in sources
         ]
         # The explored dict is the CLOSED list.
         # It maps explored nodes to a pair of parent closest to the source and label of transition from parent.
-        explored = {}
+        self.explored = {}
         path = []
         estim_total = 0.0
-        while queue:
+        while self.queue and not self.callback(self):
             # Pop the smallest item from queue, i.e. with smallest f-value
-            estim_total, __, curnode, dist, parent, label = pop(queue)
+            estim_total, __, curnode, dist, parent, label = pop(self.queue)
+
             if self._render:
                 self._domain.render(curnode)
             if self._verbose:
@@ -159,16 +173,16 @@ def extender(node, label, explored):
                 path = [(parent, label), (curnode, None)]
                 node = parent
                 while node is not None:
-                    (parent, label) = explored[node]
+                    (parent, label) = self.explored[node]
                     if parent is not None:
                         path.insert(0, (parent, label))
                     node = parent
                 break  # return path, dist, enqueued[curnode][0], len(enqueued)
-            if curnode in explored:
+            if curnode in self.explored:
                 continue
-            explored[curnode] = (parent, label)
-            for neighbor, cost, lbl in extender(curnode, label, explored):
-                if neighbor in explored:
+            self.explored[curnode] = (parent, label)
+            for neighbor, cost, lbl in extender(curnode, label, self.explored):
+                if neighbor in self.explored:
                     continue
                 ncost = dist + cost
                 if neighbor in enqueued:
@@ -184,7 +198,7 @@ def extender(node, label, explored):
                     h = self._heuristic(self._domain, neighbor).cost
                 enqueued[neighbor] = ncost, h
                 push(
-                    queue,
+                    self.queue,
                     (
                         ncost + (self._weight * h),
                         next(c),

diff --git a/skdecide/hub/solver/lrtastar/lrtastar.py b/skdecide/hub/solver/lrtastar/lrtastar.py
@@ -36,6 +36,8 @@ class D(
 
 
 class LRTAstar(Solver, DeterministicPolicies, Utilities, FromAnyState):
+    """Learning Real-Time A* solver."""
+
     T_domain = D
 
     def _get_next_action(
@@ -60,7 +62,20 @@ def __init__(
         verbose: bool = False,
         max_iter=5000,
         max_depth=200,
+        callback: Callable[[LRTAstar], bool] = lambda solver: False,
     ) -> None:
+        """
+
+        # Parameters
+        heuristic
+        weight
+        verbose
+        max_iter
+        max_depth
+        callback: function called at each solver iteration. If returning true, the solve process stops.
+
+        """
+        self.callback = callback
         self._heuristic = (
             (lambda _, __: Value(cost=0.0)) if heuristic is None else heuristic
         )
@@ -113,7 +128,7 @@ def _solve_from(
         iteration = 0
         best_cost = float("inf")
         # best_path = None
-        while True:
+        while not self.callback(self):
             print(memory)
             dead_end, cumulated_cost, current_roll, list_action = self.doTrial(memory)
             if self._verbose:

diff --git a/skdecide/hub/solver/maxent_irl/maxent_irl.py b/skdecide/hub/solver/maxent_irl/maxent_irl.py
@@ -22,6 +22,8 @@ class D(RLDomain):
 
 
 class MaxentIRL(Solver, Policies, Restorable):
+    """Maximum Entropy Inverse Reinforcement Learning solver."""
+
     T_domain = D
 
     def __init__(
@@ -34,7 +36,23 @@ def __init__(
         theta_learning_rate=0.05,
         n_epochs=20000,
         expert_trajectories="maxent_expert_demo.npy",
+        callback: Callable[[MaxentIRL], bool] = lambda solver: False,
     ) -> None:
+        """
+
+        # Parameters
+        n_states
+        n_actions
+        one_feature
+        gamma
+        q_learning_rate
+        theta_learning_rate
+        n_epochs
+        expert_trajectories
+        callback: function called at each solver epoch. If returning true, the solve process stops.
+
+        """
+        self.callback = callback
         self.n_states = n_states
         self.feature_matrix = np.eye(self.n_states)
         self.n_actions = n_actions
@@ -227,6 +245,10 @@ def _solve(self, domain_factory: Callable[[], D]) -> None:
                     arr=self.q_table,
                 )
 
+            # Stopping because of user's callback?
+            if self.callback(self):
+                break
+
         self.q_table = np.load(
             file=self.expert_trajectories[:-4] + "_maxent_q_table.npy"
         )