Merge pull request #216 from thouska/New-design-for-status-tracking

Fix for #212 and further improvements which lead to new version (1.5.0) Removes alt_objfunc keyword (setting not needed any more, spotpy will always use the user defined def objectivefunction), changes some minor details in the use of analyser.py and tracks the algorithms best objectivefunction value more intuitive
thouska · May 20, 2019 · ed502ca · ed502ca
2 parents bd8ce10 + 5b47105
commit ed502ca
Show file tree

Hide file tree

Showing 26 changed files with 528 additions and 646 deletions.
diff --git a/docs/getting_started.md b/docs/getting_started.md
@@ -14,7 +14,7 @@ The example comes along with parameter boundaries, the Rosenbrock function, the
 So we can directly start to analyse the Rosenbrock function with one of the algorithms. We start with a simple Monte Carlo sampling:
 
 	# Give Monte Carlo algorithm the example setup and saves results in a RosenMC.csv file
-	sampler = spotpy.algorithms.mc(spotpy_setup(), dbname='RosenMC', dbformat='csv')
+	sampler = spotpy.algorithms.mc(spot_setup(), dbname='RosenMC', dbformat='csv')
 
 Now we can sample with the implemented Monte Carlo algorithm:
 
@@ -35,7 +35,7 @@ We can see that the parameters *x* and *y*, which drive the the Rosenbrock funct
 
 If you want to see the best 10% of your samples, which is called posterior parameter distribution, you have to do something like this:
 
-	posterior=spotpy.analyser.get_posterior(results,percentage=10)
+	posterior=spotpy.analyser.get_posterior(results, percentage=10)
 	spotpy.analyser.plot_parameterInteraction(posterior) 
 
 This should give you a parameter interaction plot of your best 10% samples, which should look like Fig. 2:

diff --git a/spotpy/__init__.py b/spotpy/__init__.py
@@ -39,4 +39,4 @@
 from . import describe            # Contains some helper functions to describe smaplers and setups
 from .hydrology import signatures # Quantifies goodness of fit between simulation and evaluation data with hydrological signatures
 
-__version__ = '1.4.6'
+__version__ = '1.5.0'
diff --git a/spotpy/algorithms/_algorithm.py b/spotpy/algorithms/_algorithm.py
@@ -10,12 +10,13 @@
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
-from spotpy import database, objectivefunctions
+from spotpy import database
 from spotpy import parameter
 import numpy as np
 import time
 import threading
 
+
 try:
     from queue import Queue
 except ImportError:
@@ -36,37 +37,64 @@ class _RunStatistic(object):
     Usage:
     status = _RunStatistic()
     status(rep,like,params)
-
     """
 
-    def __init__(self):
+    def __init__(self, repetitions, algorithm_name, optimization_direction, parnames):
+        self.optimization_direction = optimization_direction #grid, mazimize, minimize
+        print('Initializing the ',algorithm_name,' with ',repetitions,' repetitions')
+        if optimization_direction == 'minimize':
+            self.compare = self.minimizer
+            print('The objective function will be minimized')
+        if optimization_direction == 'maximize':
+            self.compare = self.maximizer
+            print('The objective function will be minimized')
+        if optimization_direction == 'grid':
+            self.compare = self.grid    
+
         self.rep = 0
-        self.params = None
-        self.objectivefunction = -1e308
-        self.bestrep = 0
+        self.parnames = parnames
+        self.parameters= len(parnames)
+        self.params_min = [np.nan]*self.parameters
+        self.params_max = [np.nan]*self.parameters
+        self.objectivefunction_min = 1e308
+        self.objectivefunction_max = -1e308
         self.starttime = time.time()
         self.last_print = time.time()
 
-        self.repetitions = None
+        self.repetitions = repetitions
         self.stop = False
+
+    def minimizer(self, objval, params):
+        if objval < self.objectivefunction_min:
+            self.objectivefunction_min = objval 
+            self.params_min = list(params)
 
+    def maximizer(self, objval, params):
+        if objval > self.objectivefunction_max:
+            self.objectivefunction_max = objval
+            self.params_max = list(params)
+
+    def grid(self, objval, params):
+        if objval < self.objectivefunction_min:
+            self.objectivefunction_min = objval
+            self.params_min = list(params)
+        if objval > self.objectivefunction_max:
+            self.objectivefunction_max = objval
+            self.params_max = list(params)
+
+
     def __call__(self, objectivefunction, params, block_print=False):
-        self.curparmeterset = params
         self.rep+=1
-        if type(objectivefunction) == type([]):
-            if objectivefunction[0] > self.objectivefunction:
-                # Show only the first best objectivefunction when working with
-                # more than one objectivefunction
-                self.objectivefunction = objectivefunction[0]
-                self.params = params
-                self.bestrep = self.rep
+        if type(objectivefunction) == type([]): #TODO: change to iterable
+            self.compare(objectivefunction[0], params)
+
         else:
-            if objectivefunction > self.objectivefunction:
-                self.params = params
-                self.objectivefunction = objectivefunction
-                self.bestrep = self.rep
+            self.compare(objectivefunction, params)
+
+
         if self.rep == self.repetitions:
             self.stop = True
+
         if not block_print:
             self.print_status()
 
@@ -77,14 +105,59 @@ def print_status(self):
         if acttime - self.last_print >= 2:
             avg_time_per_run = (acttime - self.starttime) / (self.rep + 1)
             timestr = time.strftime("%H:%M:%S", time.gmtime(round(avg_time_per_run * (self.repetitions - (self.rep + 1)))))
-
-            text = '%i of %i (best like=%g) est. time remaining: %s' % (self.rep, self.repetitions,
-                                                                        self.objectivefunction, timestr)
+            if self.optimization_direction == 'minimize':
+                text = '%i of %i, minimal objective function=%g, time remaining: %s' % (
+                        self.rep, self.repetitions, self.objectivefunction_min, timestr)           
+
+            if self.optimization_direction == 'maximize':
+                text = '%i of %i, maximal objective function=%g, time remaining: %s' % (
+                        self.rep, self.repetitions, self.objectivefunction_max, timestr)  
+
+            if self.optimization_direction == 'grid':
+                text = '%i of %i, min objf=%g, max objf=%g, time remaining: %s' % (
+                        self.rep, self.repetitions, self.objectivefunction_min, self.objectivefunction_max, timestr)
+
             print(text)
             self.last_print = time.time()
+
+    def print_status_final(self):
+        print('\n*** Final SPOTPY summary ***')
+        print('Total Duration: ' + str(round((time.time() - self.starttime), 2)) + ' seconds')
+        print('Total Repetitions:', self.rep)
+
+        if self.optimization_direction == 'minimize':    
+            print('Minimal objective value: %g' % (self.objectivefunction_min))
+            print('Corresponding parameter setting:')
+            for i in range(self.parameters):
+                text = '%s: %g' % (self.parnames[i], self.params_min[i])
+                print(text)
+
+        if self.optimization_direction == 'maximize':
+            print('Maximal objective value: %g' % (self.objectivefunction_max))
+            print('Corresponding parameter setting:')
+            for i in range(self.parameters):
+                text = '%s: %g' % (self.parnames[i], self.params_max[i])
+                print(text)
+
+        if self.optimization_direction == 'grid':
+            print('Minimal objective value: %g' % (self.objectivefunction_min))
+            print('Corresponding parameter setting:')
+            for i in range(self.parameters):
+                text = '%s: %g' % (self.parnames[i], self.params_min[i])
+                print(text)
+
+            print('Maximal objective value: %g' % (self.objectivefunction_max))
+            print('Corresponding parameter setting:')
+            for i in range(self.parameters):
+                text = '%s: %g' % (self.parnames[i], self.params_max[i])
+                print(text)
+
+        print('******************************\n')
+
 
     def __repr__(self):
-        return 'Best objectivefunction: %g' % self.objectivefunction
+        return 'Min objectivefunction: %g \n Max objectivefunction: %g' % (
+                self.objectivefunction_min, self.objectivefunction_max)
 
 
 class _algorithm(object):
@@ -122,12 +195,6 @@ class _algorithm(object):
     db_precision:np.float type
         set np.float16, np.float32 or np.float64 for rounding of floats in the output database
         Default is np.float16
-    alt_objfun: str or None, default: 'rmse'
-        alternative objectivefunction to be used for algorithm
-        * None: the objfun defined in spot_setup.objectivefunction is used
-        * any str: if str is found in spotpy.objectivefunctions, 
-            this objectivefunction is used, else falls back to None 
-            e.g.: 'log_p', 'rmse', 'bias', 'kge' etc.
     sim_timeout: float, int or None, default: None
         the defined model given in the spot_setup class can be controlled to break after 'sim_timeout' seconds if
         sim_timeout is not None.
@@ -139,18 +206,12 @@ class _algorithm(object):
     _unaccepted_parameter_types = (parameter.List, )
 
     def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True,
-                 dbappend=False, parallel='seq', save_sim=True, alt_objfun=None,
-                 breakpoint=None, backup_every_rep=100, save_threshold=-np.inf,
-                 db_precision=np.float16, sim_timeout=None, random_state=None):
+                 dbappend=False, parallel='seq', save_sim=True, breakpoint=None, 
+                 backup_every_rep=100, save_threshold=-np.inf, db_precision=np.float16, 
+                 sim_timeout=None, random_state=None, optimization_direction='grid', algorithm_name=''):
+
         # Initialize the user defined setup class
         self.setup = spot_setup
-        # Philipp: Changed from Tobi's version, now we are using both new class defined parameters
-        # as well as the parameters function. The new method get_parameters
-        # can deal with a missing parameters function
-        #
-        # For me (Philipp) it is totally unclear why all the samplers should call this function
-        # again and again instead of
-        # TODO: just storing a definite list of parameter objects here
         param_info = parameter.get_parameters_array(self.setup, unaccepted_parameter_types=self._unaccepted_parameter_types)
         self.all_params = param_info['random']
         self.constant_positions = parameter.get_constant_indices(spot_setup)
@@ -163,16 +224,13 @@ def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True,
             self.non_constant_positions = np.arange(0,len(self.all_params))
         self.parameter = self.get_parameters
         self.parnames = param_info['name']
-
+        self.algorithm_name = algorithm_name
         # Create a type to hold the parameter values using a namedtuple
         self.partype = parameter.ParameterSet(param_info)
 
-        # use alt_objfun if alt_objfun is defined in objectivefunctions,
-        # else self.setup.objectivefunction
-        self.objectivefunction = getattr(
-            objectivefunctions, alt_objfun or '', None) or self.setup.objectivefunction
         self.evaluation = self.setup.evaluation()
         self.save_sim = save_sim
+        self.optimization_direction = optimization_direction
         self.dbname = dbname or 'customDb'
         self.dbformat = dbformat or 'ram'
         self.db_precision = db_precision
@@ -228,7 +286,6 @@ def __init__(self, spot_setup, dbname=None, dbformat=None, dbinit=True,
         # the normal work on the chains
         self.repeat = ForEach(self.simulate)
 
-        self.status = _RunStatistic()
 
     def __str__(self):
         return '{type}({mtype}())->{dbname}'.format(
@@ -247,9 +304,8 @@ def get_parameters(self):
         return pars[self.non_constant_positions]
 
     def set_repetiton(self, repetitions):
-
-        self.status.repetitions = repetitions
-
+        self.status = _RunStatistic(repetitions, self.algorithm_name, 
+                                    self.optimization_direction, self.parnames)
         # In MPI, this command will do nothing on the master process
         # but the worker processes are going to wait for jobs.
         # Hence the workers will only receive parameters for the
@@ -262,13 +318,8 @@ def final_call(self):
             self.datawriter.finalize()
         except AttributeError:  # Happens if no database was assigned
             pass
-        print('End of sampling')
-        text = 'Best run at %i of %i (best like=%g) with parameter set:' % (
-            self.status.bestrep, self.status.repetitions, self.status.objectivefunction)
-        print(text)
-        print(self.status.params)
-        text = 'Duration:' + str(round((time.time() - self.status.starttime), 2)) + ' s'
-        print(text)
+        self.status.print_status_final()
+
 
     def _init_database(self, like, randompar, simulations):
         if self.dbinit:
@@ -332,10 +383,10 @@ def postprocessing(self, rep, params, simulation, chains=1, save_run=True, negat
         # Save everything in the database, if save is True
         # This is needed as some algorithms just want to know the fitness,
         # before they actually save the run in a database (e.g. sce-ua)
+
         self.status(like,params,block_print=block_print)
 
         if save_run is True and simulation is not None:
-
             self.save(like, params, simulations=simulation, chains=chains)
         if type(like)==type([]):
             return like[0]
@@ -349,11 +400,11 @@ def getfitness(self, simulation, params):
         """
         try:
             #print('Using parameters in fitness function')
-            return self.objectivefunction(evaluation=self.evaluation, simulation=simulation, params = (params,self.parnames))
+            return self.setup.objectivefunction(evaluation=self.evaluation, simulation=simulation, params = (params,self.parnames))
 
         except TypeError: # Happens if the user does not allow to pass parameter in the spot_setup.objectivefunction
             #print('Not using parameters in fitness function')            
-            return self.objectivefunction(evaluation=self.evaluation, simulation=simulation)
+            return self.setup.objectivefunction(evaluation=self.evaluation, simulation=simulation)
 
     def simulate(self, id_params_tuple):
         """This is a simple wrapper of the model, returning the result together with

diff --git a/spotpy/algorithms/abc.py b/spotpy/algorithms/abc.py
@@ -16,7 +16,7 @@
 
 class abc(_algorithm):
     """
-    This class holds the Artificial Bee Colony(ABC) algorithm, based on Karaboga (2007).
+    This class holds the Artificial Bee Colony (ABC) algorithm, based on Karaboga (2007).
     D. Karaboga, AN IDEA BASED ON HONEY BEE SWARM FOR NUMERICAL OPTIMIZATION,TECHNICAL REPORT-TR06, Erciyes University, Engineering Faculty, Computer Engineering Department 2005.
     D. Karaboga, B. Basturk, A powerful and Efficient Algorithm for Numerical Function Optimization: Artificial Bee Colony (ABC) Algorithm, Journal of Global Optimization, Volume:39, Issue:3,pp:459-171, November 2007,ISSN:0925-5001 , doi: 10.1007/s10898-007-9149-x
 
@@ -54,7 +54,8 @@ def __init__(self, *args, **kwargs):
             * True:  Simulation results will be saved
             * False: Simulation results will not be saved
         """
-
+        kwargs['optimization_direction'] = 'maximize'
+        kwargs['algorithm_name'] = 'Artificial Bee Colony (ABC) algorithm'
         super(abc, self).__init__(*args, **kwargs)
 
 
@@ -196,7 +197,7 @@ def sample(self, repetitions, eb=48, a=(1 / 10), peps=0.0001, ownlimit=False, li
                     if self.status.stop:
                         print('Stopping samplig')
                         break
-            gnrng = -self.status.objectivefunction
+            gnrng = -self.status.objectivefunction_max
             if icall >= repetitions:
                 print('*** OPTIMIZATION SEARCH TERMINATED BECAUSE THE LIMIT')
                 print('ON THE MAXIMUM NUMBER OF TRIALS ')