From 4ae08b612467fd4ea8b48c180067ae277b7e8ab9 Mon Sep 17 00:00:00 2001
From: thouska <tobias.houska@umwelt.uni-giessen.de>
Date: Fri, 9 Oct 2020 13:43:39 +0200
Subject: [PATCH] Include NSGAii in SPOTPY infrastructur, fix minor padds error

---
 .gitignore                              |   1 -
 README.md                               |  15 +-
 README.rst                              |  15 +-
 setup.py                                |   4 +-
 spotpy/__init__.py                      |   3 +-
 spotpy/algorithms/nsgaii.py             |   4 +-
 spotpy/algorithms/padds.py              |  10 +-
 spotpy/examples/tutorial_nsgaii.py      |  81 ++++-------
 spotpy/examples/tutorial_padds.py       |   2 +-
 spotpy/examples/tutorial_padds_hymod.py | 178 ++++++++++--------------
 tests/test_algorithms.py                |   2 +-
 11 files changed, 139 insertions(+), 176 deletions(-)

diff --git a/.gitignore b/.gitignore
index f13e9241..e7aa88d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,7 +8,6 @@ __pycache__/
 
 #other files
 *.out
-
 *.png
 *.csv
 site/*
diff --git a/README.md b/README.md
index 1c325503..2c7f67ff 100644
--- a/README.md
+++ b/README.md
@@ -64,6 +64,7 @@ Some features you can use with the SPOTPY package are:
   * Fitness Scaled Chaotic Artificial Bee Colony (`FSCABC`)
   * Dynamically Dimensioned Search algorithm (`DDS`)
   * Pareto Archived - Dynamicallly Dimensioned Search algorithm (`PA-DDS`)
+  * Fast and Elitist Multiobjective Genetic Algorithm (`NSGA-II`)
 
 * Wide range of objective functions (also known as loss function, fitness function or energy function) to validate the sampled results. Available functions are
 
@@ -136,15 +137,23 @@ Some features you can use with the SPOTPY package are:
 Install
 =================
 
-Installing SPOTPY is easy. Just use:
+Classical Python options exist to install SPOTPY:
+
+From PyPi:
 
 	pip install spotpy
 
-Or, after downloading the [source code](https://pypi.python.org/pypi/spotpy "source code") and making sure python is in your OS path:
+From Conda-Forge:
+
+	conda config --add channels conda-forge
+	conda config --set channel_priority strict
+	conda install spotpy
+
+From [Source](https://pypi.python.org/pypi/spotpy):
 
 	python setup.py install
 
-	
+
 Support
 =================
 
diff --git a/README.rst b/README.rst
index cfe820de..eac7aa9c 100644
--- a/README.rst
+++ b/README.rst
@@ -39,6 +39,7 @@ We want to make this task as easy as possible. Some features you can use with th
   * Fitness Scaled Chaotic Artificial Bee Colony (`FSCABC`)
   * Dynamically Dimensioned Search algorithm (`DDS`)
   * Pareto Archived - Dynamicallly Dimensioned Search algorithm (`PA-DDS`)
+  * Fast and Elitist Multiobjective Genetic Algorithm (`NSGA-II`)
   
 * Wide range of objective functions (also known as loss function, fitness function or energy function) to validate the sampled results. Available functions are
 
@@ -136,15 +137,23 @@ Documentation is available at `<https://spotpy.readthedocs.io/en/latest>`__
 Install
 -------
 
-Installing SPOTPY is easy. Just use:
+Classical Python options exist to install SPOTPY:
+
+From PyPi:
 
 	pip install spotpy
 
-Or, after downloading the source code and making sure python is in your path:
+From Conda-Forge:
+
+	conda config --add channels conda-forge
+	conda config --set channel_priority strict
+	conda install spotpy
+
+From [Source](https://pypi.python.org/pypi/spotpy):
 
 	python setup.py install
 
-	
+
 Papers citing SPOTPY
 --------------------
 See `Google Scholar <https://scholar.google.de/scholar?cites=17155001516727704728&as_sdt=2005&sciodt=0,5&hl=de>`__ for a continuously updated list.
diff --git a/setup.py b/setup.py
index 8b786c28..7a51d7ae 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
   name = 'spotpy',
-  version = '1.5.10',
+  version = '1.5.14',
   description = 'A Statistical Parameter Optimization Tool',
   long_description=open(os.path.join(os.path.dirname(__file__),
                                        "README.rst")).read(),
@@ -15,7 +15,7 @@
   license = 'MIT',
   packages=find_packages(exclude=["tests*", "docs*"]),
   use_2to3 = True,
-  keywords = 'Monte Carlo, MCMC, MLE, SCE-UA, Simulated Annealing, DE-MCz, DREAM, ROPE, Artifical Bee Colony, DDS, PA-DDS, Uncertainty, Calibration, Model, Signatures',
+  keywords = 'Monte Carlo, MCMC, MLE, SCE-UA, Simulated Annealing, DE-MCz, DREAM, ROPE, Artifical Bee Colony, DDS, PA-DDS, NSGAii, Uncertainty, Calibration, Model, Signatures',
   classifiers = [
         'Development Status :: 4 - Beta',
         'Intended Audience :: Developers',
diff --git a/spotpy/__init__.py b/spotpy/__init__.py
index 89b1022b..ac788820 100644
--- a/spotpy/__init__.py
+++ b/spotpy/__init__.py
@@ -14,6 +14,7 @@
 sampling and an analyser class for the plotting of results by the sampling.
 
 :dependencies: - Numpy >1.8 (http://www.numpy.org/) 
+               - Scipy >1.5 (https://pypi.org/project/scipy/)
                - Pandas >0.13 (optional) (http://pandas.pydata.org/)
                - Matplotlib >1.4 (optional) (http://matplotlib.org/) 
                - CMF (optional) (http://fb09-pasig.umwelt.uni-giessen.de:8000/)
@@ -40,4 +41,4 @@
 from . import describe            # Contains some helper functions to describe samplers and set-ups
 from .hydrology import signatures # Quantifies goodness of fit between simulation and evaluation data with hydrological signatures
 
-__version__ = '1.5.10'
\ No newline at end of file
+__version__ = '1.5.14'
\ No newline at end of file
diff --git a/spotpy/algorithms/nsgaii.py b/spotpy/algorithms/nsgaii.py
index a3867305..0c0c28b4 100644
--- a/spotpy/algorithms/nsgaii.py
+++ b/spotpy/algorithms/nsgaii.py
@@ -260,7 +260,7 @@ def crowdDist2(self,x):
 
 
 
-    def sample(self, generations, n_obj, n_pop = None, skip_duplicates = False, 
+    def sample(self, generations, n_obj, n_pop = None, 
                selection = TournamentSelection(pressure=2), 
                crossover = Crossover(crossProb=0.9),
                mutation = PolynomialMutation(prob_mut=0.25,eta_mut=30)):
@@ -273,7 +273,7 @@ def sample(self, generations, n_obj, n_pop = None, skip_duplicates = False,
         self.n_pop = n_pop
         self.generations= generations
         self.set_repetiton(self.generations*self.n_pop)
-        self.skip_duplicates = skip_duplicates
+        self.skip_duplicates = True # False does not work yet
 
         Pt = np.vstack([self.parameter()['random'] for i in range(self.n_pop)])
         
diff --git a/spotpy/algorithms/padds.py b/spotpy/algorithms/padds.py
index 7e9e8670..86cb179b 100644
--- a/spotpy/algorithms/padds.py
+++ b/spotpy/algorithms/padds.py
@@ -134,7 +134,7 @@ def get_next_x_curr(self):
             else:  # otherwise use the last generated solution
                 self.best_value.parameters, self.best_value.best_obj_val = (self.parameter_current, self.obj_func_current)
 
-            # This line is needed to get an array of data converted into a parameter object
+            # # This line is needed to get an array of data converted into a parameter object
             self.best_value.fix_format()
 
             yield rep, self.calculate_next_s_test(self.best_value.parameters, rep, self.generator_repetitions, self.r)
@@ -163,9 +163,9 @@ def calculate_initial_parameterset(self, repetitions, initial_objs, initial_para
             for i in range(initial_params.shape[0]):
                 self.parameter_current = initial_params[i]
                 if len(initial_objs[i]) > 0:
-                    self.obj_func_current = initial_objs[i]
+                    self.obj_func_current = np.array(initial_objs[i])
                 else:
-                    self.obj_func_current = self.getfitness(simulation=[], params=self.parameter_current)
+                    self.obj_func_current = np.array(self.getfitness(simulation=[], params=self.parameter_current))
 
                 if i == 0:  # Initial value
                     self.pareto_front = np.array([[self.obj_func_current, self.parameter_current]])
@@ -206,7 +206,8 @@ def sample(self, repetitions, trials=1, initial_objs=np.array([]), initial_param
             self.generator_repetitions = repetions_left
 
             for rep, x_curr, simulations in self.repeat(self.get_next_x_curr()):
-                self.obj_func_current = self.postprocessing(rep, x_curr, simulations)
+                obj_func_current = self.postprocessing(rep, x_curr, simulations)
+                self.obj_func_current = np.array(obj_func_current)
                 num_imp = np.sum(self.obj_func_current <= self.best_value.best_obj_val)
                 num_deg = np.sum(self.obj_func_current > self.best_value.best_obj_val)
 
@@ -335,6 +336,7 @@ def nd_check(nd_set_input, objective_values, parameter_set):
     dominance_flag = 0
 
     # These are simply reshaping problems if we want to loop over arrays but we have a single float given
+    objective_values = np.array(objective_values)
     try:
         like_struct_len = objective_values.shape[0]
     except IndexError:
diff --git a/spotpy/examples/tutorial_nsgaii.py b/spotpy/examples/tutorial_nsgaii.py
index 444f28ee..ccc096aa 100644
--- a/spotpy/examples/tutorial_nsgaii.py
+++ b/spotpy/examples/tutorial_nsgaii.py
@@ -1,6 +1,14 @@
 #!/usr/bin/env python
 # coding: utf-8
+# -*- coding: utf-8 -*-
+'''
+Copyright 2015 by Tobias Houska
+This file is part of Statistical Parameter Estimation Tool (SPOTPY).
 
+:author: Tobias Houska
+
+This class holds example code how to use the nsgaii algorithm
+'''
 
 from __future__ import division, print_function
 from __future__ import absolute_import
@@ -24,7 +32,22 @@
 
 
 def multi_obj_func(evaluation, simulation):
-    #used to overwrite objective function in hymod example
+    """
+    This function is used to overwrite objective function in hymod example
+    
+    Parameters
+    ----------
+    evaluation : array
+        The observation data.
+    simulation : array
+        The model input
+
+    Returns
+    -------
+    list
+        Three different kinds of objective functions.
+
+    """
     like1 = abs(spotpy.objectivefunctions.pbias(evaluation, simulation))
     like2 = spotpy.objectivefunctions.rmse(evaluation, simulation)
     like3 = spotpy.objectivefunctions.rsquared(evaluation, simulation)*-1
@@ -40,61 +63,7 @@ def multi_obj_func(evaluation, simulation):
     sp_setup=spot_setup(obj_func= multi_obj_func)
     sampler = spotpy.algorithms.NSGAII(spot_setup=sp_setup, dbname='NSGA2', dbformat="csv")
     
-    sampler.sample(generations, n_obj= 3, n_pop = n_pop, skip_duplicates = skip_duplicates)
-    #sampler.sample(generations=5, paramsamp=40)
-    
-    
-#    # user config
-#    
-#    n_var = 5
-#    
-#    
-#    last = None
-#    first = None
-#    
-#    # output calibration 
-#    
-#    df = pd.read_csv("NSGA2.csv")
-#    
-#    if last:
-#        df = df.iloc[-last:,:]
-#    elif first:
-#        df = df.iloc[:first,:]
-#    else:
-#        pass
-#    
-#    
-#    
-#    print(len(df))
-#    # plot objective functions
-#    fig = plt.figure()
-#    for i,name in enumerate(df.columns[:n_obj]):
-#        ax = fig.add_subplot(n_obj,1,i +1)
-#        df.loc[::5,name].plot(lw=0.5,figsize=(18,8),ax = ax,color="black")
-#        plt.title(name)
-#    plt.show()
-#    
-#    last = 100
-#    first = None
-#    
-#    x,y,z = df.iloc[-last:,0],df.iloc[-last:,1],df.iloc[-last:,2]
-#    fig = plt.figure()
-#    ax = fig.add_subplot(111, projection='3d')
-#    ax.scatter(x,y,z,marker="o")
-#    ax.set_xlabel("f0")
-#    ax.set_ylabel("f1")
-#    ax.set_zlabel("f2")
-#    plt.show()
-#    
-#    # plot parameters
-#    fig = plt.figure()
-#    for i,name in enumerate(df.columns[n_obj:8]):
-#        ax = fig.add_subplot(5,1,i +1)
-#        df.loc[:,name].plot(lw=0.5,figsize=(18,8),ax = ax,color="black")
-#        plt.title(name)
-#    plt.show()
-#    
-
+    sampler.sample(generations, n_obj= 3, n_pop = n_pop)
 
 
     results = sampler.getdata()
diff --git a/spotpy/examples/tutorial_padds.py b/spotpy/examples/tutorial_padds.py
index 3dd54cdc..8f337538 100644
--- a/spotpy/examples/tutorial_padds.py
+++ b/spotpy/examples/tutorial_padds.py
@@ -78,4 +78,4 @@ def objectivefunction(self, simulation, evaluation, params):
 spot_setup = padds_spot_setup()
 
 sampler = spotpy.algorithms.padds(spot_setup, dbname='padds_hymod', dbformat='csv')
-res = sampler.sample(10000,trials=1)
+res = sampler.sample(2000,trials=1)
diff --git a/spotpy/examples/tutorial_padds_hymod.py b/spotpy/examples/tutorial_padds_hymod.py
index 7a9c695c..b0966f06 100644
--- a/spotpy/examples/tutorial_padds_hymod.py
+++ b/spotpy/examples/tutorial_padds_hymod.py
@@ -16,10 +16,16 @@
     sys.path.append(".")
     import spotpy
 
-from spotpy.examples.spot_setup_hymod_python_pareto import spot_setup
+from spotpy.examples.spot_setup_hymod_python import spot_setup
 import matplotlib.pyplot as plt
 
-
+def multi_obj_func(evaluation, simulation):
+    #used to overwrite objective function in hymod example
+    like1 = abs(spotpy.objectivefunctions.pbias(evaluation, simulation))
+    like2 = spotpy.objectivefunctions.rmse(evaluation, simulation)
+    like3 = spotpy.objectivefunctions.rsquared(evaluation, simulation)*-1
+    return [like1, like2, like3]
+   
 if __name__ == "__main__":
     parallel ='seq' # Runs everthing in sequential mode
     np.random.seed(2000) # Makes the results reproduceable
@@ -27,29 +33,29 @@
     # Initialize the Hymod example
     # In this case, we tell the setup which algorithm we want to use, so
     # we can use this exmaple for different algorithms
-    spot_setup=spot_setup()
+    sp_setup=spot_setup(obj_func= multi_obj_func)
     
     #Select number of maximum allowed repetitions
-    rep=3000
+    rep=2000
         
     # Create the SCE-UA sampler of spotpy, alt_objfun is set to None to force SPOTPY
     # to jump into the def objectivefunction in the spot_setup class (default is
     # spotpy.objectivefunctions.rmse) 
-    sampler=spotpy.algorithms.padds(spot_setup, dbname='padds_hymod', dbformat='csv')
+    sampler=spotpy.algorithms.padds(sp_setup, dbname='padds_hymod', dbformat='csv')
     
-    #Start the sampler, one can specify ngs, kstop, peps and pcento id desired
-    print(sampler.sample(rep, metric="crowd_distance"))
+    #Start the sampler, one can specify metric if desired
+    sampler.sample(rep,metric='ones')
     
     # Load the results gained with the sceua sampler, stored in SCEUA_hymod.csv
     #results = spotpy.analyser.load_csv_results('DDS_hymod')
 
 
     results = sampler.getdata()
-    from pprint import pprint
-    #pprint(results)
-    pprint(results['chain'])
+    # from pprint import pprint
+    # #pprint(results)
+    # pprint(results['chain'])
 
-    for likno in range(1,5):
+    for likno in range(1,4):
         fig_like1 = plt.figure(1,figsize=(9,5))
         plt.plot(results['like'+str(likno)])
         plt.show()
@@ -61,104 +67,72 @@
 
     
     # Example plot to show the parameter distribution ###### 
+    def plot_parameter_trace(ax, results, parameter):
+        ax.plot(results['par'+parameter.name],'.')
+        ax.set_ylabel(parameter.name)
+        ax.set_ylim(parameter.minbound, parameter.maxbound)
+        
+    def plot_parameter_histogram(ax, results, parameter):
+        #chooses the last 20% of the sample
+        ax.hist(results['par'+parameter.name][int(len(results)*0.9):], 
+                 bins =np.linspace(parameter.minbound,parameter.maxbound,20))
+        ax.set_ylabel('Density')
+        ax.set_xlim(parameter.minbound, parameter.maxbound)
+        
     fig= plt.figure(2,figsize=(9,9))
-    normed_value = 1
-    
-    plt.subplot(5,2,1)
-    x = results['parcmax']
-    for i in range(int(max(results['chain'])-1)):
-        index=np.where(results['chain']==i+1) #Ignores burn-in chain
-        plt.plot(x[index],'.')
-    plt.ylabel('cmax')
-    plt.ylim(spot_setup.cmax.minbound, spot_setup.cmax.maxbound)
-    
-    
-    plt.subplot(5,2,2)
-    x = x[int(len(results)*0.9):] #choose the last 10% of the sample
-    hist, bins = np.histogram(x, bins=20, density=True)
-    widths = np.diff(bins)
-    hist *= normed_value
-    plt.bar(bins[:-1], hist, widths)
-    plt.ylabel('cmax')
-    plt.xlim(spot_setup.cmax.minbound, spot_setup.cmax.maxbound)
-    
-    
-    plt.subplot(5,2,3)
-    x = results['parbexp']
-    for i in range(int(max(results['chain'])-1)):
-        index=np.where(results['chain']==i+1)
-        plt.plot(x[index],'.')
-    plt.ylabel('bexp')
-    plt.ylim(spot_setup.bexp.minbound, spot_setup.bexp.maxbound)
     
-    plt.subplot(5,2,4)
-    x = x[int(len(results)*0.9):]
-    hist, bins = np.histogram(x, bins=20, density=True)
-    widths = np.diff(bins)
-    hist *= normed_value
-    plt.bar(bins[:-1], hist, widths)
-    plt.ylabel('bexp')
-    plt.xlim(spot_setup.bexp.minbound, spot_setup.bexp.maxbound)
+    ax1 = plt.subplot(5,2,1)
+    plot_parameter_trace(ax1, results, spot_setup.cmax)
     
+    ax2 = plt.subplot(5,2,2)
+    plot_parameter_histogram(ax2,results, spot_setup.cmax)
     
+    ax3 = plt.subplot(5,2,3)
+    plot_parameter_trace(ax3, results, spot_setup.bexp)
     
-    plt.subplot(5,2,5)
-    x = results['paralpha']
-    print(x)
-    for i in range(int(max(results['chain'])-1)):
-        index=np.where(results['chain']==i+1)
-        plt.plot(x[index],'.')
-    plt.ylabel('alpha')
-    plt.ylim(spot_setup.alpha.minbound, spot_setup.alpha.maxbound)
-    
-    
-    plt.subplot(5,2,6)
-    x = x[int(len(results)*0.9):]
-    hist, bins = np.histogram(x, bins=20, density=True)
-    widths = np.diff(bins)
-    hist *= normed_value
-    plt.bar(bins[:-1], hist, widths)
-    plt.ylabel('alpha')
-    plt.xlim(spot_setup.alpha.minbound, spot_setup.alpha.maxbound)
-    
-    
-    plt.subplot(5,2,7)
-    x = results['parKs']
-    for i in range(int(max(results['chain'])-1)):
-        index=np.where(results['chain']==i+1)
-        plt.plot(x[index],'.')
-    plt.ylabel('Ks')
-    plt.ylim(spot_setup.Ks.minbound, spot_setup.Ks.maxbound)
-    
-    
-    plt.subplot(5,2,8)
-    x = x[int(len(results)*0.9):]
+    ax4 = plt.subplot(5,2,4)
+    plot_parameter_histogram(ax4, results, spot_setup.bexp)
+        
+    ax5 = plt.subplot(5,2,5)
+    plot_parameter_trace(ax5, results, spot_setup.alpha)
+     
+    ax6 = plt.subplot(5,2,6)
+    plot_parameter_histogram(ax6, results, spot_setup.alpha)
 
-    hist, bins = np.histogram(x, bins=20, density=True)
-    widths = np.diff(bins)
-    hist *= normed_value
-    plt.bar(bins[:-1], hist, widths)
-    plt.ylabel('Ks')
-    plt.xlim(spot_setup.Ks.minbound, spot_setup.Ks.maxbound)
+    ax7 = plt.subplot(5,2,7)
+    plot_parameter_trace(ax7, results, spot_setup.Ks)
     
+    ax8 = plt.subplot(5,2,8)
+    plot_parameter_histogram(ax8, results, spot_setup.Ks)
+
+    ax9 = plt.subplot(5,2,9)
+    plot_parameter_trace(ax9, results, spot_setup.Kq)
+    ax9.set_xlabel('Iterations')
     
-    plt.subplot(5,2,9)
-    x = results['parKq']
-    for i in range(int(max(results['chain'])-1)):
-        index=np.where(results['chain']==i+1)
-        plt.plot(x[index],'.')
-    plt.ylabel('Kq')
-    plt.ylim(spot_setup.Kq.minbound, spot_setup.Kq.maxbound)
-    plt.xlabel('Iterations')
+    ax10 = plt.subplot(5,2,10)
+    plot_parameter_histogram(ax10, results, spot_setup.Kq)
+    ax10.set_xlabel('Parameter range')
     
-    plt.subplot(5,2,10)
-    x = x[int(len(results)*0.9):]
-    hist, bins = np.histogram(x, bins=20, density=True)
-    widths = np.diff(bins)
-    hist *= normed_value
-    plt.bar(bins[:-1], hist, widths)
-    plt.ylabel('Kq')
-    plt.xlabel('Parameter range')
-    plt.xlim(spot_setup.Kq.minbound, spot_setup.Kq.maxbound)
     plt.show()
-    fig.savefig('hymod_parameters.png',dpi=300)
\ No newline at end of file
+    fig.savefig('hymod_parameters.png',dpi=300)
+    
+    
+    
+    # Example plot to show remaining parameter uncertainty #
+    fields=[word for word in results.dtype.names if word.startswith('sim')]
+    fig= plt.figure(3, figsize=(16,9))
+    ax11 = plt.subplot(1,1,1)
+    q5,q25,q75,q95=[],[],[],[]
+    for field in fields:
+        q5.append(np.percentile(results[field][-100:-1],2.5))
+        q95.append(np.percentile(results[field][-100:-1],97.5))
+    ax11.plot(q5,color='dimgrey',linestyle='solid')
+    ax11.plot(q95,color='dimgrey',linestyle='solid')
+    ax11.fill_between(np.arange(0,len(q5),1),list(q5),list(q95),facecolor='dimgrey',zorder=0,
+                    linewidth=0,label='parameter uncertainty')  
+    ax11.plot(sp_setup.evaluation(),'r.',label='data')
+    ax11.set_ylim(-50,450)
+    ax11.set_xlim(0,729)
+    ax11.legend()
+    fig.savefig('python_hymod.png',dpi=300)
+    #########################################################
\ No newline at end of file
diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py
index e8a809d9..e845bb31 100644
--- a/tests/test_algorithms.py
+++ b/tests/test_algorithms.py
@@ -133,7 +133,7 @@ def test_nsgaii(self):
         n_pop = 20
         skip_duplicates = True
         sampler=spotpy.algorithms.NSGAII(spot_setup(self.multi_obj_func),parallel=self.parallel, dbname='Rosen', dbformat=self.dbformat, sim_timeout=self.timeout)
-        sampler.sample(generations, n_obj= 3, n_pop = n_pop, skip_duplicates = skip_duplicates)
+        sampler.sample(generations, n_obj= 3, n_pop = n_pop)
         results = sampler.getdata()
         self.assertLessEqual(len(results), generations*n_pop)