From 4ae08b612467fd4ea8b48c180067ae277b7e8ab9 Mon Sep 17 00:00:00 2001 From: thouska Date: Fri, 9 Oct 2020 13:43:39 +0200 Subject: [PATCH] Include NSGAii in SPOTPY infrastructur, fix minor padds error --- .gitignore | 1 - README.md | 15 +- README.rst | 15 +- setup.py | 4 +- spotpy/__init__.py | 3 +- spotpy/algorithms/nsgaii.py | 4 +- spotpy/algorithms/padds.py | 10 +- spotpy/examples/tutorial_nsgaii.py | 81 ++++------- spotpy/examples/tutorial_padds.py | 2 +- spotpy/examples/tutorial_padds_hymod.py | 178 ++++++++++-------------- tests/test_algorithms.py | 2 +- 11 files changed, 139 insertions(+), 176 deletions(-) diff --git a/.gitignore b/.gitignore index f13e9241..e7aa88d8 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,6 @@ __pycache__/ #other files *.out - *.png *.csv site/* diff --git a/README.md b/README.md index 1c325503..2c7f67ff 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,7 @@ Some features you can use with the SPOTPY package are: * Fitness Scaled Chaotic Artificial Bee Colony (`FSCABC`) * Dynamically Dimensioned Search algorithm (`DDS`) * Pareto Archived - Dynamicallly Dimensioned Search algorithm (`PA-DDS`) + * Fast and Elitist Multiobjective Genetic Algorithm (`NSGA-II`) * Wide range of objective functions (also known as loss function, fitness function or energy function) to validate the sampled results. Available functions are @@ -136,15 +137,23 @@ Some features you can use with the SPOTPY package are: Install ================= -Installing SPOTPY is easy. Just use: +Classical Python options exist to install SPOTPY: + +From PyPi: pip install spotpy -Or, after downloading the [source code](https://pypi.python.org/pypi/spotpy "source code") and making sure python is in your OS path: +From Conda-Forge: + + conda config --add channels conda-forge + conda config --set channel_priority strict + conda install spotpy + +From [Source](https://pypi.python.org/pypi/spotpy): python setup.py install - + Support ================= diff --git a/README.rst b/README.rst index cfe820de..eac7aa9c 100644 --- a/README.rst +++ b/README.rst @@ -39,6 +39,7 @@ We want to make this task as easy as possible. Some features you can use with th * Fitness Scaled Chaotic Artificial Bee Colony (`FSCABC`) * Dynamically Dimensioned Search algorithm (`DDS`) * Pareto Archived - Dynamicallly Dimensioned Search algorithm (`PA-DDS`) + * Fast and Elitist Multiobjective Genetic Algorithm (`NSGA-II`) * Wide range of objective functions (also known as loss function, fitness function or energy function) to validate the sampled results. Available functions are @@ -136,15 +137,23 @@ Documentation is available at ``__ Install ------- -Installing SPOTPY is easy. Just use: +Classical Python options exist to install SPOTPY: + +From PyPi: pip install spotpy -Or, after downloading the source code and making sure python is in your path: +From Conda-Forge: + + conda config --add channels conda-forge + conda config --set channel_priority strict + conda install spotpy + +From [Source](https://pypi.python.org/pypi/spotpy): python setup.py install - + Papers citing SPOTPY -------------------- See `Google Scholar `__ for a continuously updated list. diff --git a/setup.py b/setup.py index 8b786c28..7a51d7ae 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name = 'spotpy', - version = '1.5.10', + version = '1.5.14', description = 'A Statistical Parameter Optimization Tool', long_description=open(os.path.join(os.path.dirname(__file__), "README.rst")).read(), @@ -15,7 +15,7 @@ license = 'MIT', packages=find_packages(exclude=["tests*", "docs*"]), use_2to3 = True, - keywords = 'Monte Carlo, MCMC, MLE, SCE-UA, Simulated Annealing, DE-MCz, DREAM, ROPE, Artifical Bee Colony, DDS, PA-DDS, Uncertainty, Calibration, Model, Signatures', + keywords = 'Monte Carlo, MCMC, MLE, SCE-UA, Simulated Annealing, DE-MCz, DREAM, ROPE, Artifical Bee Colony, DDS, PA-DDS, NSGAii, Uncertainty, Calibration, Model, Signatures', classifiers = [ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', diff --git a/spotpy/__init__.py b/spotpy/__init__.py index 89b1022b..ac788820 100644 --- a/spotpy/__init__.py +++ b/spotpy/__init__.py @@ -14,6 +14,7 @@ sampling and an analyser class for the plotting of results by the sampling. :dependencies: - Numpy >1.8 (http://www.numpy.org/) + - Scipy >1.5 (https://pypi.org/project/scipy/) - Pandas >0.13 (optional) (http://pandas.pydata.org/) - Matplotlib >1.4 (optional) (http://matplotlib.org/) - CMF (optional) (http://fb09-pasig.umwelt.uni-giessen.de:8000/) @@ -40,4 +41,4 @@ from . import describe # Contains some helper functions to describe samplers and set-ups from .hydrology import signatures # Quantifies goodness of fit between simulation and evaluation data with hydrological signatures -__version__ = '1.5.10' \ No newline at end of file +__version__ = '1.5.14' \ No newline at end of file diff --git a/spotpy/algorithms/nsgaii.py b/spotpy/algorithms/nsgaii.py index a3867305..0c0c28b4 100644 --- a/spotpy/algorithms/nsgaii.py +++ b/spotpy/algorithms/nsgaii.py @@ -260,7 +260,7 @@ def crowdDist2(self,x): - def sample(self, generations, n_obj, n_pop = None, skip_duplicates = False, + def sample(self, generations, n_obj, n_pop = None, selection = TournamentSelection(pressure=2), crossover = Crossover(crossProb=0.9), mutation = PolynomialMutation(prob_mut=0.25,eta_mut=30)): @@ -273,7 +273,7 @@ def sample(self, generations, n_obj, n_pop = None, skip_duplicates = False, self.n_pop = n_pop self.generations= generations self.set_repetiton(self.generations*self.n_pop) - self.skip_duplicates = skip_duplicates + self.skip_duplicates = True # False does not work yet Pt = np.vstack([self.parameter()['random'] for i in range(self.n_pop)]) diff --git a/spotpy/algorithms/padds.py b/spotpy/algorithms/padds.py index 7e9e8670..86cb179b 100644 --- a/spotpy/algorithms/padds.py +++ b/spotpy/algorithms/padds.py @@ -134,7 +134,7 @@ def get_next_x_curr(self): else: # otherwise use the last generated solution self.best_value.parameters, self.best_value.best_obj_val = (self.parameter_current, self.obj_func_current) - # This line is needed to get an array of data converted into a parameter object + # # This line is needed to get an array of data converted into a parameter object self.best_value.fix_format() yield rep, self.calculate_next_s_test(self.best_value.parameters, rep, self.generator_repetitions, self.r) @@ -163,9 +163,9 @@ def calculate_initial_parameterset(self, repetitions, initial_objs, initial_para for i in range(initial_params.shape[0]): self.parameter_current = initial_params[i] if len(initial_objs[i]) > 0: - self.obj_func_current = initial_objs[i] + self.obj_func_current = np.array(initial_objs[i]) else: - self.obj_func_current = self.getfitness(simulation=[], params=self.parameter_current) + self.obj_func_current = np.array(self.getfitness(simulation=[], params=self.parameter_current)) if i == 0: # Initial value self.pareto_front = np.array([[self.obj_func_current, self.parameter_current]]) @@ -206,7 +206,8 @@ def sample(self, repetitions, trials=1, initial_objs=np.array([]), initial_param self.generator_repetitions = repetions_left for rep, x_curr, simulations in self.repeat(self.get_next_x_curr()): - self.obj_func_current = self.postprocessing(rep, x_curr, simulations) + obj_func_current = self.postprocessing(rep, x_curr, simulations) + self.obj_func_current = np.array(obj_func_current) num_imp = np.sum(self.obj_func_current <= self.best_value.best_obj_val) num_deg = np.sum(self.obj_func_current > self.best_value.best_obj_val) @@ -335,6 +336,7 @@ def nd_check(nd_set_input, objective_values, parameter_set): dominance_flag = 0 # These are simply reshaping problems if we want to loop over arrays but we have a single float given + objective_values = np.array(objective_values) try: like_struct_len = objective_values.shape[0] except IndexError: diff --git a/spotpy/examples/tutorial_nsgaii.py b/spotpy/examples/tutorial_nsgaii.py index 444f28ee..ccc096aa 100644 --- a/spotpy/examples/tutorial_nsgaii.py +++ b/spotpy/examples/tutorial_nsgaii.py @@ -1,6 +1,14 @@ #!/usr/bin/env python # coding: utf-8 +# -*- coding: utf-8 -*- +''' +Copyright 2015 by Tobias Houska +This file is part of Statistical Parameter Estimation Tool (SPOTPY). +:author: Tobias Houska + +This class holds example code how to use the nsgaii algorithm +''' from __future__ import division, print_function from __future__ import absolute_import @@ -24,7 +32,22 @@ def multi_obj_func(evaluation, simulation): - #used to overwrite objective function in hymod example + """ + This function is used to overwrite objective function in hymod example + + Parameters + ---------- + evaluation : array + The observation data. + simulation : array + The model input + + Returns + ------- + list + Three different kinds of objective functions. + + """ like1 = abs(spotpy.objectivefunctions.pbias(evaluation, simulation)) like2 = spotpy.objectivefunctions.rmse(evaluation, simulation) like3 = spotpy.objectivefunctions.rsquared(evaluation, simulation)*-1 @@ -40,61 +63,7 @@ def multi_obj_func(evaluation, simulation): sp_setup=spot_setup(obj_func= multi_obj_func) sampler = spotpy.algorithms.NSGAII(spot_setup=sp_setup, dbname='NSGA2', dbformat="csv") - sampler.sample(generations, n_obj= 3, n_pop = n_pop, skip_duplicates = skip_duplicates) - #sampler.sample(generations=5, paramsamp=40) - - -# # user config -# -# n_var = 5 -# -# -# last = None -# first = None -# -# # output calibration -# -# df = pd.read_csv("NSGA2.csv") -# -# if last: -# df = df.iloc[-last:,:] -# elif first: -# df = df.iloc[:first,:] -# else: -# pass -# -# -# -# print(len(df)) -# # plot objective functions -# fig = plt.figure() -# for i,name in enumerate(df.columns[:n_obj]): -# ax = fig.add_subplot(n_obj,1,i +1) -# df.loc[::5,name].plot(lw=0.5,figsize=(18,8),ax = ax,color="black") -# plt.title(name) -# plt.show() -# -# last = 100 -# first = None -# -# x,y,z = df.iloc[-last:,0],df.iloc[-last:,1],df.iloc[-last:,2] -# fig = plt.figure() -# ax = fig.add_subplot(111, projection='3d') -# ax.scatter(x,y,z,marker="o") -# ax.set_xlabel("f0") -# ax.set_ylabel("f1") -# ax.set_zlabel("f2") -# plt.show() -# -# # plot parameters -# fig = plt.figure() -# for i,name in enumerate(df.columns[n_obj:8]): -# ax = fig.add_subplot(5,1,i +1) -# df.loc[:,name].plot(lw=0.5,figsize=(18,8),ax = ax,color="black") -# plt.title(name) -# plt.show() -# - + sampler.sample(generations, n_obj= 3, n_pop = n_pop) results = sampler.getdata() diff --git a/spotpy/examples/tutorial_padds.py b/spotpy/examples/tutorial_padds.py index 3dd54cdc..8f337538 100644 --- a/spotpy/examples/tutorial_padds.py +++ b/spotpy/examples/tutorial_padds.py @@ -78,4 +78,4 @@ def objectivefunction(self, simulation, evaluation, params): spot_setup = padds_spot_setup() sampler = spotpy.algorithms.padds(spot_setup, dbname='padds_hymod', dbformat='csv') -res = sampler.sample(10000,trials=1) +res = sampler.sample(2000,trials=1) diff --git a/spotpy/examples/tutorial_padds_hymod.py b/spotpy/examples/tutorial_padds_hymod.py index 7a9c695c..b0966f06 100644 --- a/spotpy/examples/tutorial_padds_hymod.py +++ b/spotpy/examples/tutorial_padds_hymod.py @@ -16,10 +16,16 @@ sys.path.append(".") import spotpy -from spotpy.examples.spot_setup_hymod_python_pareto import spot_setup +from spotpy.examples.spot_setup_hymod_python import spot_setup import matplotlib.pyplot as plt - +def multi_obj_func(evaluation, simulation): + #used to overwrite objective function in hymod example + like1 = abs(spotpy.objectivefunctions.pbias(evaluation, simulation)) + like2 = spotpy.objectivefunctions.rmse(evaluation, simulation) + like3 = spotpy.objectivefunctions.rsquared(evaluation, simulation)*-1 + return [like1, like2, like3] + if __name__ == "__main__": parallel ='seq' # Runs everthing in sequential mode np.random.seed(2000) # Makes the results reproduceable @@ -27,29 +33,29 @@ # Initialize the Hymod example # In this case, we tell the setup which algorithm we want to use, so # we can use this exmaple for different algorithms - spot_setup=spot_setup() + sp_setup=spot_setup(obj_func= multi_obj_func) #Select number of maximum allowed repetitions - rep=3000 + rep=2000 # Create the SCE-UA sampler of spotpy, alt_objfun is set to None to force SPOTPY # to jump into the def objectivefunction in the spot_setup class (default is # spotpy.objectivefunctions.rmse) - sampler=spotpy.algorithms.padds(spot_setup, dbname='padds_hymod', dbformat='csv') + sampler=spotpy.algorithms.padds(sp_setup, dbname='padds_hymod', dbformat='csv') - #Start the sampler, one can specify ngs, kstop, peps and pcento id desired - print(sampler.sample(rep, metric="crowd_distance")) + #Start the sampler, one can specify metric if desired + sampler.sample(rep,metric='ones') # Load the results gained with the sceua sampler, stored in SCEUA_hymod.csv #results = spotpy.analyser.load_csv_results('DDS_hymod') results = sampler.getdata() - from pprint import pprint - #pprint(results) - pprint(results['chain']) + # from pprint import pprint + # #pprint(results) + # pprint(results['chain']) - for likno in range(1,5): + for likno in range(1,4): fig_like1 = plt.figure(1,figsize=(9,5)) plt.plot(results['like'+str(likno)]) plt.show() @@ -61,104 +67,72 @@ # Example plot to show the parameter distribution ###### + def plot_parameter_trace(ax, results, parameter): + ax.plot(results['par'+parameter.name],'.') + ax.set_ylabel(parameter.name) + ax.set_ylim(parameter.minbound, parameter.maxbound) + + def plot_parameter_histogram(ax, results, parameter): + #chooses the last 20% of the sample + ax.hist(results['par'+parameter.name][int(len(results)*0.9):], + bins =np.linspace(parameter.minbound,parameter.maxbound,20)) + ax.set_ylabel('Density') + ax.set_xlim(parameter.minbound, parameter.maxbound) + fig= plt.figure(2,figsize=(9,9)) - normed_value = 1 - - plt.subplot(5,2,1) - x = results['parcmax'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) #Ignores burn-in chain - plt.plot(x[index],'.') - plt.ylabel('cmax') - plt.ylim(spot_setup.cmax.minbound, spot_setup.cmax.maxbound) - - - plt.subplot(5,2,2) - x = x[int(len(results)*0.9):] #choose the last 10% of the sample - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('cmax') - plt.xlim(spot_setup.cmax.minbound, spot_setup.cmax.maxbound) - - - plt.subplot(5,2,3) - x = results['parbexp'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('bexp') - plt.ylim(spot_setup.bexp.minbound, spot_setup.bexp.maxbound) - plt.subplot(5,2,4) - x = x[int(len(results)*0.9):] - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('bexp') - plt.xlim(spot_setup.bexp.minbound, spot_setup.bexp.maxbound) + ax1 = plt.subplot(5,2,1) + plot_parameter_trace(ax1, results, spot_setup.cmax) + ax2 = plt.subplot(5,2,2) + plot_parameter_histogram(ax2,results, spot_setup.cmax) + ax3 = plt.subplot(5,2,3) + plot_parameter_trace(ax3, results, spot_setup.bexp) - plt.subplot(5,2,5) - x = results['paralpha'] - print(x) - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('alpha') - plt.ylim(spot_setup.alpha.minbound, spot_setup.alpha.maxbound) - - - plt.subplot(5,2,6) - x = x[int(len(results)*0.9):] - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('alpha') - plt.xlim(spot_setup.alpha.minbound, spot_setup.alpha.maxbound) - - - plt.subplot(5,2,7) - x = results['parKs'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('Ks') - plt.ylim(spot_setup.Ks.minbound, spot_setup.Ks.maxbound) - - - plt.subplot(5,2,8) - x = x[int(len(results)*0.9):] + ax4 = plt.subplot(5,2,4) + plot_parameter_histogram(ax4, results, spot_setup.bexp) + + ax5 = plt.subplot(5,2,5) + plot_parameter_trace(ax5, results, spot_setup.alpha) + + ax6 = plt.subplot(5,2,6) + plot_parameter_histogram(ax6, results, spot_setup.alpha) - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Ks') - plt.xlim(spot_setup.Ks.minbound, spot_setup.Ks.maxbound) + ax7 = plt.subplot(5,2,7) + plot_parameter_trace(ax7, results, spot_setup.Ks) + ax8 = plt.subplot(5,2,8) + plot_parameter_histogram(ax8, results, spot_setup.Ks) + + ax9 = plt.subplot(5,2,9) + plot_parameter_trace(ax9, results, spot_setup.Kq) + ax9.set_xlabel('Iterations') - plt.subplot(5,2,9) - x = results['parKq'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('Kq') - plt.ylim(spot_setup.Kq.minbound, spot_setup.Kq.maxbound) - plt.xlabel('Iterations') + ax10 = plt.subplot(5,2,10) + plot_parameter_histogram(ax10, results, spot_setup.Kq) + ax10.set_xlabel('Parameter range') - plt.subplot(5,2,10) - x = x[int(len(results)*0.9):] - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Kq') - plt.xlabel('Parameter range') - plt.xlim(spot_setup.Kq.minbound, spot_setup.Kq.maxbound) plt.show() - fig.savefig('hymod_parameters.png',dpi=300) \ No newline at end of file + fig.savefig('hymod_parameters.png',dpi=300) + + + + # Example plot to show remaining parameter uncertainty # + fields=[word for word in results.dtype.names if word.startswith('sim')] + fig= plt.figure(3, figsize=(16,9)) + ax11 = plt.subplot(1,1,1) + q5,q25,q75,q95=[],[],[],[] + for field in fields: + q5.append(np.percentile(results[field][-100:-1],2.5)) + q95.append(np.percentile(results[field][-100:-1],97.5)) + ax11.plot(q5,color='dimgrey',linestyle='solid') + ax11.plot(q95,color='dimgrey',linestyle='solid') + ax11.fill_between(np.arange(0,len(q5),1),list(q5),list(q95),facecolor='dimgrey',zorder=0, + linewidth=0,label='parameter uncertainty') + ax11.plot(sp_setup.evaluation(),'r.',label='data') + ax11.set_ylim(-50,450) + ax11.set_xlim(0,729) + ax11.legend() + fig.savefig('python_hymod.png',dpi=300) + ######################################################### \ No newline at end of file diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py index e8a809d9..e845bb31 100644 --- a/tests/test_algorithms.py +++ b/tests/test_algorithms.py @@ -133,7 +133,7 @@ def test_nsgaii(self): n_pop = 20 skip_duplicates = True sampler=spotpy.algorithms.NSGAII(spot_setup(self.multi_obj_func),parallel=self.parallel, dbname='Rosen', dbformat=self.dbformat, sim_timeout=self.timeout) - sampler.sample(generations, n_obj= 3, n_pop = n_pop, skip_duplicates = skip_duplicates) + sampler.sample(generations, n_obj= 3, n_pop = n_pop) results = sampler.getdata() self.assertLessEqual(len(results), generations*n_pop)