From d0c7494275e9fc451b5a6904b9a88240a5dd1664 Mon Sep 17 00:00:00 2001 From: thouska Date: Thu, 24 Sep 2020 13:29:48 +0200 Subject: [PATCH] Redesign for tutorial of hydrological model calibration #259 --- ...brating a hydrological model with DREAM.md | 174 +++++++++++ docs/Hydrological_model.md | 215 ------------- docs/Working with DREAM and Hymod.md | 287 ------------------ mkdocs.yml | 3 +- spotpy/analyser.py | 44 ++- spotpy/examples/tutorial_cmf_lumped.py | 4 +- spotpy/examples/tutorial_dream_hymod.py | 142 +-------- 7 files changed, 227 insertions(+), 642 deletions(-) create mode 100644 docs/Calibrating a hydrological model with DREAM.md delete mode 100644 docs/Hydrological_model.md delete mode 100644 docs/Working with DREAM and Hymod.md diff --git a/docs/Calibrating a hydrological model with DREAM.md b/docs/Calibrating a hydrological model with DREAM.md new file mode 100644 index 00000000..d85f79e7 --- /dev/null +++ b/docs/Calibrating a hydrological model with DREAM.md @@ -0,0 +1,174 @@ +# Calibration of HYMOD with DREAM + +This chapter shows you, how to link the external hydrological model HYMOD with SPOTPY (works only on Windows systems). + +We use the hydrological model HYMOD as an example, to calibrate it with the Differential Evolution Adaptive Metropolis (DREAM) algorithm. +For detailed information about the underlying theorie, have a look at the [Vrugt (2016)](https://doi.org/10.1016/j.envsoft.2015.08.013 "Vrugt (2016)"). +The SPOTPY package comes with an example which is desgined to help you to set up your own research project. + +First we need to import some functions we will need in the following: + + from spotpy.parameter import Uniform + from spotpy.examples.hymod_python.hymod import hymod + import os + +Now, we can setup the model within a spot setup class. The model needs some meteorological input data and five parameters to estimate discharge: + +## Connect HYMOD with SPOTPY + +Here we use class parameter, to initialize the parameter for our model. + + class spot_setup(object): + cmax = Uniform(low=1.0 , high=500, optguess=412.33) + bexp = Uniform(low=0.1 , high=2.0, optguess=0.1725) + alpha = Uniform(low=0.1 , high=0.99, optguess=0.8127) + Ks = Uniform(low=0.001 , high=0.10, optguess=0.0404) + Kq = Uniform(low=0.1 , high=0.99, optguess=0.5592) + + def __init__(self, obj_func=None): + #Just a way to keep this example flexible and applicable to various examples + self.obj_func = obj_func + #Transform [mm/day] into [l s-1], where 1.783 is the catchment area + self.Factor = 1.783 * 1000 * 1000 / (60 * 60 * 24) + self.PET,self.Precip = [], [] + self.date,self.trueObs = [], [] + #Find Path to Hymod on users system + self.owd = os.path.dirname(os.path.realpath(__file__)) + self.hymod_path = self.owd+os.sep+'hymod_python' + #Load Observation data from file + climatefile = open(self.hymod_path+os.sep+'hymod_input.csv', 'r') + headerline = climatefile.readline()[:-1] + + #Read model forcing in working storage (this is done only ones) + if ';' in headerline: + self.delimiter = ';' + else: + self.delimiter = ',' + self.header = headerline.split(self.delimiter) + for line in climatefile: + values = line.strip().split(self.delimiter) + self.date.append(str(values[0])) + self.Precip.append(float(values[1])) + self.PET.append(float(values[2])) + self.trueObs.append(float(values[3])) + climatefile.close() + +We use the simulation function to write one random parameter set into a parameter file, like it is needed for the HYMOD model, +start the model and read the model discharge output data: + + def simulation(self,x): + #Here the model is actualy startet with one paramter combination that it gets from spotpy for each time the model is called + data = hymod(self.Precip, self.PET, x[0], x[1], x[2], x[3], x[4]) + sim=[] + for val in data: + sim.append(val*self.Factor) + #The first year of simulation data is ignored (warm-up) + return sim[366:] + +And in a last step, we compare the observed and the simulated data. Here we set up the setup class in a way, that it can receive any +likein the SPOTPY package. Please mind that the selection of the Likelihood highly influences the results gained with this algorithm: + + def evaluation(self): + #The first year of simulation data is ignored (warm-up) + return self.trueObs[366:] + + def objectivefunction(self,simulation,evaluation, params=None): + like = spotpy.likelihoods.gaussianLikelihoodMeasErrorOut(evaluation,simulation) + return like + +## Sample with DREAM + +Now we can initialize the Hymod example: + + spot_setup=spot_setup() + +Create the Dream sampler of spotpy, alt_objfun is set to None to force SPOTPY +to jump into the def objectivefunction in the spot_setup class (default is +spotpy.objectivefunctions.log_p). Results are saved in a DREAM_hymod.csv file: + + sampler=spotpy.algorithms.dream(spot_setup, dbname='DREAM_hymod', dbformat='csv') + +Select number of maximum repetitions, the number of chains used by dream (default = 5) and set the Gelman-Rubin convergence limit (default 1.2). +We further allow 100 runs after convergence is achieved: + + #Select number of maximum repetitions + rep=5000 + + # Select five chains and set the Gelman-Rubin convergence limit + nChains = 4 + convergence_limit = 1.2 + + # Other possible settings to modify the DREAM algorithm, for details see Vrugt (2016) + nCr = 3 + eps = 10e-6 + runs_after_convergence = 100 + acceptance_test_option = 6 + +We start the sampler and collect the gained r_hat convergence values after the sampling: + + r_hat = sampler.sample(rep, nChains, nCr, eps, convergence_limit) + + +## Access the results +All other results can be accessed from the SPOTPY csv-database: + + results = spotpy.analyser.load_csv_results('DREAM_hymod') + +These results are structured as a numpy array. Accordingly, you can access the different columns by using simple Python code, +e.g. to access all the simulations: + + fields=[word for word in results.dtype.names if word.startswith('sim')] + print(results[fields) + +## Plot model uncertainty +For the analysis we provide some examples how to plor the data. +If you want to see the remaining posterior model uncertainty: + + fig= plt.figure(figsize=(16,9)) + ax = plt.subplot(1,1,1) + q5,q25,q75,q95=[],[],[],[] + for field in fields: + q5.append(np.percentile(results[field][-100:-1],2.5)) + q95.append(np.percentile(results[field][-100:-1],97.5)) + ax.plot(q5,color='dimgrey',linestyle='solid') + ax.plot(q95,color='dimgrey',linestyle='solid') + ax.fill_between(np.arange(0,len(q5),1),list(q5),list(q95),facecolor='dimgrey',zorder=0, + linewidth=0,label='parameter uncertainty') + ax.plot(spot_setup.evaluation(),'r.',label='data') + ax.set_ylim(-50,450) + ax.set_xlim(0,729) + ax.legend() + fig.savefig('python_hymod.png',dpi=300) + +![Posterior model uncertainty](../img/python_hymod_simulation.png) +*Figure 1: Posterior model uncertainty of HYMOD.* + +## Plot convergence diagnostic +If you want to check the convergence of the DREAM algorithm: + + spotpy.analyser.plot_gelman_rubin(results, r_hat, fig_name = 'python_hymod_convergence.png') + +![Convergence diagnostic](../img/python_hymod_convergence.png) + +*Figure 2: Gelman-Rubin onvergence diagnostic of DREAM results.* + + +## Plot parameter uncertainty +Or if you want to check the posterior parameter distribution: + + parameters = spotpy.parameter.get_parameters_array(spot_setup) + + fig, ax = plt.subplots(nrows=5, ncols=2) + for par_id in range(len(parameters)): + plot_parameter_trace(ax[par_id][0], results, parameters[par_id]) + plot_posterior_parameter_histogram(ax[par_id][1], results, parameters[par_id]) + + ax[-1][0].set_xlabel('Iterations') + ax[-1][1].set_xlabel('Parameter range') + + plt.show() + fig.savefig('hymod_parameters.png',dpi=300) + +![Parameter distribution](../img/python_hymod_parameters.png) + +*Figure 3: Posterior parameter distribution of HYMOD.* diff --git a/docs/Hydrological_model.md b/docs/Hydrological_model.md deleted file mode 100644 index 09cf2bd4..00000000 --- a/docs/Hydrological_model.md +++ /dev/null @@ -1,215 +0,0 @@ -# A hydrological model - -This chapter shows you, how to link an external model with SPOTPY. This is might be the most interesting chapter for users. -We use an hydrological model as an example, the Catchment Modelling Framework (CMF). For detailed information and download options about this model, have a look at the [CMF Homepage](http://fb09-pasig.umwelt.uni-giessen.de:8000/ "CMF Homepage"). -This example will, most likely, be different from your actual use, but it will give you hints how to solve your specific issue. - -![CMF model](../img/cmf_model.png) - -*Figure 6: One soil column model build with cmf. The coloured bars represent soil layers, the grey ring a Free air carbon dioxide enrichment (FACE) ring and the black tube a Piezometer.* - -Our model is driven by some meteorological data and a varying groundwater table. Evapotransipration is simulate with the Shuttleworth Wallace concept -and infiltration with the Richards equation. The retention curve is build with the VanGenuchten equation. We want to calibrate the VanGenuchten parameter -*alpha*, *ksat*, *n* and *porosity* to simulate the observed soil mositure in the upper most soil layer. - -## Creating the setup file - -To create our setup file, first have to import SPOTPY, the model cmf and some extra packages, we will need: - - import spotpy - import cmf - from datetime import timedelta, datetime # Standard in Python to work with data that have date and time stamp - import Load_Data as loader # Used to import meterological data and the evaluation data from csv-files. - import numpy as np - -Before we start to write our spotpy_setup, we have to write a class or a function, which setting up your model. -In this case, we use the cmf model which directly callable from Python. We use the \__init\__ function to separate the forcing and -evaluation data loading from the runtime loop. The run function is supposed to be called with a set of parameters, which setup the rest of the model and runs it. -Important here is that our run function returns just the simulations, on which evaluation data is available. You will have to do the same, if you use a different model. - - class model(object): - ''' - Input: datastart: e.g. datetime(1998,6,1) - dataend: e.g. datetime(2000,1,1) - analysestart: e.g. datetime(1999,1,1) - - Output: Initialised model instance with forcing data (climate, groundwater) and evaluation data - (soil moisture) - ''' - def __init__(self,datastart,dataend,analysestart): - self.d_s=datastart - self.d_end=dataend - self.a_start=analysestart - self.bound= [[0.0001,0.6],[0.01,3],[1.05,1.4],[0.4,0.7]] # Physical parameter boundaries - DataLoader = loader.load_data(self.a_start,self.d_s,self.d_end) - cmf.set_parallel_threads(1) - - ###################### Load Forcing data #################################### - ClimateFilename = 'Climate_Face_new2.csv' - self.md=np.load(ClimateFilename+str(d_start.date())+str(self.d_end.date())+'.npy') - self.rain=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Nd_mm_day']) - self.rHmean= cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Rh']) - self.Windspeed=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Wind']) - self.Rs=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Rs_meas']) - self.T=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Temp']) - self.piezometer = 'P4' - self.gw_array = DataLoader.groundwater(self.piezometer) - ############################################################################# - - ###################### Load Evaluation data ################################# - eval_soil_moisture = DataLoader.soil_moisture('A1') - self.eval_dates = eval_soil_moisture['Date'] - self.observations = eval_soil_moisture['A1'] - ########################################################################### - - def _load_meteo(self,project): - #Create meteo station for project - meteo=project.meteo_stations.add_station('FACE',position = (0,0,0),timezone=1,timestep=cmf.h) - rain = self.rain - meteo.rHmean = self.rHmean - meteo.Windspeed = self.Windspeed - meteo.Rs = self.Rs - meteo.T = self.T - meteo.Tmax = meteo.T.reduce_max(begin = self.d_start, step = timedelta(days=1)) - meteo.Tmin = meteo.T.reduce_min(begin = self.d_start, step = timedelta(days=1)) - project.rainfall_stations.add('FACE',rain,(0,0,0)) - project.use_nearest_rainfall() - # Use the meteorological station for each cell of the project - project.use_nearest_meteo() - - def run(self,args): - return self._run(*args) - - def _run(self,alpha=None,n=None,porosity=None,ksat=None): - #return alpha,n,porosity,ksat - ''' - Runs the model instance - - Input: Parameter set (in this case VanGenuchten Parameter alpha,n,porosity,ksat) - Output: Simulated values on given observation days - ''' - #Check if given parameter set is in realistic boundaries - if alphaself.bound[0][1] or ksatself.bound[1][1] or nself.bound[2][1] or \ - porosityself.bound[3][1]: - print 'The following combination was ignored:' - print 'n= '+str(n) - print 'alpha='+str(alpha) - print 'ksat= '+str(ksat) - print 'porosity= '+str(porosity) - print '##############################' - return self.observations*-np.inf - else: - project=cmf.project() - cell = project.NewCell(x=0,y=0,z=0,area=1000, with_surfacewater=True) - print 'n= '+str(n) - print 'alpha='+str(alpha) - print 'ksat= '+str(ksat) - print 'porosity= '+str(porosity) - print '##############################' - r_curve = cmf.VanGenuchtenMualem(Ksat=ksat,phi=porosity,alpha=alpha,n=n) - layers=5 - ldepth=.01 - for i in range(layers): - depth = (i+1) * ldepth - cell.add_layer(depth,r_curve) - cell.install_connection(cmf.Richards) - cell.install_connection(cmf.ShuttleworthWallace) - cell.saturated_depth =.5 - solver = cmf.CVodeIntegrator(project,1e-6) - self._load_meteo(project) - gw = project.NewOutlet('groundwater',x=0,y=0,z=.9)#layers*ldepth) - cmf.Richards(cell.layers[-1],gw) - gw.potential = -.5 #IMPORTANT - gw.is_source=True - solver.t = self.d_start - Evalstep,evallist=0,[] - rundays=(self.d_end-self.d_start).days - for t in solver.run(solver.t,solver.t + timedelta(days=rundays),timedelta(hours=1)): - if self.gw_array['Date'].__contains__(t)==True: - Gw_Index=np.where(self.gw_array['Date']==t) - gw.potential=self.gw_array[self.piezometer][Gw_Index] - print gw.potential #TO DO: CHECK IF SOMETHING HAPPENS HERE!!!! - if t > self.a_start: - if Evalstep !=len(self.eval_dates) and t == self.eval_dates[Evalstep]: - evallist.append(cell.layers.wetness[0]*cell.layers.porosity[0]*100) - Evalstep+=1 - return evallist - -Now we can create our spotpy_setup class. Here we use to \__init\__ function, to initialize our model. -At this point it is recommended to load all needed data into the working storage (in this case meteorological data, soil moisture and groundwater table data). -Keep in mind, that the \__init\__ function is only called once during the sampling, while the other functions are called within every iteration of the algorithm. -The more you can separate from you model into the \__init\__ function, the faster you sampling will be. - - class spotpy_setup(object): - def __init__(self, obj_func=None): - #Just a way to keep this example flexible and applicable to various examples - self.obj_func = obj_func - datastart = datetime(1998,6,1) - dataend = datetime(2000,1,1) - analysestart = datetime(1999,1,1) - self.cmfmodel = model(datastart,dataend,analysestart) - self.params = [spotpy.parameter.Normal('alpha',0.3,0.1,0.02,0.2), - spotpy.parameter.Normal('n',1.2,0.035,0.01,1.22), - spotpy.parameter.Normal('ksat',1,0.3,0.1,2.0), - spotpy.parameter.Normal('porosity',.55,0.04,0.02,0.6), - ] - -Now we have setup our model. It has a warm up from 06/01/1998 until 01/01/1999 and will then save soil moisture simulations whenever evaluation data is available. - -To define the VanGenuchten parameter boundaries we use a normal distribution. - - def parameters(self): - return spotpy.parameter.generate(self.params) - - def simulation(self,vector): - simulations= self.cmfmodel._run(alpha=vector[0],n=vector[1],ksat=vector[2],porosity=vector[3]) - return simulations - - def evaluation(self,evaldates=False): - if evaldates: - return self.cmfmodel.eval_dates - else: - return self.cmfmodel.observations - - def objectivefunction(self,simulation,evaluation): - #SPOTPY expects to get one or multiple values back, - #that define the performence of the model run - if not self.obj_func: - # This is used if not overwritten by user - # RMSE (root mean squared error) works good for the SCE-UA algorithm, - # as it minimizes the objective function. - # All other implemented algorithm maximize the objectivefunction - model_performance = spotpy.objectivefunctions.rmse(evaluation,simulation) - else: - #Way to ensure flexible spot setup class - model_performance = self.obj_func(evaluation,simulation) - return model_performance - - -## Sampling - - sampler = spotpy.algorithms.mc(spotpy_setup(),dbname='MC_CMF',dbformat='csv') - sampler = spotpy.algorithms.mle(spotpy_setup(),dbname='MLE_CMF',dbformat='csv') - sampler = spotpy.algorithms.lhs(spotpy_setup(),dbname='LHS_CMF',dbformat='csv') - sampler = spotpy.algorithms.sceua(spotpy_setup(used_algorithm='sceua'),dbname='SCEUA_CMF',dbformat='csv') - sampler = spotpy.algorithms.demcz(spotpy_setup(),dbname='DE-MCz_CMF',dbformat='csv') - sampler = spotpy.algorithms.sa(spotpy_setup(),dbname='SA_CMF',dbformat='csv') - sampler = spotpy.algorithms.rope(spotpy_setup(),dbname='ROPE_CMF',dbformat='csv') - - algorithms=['MC','LHS','MLE','MCMC','SCE-UA','SA','DE-MCz','ROPE'] - results=[] - for algorithm in algorithms: - sampler.sample(10000) - results.append(sampler.getdata) - -## Plotting - - evaluation = spotpy_setup().evaluation() - evaldates= spotpy_setup().evaluation(evaldates=True) - - spotpy.analyser.plot_bestmodelruns(res,evaluation,algorithms=algorithms,dates=evaldates, ylabel='Soil moisture [%]') - -![CMF model](../img/cmf_bestmodelruns.png) - -*Figure 7: Best model runs of the different algorithms.* diff --git a/docs/Working with DREAM and Hymod.md b/docs/Working with DREAM and Hymod.md deleted file mode 100644 index 89496e5a..00000000 --- a/docs/Working with DREAM and Hymod.md +++ /dev/null @@ -1,287 +0,0 @@ -# Calibration of HYMOD with DREAM - -This chapter shows you, how to link the external hydrological model HYMOD with SPOTPY (works only on Windows systems). - -We use the hydrological model HYMOD as an example, to calibrate it with the Differential Evolution Adaptive Metropolis (DREAM) algorithm. -For detailed information about the underlying theorie, have a look at the [Vrugt (2016)](https://doi.org/10.1016/j.envsoft.2015.08.013 "Vrugt (2016)"). -The SPOTPY package comes with an example which is desgined to help you to set up your own research project. - -First, we need to setup the model within a spot setup class. The model needs some meteorological input data and five parameters to estimate discharge: - -## Connect HYMOD with SPOTPY - -Here we use to \__init\__ function, to initialize the parameter for our model. - - class spot_setup(object): - def __init__(self): - - self.params = [spotpy.parameter.Uniform('x1',low=1.0 , high=500, optguess=412.33), - spotpy.parameter.Uniform('x2',low=0.1 , high=2.0, optguess=0.1725), - spotpy.parameter.Uniform('x3',low=0.1 , high=0.99, optguess=0.8127), - spotpy.parameter.Uniform('x4',low=0.0 , high=0.10, optguess=0.0404), - spotpy.parameter.Uniform('x5',low=0.1 , high=0.99, optguess=0.5592) - ] - self.curdir = os.getcwd() - self.owd = os.path.realpath(__file__)+os.sep+'..' - self.evals = list(np.genfromtxt(self.owd+os.sep+'hymod'+os.sep+'bound.txt',skip_header=65)[:,3])[:730] - self.Factor = 1944 * (1000 * 1000 ) / (1000 * 60 * 60 * 24) - print(len(self.evals)) - - def parameters(self): - return spotpy.parameter.generate(self.params) - -We use the simulation function to write one random parameter set into a parameter file, like it is needed for the HYMOD model, -start the model and read the model discharge output data: - - def simulation(self,x): - os.chdir(self.owd+os.sep+'hymod') - if sys.version_info.major == 2: - params = file('Param.in', 'w') - elif sys.version_info.major == 3: - params = open('Param.in','w') - else: - raise Exception("Your python is too old for this example") - for i in range(len(x)): - if i == len(x): - params.write(str(round(x[i],5))) - else: - params.write(str(round(x[i],5))+' ') - params.close() - os.system('HYMODsilent.exe') - - #try: - if sys.version_info.major == 2: - SimRR = file('Q.out', 'r') - elif sys.version_info.major == 3: - SimRR = open('Q.out', 'r') - else: - raise Exception("Your python is too old for this example") - simulations=[] - for i in range(64): - SimRR.readline() - for i in range(730): - val= SimRR.readline() - simulations.append(float(val)*self.Factor) - #except:#Assign bad values - model might have crashed - # SimRR = 795 * [np.nan] - os.chdir(self.curdir) - - return simulations - -And in a last step, we compare the observed and the simulated data. Here we choose one of the implemented Likelihood functions -in the SPOTPY package. Please mind that the selection of the Likelihood highly influences the results gained with this algorithm: - - def evaluation(self): - return self.evals - - def objectivefunction(self,simulation,evaluation, params=None): - like = spotpy.likelihoods.gaussianLikelihoodMeasErrorOut(evaluation,simulation) - return like - -## Sample with DREAM - -Now we can initialize the Hymod example: - - spot_setup=spot_setup() - -Create the Dream sampler of spotpy, alt_objfun is set to None to force SPOTPY -to jump into the def objectivefunction in the spot_setup class (default is -spotpy.objectivefunctions.log_p). Results are saved in a DREAM_hymod.csv file: - - sampler=spotpy.algorithms.dream(spot_setup, dbname='DREAM_hymod', dbformat='csv', alt_objfun=None) - -Select number of maximum repetitions, the number of chains used by dream (default = 5) and set the Gelman-Rubin convergence limit (default 1.2). -We further allow 100 runs after convergence is achieved: - - nChains = 4 - convergence_limit = 1.2 - runs_after_convergence = 100 - -We start the sampler and collect the gained r_hat convergence values after the sampling: - - r_hat = sampler.sample(rep,nChains=nChains,convergence_limit=convergence_limit, - runs_after_convergence=runs_after_convergence) - - -## Access the results -All other results can be accessed from the SPOTPY csv-database: - - results = spotpy.analyser.load_csv_results('DREAM_hymod') - -These results are structured as a numpy array. Accordingly, you can access the different columns by using simple Python code, -e.g. to access all the simulations: - - fields=[word for word in results.dtype.names if word.startswith('sim')] - print(results[fields) - -## Plot model uncertainty -For the analysis we provide some examples how to plor the data. -If you want to see the remaining posterior model uncertainty: - - fig= plt.figure(figsize=(16,9)) - ax = plt.subplot(1,1,1) - q5,q25,q75,q95=[],[],[],[] - for field in fields: - q5.append(np.percentile(results[field][-100:-1],2.5)) - q95.append(np.percentile(results[field][-100:-1],97.5)) - ax.plot(q5,color='dimgrey',linestyle='solid') - ax.plot(q95,color='dimgrey',linestyle='solid') - ax.fill_between(np.arange(0,730,1),list(q5),list(q95),facecolor='dimgrey',zorder=0, - linewidth=0,label='parameter uncertainty') - ax.plot(spot_setup.evaluation(),'r.',label='data') - ax.set_ylim(-50,450) - ax.set_xlim(0,729) - ax.legend() - fig.savefig('python_hymod.png',dpi=300) - -![Posterior model uncertainty](../img/python_hymod_simulation.png) -*Figure 1: Posterior model uncertainty of HYMOD.* - -## Plot convergence diagnostic -If you want to check the convergence of the DREAM algorithm: - - fig= plt.figure(figsize=(16,9)) - plt.subplot(2,1,1) - for i in range(int(max(results['chain']))+1): - index=np.where(results['chain']==i) - plt.plot(results['like1'][index], label='Chain '+str(i+1)) - - plt.ylabel('Likelihood value') - plt.legend() - - ax =plt.subplot(2,1,2) - r_hat=np.array(r_hat) - ax.plot([1.2]*len(r_hat),'k--') - for i in range(len(r_hat[0])): - ax.plot(r_hat[:,i],label='x'+str(i+1)) - - ax.set_yscale("log", nonposy='clip') - ax.set_ylim(-1,50) - ax.set_ylabel('R$^d$ - convergence diagnostic') - plt.xlabel('Number of chainruns') - plt.legend() - fig.savefig('python_hymod_convergence.png',dpi=300) - -![Convergence diagnostic](../img/python_hymod_convergence.png) - -*Figure 2: Gelman-Rubin onvergence diagnostic of DREAM results.* - - -## Plot parameter uncertainty -Or if you want to check the posterior parameter distribution: - - def find_min_max(spot_setup): - randompar=spot_setup.parameters()['random'] - for i in range(1000): - randompar=np.column_stack((randompar,spot_setup.parameters()['random'])) - return np.amin(randompar,axis=1),np.amax(randompar,axis=1) - - - min_vs,max_vs = find_min_max(spot_setup) - - fig= plt.figure(figsize=(16,16)) - plt.subplot(5,2,1) - x = results['par'+spot_setup.parameters()['name'][0]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x1') - plt.ylim(min_vs[0],max_vs[0]) - - - plt.subplot(5,2,2) - x = results['par'+spot_setup.parameters()['name'][0]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x1') - plt.xlim(min_vs[0],max_vs[0]) - - - - plt.subplot(5,2,3) - x = results['par'+spot_setup.parameters()['name'][1]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x2') - plt.ylim(min_vs[1],max_vs[1]) - - plt.subplot(5,2,4) - x = results['par'+spot_setup.parameters()['name'][1]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x2') - plt.xlim(min_vs[1],max_vs[1]) - - - - plt.subplot(5,2,5) - x = results['par'+spot_setup.parameters()['name'][2]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x3') - plt.ylim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,6) - x = results['par'+spot_setup.parameters()['name'][2]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x3') - plt.xlim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,7) - x = results['par'+spot_setup.parameters()['name'][3]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x4') - plt.ylim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,8) - x = results['par'+spot_setup.parameters()['name'][3]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x4') - plt.xlim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,9) - x = results['par'+spot_setup.parameters()['name'][4]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x5') - plt.ylim(min_vs[4],max_vs[4]) - plt.xlabel('Iterations') - - plt.subplot(5,2,10) - x = results['par'+spot_setup.parameters()['name'][4]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x5') - plt.xlabel('Parameter range') - plt.xlim(min_vs[4],max_vs[4]) - fig.savefig('python_parameters.png',dpi=300) - -![Parameter distribution](../img/python_hymod_parameters.png) - -*Figure 3: Posterior parameter distribution of HYMOD.* diff --git a/mkdocs.yml b/mkdocs.yml index 44b31de5..a68e12e2 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,8 +20,7 @@ nav: - Rosenbrock: Rosenbrock.md - Griewank: Griewank.md - Ackley: Ackley.md - - Hydrological_model: Hydrological_model.md - - Working with DREAM and Hymod: Working with DREAM and Hymod.md + - Calibrating a hydrological model with DREAM: Calibrating a hydrological model with DREAM.md - Advanced_hints: Advanced_hints.md - Algorithm_guide: Algorithm_guide.md diff --git a/spotpy/analyser.py b/spotpy/analyser.py index 7767d8ee..1c7db8de 100644 --- a/spotpy/analyser.py +++ b/spotpy/analyser.py @@ -279,6 +279,21 @@ def get_posterior(results,percentage=10, maximize=True): index = np.where(results['like1']>=np.percentile(results['like1'],100.0-percentage)) return results[index] +def plot_parameter_trace(ax, results, parameter): + #THis function plots the parameter setting for each run + for i in range(int(max(results['chain']))): + index=np.where(results['chain']==i) + ax.plot(results['par'+parameter['name']][index],'.') + ax.set_ylabel(parameter['name']) + ax.set_ylim(parameter['minbound'], parameter['maxbound']) + +def plot_posterior_parameter_histogram(ax, results, parameter): + #This functing is plotting the last 10% of the results + ax.hist(results['par'+parameter['name']][int(len(results)*0.9):], + bins =np.linspace(parameter['minbound'],parameter['maxbound'],20)) + ax.set_ylabel('Density') + ax.set_xlim(parameter['minbound'], parameter['maxbound']) + def plot_parameter_uncertainty(posterior_results,evaluation, fig_name='Posterior_parameter_uncertainty.png'): import matplotlib.pyplot as plt @@ -969,16 +984,31 @@ def plot_autocorellation(parameterdistribution,parametername,fig_name='Autocorre plt.savefig(fig_name,dpi=300) -def plot_gelman_rubin(r_hat_values,fig_name='gelman_rub.png'): +def plot_gelman_rubin(results, r_hat_values,fig_name='gelman_rub.png'): '''Input: List of R_hat values of chains (see Gelman & Rubin 1992) Output: Plot as seen for e.g. in (Sadegh and Vrugt 2014)''' import matplotlib.pyplot as plt - fig=plt.figure(figsize=(16,9)) - ax = plt.subplot(1,1,1) - ax.plot(r_hat_values) - ax.plot([1.2]*len(r_hat_values),'k--') - ax.set_xlabel='r_hat' - plt.savefig(fig_name,dpi=300) + + fig= plt.figure(figsize=(12,16)) + ax1 = plt.subplot(2,1,1) + for i in range(int(max(results['chain']))+1): + index=np.where(results['chain']==i) + ax1.plot(results['like1'][index], label='Chain '+str(i+1)) + + ax1.set_ylabel('Likelihood value') + ax1.legend() + + ax2 =plt.subplot(2,1,2) + r_hat=np.array(r_hat_values) + ax2.plot([1.2]*len(r_hat),'k--') + for i in range(len(r_hat[0])): + ax2.plot(r_hat[:,i],label='x'+str(i+1)) + + ax2.set_yscale("log", nonpositive='clip') + ax2.set_ylabel('R$^d$ - convergence diagnostic') + ax2.set_xlabel('Number of chainruns') + ax2.legend() + fig.savefig(fig_name,dpi=300) def gelman_rubin(x): '''NOT USED YET''' diff --git a/spotpy/examples/tutorial_cmf_lumped.py b/spotpy/examples/tutorial_cmf_lumped.py index 49a87713..a79904fa 100644 --- a/spotpy/examples/tutorial_cmf_lumped.py +++ b/spotpy/examples/tutorial_cmf_lumped.py @@ -46,11 +46,11 @@ def get_runs(default=1): model = SingleStorage(datetime.datetime(1980, 1, 1), datetime.datetime(1985, 12, 31)) - runs = get_runs(default=5) + runs = get_runs(default=100) # Create the sampler sampler = spotpy.algorithms.mc(model, parallel=parallel(), - dbname=model.dbname, dbformat='hdf5', + dbname=model.dbname, dbformat='csv', save_sim=True, sim_timeout=300) # Now we can sample with the implemented Monte Carlo algorithm: diff --git a/spotpy/examples/tutorial_dream_hymod.py b/spotpy/examples/tutorial_dream_hymod.py index 72a9f7dc..36de14fc 100644 --- a/spotpy/examples/tutorial_dream_hymod.py +++ b/spotpy/examples/tutorial_dream_hymod.py @@ -14,14 +14,15 @@ from spotpy.examples.spot_setup_hymod_python import spot_setup import matplotlib.pyplot as plt from spotpy.likelihoods import gaussianLikelihoodMeasErrorOut as GausianLike - +from spotpy.analyser import plot_parameter_trace +from spotpy.analyser import plot_posterior_parameter_histogram if __name__ == "__main__": parallel ='seq' # Initialize the Hymod example (will only work on Windows systems) #spot_setup=spot_setup(parallel=parallel) spot_setup=spot_setup(GausianLike) - # Create the Dream sampler of spotpy, al_objfun is set to None to force SPOTPY + # Create the Dream sampler of spotpy, alt_objfun is set to None to force SPOTPY # to jump into the def objectivefunction in the spot_setup class (default is # spotpy.objectivefunctions.log_p) @@ -71,27 +72,7 @@ # Example plot to show the convergence ################# - fig= plt.figure(figsize=(12,16)) - plt.subplot(2,1,1) - for i in range(int(max(results['chain']))+1): - index=np.where(results['chain']==i) - plt.plot(results['like1'][index], label='Chain '+str(i+1)) - - plt.ylabel('Likelihood value') - plt.legend() - - ax =plt.subplot(2,1,2) - r_hat=np.array(r_hat) - ax.plot([1.2]*len(r_hat),'k--') - for i in range(len(r_hat[0])): - ax.plot(r_hat[:,i],label='x'+str(i+1)) - - ax.set_yscale("log", nonposy='clip') - ax.set_ylim(-1,50) - ax.set_ylabel('R$^d$ - convergence diagnostic') - plt.xlabel('Number of chainruns') - plt.legend() - fig.savefig('python_hymod_convergence.png',dpi=300) + spotpy.analyser.plot_gelman_rubin(results, r_hat) ######################################################## @@ -100,111 +81,14 @@ # Example plot to show the parameter distribution ###### parameters = spotpy.parameter.get_parameters_array(spot_setup) + fig, ax = plt.subplots(nrows=5, ncols=2) + for par_id in range(len(parameters)): + plot_parameter_trace(ax[par_id][0], results, parameters[par_id]) + plot_posterior_parameter_histogram(ax[par_id][1], results, parameters[par_id]) - min_vs,max_vs = parameters['minbound'], parameters['maxbound'] - - fig= plt.figure(figsize=(16,16)) - plt.subplot(5,2,1) - x = results['par'+str(parameters['name'][0])] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('cmax') - plt.ylim(min_vs[0],max_vs[0]) - - - plt.subplot(5,2,2) - x = results['par'+parameters['name'][0]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('cmax') - plt.xlim(min_vs[0],max_vs[0]) - - - - plt.subplot(5,2,3) - x = results['par'+parameters['name'][1]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('bexp') - plt.ylim(min_vs[1],max_vs[1]) - - plt.subplot(5,2,4) - x = results['par'+parameters['name'][1]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('bexp') - plt.xlim(min_vs[1],max_vs[1]) - - - - plt.subplot(5,2,5) - x = results['par'+parameters['name'][2]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('alpha') - plt.ylim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,6) - x = results['par'+parameters['name'][2]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('alpha') - plt.xlim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,7) - x = results['par'+parameters['name'][3]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('Ks') - plt.ylim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,8) - x = results['par'+parameters['name'][3]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Ks') - plt.xlim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,9) - x = results['par'+parameters['name'][4]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('Kq') - plt.ylim(min_vs[4],max_vs[4]) - plt.xlabel('Iterations') - - plt.subplot(5,2,10) - x = results['par'+parameters['name'][4]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Kq') - plt.xlabel('Parameter range') - plt.xlim(min_vs[4],max_vs[4]) - plt.show() - fig.savefig('python_parameters.png',dpi=300) - ######################################################## + ax[-1][0].set_xlabel('Iterations') + ax[-1][1].set_xlabel('Parameter range') + plt.show() + fig.savefig('hymod_parameters.png',dpi=300) + ####################################################### \ No newline at end of file