diff --git a/.travis.yml b/.travis.yml index 12c199e6..eda4af49 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,12 @@ language: python python: #- "2.7" - #- "3.5" - "3.7" # - "3.8" #Errors on Travis, build times out because no output was received... +# Uses cache to build faster +cache: pip + # this ubuntu distribution has sure the libopenmpi-dev packages available dist: bionic @@ -31,9 +33,27 @@ install: script: - - py.test tests/test_* --cov spotpy --cov-report term-missing -v + - py.test tests/test_* --cov spotpy --cov-report term-missing -v # - mpirun -c 4 python spotpy/examples/tutorial_parallel_computing_hymod.py 100 - pip uninstall spotpy -y -after_success: - - coveralls + +jobs: + include: + - stage: Coveralls + name: Report Coveralls + after_success: python3 -m coveralls + +# Publish spotpy on PyPi if taged +deploy: + provider: pypi + distributions: "sdist bdist_wheel" + on: + # In this case I want to deploy only when a tag is present... + tags: true + # ... and when tag is on master and respects the form "v0.0.0" + branch: + - master + - /v?(\d+\.)?(\d+\.)?(\*|\d+)$/ + user: thouska + password : ${deploying} diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..8b43b17b --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +## [Unreleased Changes](https://github.com/thouska/spotpy/compare/v1.5.13...master) + +## Spotpy Version [1.5.13](https://github.com/thouska/spotpy/compare/v1.5.12...v1.5.13) (2020-09-07) + +* Introducing package dependencies as requested [#249](https://github.com/thouska/spotpy/issues/249) +* Introducing chanchelog.md as requested [#225](https://github.com/thouska/spotpy/issues/225) diff --git a/docs/Advanced_hints.md b/docs/Advanced_hints.md index da74b6c3..e8f90c0d 100644 --- a/docs/Advanced_hints.md +++ b/docs/Advanced_hints.md @@ -1,4 +1,5 @@ + # Advanced settings This chapter will show you, how to get the full power out of SPOTPY: @@ -20,7 +21,7 @@ To tell SPOTPY to use MPI, just give this information to the sampler: sampler = spotpy.algorithms.sceua(spotpy_setup,() dbname='RosenSCEUA', dbformat='csv', parallel='mpi') sampler.sample(10000) - + Now save this file and start it from a console: `mpirun -c 20 your_script.py`, where 20 is the number of cores you want to use. This should give you and speed of neerly 20 times compared with the standard sequential sampling. @@ -38,29 +39,7 @@ DE-MCz will parallelize the selcted number of chains [terBrack and Vrugt, 2008]( Th algorithms `MLE`, `MCMC` and `SA` can not run in parallel. -## FAST - Sensitivity analysis -SPOTPY gives you the opportunity to start a sensitivity analysis of your model. In this case, we included a global sensitivity analysis called "Extended FAST" based on -Saltelli et al. (1999). This is besides the Sobol´ sensitivity test the only algorithm available that is taking parameter interaction into account. - -The algorithm will tell you, how sensitive your parameters are on whatever is given back by your objective function. Before you start to sample, you should know how how many -iterations you need to get an reliable information about your parameter. The number of iteration can be calculate after [Henkel et al. GLOBAL SENSITIVITY ANALYSIS OF NONLINEAR MATHEMATICAL MODELS - AN -IMPLEMENTATION OF TWO COMPLEMENTING VARIANCE-BASED ALGORITHMS, 2012] (https://www.informs-sim.org/wsc12papers/includes/files/con308.pdf): - -$$N = (1+4M^2(1+(k-2)d))k$$ - -with N = needed parameter iterations, M= inference factor (SPOTPY default M=4) and d= frequency step (SPOTPY default d=2) and k as the number of parameters of your model. -You can start the simulation with - - sampler = spotpy.algorithms.fast(spotpy_setup,() dbname='Fast_sensitivity', dbformat='csv') - -and you can analyse your results with - - results = sampler.get_data() - analyser.plot_fast_sensitivity(results,number_of_sensitiv_pars=5) - -This will show you a Plot with the total Sensitivity index of all your parameters and in this case the five most sensitive parameters (can be adjusted). - ## Plotting time If you want create plots out of your samples and you don't want to sample your results again do something like this: diff --git a/docs/Bayesian_uncertainty_analysis_with_DREAM.md b/docs/Bayesian_uncertainty_analysis_with_DREAM.md new file mode 100644 index 00000000..cbeabb2b --- /dev/null +++ b/docs/Bayesian_uncertainty_analysis_with_DREAM.md @@ -0,0 +1,128 @@ +# Bayesian uncertainty analysis of HYMOD with DREAM + +This chapter shows you, how to calibrate an external hydrological model (HYMOD) with SPOTPY. + +We use the previously created hydrological model HYMOD spotpy_setup class as an [example](https://github.com/thouska/spotpy/blob/master/spotpy/examples/spot_setup_hymod_python.py), +to perform a parameter uncertainty analysis and Bayesian calibration with the Differential Evolution Adaptive Metropolis (DREAM) algorithm. +For detailed information about the underlying theory, have a look at the [Vrugt (2016)](https://doi.org/10.1016/j.envsoft.2015.08.013 "Vrugt (2016)"). + + +First some relevant functions are imported Hymod example: + + import numpy as np + import spotpy + import matplotlib.pyplot as plt + from spotpy.likelihoods import gaussianLikelihoodMeasErrorOut as GausianLike + from spotpy.analyser import plot_parameter_trace + from spotpy.analyser import plot_posterior_parameter_histogram + +Further we need the spotpy_setup class, which links the model to spotpy: + + from spotpy.examples.spot_setup_hymod_python import spot_setup + +It is important that this function is initialized before it is further used by SPOTPY: + + spot_setup=spot_setup() + +In this special case, we want to change the implemented rmse objective function with a likelihood function, which is compatible with the Bayesian +calibration approach, used by the DREAM sampler: + + # Initialize the Hymod example (will only work on Windows systems) + spot_setup=spot_setup(GausianLike) + + +## Sample with DREAM + +Now we create a sampler, by using one of the implemented algorithms in SPOTPY. +The algorithm needs the inititalized spotpy setup class, and wants you to define a database name and database format. Here we create a DREAM_hymod.csv file, +which will contain the returned likelihood function, the coresponding parameter set, the model results and a chain ID (Dream is starting different independent +optimization chains, which need to be in line, to receive robust results): + + sampler=spotpy.algorithms.dream(spot_setup, dbname='DREAM_hymod', dbformat='csv') + +To actually start the algorithm, spotpy needs some further details, like the maximum allowed number of repetitions, the number of chains used by dream (default = 5) and set the Gelman-Rubin convergence limit (default 1.2). +We further allow 100 runs after convergence is achieved: + + #Select number of maximum repetitions + rep=5000 + + # Select five chains and set the Gelman-Rubin convergence limit + nChains = 4 + convergence_limit = 1.2 + + # Other possible settings to modify the DREAM algorithm, for details see Vrugt (2016) + nCr = 3 + eps = 10e-6 + runs_after_convergence = 100 + acceptance_test_option = 6 + +We start the sampler and collect the gained r_hat convergence values after the sampling: + + r_hat = sampler.sample(rep, nChains, nCr, eps, convergence_limit) + + +## Access the results +All gained results can be accessed from the SPOTPY csv-database: + + results = spotpy.analyser.load_csv_results('DREAM_hymod') + +These results are structured as a numpy array. Accordingly, you can access the different columns by using simple Python code, +e.g. to access all the simulations: + + fields=[word for word in results.dtype.names if word.startswith('sim')] + print(results[fields) + +## Plot model uncertainty +For the analysis we provide some examples how to plor the data. +If you want to see the remaining posterior model uncertainty: + + fig= plt.figure(figsize=(16,9)) + ax = plt.subplot(1,1,1) + q5,q25,q75,q95=[],[],[],[] + for field in fields: + q5.append(np.percentile(results[field][-100:-1],2.5)) + q95.append(np.percentile(results[field][-100:-1],97.5)) + ax.plot(q5,color='dimgrey',linestyle='solid') + ax.plot(q95,color='dimgrey',linestyle='solid') + ax.fill_between(np.arange(0,len(q5),1),list(q5),list(q95),facecolor='dimgrey',zorder=0, + linewidth=0,label='parameter uncertainty') + ax.plot(spot_setup.evaluation(),'r.',label='data') + ax.set_ylim(-50,450) + ax.set_xlim(0,729) + ax.legend() + fig.savefig('python_hymod.png',dpi=300) + +![Posterior simulation uncertainty](../img/DREAM_simulation_uncertainty_Hymod.png) +*Figure 1: Posterior model uncertainty of HYMOD.* + +## Plot convergence diagnostic +If you want to check the convergence of the DREAM algorithm: + + spotpy.analyser.plot_gelman_rubin(results, r_hat, fig_name='python_hymod_convergence.png') + +![Convergence diagnostic](../img/DREAM_r_hat.png) + +*Figure 2: Gelman-Rubin onvergence diagnostic of DREAM results.* + + +## Plot parameter uncertainty +Or if you want to check the posterior parameter distribution: + + parameters = spotpy.parameter.get_parameters_array(spot_setup) + + fig, ax = plt.subplots(nrows=5, ncols=2) + for par_id in range(len(parameters)): + plot_parameter_trace(ax[par_id][0], results, parameters[par_id]) + plot_posterior_parameter_histogram(ax[par_id][1], results, parameters[par_id]) + + ax[-1][0].set_xlabel('Iterations') + ax[-1][1].set_xlabel('Parameter range') + + plt.show() + fig.savefig('hymod_parameters.png',dpi=300) + +![DREAM_parameter_uncertainty_Hymod](../img/DREAM_parameter_uncertainty_Hymod.png) + +*Figure 3: Posterior parameter distribution of HYMOD. Plotting the last 100 repetitions of the algorithm as a histogram.* + +The corresponding code is available for download [here](https://github.com/thouska/spotpy/blob/master/spotpy/examples/tutorial_dream_hymod.py). \ No newline at end of file diff --git a/docs/Calibration_with_SCE-UA.md b/docs/Calibration_with_SCE-UA.md new file mode 100644 index 00000000..d522f4e7 --- /dev/null +++ b/docs/Calibration_with_SCE-UA.md @@ -0,0 +1,89 @@ +# Calibration of HYMOD with SCE-UA + +This chapter shows you, how to calibrate an external hydrological model (HYMOD) with SPOTPY. + +We use the previously created hydrological model HYMOD spotpy_setup class as an [example](https://github.com/thouska/spotpy/blob/master/spotpy/examples/spot_setup_hymod_python.py), +to perform a calibration with the Shuffled Complex Evolution Algorithm (SCE-UA) algorithm. +For detailed information about the underlying theory, have a look at +Duan, Q., Sorooshian, S. and Gupta, V. K. (1994) +Optimal use of the SCE-UA global optimization method for calibrating watershed models, J. Hydrol.. + +First some relevant functions are imported Hymod example: + + import numpy as np + import spotpy + from spotpy.examples.spot_setup_hymod_python import spot_setup + import matplotlib.pyplot as plt + +As SCE-UA is minizing the objective function we need a objective function that gets better with decreasing values. +The Root Mean Squared Error objective function is a suitable choice: + + spot_setup=spot_setup(spotpy.objectivefunctions.rmse) + +Now we create a sampler, by using one of the implemented algorithms in SPOTPY. +The algorithm needs the inititalized spotpy setup class, and wants you to define a database name and database format. Here we create a SCEUA_hymod.csv file, +which will contain the returned likelihood function, the coresponding parameter set, the model results and a chain ID (SCE-UA is starting different independent +optimization complexes, which need to be in line, to receive robust results, they are herein called chains): + + sampler=spotpy.algorithms.sceua(spot_setup, dbname='SCEUA_hymod', dbformat='csv') + +To actually start the algorithm, spotpy needs some further details, like the maximum allowed number of repetitions, the number of chains used by dream (default = 5) and set the Gelman-Rubin convergence limit (default 1.2). +We further allow 100 runs after convergence is achieved: + + #Select number of maximum repetitions + rep=5000 + +We start the sampler and set some optional algorithm specific settings (check out the publication of SCE-UA for details): + + sampler.sample(rep, ngs=7, kstop=3, peps=0.1, pcento=0.1) + + +## Access the results +All gained results can be accessed from the SPOTPY csv-database: + + results = spotpy.analyser.load_csv_results('SCEUA_hymod') + +These results are stored in a simple structered numpy array, giving you all the flexibility at hand to analyse the results. +Herein we show a example how the objective function was minimized during sampling: + + fig= plt.figure(1,figsize=(9,5)) + plt.plot(results['like1']) + plt.show() + plt.ylabel('RMSE') + plt.xlabel('Iteration') + fig.savefig('SCEUA_objectivefunctiontrace.png',dpi=300) + +![Posterior model uncertainty](../img/SCEUA_objectivefunctiontrace.png) +*Figure 1: Root Mean Squared Error of Hymod model results during the optimization with SCE-UA algorithm.* + + +## Plot best model run +Or if you want to check the best model run, we first need to find the run_id with the minimal objective function value + + bestindex,bestobjf = spotpy.analyser.get_minlikeindex(results) + +Than we select the best model run: + + best_model_run = results[bestindex] + +And filter results for simulation results only: + + fields=[word for word in best_model_run.dtype.names if word.startswith('sim')] + best_simulation = list(best_model_run[fields]) + +The best model run can be plotted by using basic Matplotlib skills: + + fig= plt.figure(figsize=(16,9)) + ax = plt.subplot(1,1,1) + ax.plot(best_simulation,color='black',linestyle='solid', label='Best objf.='+str(bestobjf)) + ax.plot(spot_setup.evaluation(),'r.',markersize=3, label='Observation data') + plt.xlabel('Number of Observation Points') + plt.ylabel ('Discharge [l s-1]') + plt.legend(loc='upper right') + fig.savefig('SCEUA_best_modelrun.png',dpi=300) + +![SCEUA_best_modelrun](../img/SCEUA_best_modelrun.png) + +*Figure 2: Best model run of HYMOD, calibrated with SCE-UA, using RMSE as objective function.* + +The corresponding code is available for download [here](https://github.com/thouska/spotpy/blob/master/spotpy/examples/tutorial_sceua_hymod.py). \ No newline at end of file diff --git a/docs/How_to_link_a_model_to_SPOTPY.md b/docs/How_to_link_a_model_to_SPOTPY.md new file mode 100644 index 00000000..ccb08d07 --- /dev/null +++ b/docs/How_to_link_a_model_to_SPOTPY.md @@ -0,0 +1,126 @@ +## Linking a model with SPOTPY + +The SPOTPY package comes with several examples, which are designed to help you to set up your own research project. + +Herein we show, how to link an external model with SPOTPY. + +The hydrological model HYMOD serves as an example, which is a commonly used model in the hydrological community. +It takes meteorological data as input and produces discharge as output. + +Basically any model can be linked to SPOTPY as long it is somehow start-able with the Python programming language. +Linking a model to SPOTPY is done by following five consecutive steps, which are grouped in a spotpy_setup class. +If these steps are followed, SPOTPY can work with this class and analyse the model in an automatized way, giving you +powerful tools at hand. + +## Step 0: Import relevant packages and generate a spotpy setup class + + from spotpy.parameter import Uniform + from spotpy.objectivefunctions import rmse + from spotpy.examples.hymod_python.hymod import hymod + import os + +Generating a spotpy setup class is as easy as creating a class in Python: + + class spot_setup(object): + +## Step 1: Define the parameter of the model as class parameters + +Now, we can fill the setup class. In our case the model comes along with five parameters. +Here we use Python class parameter, to initialize the parameter for our model and make them readable for SPOTPY. +Needed information is the prior distribution (herein we choose a Uniform distribution), the minimum allowed parameter +setting (low) and the maximum allow setting (high). + + cmax = Uniform(low=1.0 , high=500) + bexp = Uniform(low=0.1 , high=2.0) + alpha = Uniform(low=0.1 , high=0.99) + Ks = Uniform(low=0.001 , high=0.10) + Kq = Uniform(low=0.1 , high=0.99) + +## Step 2: Write the def __init__ function, which takes care of any things which need to be done only once + +In this step, we can load all the data needed to start the model and set information, which might be needed in the following, +but want change during the model assessment. In our case, we want to set the path to the model: + + def __init__(self,obj_func=None): + #Find Path to Hymod on users system + self.owd = os.path.dirname(os.path.realpath(__file__)) + self.hymod_path = self.owd+os.sep+'hymod_python' + +Further we want to read in the forcing meteorological data and the observation data from file: + + self.PET,self.Precip = [], [] + self.date,self.trueObs = [], [] + #Load Observation data from file + climatefile = open(self.hymod_path+os.sep+'hymod_input.csv', 'r') + headerline = climatefile.readline()[:-1] + + #Read model forcing in working storage (this is done only ones) + if ';' in headerline: + self.delimiter = ';' + else: + self.delimiter = ',' + self.header = headerline.split(self.delimiter) + for line in climatefile: + values = line.strip().split(self.delimiter) + self.date.append(str(values[0])) + self.Precip.append(float(values[1])) + self.PET.append(float(values[2])) + self.trueObs.append(float(values[3])) + climatefile.close() + +Finaly, in this case a transformation factor is needed, which brings the output of the model from [mm/day] into [l s-1], where 1.783 is the catchment area [km²] + + self.Factor = 1.783 * 1000 * 1000 / (60 * 60 * 24) + +The keyword `obj_func` is not necessary in most cases. Herein we use it to change the function in Step 5, which makes this example flexible and applicable to various algorithms. + + self.obj_func = obj_func + +## Step 3: Write the def simulation function, which starts your model + +This function need to do three things. First it needs to be able to receive a parameter set from SPOTPY. +This will be an list of, in our case five values. Each value represents one setting for one parameter. +The HYMOD model receives these settings through a function. However, one could also write the settings into a file, which is read by the model (most common way). +Finally we start the model with the parameter set and collect the data, which are at the end returned. + + def simulation(self,x): + #Here the model is actualy started with a unique parameter combination that it gets from spotpy for each time the model is called + data = hymod(self.Precip, self.PET, x[0], x[1], x[2], x[3], x[4]) + sim=[] + for val in data: + sim.append(val*self.Factor) + #The first year of simulation data is ignored (warm-up) + return sim[366:] + +## Step 4: Write the def evaluation function, which returns your observation data + +Model assessment needs data, which tell you something about the reality that the model aims to reproduce. The HYMOD model produces discharge data, so herein we need to return observed discharge data + + def evaluation(self): + #The first year of simulation data is ignored (warm-up) + return self.trueObs[366:] + +## Step 5: Write the def objectivefunction, which returns how good the model fits the observation data + +In this last step, we compare the observed and the simulated data. So we need a function which can receive both information (simulation and evaluation). +SPOTPY will internally take care of times when this function needs to be called and will bring the corresponding data to this function. +Important is that the first keyword is handled as simulation/model results and the second as observation/evaluation results. +The SPOTPY package gives you several functions at hand, which compare the model and observation data with each other. +Please note that the selection of such a function highly influences the results gained with a optimization algorithm. Further some optimization algorithms +have specific needs at this function, as they wont to minimize or maximize the returned value by playing around with the parameter settings. Further some algorithms +need specific likelihood function. Herein we choose the Root Mean Squared Error (rmse) as an example. +A value of 0 would be a perfect fit of the simulation and observation data, +inf would be the worst possible fit. + + def objectivefunction(self,simulation,evaluation, params=None): + #SPOTPY expects to get one or multiple values back, + #that define the performance of the model run + if not self.obj_func: + # This is used if not overwritten by user + like = rmse(evaluation,simulation) + else: + #Way to ensure flexible spot setup class + like = self.obj_func(evaluation,simulation) + return like +Finally, save the file and check out the next example how to use the spotpy setup class with an algorithm. + +The shown example code is available [here](https://github.com/thouska/spotpy/blob/master/spotpy/examples/spot_setup_hymod_python.py) \ No newline at end of file diff --git a/docs/Hydrological_model.md b/docs/Hydrological_model.md deleted file mode 100644 index 09cf2bd4..00000000 --- a/docs/Hydrological_model.md +++ /dev/null @@ -1,215 +0,0 @@ -# A hydrological model - -This chapter shows you, how to link an external model with SPOTPY. This is might be the most interesting chapter for users. -We use an hydrological model as an example, the Catchment Modelling Framework (CMF). For detailed information and download options about this model, have a look at the [CMF Homepage](http://fb09-pasig.umwelt.uni-giessen.de:8000/ "CMF Homepage"). -This example will, most likely, be different from your actual use, but it will give you hints how to solve your specific issue. - -![CMF model](../img/cmf_model.png) - -*Figure 6: One soil column model build with cmf. The coloured bars represent soil layers, the grey ring a Free air carbon dioxide enrichment (FACE) ring and the black tube a Piezometer.* - -Our model is driven by some meteorological data and a varying groundwater table. Evapotransipration is simulate with the Shuttleworth Wallace concept -and infiltration with the Richards equation. The retention curve is build with the VanGenuchten equation. We want to calibrate the VanGenuchten parameter -*alpha*, *ksat*, *n* and *porosity* to simulate the observed soil mositure in the upper most soil layer. - -## Creating the setup file - -To create our setup file, first have to import SPOTPY, the model cmf and some extra packages, we will need: - - import spotpy - import cmf - from datetime import timedelta, datetime # Standard in Python to work with data that have date and time stamp - import Load_Data as loader # Used to import meterological data and the evaluation data from csv-files. - import numpy as np - -Before we start to write our spotpy_setup, we have to write a class or a function, which setting up your model. -In this case, we use the cmf model which directly callable from Python. We use the \__init\__ function to separate the forcing and -evaluation data loading from the runtime loop. The run function is supposed to be called with a set of parameters, which setup the rest of the model and runs it. -Important here is that our run function returns just the simulations, on which evaluation data is available. You will have to do the same, if you use a different model. - - class model(object): - ''' - Input: datastart: e.g. datetime(1998,6,1) - dataend: e.g. datetime(2000,1,1) - analysestart: e.g. datetime(1999,1,1) - - Output: Initialised model instance with forcing data (climate, groundwater) and evaluation data - (soil moisture) - ''' - def __init__(self,datastart,dataend,analysestart): - self.d_s=datastart - self.d_end=dataend - self.a_start=analysestart - self.bound= [[0.0001,0.6],[0.01,3],[1.05,1.4],[0.4,0.7]] # Physical parameter boundaries - DataLoader = loader.load_data(self.a_start,self.d_s,self.d_end) - cmf.set_parallel_threads(1) - - ###################### Load Forcing data #################################### - ClimateFilename = 'Climate_Face_new2.csv' - self.md=np.load(ClimateFilename+str(d_start.date())+str(self.d_end.date())+'.npy') - self.rain=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Nd_mm_day']) - self.rHmean= cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Rh']) - self.Windspeed=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Wind']) - self.Rs=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Rs_meas']) - self.T=cmf.timeseries.from_array(begin=self.d_s,step=timedelta(hours=1),data=self.md['Temp']) - self.piezometer = 'P4' - self.gw_array = DataLoader.groundwater(self.piezometer) - ############################################################################# - - ###################### Load Evaluation data ################################# - eval_soil_moisture = DataLoader.soil_moisture('A1') - self.eval_dates = eval_soil_moisture['Date'] - self.observations = eval_soil_moisture['A1'] - ########################################################################### - - def _load_meteo(self,project): - #Create meteo station for project - meteo=project.meteo_stations.add_station('FACE',position = (0,0,0),timezone=1,timestep=cmf.h) - rain = self.rain - meteo.rHmean = self.rHmean - meteo.Windspeed = self.Windspeed - meteo.Rs = self.Rs - meteo.T = self.T - meteo.Tmax = meteo.T.reduce_max(begin = self.d_start, step = timedelta(days=1)) - meteo.Tmin = meteo.T.reduce_min(begin = self.d_start, step = timedelta(days=1)) - project.rainfall_stations.add('FACE',rain,(0,0,0)) - project.use_nearest_rainfall() - # Use the meteorological station for each cell of the project - project.use_nearest_meteo() - - def run(self,args): - return self._run(*args) - - def _run(self,alpha=None,n=None,porosity=None,ksat=None): - #return alpha,n,porosity,ksat - ''' - Runs the model instance - - Input: Parameter set (in this case VanGenuchten Parameter alpha,n,porosity,ksat) - Output: Simulated values on given observation days - ''' - #Check if given parameter set is in realistic boundaries - if alphaself.bound[0][1] or ksatself.bound[1][1] or nself.bound[2][1] or \ - porosityself.bound[3][1]: - print 'The following combination was ignored:' - print 'n= '+str(n) - print 'alpha='+str(alpha) - print 'ksat= '+str(ksat) - print 'porosity= '+str(porosity) - print '##############################' - return self.observations*-np.inf - else: - project=cmf.project() - cell = project.NewCell(x=0,y=0,z=0,area=1000, with_surfacewater=True) - print 'n= '+str(n) - print 'alpha='+str(alpha) - print 'ksat= '+str(ksat) - print 'porosity= '+str(porosity) - print '##############################' - r_curve = cmf.VanGenuchtenMualem(Ksat=ksat,phi=porosity,alpha=alpha,n=n) - layers=5 - ldepth=.01 - for i in range(layers): - depth = (i+1) * ldepth - cell.add_layer(depth,r_curve) - cell.install_connection(cmf.Richards) - cell.install_connection(cmf.ShuttleworthWallace) - cell.saturated_depth =.5 - solver = cmf.CVodeIntegrator(project,1e-6) - self._load_meteo(project) - gw = project.NewOutlet('groundwater',x=0,y=0,z=.9)#layers*ldepth) - cmf.Richards(cell.layers[-1],gw) - gw.potential = -.5 #IMPORTANT - gw.is_source=True - solver.t = self.d_start - Evalstep,evallist=0,[] - rundays=(self.d_end-self.d_start).days - for t in solver.run(solver.t,solver.t + timedelta(days=rundays),timedelta(hours=1)): - if self.gw_array['Date'].__contains__(t)==True: - Gw_Index=np.where(self.gw_array['Date']==t) - gw.potential=self.gw_array[self.piezometer][Gw_Index] - print gw.potential #TO DO: CHECK IF SOMETHING HAPPENS HERE!!!! - if t > self.a_start: - if Evalstep !=len(self.eval_dates) and t == self.eval_dates[Evalstep]: - evallist.append(cell.layers.wetness[0]*cell.layers.porosity[0]*100) - Evalstep+=1 - return evallist - -Now we can create our spotpy_setup class. Here we use to \__init\__ function, to initialize our model. -At this point it is recommended to load all needed data into the working storage (in this case meteorological data, soil moisture and groundwater table data). -Keep in mind, that the \__init\__ function is only called once during the sampling, while the other functions are called within every iteration of the algorithm. -The more you can separate from you model into the \__init\__ function, the faster you sampling will be. - - class spotpy_setup(object): - def __init__(self, obj_func=None): - #Just a way to keep this example flexible and applicable to various examples - self.obj_func = obj_func - datastart = datetime(1998,6,1) - dataend = datetime(2000,1,1) - analysestart = datetime(1999,1,1) - self.cmfmodel = model(datastart,dataend,analysestart) - self.params = [spotpy.parameter.Normal('alpha',0.3,0.1,0.02,0.2), - spotpy.parameter.Normal('n',1.2,0.035,0.01,1.22), - spotpy.parameter.Normal('ksat',1,0.3,0.1,2.0), - spotpy.parameter.Normal('porosity',.55,0.04,0.02,0.6), - ] - -Now we have setup our model. It has a warm up from 06/01/1998 until 01/01/1999 and will then save soil moisture simulations whenever evaluation data is available. - -To define the VanGenuchten parameter boundaries we use a normal distribution. - - def parameters(self): - return spotpy.parameter.generate(self.params) - - def simulation(self,vector): - simulations= self.cmfmodel._run(alpha=vector[0],n=vector[1],ksat=vector[2],porosity=vector[3]) - return simulations - - def evaluation(self,evaldates=False): - if evaldates: - return self.cmfmodel.eval_dates - else: - return self.cmfmodel.observations - - def objectivefunction(self,simulation,evaluation): - #SPOTPY expects to get one or multiple values back, - #that define the performence of the model run - if not self.obj_func: - # This is used if not overwritten by user - # RMSE (root mean squared error) works good for the SCE-UA algorithm, - # as it minimizes the objective function. - # All other implemented algorithm maximize the objectivefunction - model_performance = spotpy.objectivefunctions.rmse(evaluation,simulation) - else: - #Way to ensure flexible spot setup class - model_performance = self.obj_func(evaluation,simulation) - return model_performance - - -## Sampling - - sampler = spotpy.algorithms.mc(spotpy_setup(),dbname='MC_CMF',dbformat='csv') - sampler = spotpy.algorithms.mle(spotpy_setup(),dbname='MLE_CMF',dbformat='csv') - sampler = spotpy.algorithms.lhs(spotpy_setup(),dbname='LHS_CMF',dbformat='csv') - sampler = spotpy.algorithms.sceua(spotpy_setup(used_algorithm='sceua'),dbname='SCEUA_CMF',dbformat='csv') - sampler = spotpy.algorithms.demcz(spotpy_setup(),dbname='DE-MCz_CMF',dbformat='csv') - sampler = spotpy.algorithms.sa(spotpy_setup(),dbname='SA_CMF',dbformat='csv') - sampler = spotpy.algorithms.rope(spotpy_setup(),dbname='ROPE_CMF',dbformat='csv') - - algorithms=['MC','LHS','MLE','MCMC','SCE-UA','SA','DE-MCz','ROPE'] - results=[] - for algorithm in algorithms: - sampler.sample(10000) - results.append(sampler.getdata) - -## Plotting - - evaluation = spotpy_setup().evaluation() - evaldates= spotpy_setup().evaluation(evaldates=True) - - spotpy.analyser.plot_bestmodelruns(res,evaluation,algorithms=algorithms,dates=evaldates, ylabel='Soil moisture [%]') - -![CMF model](../img/cmf_bestmodelruns.png) - -*Figure 7: Best model runs of the different algorithms.* diff --git a/docs/Sensitivity_analysis_with_FAST.md b/docs/Sensitivity_analysis_with_FAST.md new file mode 100644 index 00000000..a0ed499b --- /dev/null +++ b/docs/Sensitivity_analysis_with_FAST.md @@ -0,0 +1,65 @@ + + +# Sensitivity analysis of HYMOD with FAST + +SPOTPY gives you the opportunity to start a sensitivity analysis of your model. In this case, we included a global sensitivity analysis called "FAST" based on +Saltelli et al. (1999). + +The algorithm will tell you, how sensitive your parameters are based on whatever is given back by your objective function. Before you start to sample, you should know how how many +iterations you need to get an reliable information about your parameter. The number of iteration can be calculate after [Henkel et al. GLOBAL SENSITIVITY ANALYSIS OF NONLINEAR MATHEMATICAL MODELS - AN +IMPLEMENTATION OF TWO COMPLEMENTING VARIANCE-BASED ALGORITHMS, 2012] (https://www.informs-sim.org/wsc12papers/includes/files/con308.pdf): + +$$N = (1+4M^2(1+(k-2)d))k$$ + +with N = needed parameter iterations, M= inference factor (SPOTPY default M=4) and d= frequency step (SPOTPY default d=2) and k as the number of parameters of your model. + +In our case, we provide the hymod model as an [example](https://github.com/thouska/spotpy/blob/master/spotpy/examples/spot_setup_hymod_python.py) +You can start the simulation with + + +So the first step is as always to initialize the spotpy setup class example: + import spotpy + from spotpy.examples.spot_setup_hymod_python import spot_setup + spot_setup=spot_setup() + +As a next step we apply the above formular to determine the number of repetitions needed for the FAST analysis. In our case we have k=5 parameter +in the hymod model, so we need N=1345 repetitions to get reliable results: + + rep = 1345 + +And that's already it. Now we can start the sensitivity analysis: + sampler = spotpy.algorithms.fast(spot_setup, dbname='FAST_hymod', dbformat='csv') + sampler.sample(rep) + +This will take some time. Meanwhile SPOTPY will report you about the progress and the approximate duration. +Further SPOTPY will create a database for you with the name 'Fast_hymod.csv', depending on the users selection above. +This file will contain the return objectivefunction values, the sampled parameter sets, the simulation results and some information +about a chain (not relevant for FAST). + +These results can be loaded, e.g. directly from the sampler: + + results = sampler.get_data() + +OR, by loading the results from the database: + + results = spotpy.analyser.load_csv_results('FAST_hymod') + +Finally SPOTPY gives you some tools at hand, to analyse these results. E.g. if you want to determine, which are the three most important parameter of your +model, you can use the following: + + spotpy.analyser.plot_fast_sensitivity(results, number_of_sensitiv_pars=3) + + + analyser.plot_fast_sensitivity(results,number_of_sensitiv_pars=5) + +This will show you a Plot with the total Sensitivity index of all your parameters and in this case the five most sensitive parameters (can be adjusted). +Herein we use a already create spotpy setup class from the tutorial. The code for this class +is available + + +![FAST sensitivity results](../img/FAST_sensitivity.png) + +*Figure 1: Example output of a sensitivity analysis using the FAST (Fourier Amplitude Sensitivity Test). +Total sensitivity index is plotted for every model parameter. Sensitive parameters are plotted in blue, insensitive parameter in orange.* + +The corresponding code is available for download [here](https://github.com/thouska/spotpy/blob/master/spotpy/examples/tutorial_fast_hymod.py). \ No newline at end of file diff --git a/docs/Working with DREAM and Hymod.md b/docs/Working with DREAM and Hymod.md deleted file mode 100644 index 89496e5a..00000000 --- a/docs/Working with DREAM and Hymod.md +++ /dev/null @@ -1,287 +0,0 @@ -# Calibration of HYMOD with DREAM - -This chapter shows you, how to link the external hydrological model HYMOD with SPOTPY (works only on Windows systems). - -We use the hydrological model HYMOD as an example, to calibrate it with the Differential Evolution Adaptive Metropolis (DREAM) algorithm. -For detailed information about the underlying theorie, have a look at the [Vrugt (2016)](https://doi.org/10.1016/j.envsoft.2015.08.013 "Vrugt (2016)"). -The SPOTPY package comes with an example which is desgined to help you to set up your own research project. - -First, we need to setup the model within a spot setup class. The model needs some meteorological input data and five parameters to estimate discharge: - -## Connect HYMOD with SPOTPY - -Here we use to \__init\__ function, to initialize the parameter for our model. - - class spot_setup(object): - def __init__(self): - - self.params = [spotpy.parameter.Uniform('x1',low=1.0 , high=500, optguess=412.33), - spotpy.parameter.Uniform('x2',low=0.1 , high=2.0, optguess=0.1725), - spotpy.parameter.Uniform('x3',low=0.1 , high=0.99, optguess=0.8127), - spotpy.parameter.Uniform('x4',low=0.0 , high=0.10, optguess=0.0404), - spotpy.parameter.Uniform('x5',low=0.1 , high=0.99, optguess=0.5592) - ] - self.curdir = os.getcwd() - self.owd = os.path.realpath(__file__)+os.sep+'..' - self.evals = list(np.genfromtxt(self.owd+os.sep+'hymod'+os.sep+'bound.txt',skip_header=65)[:,3])[:730] - self.Factor = 1944 * (1000 * 1000 ) / (1000 * 60 * 60 * 24) - print(len(self.evals)) - - def parameters(self): - return spotpy.parameter.generate(self.params) - -We use the simulation function to write one random parameter set into a parameter file, like it is needed for the HYMOD model, -start the model and read the model discharge output data: - - def simulation(self,x): - os.chdir(self.owd+os.sep+'hymod') - if sys.version_info.major == 2: - params = file('Param.in', 'w') - elif sys.version_info.major == 3: - params = open('Param.in','w') - else: - raise Exception("Your python is too old for this example") - for i in range(len(x)): - if i == len(x): - params.write(str(round(x[i],5))) - else: - params.write(str(round(x[i],5))+' ') - params.close() - os.system('HYMODsilent.exe') - - #try: - if sys.version_info.major == 2: - SimRR = file('Q.out', 'r') - elif sys.version_info.major == 3: - SimRR = open('Q.out', 'r') - else: - raise Exception("Your python is too old for this example") - simulations=[] - for i in range(64): - SimRR.readline() - for i in range(730): - val= SimRR.readline() - simulations.append(float(val)*self.Factor) - #except:#Assign bad values - model might have crashed - # SimRR = 795 * [np.nan] - os.chdir(self.curdir) - - return simulations - -And in a last step, we compare the observed and the simulated data. Here we choose one of the implemented Likelihood functions -in the SPOTPY package. Please mind that the selection of the Likelihood highly influences the results gained with this algorithm: - - def evaluation(self): - return self.evals - - def objectivefunction(self,simulation,evaluation, params=None): - like = spotpy.likelihoods.gaussianLikelihoodMeasErrorOut(evaluation,simulation) - return like - -## Sample with DREAM - -Now we can initialize the Hymod example: - - spot_setup=spot_setup() - -Create the Dream sampler of spotpy, alt_objfun is set to None to force SPOTPY -to jump into the def objectivefunction in the spot_setup class (default is -spotpy.objectivefunctions.log_p). Results are saved in a DREAM_hymod.csv file: - - sampler=spotpy.algorithms.dream(spot_setup, dbname='DREAM_hymod', dbformat='csv', alt_objfun=None) - -Select number of maximum repetitions, the number of chains used by dream (default = 5) and set the Gelman-Rubin convergence limit (default 1.2). -We further allow 100 runs after convergence is achieved: - - nChains = 4 - convergence_limit = 1.2 - runs_after_convergence = 100 - -We start the sampler and collect the gained r_hat convergence values after the sampling: - - r_hat = sampler.sample(rep,nChains=nChains,convergence_limit=convergence_limit, - runs_after_convergence=runs_after_convergence) - - -## Access the results -All other results can be accessed from the SPOTPY csv-database: - - results = spotpy.analyser.load_csv_results('DREAM_hymod') - -These results are structured as a numpy array. Accordingly, you can access the different columns by using simple Python code, -e.g. to access all the simulations: - - fields=[word for word in results.dtype.names if word.startswith('sim')] - print(results[fields) - -## Plot model uncertainty -For the analysis we provide some examples how to plor the data. -If you want to see the remaining posterior model uncertainty: - - fig= plt.figure(figsize=(16,9)) - ax = plt.subplot(1,1,1) - q5,q25,q75,q95=[],[],[],[] - for field in fields: - q5.append(np.percentile(results[field][-100:-1],2.5)) - q95.append(np.percentile(results[field][-100:-1],97.5)) - ax.plot(q5,color='dimgrey',linestyle='solid') - ax.plot(q95,color='dimgrey',linestyle='solid') - ax.fill_between(np.arange(0,730,1),list(q5),list(q95),facecolor='dimgrey',zorder=0, - linewidth=0,label='parameter uncertainty') - ax.plot(spot_setup.evaluation(),'r.',label='data') - ax.set_ylim(-50,450) - ax.set_xlim(0,729) - ax.legend() - fig.savefig('python_hymod.png',dpi=300) - -![Posterior model uncertainty](../img/python_hymod_simulation.png) -*Figure 1: Posterior model uncertainty of HYMOD.* - -## Plot convergence diagnostic -If you want to check the convergence of the DREAM algorithm: - - fig= plt.figure(figsize=(16,9)) - plt.subplot(2,1,1) - for i in range(int(max(results['chain']))+1): - index=np.where(results['chain']==i) - plt.plot(results['like1'][index], label='Chain '+str(i+1)) - - plt.ylabel('Likelihood value') - plt.legend() - - ax =plt.subplot(2,1,2) - r_hat=np.array(r_hat) - ax.plot([1.2]*len(r_hat),'k--') - for i in range(len(r_hat[0])): - ax.plot(r_hat[:,i],label='x'+str(i+1)) - - ax.set_yscale("log", nonposy='clip') - ax.set_ylim(-1,50) - ax.set_ylabel('R$^d$ - convergence diagnostic') - plt.xlabel('Number of chainruns') - plt.legend() - fig.savefig('python_hymod_convergence.png',dpi=300) - -![Convergence diagnostic](../img/python_hymod_convergence.png) - -*Figure 2: Gelman-Rubin onvergence diagnostic of DREAM results.* - - -## Plot parameter uncertainty -Or if you want to check the posterior parameter distribution: - - def find_min_max(spot_setup): - randompar=spot_setup.parameters()['random'] - for i in range(1000): - randompar=np.column_stack((randompar,spot_setup.parameters()['random'])) - return np.amin(randompar,axis=1),np.amax(randompar,axis=1) - - - min_vs,max_vs = find_min_max(spot_setup) - - fig= plt.figure(figsize=(16,16)) - plt.subplot(5,2,1) - x = results['par'+spot_setup.parameters()['name'][0]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x1') - plt.ylim(min_vs[0],max_vs[0]) - - - plt.subplot(5,2,2) - x = results['par'+spot_setup.parameters()['name'][0]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x1') - plt.xlim(min_vs[0],max_vs[0]) - - - - plt.subplot(5,2,3) - x = results['par'+spot_setup.parameters()['name'][1]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x2') - plt.ylim(min_vs[1],max_vs[1]) - - plt.subplot(5,2,4) - x = results['par'+spot_setup.parameters()['name'][1]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x2') - plt.xlim(min_vs[1],max_vs[1]) - - - - plt.subplot(5,2,5) - x = results['par'+spot_setup.parameters()['name'][2]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x3') - plt.ylim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,6) - x = results['par'+spot_setup.parameters()['name'][2]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x3') - plt.xlim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,7) - x = results['par'+spot_setup.parameters()['name'][3]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x4') - plt.ylim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,8) - x = results['par'+spot_setup.parameters()['name'][3]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x4') - plt.xlim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,9) - x = results['par'+spot_setup.parameters()['name'][4]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('x5') - plt.ylim(min_vs[4],max_vs[4]) - plt.xlabel('Iterations') - - plt.subplot(5,2,10) - x = results['par'+spot_setup.parameters()['name'][4]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('x5') - plt.xlabel('Parameter range') - plt.xlim(min_vs[4],max_vs[4]) - fig.savefig('python_parameters.png',dpi=300) - -![Parameter distribution](../img/python_hymod_parameters.png) - -*Figure 3: Posterior parameter distribution of HYMOD.* diff --git a/docs/img/DREAM_parameter_uncertainty_Hymod.png b/docs/img/DREAM_parameter_uncertainty_Hymod.png new file mode 100644 index 00000000..e29b48e2 Binary files /dev/null and b/docs/img/DREAM_parameter_uncertainty_Hymod.png differ diff --git a/docs/img/DREAM_r_hat.png b/docs/img/DREAM_r_hat.png new file mode 100644 index 00000000..2cf526b5 Binary files /dev/null and b/docs/img/DREAM_r_hat.png differ diff --git a/docs/img/DREAM_simulation_uncertainty_Hymod.png b/docs/img/DREAM_simulation_uncertainty_Hymod.png new file mode 100644 index 00000000..3187fa8d Binary files /dev/null and b/docs/img/DREAM_simulation_uncertainty_Hymod.png differ diff --git a/docs/img/FAST_sensitivity.png b/docs/img/FAST_sensitivity.png new file mode 100644 index 00000000..e0815f85 Binary files /dev/null and b/docs/img/FAST_sensitivity.png differ diff --git a/docs/img/SCEUA_best_modelrun.png b/docs/img/SCEUA_best_modelrun.png new file mode 100644 index 00000000..f2c47c70 Binary files /dev/null and b/docs/img/SCEUA_best_modelrun.png differ diff --git a/docs/img/SCEUA_objectivefunctiontrace.png b/docs/img/SCEUA_objectivefunctiontrace.png new file mode 100644 index 00000000..b2fc9ed3 Binary files /dev/null and b/docs/img/SCEUA_objectivefunctiontrace.png differ diff --git a/docs/index.md b/docs/index.md index 2afedac2..be990ea0 100644 --- a/docs/index.md +++ b/docs/index.md @@ -157,8 +157,10 @@ The results can be analysed with some pre-build statistics and plotting features rope.py # RObust Parameter Estimation fast.py # Fourier Amplitude Sensitivity Testing abc.py # Artificial Bee Colony - fscabc.py # Fitness Scaled Chaotic Artificial Bee Colony - dream.py # Differential Evolution Adaptive Metropolis + fscabc.py # Fitness Scaled Chaotic Artificial Bee Colony + dream.py # Differential Evolution Adaptive Metropolis + dds.py # Dynamically Dimensioned Search + parallel/ mpi.py #Basic Parralel Computing features diff --git a/mkdocs.yml b/mkdocs.yml index 44b31de5..5348b679 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -20,8 +20,10 @@ nav: - Rosenbrock: Rosenbrock.md - Griewank: Griewank.md - Ackley: Ackley.md - - Hydrological_model: Hydrological_model.md - - Working with DREAM and Hymod: Working with DREAM and Hymod.md + - How to link a model to SPOTPY: How_to_link_a_model_to_SPOTPY.md + - Sensitivity analysis with FAST: Sensitivity_analysis_with_FAST.md + - Calibrating with SCE-UA: Calibration_with_SCE-UA.md + - Bayesian uncertainty analysis with DREAM: Bayesian_uncertainty_analysis_with_DREAM.md - Advanced_hints: Advanced_hints.md - Algorithm_guide: Algorithm_guide.md diff --git a/setup.py b/setup.py index 7a51d7ae..d2f1e1dd 100644 --- a/setup.py +++ b/setup.py @@ -13,6 +13,8 @@ author_email = 'tobias.houska@umwelt.uni-giessen.de', url = 'https://spotpy.readthedocs.io/en/latest/', license = 'MIT', + install_requires=[ + 'scipy', 'numpy'], packages=find_packages(exclude=["tests*", "docs*"]), use_2to3 = True, keywords = 'Monte Carlo, MCMC, MLE, SCE-UA, Simulated Annealing, DE-MCz, DREAM, ROPE, Artifical Bee Colony, DDS, PA-DDS, NSGAii, Uncertainty, Calibration, Model, Signatures', diff --git a/spotpy/algorithms/_algorithm.py b/spotpy/algorithms/_algorithm.py index b366f892..65c4e1a9 100644 --- a/spotpy/algorithms/_algorithm.py +++ b/spotpy/algorithms/_algorithm.py @@ -368,17 +368,19 @@ def read_breakdata(self, dbname): Reason: In case of incomplete optimizations, old data can be restored. ''' import pickle with open(dbname+'.break', 'rb') as breakfile: - work,backuptime,repos,obmin,obmax=pickle.load(breakfile) + work,backuptime,repos,obmin,obmax,pmin,pmax=pickle.load(breakfile) self.status.starttime=self.status.starttime-backuptime self.status.rep=repos self.status.objectivefunction_min=obmin self.status.objectivefunction_max=obmax + self.status.params_min=pmin + self.status.params_max=pmax return work def write_breakdata(self, dbname, work): ''' Write data to a pickle file if a breakpoint has been set.''' import pickle - work=(work,self.status.last_print-self.status.starttime,self.status.rep,self.status.objectivefunction_min,self.status.objectivefunction_max) + work=(work,self.status.last_print-self.status.starttime,self.status.rep,self.status.objectivefunction_min,self.status.objectivefunction_max,self.status.params_min,self.status.params_max) with open(str(dbname)+'.break', 'wb') as breakfile: pickle.dump(work, breakfile) @@ -437,26 +439,31 @@ def simulate(self, id_params_tuple): self.all_params[self.non_constant_positions] = params #TODO: List parameters are not updated if not accepted for the algorithm, we may have to warn/error if list is given all_params = self.all_params - # we need a layer to fetch returned data from a threaded process into a queue. - def model_layer(q,all_params): - # Call self.model with a namedtuple instead of another sequence - q.put(self.setup.simulation(self.partype(*all_params))) + if self.sim_timeout: + # we need a layer to fetch returned data from a threaded process into a queue. + def model_layer(q,all_params): + # Call self.model with a namedtuple instead of another sequence + q.put(self.setup.simulation(self.partype(*all_params))) - # starting a queue, where in python2.7 this is a multiprocessing class and can cause errors because of - # incompability which the main thread. Therefore only for older Python version a workaround follows - que = Queue() - sim_thread = threading.Thread(target=model_layer, args=(que, all_params)) - sim_thread.daemon = True - sim_thread.start() + # starting a queue, where in python2.7 this is a multiprocessing class and can cause errors because of + # incompability which the main thread. Therefore only for older Python version a workaround follows + que = Queue() + sim_thread = threading.Thread(target=model_layer, args=(que, all_params)) + sim_thread.daemon = True + sim_thread.start() - # If self.sim_timeout is not None the self.model will break after self.sim_timeout seconds otherwise is runs as - # long it needs to run - sim_thread.join(self.sim_timeout) + # If self.sim_timeout is not None the self.model will break after self.sim_timeout seconds otherwise is runs as + # long it needs to run + sim_thread.join(self.sim_timeout) + + # If no result from the thread is given, i.e. the thread was killed from the watcher the default result is + # '[nan]' and will not be saved. Otherwise get the result from the thread + model_result = None + if not que.empty(): + model_result = que.get() + + else: + model_result = self.setup.simulation(self.partype(*all_params)) - # If no result from the thread is given, i.e. the thread was killed from the watcher the default result is - # '[nan]' and will not be saved. Otherwise get the result from the thread - model_result = None - if not que.empty(): - model_result = que.get() return id, params, model_result diff --git a/spotpy/algorithms/dds.py b/spotpy/algorithms/dds.py index 11c488cc..1cd368b5 100644 --- a/spotpy/algorithms/dds.py +++ b/spotpy/algorithms/dds.py @@ -396,9 +396,9 @@ def calculate_next_s_test(self, previous_x_curr, rep, rep_limit, r): new_x_curr[j] = new_value # change relevant dec var value in x_curr if dvn_count == 0: # no DVs selected at random, so select ONE - dec_var = np.int(np.ceil(amount_params * self.np_random.rand())) - new_value = self.dds_generator.neigh_value_mixed(previous_x_curr, r, dec_var - 1, self.min_bound[j],self.max_bound[j]) - - new_x_curr[dec_var - 1] = new_value # change relevant decision variable value in s_test + dec_var = np.int(np.ceil(amount_params * self.np_random.rand())) - 1 + new_value = self.dds_generator.neigh_value_mixed(previous_x_curr, r, dec_var, self.min_bound[dec_var], + self.max_bound[dec_var]) + new_x_curr[dec_var] = new_value # change relevant decision variable value in s_test return new_x_curr diff --git a/spotpy/algorithms/padds.py b/spotpy/algorithms/padds.py index 86cb179b..ab861efe 100644 --- a/spotpy/algorithms/padds.py +++ b/spotpy/algorithms/padds.py @@ -89,7 +89,11 @@ def __init__(self, *args, **kwargs): self.r = kwargs.pop("r") except KeyError: self.r = 0.2 # default value - self._return_all_likes=True #alloes multi-objective calibration + + self._return_all_likes=True #allows multi-objective calibration + kwargs['optimization_direction'] = 'minimize' + kwargs['algorithm_name'] = 'Pareto Archived Dynamically Dimensioned Search (PADDS) algorithm' + super(padds, self).__init__(*args, **kwargs) self.np_random = np.random @@ -307,11 +311,11 @@ def calculate_next_s_test(self, previous_x_curr, rep, rep_limit, r): new_x_curr[j] = new_value # change relevant dec var value in x_curr if dvn_count == 0: # no DVs selected at random, so select ONE + dec_var = np.int(np.ceil(amount_params * self.np_random.rand())) new_value = self.dds_generator.neigh_value_mixed(previous_x_curr, r, dec_var - 1, self.min_bound[dec_var - 1],self.max_bound[dec_var - 1]) new_x_curr[dec_var - 1] = new_value # change relevant decision variable value in s_test - return new_x_curr diff --git a/spotpy/algorithms/sceua.py b/spotpy/algorithms/sceua.py index bdcc5f9a..fe5e59bf 100644 --- a/spotpy/algorithms/sceua.py +++ b/spotpy/algorithms/sceua.py @@ -15,7 +15,7 @@ class sceua(_algorithm): """ - This class holds the Shuffled Complex Evolution Algortithm (SCE-UA) algorithm, + This class holds the Shuffled Complex Evolution Algorithm (SCE-UA) algorithm, based on: Duan, Q., Sorooshian, S. and Gupta, V. K. (1994) Optimal use of the SCE-UA global optimization method for calibrating watershed models, J. Hydrol. @@ -63,7 +63,6 @@ def __init__(self, *args, **kwargs): kwargs['optimization_direction'] = 'minimize' kwargs['algorithm_name'] = 'Shuffled Complex Evolution (SCE-UA) algorithm' super(sceua, self).__init__(*args, **kwargs) - def simulate(self, id_params_tuple): """This overwrites the simple wrapper function of _algorithms.py @@ -128,7 +127,7 @@ def simulate(self, id_params_tuple): # Replace the complex back into the population; return igs, likes, pars, sims, cx, cf, k1, k2, discarded_runs - def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.0000001): + def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.0000001, max_loop_inc=None): """ Samples from parameter distributions using SCE-UA (Duan, 2004), converted to python by Van Hoey (2011), restructured and parallelized by Houska et al (2015). @@ -146,6 +145,8 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 the percentage change allowed in the past kstop loops below which convergence is assumed to be achieved. peps: float Value of the normalized geometric range of the parameters in the population below which convergence is deemed achieved. + max_loop_inc: int + Number of loops executed at max in this function call """ self.set_repetiton(repetitions) # Initialize SCE parameters: @@ -163,6 +164,10 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 bound = self.bu - self.bl # np.array self.stochastic_parameters = bound != 0 proceed = True + + # burnin_only, needed to indictat if only the burnin phase should be run + burnin_only = False + if self.breakpoint == 'read' or self.breakpoint == 'readandwrite': data_frombreak = self.read_breakdata(self.dbname) icall = data_frombreak[0] @@ -177,7 +182,7 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 icall = 0 xf = np.zeros(npt) - print ('Starting burn-in sampling...') + print('Starting burn-in sampling...') # Burn in param_generator = ((rep, x[rep]) for rep in range(int(npt))) @@ -194,6 +199,12 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 idx = np.argsort(xf) xf = np.sort(xf) x = x[idx, :] + + if max_loop_inc == 1: + burnin_only = True + + print('Burn-in sampling completed...') + else: raise ValueError("Don't know the breakpoint keyword {}".format(self.breakpoint)) @@ -221,16 +232,29 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 print( 'THE POPULATION HAS CONVERGED TO A PRESPECIFIED SMALL PARAMETER SPACE') - print ('Burn-in sampling completed...') + + # Begin evolution loops: nloop = 0 criter = [] criter_change_pcent = 1e+5 - - self.repeat.setphase('ComplexEvo') - print ('Starting Complex Evolution...') proceed = True + + # if only burnin, stop the following while loop to be started + # write brakpoint if only a single generation shall be computed and + # the main loop will not be executed + if burnin_only: + if self.breakpoint == 'write' or self.breakpoint == 'readandwrite': + work = (self.status.rep, (x, xf), gnrng) + self.write_breakdata(self.dbname, work) + proceed = False + print('ONLY THE BURNIN PHASE WAS COMPUTED') + + else: + self.repeat.setphase('ComplexEvo') + print('Starting Complex Evolution...') + while icall < repetitions and gnrng > peps and criter_change_pcent > pcento and proceed == True: nloop += 1 print ('ComplexEvo loop #%d in progress...' % nloop) @@ -260,10 +284,10 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 self.discarded_runs+=1 print('Skipping saving') - if self.breakpoint == 'write' or self.breakpoint == 'readandwrite'\ - and self.status.rep >= self.backup_every_rep: - work = (self.status.rep, (x, xf), gnrng) - self.write_breakdata(self.dbname, work) + if self.breakpoint == 'write' or self.breakpoint == 'readandwrite'\ + and self.status.rep >= self.backup_every_rep: + work = (self.status.rep, (x, xf), gnrng) + self.write_breakdata(self.dbname, work) # End of Loop on Complex Evolution; @@ -315,7 +339,13 @@ def sample(self, repetitions, ngs=20, kstop=100, pcento=0.0000001, peps=0.000000 'CONVERGENCY HAS ACHIEVED BASED ON OBJECTIVE FUNCTION CRITERIA!!!') elif self.status.stop: proceed = False - break + break + + # stop, if max number of loop iteration was reached + elif max_loop_inc and nloop >= max_loop_inc: + proceed = False + print('THE MAXIMAL NUMBER OF LOOPS PER EXECUTION WAS REACHED') + break # End of the Outer Loops print('SEARCH WAS STOPPED AT TRIAL NUMBER: %d' % self.status.rep) @@ -413,4 +443,4 @@ def _sampleinputmatrix(self, nrows, npars): x = np.zeros((nrows, npars)) for i in range(nrows): x[i, :] = self.parameter()['random'] - return x \ No newline at end of file + return x diff --git a/spotpy/analyser.py b/spotpy/analyser.py index 7767d8ee..690d363e 100644 --- a/spotpy/analyser.py +++ b/spotpy/analyser.py @@ -187,7 +187,7 @@ def get_minlikeindex(results): textv=text+str(index[0][0])+text2+value print(textv) - return index, minimum + return index[0][0], minimum def get_percentiles(results,sim_number=''): @@ -279,6 +279,21 @@ def get_posterior(results,percentage=10, maximize=True): index = np.where(results['like1']>=np.percentile(results['like1'],100.0-percentage)) return results[index] +def plot_parameter_trace(ax, results, parameter): + #THis function plots the parameter setting for each run + for i in range(int(max(results['chain']))): + index=np.where(results['chain']==i) + ax.plot(results['par'+parameter['name']][index],'.', markersize=2) + ax.set_ylabel(parameter['name']) + ax.set_ylim(parameter['minbound'], parameter['maxbound']) + +def plot_posterior_parameter_histogram(ax, results, parameter): + #This functing is the last 100 runs + ax.hist(results['par'+parameter['name']][-100:], + bins =np.linspace(parameter['minbound'],parameter['maxbound'],20)) + ax.set_ylabel('Density') + ax.set_xlim(parameter['minbound'], parameter['maxbound']) + def plot_parameter_uncertainty(posterior_results,evaluation, fig_name='Posterior_parameter_uncertainty.png'): import matplotlib.pyplot as plt @@ -453,7 +468,7 @@ def plot_fast_sensitivity(results,like_index=1,number_of_sensitiv_pars=10,fig_na import matplotlib.pyplot as plt parnames=get_parameternames(results) - fig=plt.figure(figsize=(16,6)) + fig=plt.figure(figsize=(9,6)) ax = plt.subplot(1,1,1) Si = get_sensitivity_of_fast(results, like_index=like_index) @@ -515,7 +530,7 @@ def plot_fast_sensitivity(results,like_index=1,number_of_sensitiv_pars=10,fig_na ax.plot(np.arange(-1,len(parnames)+1,1),[threshold]*(len(parnames)+2),'r--') ax.set_xlim(-0.5,len(parnames)-0.5) plt.tight_layout() - fig.savefig(fig_name,dpi=300) + fig.savefig(fig_name,dpi=150) def plot_heatmap_griewank(results,algorithms, fig_name='heatmap_griewank.png'): @@ -969,16 +984,31 @@ def plot_autocorellation(parameterdistribution,parametername,fig_name='Autocorre plt.savefig(fig_name,dpi=300) -def plot_gelman_rubin(r_hat_values,fig_name='gelman_rub.png'): +def plot_gelman_rubin(results, r_hat_values,fig_name='gelman_rub.png'): '''Input: List of R_hat values of chains (see Gelman & Rubin 1992) Output: Plot as seen for e.g. in (Sadegh and Vrugt 2014)''' import matplotlib.pyplot as plt - fig=plt.figure(figsize=(16,9)) - ax = plt.subplot(1,1,1) - ax.plot(r_hat_values) - ax.plot([1.2]*len(r_hat_values),'k--') - ax.set_xlabel='r_hat' - plt.savefig(fig_name,dpi=300) + + fig= plt.figure(figsize=(9,6)) + ax1 = plt.subplot(2,1,1) + for i in range(int(max(results['chain']))+1): + index=np.where(results['chain']==i) + ax1.plot(results['like1'][index], label='Chain '+str(i+1)) + + ax1.set_ylabel('Likelihood value') + ax1.legend() + + ax2 =plt.subplot(2,1,2) + r_hat=np.array(r_hat_values) + ax2.plot([1.2]*len(r_hat),'k--') + for i in range(len(r_hat[0])): + ax2.plot(r_hat[:,i],label='x'+str(i+1)) + + ax2.set_yscale("log", nonpositive='clip') + ax2.set_ylabel('R$^d$ - convergence diagnostic') + ax2.set_xlabel('Number of chainruns') + ax2.legend() + fig.savefig(fig_name,dpi=150) def gelman_rubin(x): '''NOT USED YET''' diff --git a/spotpy/examples/hymod_python/hymod.py b/spotpy/examples/hymod_python/hymod.py index ae10739b..f74bebdf 100644 --- a/spotpy/examples/hymod_python/hymod.py +++ b/spotpy/examples/hymod_python/hymod.py @@ -1,9 +1,26 @@ +# -*- coding: utf-8 -*- +''' +Copyright (c) 2015 by Tobias Houska + +This file is part of Statistical Parameter Estimation Tool (SPOTPY). + +:author: Tobias Houska and Benjamin Manns + +:paper: Houska, T., Kraft, P., Chamorro-Chavez, A. and Breuer, L.: +SPOTting Model Parameters Using a Ready-Made Python Package, +PLoS ONE, 10(12), e0145180, doi:10.1371/journal.pone.0145180, 2015. +''' + from numba import jit def hymod(Precip, PET, cmax,bexp,alpha,Rs,Rq): """ - See https://www.proc-iahs.net/368/180/2015/piahs-368-180-2015.pdf for a scientific paper. + See https://www.proc-iahs.net/368/180/2015/piahs-368-180-2015.pdf for a scientific paper: + + Quan, Z.; Teng, J.; Sun, W.; Cheng, T. & Zhang, J. (2015): Evaluation of the HYMOD model + for rainfall–runoff simulation using the GLUE method. Remote Sensing and GIS for Hydrology + and Water Resources, 180 - 185, IAHS Publ. 368. DOI: 10.5194/piahs-368-180-2015. :param cmax: :param bexp: diff --git a/spotpy/examples/spot_setup_dds.py b/spotpy/examples/spot_setup_dds.py index 67e93585..e39e85dd 100644 --- a/spotpy/examples/spot_setup_dds.py +++ b/spotpy/examples/spot_setup_dds.py @@ -53,6 +53,21 @@ def _objfunc_switcher(self, name): for j in range(2)] + [Uniform('c' + str(j), -500, 700, 1.5, 3.0, -500, 700, doc=str(j) + 'continuous parameter within a boundary') for j in range(8)] + elif name == "cmf_style": + self.objfunc = ackley10 + self.params = [Uniform(.5, 5., optguess=1.5, doc='saturated depth at beginning'), + Uniform(.001, .8, optguess=.1, doc='porosity of matrix [m3 Pores / m3 Soil]'), + Uniform(1., 240., optguess=10., + doc='ssaturated conductivity of macropores [m/day]'), + Uniform(.0001, .5, optguess=.05, doc='macropore fraction [m3/m3]'), + Uniform(.005, 1., optguess=.05, + doc='mean distance between the macropores [m]'), + Uniform(0., 1., optguess=0., + doc='water content when matric potential pointing towards -infinity'), + Uniform(.5, 1., optguess=.99, + doc='wetness above which the parabolic extrapolation is used instead of VGM'), + Uniform(0., 50, optguess=.1, + doc='exchange rate [1/day] for macropore-matrix-exchange')] def parameters(self): if self.params is None: diff --git a/spotpy/examples/spot_setup_hymod_python.py b/spotpy/examples/spot_setup_hymod_python.py index 876f1f41..38f6d6a1 100644 --- a/spotpy/examples/spot_setup_hymod_python.py +++ b/spotpy/examples/spot_setup_hymod_python.py @@ -68,7 +68,7 @@ def evaluation(self): def objectivefunction(self,simulation,evaluation, params=None): #SPOTPY expects to get one or multiple values back, - #that define the performence of the model run + #that define the performance of the model run if not self.obj_func: # This is used if not overwritten by user like = rmse(evaluation,simulation) diff --git a/spotpy/examples/spot_setup_hymod_python_pareto.py b/spotpy/examples/spot_setup_hymod_python_pareto.py index ceb2fffa..b4e97b31 100644 --- a/spotpy/examples/spot_setup_hymod_python_pareto.py +++ b/spotpy/examples/spot_setup_hymod_python_pareto.py @@ -61,12 +61,6 @@ def evaluation(self): return self.trueObs[366:] def objectivefunction(self,simulation,evaluation, params=None): - return np.array([ - spotpy.likelihoods.gaussianLikelihoodMeasErrorOut(evaluation, simulation), - -spotpy.objectivefunctions.rmse(evaluation, simulation), - -spotpy.objectivefunctions.mse(evaluation, simulation), - -spotpy.objectivefunctions.pbias(evaluation, simulation), - spotpy.likelihoods.NashSutcliffeEfficiencyShapingFactor(evaluation, simulation), - spotpy.likelihoods.ABCBoxcarLikelihood(evaluation, simulation), - spotpy.likelihoods.LikelihoodAR1NoC(evaluation, simulation) - ]) \ No newline at end of file + return [-abs(spotpy.objectivefunctions.bias(evaluation, simulation)), + spotpy.objectivefunctions.rsquared(evaluation, simulation), + spotpy.objectivefunctions.nashsutcliffe(evaluation, simulation)] \ No newline at end of file diff --git a/spotpy/examples/tutorial_cmf_lumped.py b/spotpy/examples/tutorial_cmf_lumped.py index 49a87713..a79904fa 100644 --- a/spotpy/examples/tutorial_cmf_lumped.py +++ b/spotpy/examples/tutorial_cmf_lumped.py @@ -46,11 +46,11 @@ def get_runs(default=1): model = SingleStorage(datetime.datetime(1980, 1, 1), datetime.datetime(1985, 12, 31)) - runs = get_runs(default=5) + runs = get_runs(default=100) # Create the sampler sampler = spotpy.algorithms.mc(model, parallel=parallel(), - dbname=model.dbname, dbformat='hdf5', + dbname=model.dbname, dbformat='csv', save_sim=True, sim_timeout=300) # Now we can sample with the implemented Monte Carlo algorithm: diff --git a/spotpy/examples/tutorial_dds_hymod.py b/spotpy/examples/tutorial_dds_hymod.py index f94a061c..7eb52eb2 100644 --- a/spotpy/examples/tutorial_dds_hymod.py +++ b/spotpy/examples/tutorial_dds_hymod.py @@ -27,7 +27,7 @@ # Initialize the Hymod example # In this case, we tell the setup which algorithm we want to use, so # we can use this exmaple for different algorithms - spot_setup=spot_setup(users_objective_function=spotpy.objectivefunctions.nashsutcliffe) + spot_setup=spot_setup(spotpy.objectivefunctions.nashsutcliffe) #Select number of maximum allowed repetitions rep=1000 diff --git a/spotpy/examples/tutorial_dream_hymod.py b/spotpy/examples/tutorial_dream_hymod.py index c7cc3d9a..08a609c9 100644 --- a/spotpy/examples/tutorial_dream_hymod.py +++ b/spotpy/examples/tutorial_dream_hymod.py @@ -14,14 +14,15 @@ from spotpy.examples.spot_setup_hymod_python import spot_setup import matplotlib.pyplot as plt from spotpy.likelihoods import gaussianLikelihoodMeasErrorOut as GausianLike - +from spotpy.analyser import plot_parameter_trace +from spotpy.analyser import plot_posterior_parameter_histogram if __name__ == "__main__": parallel ='seq' # Initialize the Hymod example (will only work on Windows systems) #spot_setup=spot_setup(parallel=parallel) spot_setup=spot_setup(GausianLike) - # Create the Dream sampler of spotpy, al_objfun is set to None to force SPOTPY + # Create the Dream sampler of spotpy, alt_objfun is set to None to force SPOTPY # to jump into the def objectivefunction in the spot_setup class (default is # spotpy.objectivefunctions.log_p) @@ -31,11 +32,16 @@ # Select five chains and set the Gelman-Rubin convergence limit nChains = 4 convergence_limit = 1.2 + + # Other possible settings to modify the DREAM algorithm, for details see Vrugt (2016) + nCr = 3 + eps = 10e-6 runs_after_convergence = 100 + acceptance_test_option = 6 sampler=spotpy.algorithms.dream(spot_setup, dbname='DREAM_hymod', dbformat='csv') - r_hat = sampler.sample(rep,nChains=nChains,convergence_limit=convergence_limit, - runs_after_convergence=runs_after_convergence) + r_hat = sampler.sample(rep, nChains, nCr, eps, convergence_limit, + runs_after_convergence,acceptance_test_option) @@ -47,46 +53,28 @@ # Example plot to show remaining parameter uncertainty # - fig= plt.figure(figsize=(16,9)) + fig= plt.figure(figsize=(9,6)) ax = plt.subplot(1,1,1) q5,q25,q75,q95=[],[],[],[] for field in fields: - q5.append(np.percentile(results[field][-100:-1],2.5)) - q95.append(np.percentile(results[field][-100:-1],97.5)) + q5.append(np.percentile(results[field][-100:-1],2.5))# ALl 100 runs after convergence + q95.append(np.percentile(results[field][-100:-1],97.5))# ALl 100 runs after convergence ax.plot(q5,color='dimgrey',linestyle='solid') ax.plot(q95,color='dimgrey',linestyle='solid') ax.fill_between(np.arange(0,len(q5),1),list(q5),list(q95),facecolor='dimgrey',zorder=0, - linewidth=0,label='parameter uncertainty') - ax.plot(spot_setup.evaluation(),'r.',label='data') + linewidth=0,label='simulation uncertainty') + ax.plot(spot_setup.evaluation(),color='red', markersize=2,label='data') ax.set_ylim(-50,450) ax.set_xlim(0,729) + ax.set_ylabel('Discharge [l s-1]') + ax.set_xlabel('Days') ax.legend() - fig.savefig('python_hymod.png',dpi=300) + fig.savefig('DREAM_simulation_uncertainty_Hymod.png',dpi=150) ######################################################### # Example plot to show the convergence ################# - fig= plt.figure(figsize=(12,16)) - plt.subplot(2,1,1) - for i in range(int(max(results['chain']))+1): - index=np.where(results['chain']==i) - plt.plot(results['like1'][index], label='Chain '+str(i+1)) - - plt.ylabel('Likelihood value') - plt.legend() - - ax =plt.subplot(2,1,2) - r_hat=np.array(r_hat) - ax.plot([1.2]*len(r_hat),'k--') - for i in range(len(r_hat[0])): - ax.plot(r_hat[:,i],label='x'+str(i+1)) - - ax.set_yscale("log", nonposy='clip') - ax.set_ylim(-1,50) - ax.set_ylabel('R$^d$ - convergence diagnostic') - plt.xlabel('Number of chainruns') - plt.legend() - fig.savefig('python_hymod_convergence.png',dpi=300) + spotpy.analyser.plot_gelman_rubin(results, r_hat, fig_name='DREAM_r_hat.png') ######################################################## @@ -95,111 +83,17 @@ # Example plot to show the parameter distribution ###### parameters = spotpy.parameter.get_parameters_array(spot_setup) + fig, ax = plt.subplots(nrows=5, ncols=2) + fig.set_figheight(9) + fig.set_figwidth(9) + for par_id in range(len(parameters)): + plot_parameter_trace(ax[par_id][0], results, parameters[par_id]) + plot_posterior_parameter_histogram(ax[par_id][1], results, parameters[par_id]) - min_vs,max_vs = parameters['minbound'], parameters['maxbound'] - - fig= plt.figure(figsize=(16,16)) - plt.subplot(5,2,1) - x = results['par'+str(parameters['name'][0])] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('cmax') - plt.ylim(min_vs[0],max_vs[0]) - - - plt.subplot(5,2,2) - x = results['par'+parameters['name'][0]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('cmax') - plt.xlim(min_vs[0],max_vs[0]) - - - - plt.subplot(5,2,3) - x = results['par'+parameters['name'][1]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('bexp') - plt.ylim(min_vs[1],max_vs[1]) - - plt.subplot(5,2,4) - x = results['par'+parameters['name'][1]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('bexp') - plt.xlim(min_vs[1],max_vs[1]) - - - - plt.subplot(5,2,5) - x = results['par'+parameters['name'][2]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('alpha') - plt.ylim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,6) - x = results['par'+parameters['name'][2]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('alpha') - plt.xlim(min_vs[2],max_vs[2]) - - - plt.subplot(5,2,7) - x = results['par'+parameters['name'][3]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('Ks') - plt.ylim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,8) - x = results['par'+parameters['name'][3]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Ks') - plt.xlim(min_vs[3],max_vs[3]) - - - plt.subplot(5,2,9) - x = results['par'+parameters['name'][4]] - for i in range(int(max(results['chain']))): - index=np.where(results['chain']==i) - plt.plot(x[index],'.') - plt.ylabel('Kq') - plt.ylim(min_vs[4],max_vs[4]) - plt.xlabel('Iterations') - - plt.subplot(5,2,10) - x = results['par'+parameters['name'][4]][int(len(results)*0.5):] - normed_value = 1 - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Kq') - plt.xlabel('Parameter range') - plt.xlim(min_vs[4],max_vs[4]) - plt.show() - fig.savefig('python_parameters.png',dpi=300) - ######################################################## + ax[-1][0].set_xlabel('Iterations') + ax[-1][1].set_xlabel('Parameter range') + plt.show() + fig.tight_layout() + fig.savefig('DREAM_parameter_uncertainty_Hymod.png',dpi=300) + ####################################################### \ No newline at end of file diff --git a/spotpy/examples/tutorial_fast_hymod.py b/spotpy/examples/tutorial_fast_hymod.py index 4ea0f05e..8c350cf4 100644 --- a/spotpy/examples/tutorial_fast_hymod.py +++ b/spotpy/examples/tutorial_fast_hymod.py @@ -5,15 +5,10 @@ :author: Tobias Houska -This class holds example code how to use the dream algorithm +This class holds example code how to use the FAST algorithm ''' -try: - import spotpy -except ImportError: - import sys - sys.path.append(".") - import spotpy +import spotpy from spotpy.examples.spot_setup_hymod_python import spot_setup @@ -23,7 +18,7 @@ spot_setup = spot_setup() #Select number of maximum repetitions - rep = 1000 + rep = 1345 #Start a sensitivity analysis sampler = spotpy.algorithms.fast(spot_setup, dbname='FAST_hymod', dbformat='csv') diff --git a/spotpy/examples/tutorial_padds.py b/spotpy/examples/tutorial_padds.py index 8f337538..9d279583 100644 --- a/spotpy/examples/tutorial_padds.py +++ b/spotpy/examples/tutorial_padds.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np import sys +from spotpy.parameter import Uniform try: import spotpy except ModuleNotFoundError: @@ -48,10 +49,27 @@ def ZDT1(x): class padds_spot_setup(object): - def __init__(self): + def __init__(self, default=True): self.params = [] - for i in range(30): - self.params.append(spotpy.parameter.Uniform(str(i+1), 0, 1, 0, 0, 0, 1,doc="param no " + str(i+1))) + if default: + for i in range(30): + self.params.append(spotpy.parameter.Uniform(str(i+1), 0, 1, 0, 0, 0, 1,doc="param no " + str(i+1))) + else: + self.params = [Uniform(.5, 5., optguess=1.5, doc='saturated depth at beginning'), + Uniform(.001, .8, optguess=.1, doc='porosity of matrix [m3 Pores / m3 Soil]'), + Uniform(1., 240., optguess=10., + doc='ssaturated conductivity of macropores [m/day]'), + Uniform(.0001, .5, optguess=.05, doc='macropore fraction [m3/m3]'), + Uniform(.005, 1., optguess=.05, + doc='mean distance between the macropores [m]'), + Uniform(0., 1., optguess=0., + doc='water content when matric potential pointing towards -infinity'), + Uniform(.5, 1., optguess=.99, + doc='wetness above which the parabolic extrapolation is used instead of VGM'), + Uniform(0., 50, optguess=.1, + doc='exchange rate [1/day] for macropore-matrix-exchange')] + for i in range(8,30): + self.params.append(Uniform(str(i+1), 0, 1, 0, 0, 0, 1,doc="param no " + str(i+1))) def parameters(self): return spotpy.parameter.generate(self.params) diff --git a/spotpy/examples/tutorial_padds_hymod.py b/spotpy/examples/tutorial_padds_hymod.py index b0966f06..76250f52 100644 --- a/spotpy/examples/tutorial_padds_hymod.py +++ b/spotpy/examples/tutorial_padds_hymod.py @@ -19,13 +19,14 @@ from spotpy.examples.spot_setup_hymod_python import spot_setup import matplotlib.pyplot as plt + def multi_obj_func(evaluation, simulation): #used to overwrite objective function in hymod example like1 = abs(spotpy.objectivefunctions.pbias(evaluation, simulation)) like2 = spotpy.objectivefunctions.rmse(evaluation, simulation) like3 = spotpy.objectivefunctions.rsquared(evaluation, simulation)*-1 - return [like1, like2, like3] - + return np.array([like1, like2, like3]) + if __name__ == "__main__": parallel ='seq' # Runs everthing in sequential mode np.random.seed(2000) # Makes the results reproduceable @@ -33,12 +34,13 @@ def multi_obj_func(evaluation, simulation): # Initialize the Hymod example # In this case, we tell the setup which algorithm we want to use, so # we can use this exmaple for different algorithms - sp_setup=spot_setup(obj_func= multi_obj_func) - + spot_setup=spot_setup(multi_obj_func) + #Select number of maximum allowed repetitions rep=2000 + - # Create the SCE-UA sampler of spotpy, alt_objfun is set to None to force SPOTPY + # Create the PADDS sampler of spotpy, alt_objfun is set to None to force SPOTPY # to jump into the def objectivefunction in the spot_setup class (default is # spotpy.objectivefunctions.rmse) sampler=spotpy.algorithms.padds(sp_setup, dbname='padds_hymod', dbformat='csv') @@ -46,11 +48,10 @@ def multi_obj_func(evaluation, simulation): #Start the sampler, one can specify metric if desired sampler.sample(rep,metric='ones') - # Load the results gained with the sceua sampler, stored in SCEUA_hymod.csv - #results = spotpy.analyser.load_csv_results('DDS_hymod') - - + # Load the results gained with the sceua sampler, stored in padds_hymod.csv + #results = spotpy.analyser.load_csv_results('padds_hymod') results = sampler.getdata() + # from pprint import pprint # #pprint(results) # pprint(results['chain']) @@ -62,8 +63,14 @@ def multi_obj_func(evaluation, simulation): fig_like1.savefig('hymod_padds_objectivefunction_' + str(likno) + '.png', dpi=300) - plt.ylabel('RMSE') - plt.xlabel('Iteration') + fig, ax=plt.subplots(3) + for likenr in range(3): + ax[likenr].plot(results['like'+str(likenr+1)]) + ax[likenr].set_ylabel('like'+str(likenr+1)) + fig.savefig('hymod_padds_objectivefunction.png', dpi=300) + + + # Example plot to show the parameter distribution ###### @@ -117,7 +124,6 @@ def plot_parameter_histogram(ax, results, parameter): fig.savefig('hymod_parameters.png',dpi=300) - # Example plot to show remaining parameter uncertainty # fields=[word for word in results.dtype.names if word.startswith('sim')] fig= plt.figure(3, figsize=(16,9)) @@ -129,8 +135,8 @@ def plot_parameter_histogram(ax, results, parameter): ax11.plot(q5,color='dimgrey',linestyle='solid') ax11.plot(q95,color='dimgrey',linestyle='solid') ax11.fill_between(np.arange(0,len(q5),1),list(q5),list(q95),facecolor='dimgrey',zorder=0, - linewidth=0,label='parameter uncertainty') - ax11.plot(sp_setup.evaluation(),'r.',label='data') + linewidth=0,label='parameter uncertainty') + ax11.plot(spot_setup.evaluation(),'r.',label='data')´ ax11.set_ylim(-50,450) ax11.set_xlim(0,729) ax11.legend() diff --git a/spotpy/examples/tutorial_sceua_hymod.py b/spotpy/examples/tutorial_sceua_hymod.py index 25c7129c..abd2b380 100644 --- a/spotpy/examples/tutorial_sceua_hymod.py +++ b/spotpy/examples/tutorial_sceua_hymod.py @@ -21,130 +21,44 @@ # Initialize the Hymod example # In this case, we tell the setup which algorithm we want to use, so # we can use this exmaple for different algorithms - spot_setup=spot_setup(users_objective_function=spotpy.objectivefunctions.rmse) + spot_setup=spot_setup(spotpy.objectivefunctions.rmse) #Select number of maximum allowed repetitions - rep=500 - filename = 'SCEUA_hymod' - # Create the SCE-UA sampler of spotpy, alt_objfun is set to None to force SPOTPY - # to jump into the def objectivefunction in the spot_setup class (default is - # spotpy.objectivefunctions.rmse) + rep=5000 sampler=spotpy.algorithms.sceua(spot_setup, dbname='SCEUA_hymod', dbformat='csv') #Start the sampler, one can specify ngs, kstop, peps and pcento id desired - sampler.sample(rep)#,ngs=10, kstop=50, peps=0.1, pcento=0.1) + sampler.sample(rep, ngs=7, kstop=3, peps=0.1, pcento=0.1) # Load the results gained with the sceua sampler, stored in SCEUA_hymod.csv results = spotpy.analyser.load_csv_results('SCEUA_hymod') - - print(len(results), 'runs were saved.') - fig= plt.figure(1,figsize=(9,5)) + #Plot how the objective function was minimized during sampling + fig= plt.figure(1,figsize=(9,6)) plt.plot(results['like1']) plt.show() plt.ylabel('RMSE') plt.xlabel('Iteration') - fig.savefig('hymod_objectivefunction.png',dpi=300) - - # Example plot to show the parameter distribution ###### - fig= plt.figure(2,figsize=(9,9)) - normed_value = 1 - - plt.subplot(5,2,1) - x = results['parcmax'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) #Ignores burn-in chain - plt.plot(x[index],'.') - plt.ylabel('cmax') - plt.ylim(spot_setup.cmax.minbound, spot_setup.cmax.maxbound) - - - plt.subplot(5,2,2) - x = x[int(len(results)*0.9):] #choose the last 10% of the sample - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('cmax') - plt.xlim(spot_setup.cmax.minbound, spot_setup.cmax.maxbound) - - - plt.subplot(5,2,3) - x = results['parbexp'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('bexp') - plt.ylim(spot_setup.bexp.minbound, spot_setup.bexp.maxbound) - - plt.subplot(5,2,4) - x = x[int(len(results)*0.9):] - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('bexp') - plt.xlim(spot_setup.bexp.minbound, spot_setup.bexp.maxbound) - - - - plt.subplot(5,2,5) - x = results['paralpha'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('alpha') - plt.ylim(spot_setup.alpha.minbound, spot_setup.alpha.maxbound) + fig.savefig('SCEUA_objectivefunctiontrace.png',dpi=150) - - plt.subplot(5,2,6) - x = x[int(len(results)*0.9):] - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('alpha') - plt.xlim(spot_setup.alpha.minbound, spot_setup.alpha.maxbound) - - - plt.subplot(5,2,7) - x = results['parKs'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('Ks') - plt.ylim(spot_setup.Ks.minbound, spot_setup.Ks.maxbound) - - - plt.subplot(5,2,8) - x = x[int(len(results)*0.9):] + # Plot the best model run + #Find the run_id with the minimal objective function value + bestindex,bestobjf = spotpy.analyser.get_minlikeindex(results) - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Ks') - plt.xlim(spot_setup.Ks.minbound, spot_setup.Ks.maxbound) + # Select best model run + best_model_run = results[bestindex] - - plt.subplot(5,2,9) - x = results['parKq'] - for i in range(int(max(results['chain'])-1)): - index=np.where(results['chain']==i+1) - plt.plot(x[index],'.') - plt.ylabel('Kq') - plt.ylim(spot_setup.Kq.minbound, spot_setup.Kq.maxbound) - plt.xlabel('Iterations') - - plt.subplot(5,2,10) - x = x[int(len(results)*0.9):] - hist, bins = np.histogram(x, bins=20, density=True) - widths = np.diff(bins) - hist *= normed_value - plt.bar(bins[:-1], hist, widths) - plt.ylabel('Kq') - plt.xlabel('Parameter range') - plt.xlim(spot_setup.Kq.minbound, spot_setup.Kq.maxbound) - plt.show() - fig.savefig('hymod_parameters.png',dpi=300) \ No newline at end of file + #Filter results for simulation results + fields=[word for word in best_model_run.dtype.names if word.startswith('sim')] + best_simulation = list(best_model_run[fields]) + + fig= plt.figure(figsize=(9,6)) + ax = plt.subplot(1,1,1) + ax.plot(best_simulation,color='black',linestyle='solid', label='Best objf.='+str(bestobjf)) + ax.plot(spot_setup.evaluation(),'r.',markersize=3, label='Observation data') + plt.xlabel('Number of Observation Points') + plt.ylabel ('Discharge [l s-1]') + plt.legend(loc='upper right') + fig.savefig('SCEUA_best_modelrun.png',dpi=150) + diff --git a/spotpy/likelihoods.py b/spotpy/likelihoods.py index dfc37a57..b6545f06 100644 --- a/spotpy/likelihoods.py +++ b/spotpy/likelihoods.py @@ -12,7 +12,7 @@ # Dream has now a bunch of options. We tested only likelihoods, which do not need a additional parameter # from this we saw that # ExponentialTransformErrVarShapingFactor with option 2 and 6 is good -# InverseErrorVarianceShapingFactor with option 2 ist good +# InverseErrorVarianceShapingFactor with option 2 is good # gaussianLikelihoodMeasErrorOut with option 6 is good # NoisyABCGaussianLikelihood with Option 6 is good # ABCBoxcarLikelihood with Option 2 and 6 is good @@ -33,6 +33,14 @@ class LikelihoodError(Exception): def __generateMeaserror(data): return np.array(data) * 0.1 +def __calcSimpleDeviation_bias(data, comparedata, mu_h): + __standartChecksBeforeStart(data, comparedata) + d = np.array(data) + c = np.array(comparedata) + #Adjust c for multiplicative bias parameter: + mu_t = np.exp(mu_h * c) + Et = c * mu_t + return d - Et def __calcSimpleDeviation(data, comparedata): __standartChecksBeforeStart(data, comparedata) @@ -429,7 +437,6 @@ def generalizedLikelihoodFunction(data, comparedata, measerror=None, params=None """ __standartChecksBeforeStart(data, comparedata) - errorArr = __calcSimpleDeviation(data, comparedata) if measerror is None: measerror = __generateMeaserror(data) measerror = np.array(measerror) @@ -505,7 +512,7 @@ def generalizedLikelihoodFunction(data, comparedata, measerror=None, params=None mu_xi = 0.0 n = data.__len__() - + errorArr = __calcSimpleDeviation_bias(data, comparedata, muh) sum_at = 0 # formula for a_t is from page 3, (6) for j in range(n - 1): diff --git a/spotpy/parameter.py b/spotpy/parameter.py index c03e9608..c786172c 100644 --- a/spotpy/parameter.py +++ b/spotpy/parameter.py @@ -431,7 +431,7 @@ def __init__(self, *args, **kwargs): default is median of rndfunc(*rndargs, size=1000) :optguess: (optional) number for start point of parameter default is quantile(0.5) - quantile(0.4) of - rndfunc(*rndargs, size=1000) + rndfunc(*rndargs, size=1000) """ super(Exponential, self).__init__(rnd.exponential, 'Exponential', *args, **kwargs) @@ -474,7 +474,7 @@ def __init__(self, *args, **kwargs): default is median of rndfunc(*rndargs, size=1000) :optguess: (optional) number for start point of parameter default is quantile(0.5) - quantile(0.4) of - rndfunc(*rndargs, size=1000) + rndfunc(*rndargs, size=1000) """ super(Wald, self).__init__(rnd.wald, 'Wald', *args, **kwargs) diff --git a/tests/test_analyser.py b/tests/test_analyser.py index ba35f13f..7c8444d8 100644 --- a/tests/test_analyser.py +++ b/tests/test_analyser.py @@ -362,7 +362,7 @@ def test_plot_autocorellation(self): def test_plot_gelman_rubin(self): if sys.version_info >= (3, 6): - spotpy.analyser.plot_gelman_rubin(self.r_hat, fig_name =self.fig_name) + spotpy.analyser.plot_gelman_rubin(self.hymod_results, self.r_hat, fig_name =self.fig_name) self.assertGreaterEqual(abs(os.path.getsize(self.fig_name)), 100) diff --git a/tests/test_dds.py b/tests/test_dds.py index 9f51027b..da6b0df5 100644 --- a/tests/test_dds.py +++ b/tests/test_dds.py @@ -126,6 +126,24 @@ def test_run_own_initial_1(self): def test_run_own_initial_2(self): self.run_a_dds("own_input_2") + def outside_bound(self, x_curr, min_bound, max_bound): + out_left = min_bound > x_curr # [x x_curr # [x