From 432fa69f83d09b4477f67e3ba692b7254b29c5dd Mon Sep 17 00:00:00 2001 From: AHerzog Date: Tue, 5 Mar 2024 15:34:35 +0100 Subject: [PATCH 1/6] added analyser functions for eFAST algorithm --- src/spotpy/analyser.py | 160 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) diff --git a/src/spotpy/analyser.py b/src/spotpy/analyser.py index 04f80874..bd80b7b4 100644 --- a/src/spotpy/analyser.py +++ b/src/spotpy/analyser.py @@ -1217,3 +1217,163 @@ def _Geweke(samples, intervals=20): # calculate the Geweke test z[k] = (mean1 - mean2) / np.sqrt(var1 + var2) return z + +def na2mean(x): + + """ + Function to replace nan values in an array by the mean of neighbouring values. + Example (1, nan, 3) is converted to (1, 2, 3) + This function workes only for singe nan occurences, it fails with multiple nans + in a row. + + adapted from R package FAST (Reusser et al. 2011) + + Parameters + ---------- + x: array of float + array to be filled + + Returns + ---------- + value: array of float + filled array with nans replaced by mean + + """ + + x1 = np.append(x[1:], [np.nan, np.nan]) + x2 = np.append(np.array([np.nan]), x) + mean = (x1 + x2) /2 + np.place(x, np.isnan(x), mean[np.isnan(x)[-1]]) + return x + +def double_serie(x): + """ + This function reverses the model output series from a number of model runs for + the FAST analysis and appends it to the original series. The last element of the + existing series is not duplicated during this process. + This is required in order to process the model run results for the FAST + analysis with the fft function. + + adapted from R package FAST (Reusser et al. 2011) + + Parameters + ---------- + x: np.array of float + dataseries to be doubled + + Returns + ---------- + value: np.array of float + doubled x + """ + + return(np.append(x, np.flip(x)[1:])) + +def efast_sensitivity(x, numberf, t_runs, t_freq, order = 4, make_plot = False, include_total_variance = False, cukier = True): + """ + function that calculates the sensitivity from a series of model outputs + according to the FAST algorithm + + adapted from R package FAST (Reusser et al. 2011) + + Parameters + ---------- + x: np.array of float, + 1D array of model outputs where parameters vary between runs according to the fast algorithm + numberf: int + Number of parameters vaired + t_runs: float + number of runs required by algorithm (calculated internally in eFAST algorithm) + t_freq: float + frequencies used for parameter sampling (calculated internally in eFAST algorithm) + order: int, optional + Order of parameter frequency independance (see Cukier) + default = 4 + make_plot: bool, optional + plot the Fourier spectrum? + default = False + include_total_variance: bool, optional + include the sum of all variances in the result array? + default = False + + + Returns + ---------- + value: np.array of float + partial variance values accounted for by each parameter + + """ + + if len(x) < t_runs: + raise Exception("Not enough model runs loaded. Expected " + str(t_runs) +" runs, but found only " + str(len(x))) + + y = na2mean(x) + fft = np.fft.fft(double_serie(y)) + ff = abs(fft/len(x))[:len(x)+1] + + freq = np.outer(t_freq, range(1, order+1)) + + total = sum(ff[1:,]**2) + + sens = np.full(freq.shape[0], np.nan) + + for i in range(freq.shape[0]): + index = freq[i,:][freq[i,:] < len(ff)] + sens[i] = np.nansum(ff[index]**2)/total + + if include_total_variance: + np.append(sens, total) + + return sens + +def void_to_arr(results): + """ + Function to transform Spotpy results array (array of void) into np.array (float) + + Parameters + ---------- + results: Spotpy results array + + Returns + ---------- + value: np.array of float + """ + + arr = np.full((results.shape[0], len(results[0])), np.nan) + for index in range(len(results)): + arr[index, :] = list(results[index]) + + return arr + +def plot_efast(dbname, fig_name = "efast_sensitivities.png"): + + """ + Function to plot the series of parameter sensitvities generated by the eFAST algorithm + + Parameters + ---------- + dbname: str + name of the database with the parameter sensitvities generated by eFAST algorithm + fig_name: (optional) str + custom name of the figure + + Returns + ---------- + fig: saves figure as .png + """ + + import matplotlib.pyplot as plt + + senstivities = load_csv_results(dbname) + + parameters = senstivities.dtype.names + sens_values = void_to_arr(senstivities).T + + fig, axs = plt.subplots(len(parameters), sharey=True) + + for i in range(len(parameters)): + axs[i].plot(range(len(sens_values[i,:])), sens_values[i,:]) + axs[i].set_ylabel(parameters[i]) + + fig.savefig(fig_name, dpi=150) + From 0d2c2f06e9b964120d3f1c70f764785074e346b1 Mon Sep 17 00:00:00 2001 From: AHerzog Date: Tue, 5 Mar 2024 15:36:34 +0100 Subject: [PATCH 2/6] added eFAST algorithm adapted from R package Fast by Reusser et al. 2011 --- src/spotpy/algorithms/__init__.py | 1 + src/spotpy/algorithms/efast.py | 425 ++++++++++++++++++++++++++++++ tutorials/tutorial_efast_hymod.py | 39 +++ 3 files changed, 465 insertions(+) create mode 100644 src/spotpy/algorithms/efast.py create mode 100644 tutorials/tutorial_efast_hymod.py diff --git a/src/spotpy/algorithms/__init__.py b/src/spotpy/algorithms/__init__.py index 427b32a5..6b9dd198 100644 --- a/src/spotpy/algorithms/__init__.py +++ b/src/spotpy/algorithms/__init__.py @@ -20,6 +20,7 @@ from .dds import dds # Dynamically Dimensioned Search algorithm from .demcz import demcz # Differential Evolution Markov Chain from .dream import dream # DiffeRential Evolution Adaptive Metropolis +from .efast import efast # eFAST algorithm adapted from FAST R package (Reusser et al. 2011) from .fast import fast # Fourier Amplitude Sensitivity Test from .fscabc import fscabc # Fitness Scaling Artificial Bee Colony from .lhs import lhs # Latin Hypercube Sampling diff --git a/src/spotpy/algorithms/efast.py b/src/spotpy/algorithms/efast.py new file mode 100644 index 00000000..6a97b844 --- /dev/null +++ b/src/spotpy/algorithms/efast.py @@ -0,0 +1,425 @@ +# -*- coding: utf-8 -*- +""" +EFAST algorithm after Reusser et al. transfered from R +Reusser, Dominik E., Wouter Buytaert, and Erwin Zehe. "Temporal dynamics of model parameter sensitivity for computationally expensive models with FAST (Fourier Amplitude Sensitivity Test)." Water Resources Research 47 (2011): W07551. + +Further References: +CUKIER, R. I.; LEVINE, H. B. & SHULER, K. E. Non-Linear Sensitivity Analysis Of Multi-Parameter Model Systems Journal Of Computational Physics, 1978 , 26 , 1-42 +CUKIER, R. I.; FORTUIN, C. M.; SHULER, K. E.; PETSCHEK, A. G. & SCHAIBLY, J. H. Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .1. Theory Journal Of Chemical Physics, 1973 , 59 , 3873-3878 +SCHAIBLY, J. H. & SHULER, K. E. Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .2. Applications Journal Of Chemical Physics, 1973 , 59 , 3879-3888 +CUKIER, R. I.; SCHAIBLY, J. H. & SHULER, K. E. Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .3. Analysis Of Approximations Journal Of Chemical Physics, 1975 , 63 , 1140-1149 + +Transferred form R and implemented by Anna Herzog (2024) + +Copyright (c) 2018 by Tobias Houska +This file is part of Statistical Parameter Optimization Tool for Python(SPOTPY). +:author: Anna Herzog, Tobias Houska +""" + +import numpy as np +import warnings + +from . import _algorithm +from ..analyser import efast_sensitivity, get_modelruns + +class efast(_algorithm): + """ + Efast Algorithm for (distributed) parameter Sensitivity after FAST algorithm according to Cukier 1975 or McRae 1982 + """ + + _unaccepted_parameter_types = () + + def __init__(self, *args, **kwargs): + """ + Input + ---------- + spot_setup: class + model: function + Should be callable with a parameter combination of the parameter-function + and return an list of simulation results (as long as evaluation list) + parameter: function + When called, it should return a random parameter combination. Which can + be e.g. uniform or Gaussian + objectivefunction: function + Should return the objectivefunction for a given list of a model simulation and + observation. + evaluation: function + Should return the true values as return by the model. + + dbname: str + * Name of the database where parameter, objectivefunction value and simulation results will be saved. + + dbformat: str + * ram: fast suited for short sampling time. no file will be created and results are saved in an array. + * csv: A csv file will be created, which you can import afterwards. + + parallel: str + * seq: Sequentiel sampling (default): Normal iterations on one core of your cpu. + * mpi: Message Passing Interface: Parallel computing on cluster pcs (recommended for unix os). + + save_sim: boolean + * True: Simulation results will be saved + * False: Simulation results will not be saved + """ + + + + kwargs["algorithm_name"] = "EFast Sampler" + super(efast, self).__init__(*args, **kwargs) + + # hard coded repetition and frequency values from R package Fast + d_m_cukier75 = [4,8,6,10,20,22,32,40,38,26,56,62,46,76,96,60,86,126,134,112,92,128,154,196,34,416,106,208,328,198,382,88,348,186,140,170,284,568,302,438,410,248,448,388,596,217,100,488,166] + min_runs_cukier75 = [np.nan, np.nan, 19, 39, 71, 91, 167, 243, 315, 403, 487, 579, 687, 907, 1019, 1223, 1367, 1655, 1919, 2087, 2351, 2771, 3087, 3427, 3555, 4091, 4467, 4795, 5679, 5763, 6507, 7103, 7523, 8351, 9187, 9667, 10211, 10775, 11339, 7467, 12891, 13739, 14743, 15743, 16975, 18275, 18927, 19907, 20759, 21803] + omega_m_cukier75 = [np.nan, np.nan, 1, 5, 11, 1, 17, 23, 19, 25, 41, 31, 23, 87, 67, 73, 58, 143, 149, 99, 119, 237, 267, 283, 151, 385, 157, 215, 449, 163, 337, 253, 375, 441, 673, 773, 875, 873, 587, 849, 623, 637, 891, 943, 1171, 1225, 1335, 1725, 1663, 2019] + d_m_mcrae82 = [4, 8, 6, 10, 20, 22, 32, 40, 38, 26, 56, 62, 46, 76, 96] + min_runs_mcrae82 = [0, 15, 27, 47, 79, 99, 175, 251, 323, 411, 495, 587, 695, 915, 1027] + omega_m_mcrae82 = [0, 3, 1, 5, 11, 1, 17, 23, 19, 25, 41, 31, 23, 87, 67] + + def freq_cukier(self, m, i = 1, omega_before = -1): + + """ + This function creates an array of independant frequencies accroding to + Cukier 1975 for usage in the fast method. + + Parameters + ---------- + m: int + number of parameters (frequencies) needed + i: (intern) int + internally used recursion counter + omega_before: (intern) int + internally used previous frequency + + Returns + ---------- + value: np.array of float + A 1d-Array of independant frequencies to the order of 4 + + """ + + if i <= 1: + if m >= len(self.omega_m_cukier75): + raise Exception("Parameter number m to high, not implemented") + else: + o = self.omega_m_cukier75[m-1] + return np.append(o, self.freq_cukier(m, i+1, o)) + else: + o = omega_before + self.d_m_cukier75[m-i] + if i == m: + return o + else: + return np.append(o, self.freq_cukier(m, i+1, o)) + + + def freq_mcrae82(self, m, i = 1, omega_before = -1): + + """ + This function creates an array of independant frequencies accroding to + McRae 1982 for usage in the fast method. + + Parameters + ---------- + m: int + number of parameters (frequencies) needed + i: (intern) int + internally used recursion counter + omega_before: (intern) int + internally used previous frequency + + Returns + ---------- + value: np.array of float + A 1d-Array of independant frequencies to the order of 4 + + """ + + if i <= 1: + if m >= len(self.omega_m_mcrae82): + raise Exception("Parameter number m to high, not implemented") + else: + o = self.omega_m_mcrae82[m-1] + return np.append(o, self.freq_mcrae82(m, i+1, o)) + else: + o = omega_before + self.d_m_mcrae82[m-i] + if i == m: + return o + else: + return np.append(o, self.freq_mcrae82(m, i+1, o)) + + + def s(self, m, factor = 1, cukier = True): + + """ + Function that generates a number of equally spaced values between -p1/2 + and pi/2. The number is determined by the number of runs required for the + FAST method for a number of parameters (min_runs_cukier or min_runs_mcrae) + + Parameters + ---------- + m: int + number of parameters/ frequencies + factor: (optional) int + used if more than the minimum required shall be generated + default: the length of the returned array is the minimum number + required for the FAST time factor + cukier: (optional) bool + indicates weather to use the frequencies after Cukier or McRae + Default is Cukier + + Returns + ---------- + value: array of float + an array of equally spaced values between -pi/2 and pi/2 + + """ + + if cukier: + min_runs = self.min_runs_cukier75 + else: + min_runs = self.min_runs_mcrae82 + + r = np.round(min_runs[m-1]*factor) + r_range = np.array(range(1,r+1)) + s = np.pi/r * (2*r_range-r-1)/2 + + return s + + + def S(self, m, factor = 1, cukier = True): # , par_names = np.nan, reorder = range(0,m) + + """ + Function to generate an array of values with provide the base for parameters + for the FAST method. It is usally not used directly but called from the + fast_parameters function. + + Parameters + ---------- + m: int + number of parameters/frequencies + factor: (optional) int + used to create more values than the minimum required. + cukier: (optional) bool + indicates weather to use the frequencies after Cukier or McRae + Default is Cukier + + Returns + ---------- + value: array of float + an array with the shape (number of runs, parameters) + """ + + if cukier: + omega = self.freq_cukier(m) + else: + omega = self.freq_mcrae82(m) + + tab = np.outer(self.s(m, factor, cukier), omega) + + toreturn = np.arcsin(np.sin(tab))/np.pi + + # naming array dimensions is not possible with numpy but convention would be toreturn.shape () = (runs, parameternames) + + return toreturn + + + def rerange(self, data, min_goal = 0, max_goal = 1, center = np.nan): + + """ + This function performes a linear transformation of the data, such that + afterwards range(data) = (theMin, theMax) + + Parameters + ---------- + data: array of flaot + an array with the data to transform + in this case the parameter distribution generated by function S + min_goal: float + the new minimum value (lower parameter bound) + max_goal: float + the new maximum value (upper parameter bound) + center: (optional) float + indicates which old value should become the new center + default: (max_goal+min_goal)/2 + + Returns + ---------- + value: array of float + array with the transformed data + + """ + + min_data = min(data) + max_data = max(data) + + if np.isnan(center): + max_data = max_data + dat = data - min_data + dat = dat/(max_data-min_data) * (max_goal-min_goal) + dat = dat + min_goal + return(dat) + else: + # split linear transformation, the data is split into two segments, one blow center, one above, + # each segment undergoes it's own linear transformation + below = data <= center + above = data > center + new_data = np.copy(data) + np.place(new_data, below, self.rerange(np.insert(data[below], 0, center), min_goal = min_goal, max_goal= max_goal/2)[1:]) + np.place(new_data, above, self.rerange(np.insert(data[above], 0, center), min_goal = max_goal/2, max_goal= max_goal)[1:]) + return(new_data) + + + def fast_parameters(self, minimum, maximum, cukier = True, factor = 1, logscale = np.nan, names = np.nan): + + """ + Function for the creation of a FAST Parameter set based on thier range + + Parameters + ---------- + minimum: array of float + array containing the lower parameter boundries + maximum: array of float + array containing the upper parameter boundries + names: (optional) str + array containing the parameter names + factor: (optional) int + used to create more values than the minimum required. + cukier: (optional) bool + indicates weather to use the frequencies after Cukier or McRae + Default is Cukier + logscale: (optional) bool + array containing bool values indicating weather a parameter is varied + on a logarithmic scale. In that case minimum and maximum are exponents + + Returns + ---------- + value: array of float + an array with the shape (number of runs, parameters) containing the + required parameter sets + + """ + + if np.isnan(logscale): + logscale = np.full(minimum.shape, False) + if np.isnan(names): + names = ["P"+str(i) for i in range(minimum.shape[0])] + names = np.array(names) + + n = len(minimum) + + if (n != len(maximum)): + raise Exception("Expecting minimum and maximum of same size") + elif(n != len(names)): + raise Exception("Expecting minimum and names of same size") + elif(n != len(logscale)): + raise Exception("Expecting minimum and logscale of same size") + + toreturn = self.S(m=n, cukier = cukier, factor = factor) #par_names = names, + + for i in range(0,n): + toreturn[:,i] = self.rerange(toreturn[:,i], minimum[i], maximum[i]) + if logscale[i]: + toreturn[:,i] = 10**toreturn[:,i] + + return toreturn + + + def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): + """ + Samples from the EFAST algorithm. + + Input + ---------- + repetitions: int + Maximum number of runs. + """ + + print("generating eFAST Parameters") + # Get the names of the parameters to analyse + names = self.parameter()["name"] + # Get the minimum and maximum value for each parameter from the + # distribution + parmin, parmax = self.parameter()["minbound"], self.parameter()["maxbound"] + self.numberf = len(parmin) + min_runs = self.min_runs_cukier75[self.numberf-1] + + if min_runs > repetitions: + warnings.warn("specified number of repetitions is smaller than minimum required number for FAST analysis!\n"+ + "Simultions will be perfomed but eFAST analysis might not be possible") + else: + repetitions = min_runs + print("Number of specified repetitions equals or exeeds number of minimum required repetitions for eFAST analysis\n"+ + "programm will stop after required runs.") + + self.repetitions = repetitions + self.set_repetiton(repetitions) + + # Generate Matrix with eFAST parameter sets + N = self.fast_parameters(parmin, parmax, cukier, factor, logscale) + + print("Starting the eFast algorithm with {} repetitions...".format(repetitions)) + + # A generator that produces parametersets if called + param_generator = ( + (rep, N[rep,:]) for rep in range(int(repetitions)) + ) + for rep, randompar, simulations in self.repeat(param_generator): + # A function that calculates the fitness of the run and the manages the database + self.postprocessing(rep, randompar, simulations) + + if self.breakpoint == "write" or self.breakpoint == "readandwrite": + if rep >= lastbackup + self.backup_every_rep: + work = (rep, N[rep,:]) + self.write_breakdata(self.dbname, work) + lastbackup = rep + self.final_call() + + def calc_sensitivity(self, results, dbname, cukier = True): + """ + Function to calcultae the sensitivity for a data series (eg. a time series) + + Parameters + ---------- + + data: np.array of float + spopty restults array + data array containing the model output used for the sensitivity calculation + with one row per parameter set + + numberf : int + number of parameters used + + xval: int, optional + + """ + print("call analyser") + data = get_modelruns(results) + + # convert array of void to array of float + mod_results = np.full((data.shape[0], len(data[0])), np.nan) + + for index in range(len(data)): + mod_results[index, :] = list(data[index]) + + numberf = len(self.parameter()["minbound"]) + + # sens_data = np.full((len(results[0]), numberf), np.nan) + + if cukier: + t_runs = self.min_runs_cukier75[numberf-1] + t_freq = self.freq_cukier(numberf) + else: + t_runs = self.min_runs_mcrae82[numberf-1] + t_freq = self.freq_mcrae82(numberf) + + # Get the names of the parameters to analyse + names = ["par" + name for name in self.parameter()["name"]] + + f = open(dbname+".csv", 'w+') + np.savetxt(f, [names], "%s", delimiter=",") + + for index in range(mod_results.shape[1]): + x_data = mod_results[:, index] + sens_data = efast_sensitivity(x_data, numberf, t_runs, t_freq) + np.savetxt(f, [sens_data], delimiter=",", fmt='%1.5f') + + f.close() \ No newline at end of file diff --git a/tutorials/tutorial_efast_hymod.py b/tutorials/tutorial_efast_hymod.py new file mode 100644 index 00000000..a165572e --- /dev/null +++ b/tutorials/tutorial_efast_hymod.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +""" +Copyright 2015 by Tobias Houska +This file is part of Statistical Parameter Estimation Tool (SPOTPY). + +:author: Anna Herzog, Tobias Houska + +This class holds example code how to use the eFAST algorithm, which was adapted from the R fast package by Reusser et al. 2011 +""" + +import numpy as np + +import spotpy +from spotpy.examples.spot_setup_hymod_python import spot_setup + +if __name__ == "__main__": + parallel = "seq" + # Initialize the Hymod example + spot_setup = spot_setup() + + # Select number of maximum repetitions + # CHeck out https://spotpy.readthedocs.io/en/latest/Sensitivity_analysis_with_FAST/ + # How to determine an appropriate number of repetitions + rep = 100 + + # Start a sensitivity analysis + sampler = spotpy.algorithms.efast( + spot_setup, dbname="eFAST_hymod", dbformat="csv", db_precision=np.float32 + ) + sampler.sample(rep) + + # Load the results gained with the fast sampler, stored in FAST_hymod.csv + results = spotpy.analyser.load_csv_results("eFAST_hymod") + + # calculate the sensitivities + sampler.calc_sensitivity(results, dbname = "eFAST_sens_hymod") + + # plot the temporal parameter sensitivities + spotpy.analyser.plot_efast(dbname="eFast_sens_hymod") From 782cd48366d97489c9590facfc46b32f91f6d37e Mon Sep 17 00:00:00 2001 From: AHerzog Date: Tue, 5 Mar 2024 16:13:53 +0100 Subject: [PATCH 3/6] removed na2mean --- src/spotpy/analyser.py | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/src/spotpy/analyser.py b/src/spotpy/analyser.py index bd80b7b4..11c874cf 100644 --- a/src/spotpy/analyser.py +++ b/src/spotpy/analyser.py @@ -1218,34 +1218,6 @@ def _Geweke(samples, intervals=20): z[k] = (mean1 - mean2) / np.sqrt(var1 + var2) return z -def na2mean(x): - - """ - Function to replace nan values in an array by the mean of neighbouring values. - Example (1, nan, 3) is converted to (1, 2, 3) - This function workes only for singe nan occurences, it fails with multiple nans - in a row. - - adapted from R package FAST (Reusser et al. 2011) - - Parameters - ---------- - x: array of float - array to be filled - - Returns - ---------- - value: array of float - filled array with nans replaced by mean - - """ - - x1 = np.append(x[1:], [np.nan, np.nan]) - x2 = np.append(np.array([np.nan]), x) - mean = (x1 + x2) /2 - np.place(x, np.isnan(x), mean[np.isnan(x)[-1]]) - return x - def double_serie(x): """ This function reverses the model output series from a number of model runs for @@ -1307,8 +1279,7 @@ def efast_sensitivity(x, numberf, t_runs, t_freq, order = 4, make_plot = False, if len(x) < t_runs: raise Exception("Not enough model runs loaded. Expected " + str(t_runs) +" runs, but found only " + str(len(x))) - y = na2mean(x) - fft = np.fft.fft(double_serie(y)) + fft = np.fft.fft(double_serie(x)) ff = abs(fft/len(x))[:len(x)+1] freq = np.outer(t_freq, range(1, order+1)) From 6eae30670557590941cc10d94145501a3eed0f1b Mon Sep 17 00:00:00 2001 From: AHerzog Date: Tue, 5 Mar 2024 16:14:19 +0100 Subject: [PATCH 4/6] added parameter descriptions --- src/spotpy/algorithms/efast.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/spotpy/algorithms/efast.py b/src/spotpy/algorithms/efast.py index 6a97b844..bbc6ec88 100644 --- a/src/spotpy/algorithms/efast.py +++ b/src/spotpy/algorithms/efast.py @@ -331,6 +331,14 @@ def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): ---------- repetitions: int Maximum number of runs. + factor: (optional) int + used to create more values than the minimum required. + cukier: (optional) bool + indicates weather to use the frequencies after Cukier or McRae + Default is Cukier + logscale: (optional) bool + array containing bool values indicating weather a parameter is varied + on a logarithmic scale. In that case minimum and maximum are exponents """ print("generating eFAST Parameters") @@ -350,7 +358,6 @@ def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): print("Number of specified repetitions equals or exeeds number of minimum required repetitions for eFAST analysis\n"+ "programm will stop after required runs.") - self.repetitions = repetitions self.set_repetiton(repetitions) # Generate Matrix with eFAST parameter sets @@ -380,18 +387,18 @@ def calc_sensitivity(self, results, dbname, cukier = True): Parameters ---------- - data: np.array of float + results: np.array of void spopty restults array - data array containing the model output used for the sensitivity calculation - with one row per parameter set - numberf : int - number of parameters used + dbname: (optional) str + name of file to store sensitivity values + + cukier: (optional) bool + indicates weather to use the frequencies after Cukier or McRae + Default is Cukier - xval: int, optional """ - print("call analyser") data = get_modelruns(results) # convert array of void to array of float From 1709d7ca71890be0f5adb83c762ca1762c9211ca Mon Sep 17 00:00:00 2001 From: AHerzog Date: Tue, 5 Mar 2024 16:45:25 +0100 Subject: [PATCH 5/6] removed factor, changed freqency definition --- src/spotpy/algorithms/efast.py | 68 +++++++++++++++++----------------- src/spotpy/analyser.py | 2 +- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/src/spotpy/algorithms/efast.py b/src/spotpy/algorithms/efast.py index bbc6ec88..d6d531db 100644 --- a/src/spotpy/algorithms/efast.py +++ b/src/spotpy/algorithms/efast.py @@ -147,7 +147,7 @@ def freq_mcrae82(self, m, i = 1, omega_before = -1): return np.append(o, self.freq_mcrae82(m, i+1, o)) - def s(self, m, factor = 1, cukier = True): + def s(self, m, freq = 'cukier'): """ Function that generates a number of equally spaced values between -p1/2 @@ -158,12 +158,9 @@ def s(self, m, factor = 1, cukier = True): ---------- m: int number of parameters/ frequencies - factor: (optional) int - used if more than the minimum required shall be generated - default: the length of the returned array is the minimum number - required for the FAST time factor - cukier: (optional) bool - indicates weather to use the frequencies after Cukier or McRae + + freq: (optional) str + indicates weather to use the frequencies after 'cukier' or McRae 'mcrae' Default is Cukier Returns @@ -173,19 +170,21 @@ def s(self, m, factor = 1, cukier = True): """ - if cukier: + if freq == 'cukier': min_runs = self.min_runs_cukier75 - else: + elif freq == 'mcrae': min_runs = self.min_runs_mcrae82 + else: + raise Exception("Missing option for Frequency selection for Parameter definition, choose cukier or mcrae!") - r = np.round(min_runs[m-1]*factor) + r = min_runs[m-1] r_range = np.array(range(1,r+1)) s = np.pi/r * (2*r_range-r-1)/2 return s - def S(self, m, factor = 1, cukier = True): # , par_names = np.nan, reorder = range(0,m) + def S(self, m, freq = 'cukier'): """ Function to generate an array of values with provide the base for parameters @@ -196,10 +195,9 @@ def S(self, m, factor = 1, cukier = True): # , par_names = np.nan, reorder = ran ---------- m: int number of parameters/frequencies - factor: (optional) int - used to create more values than the minimum required. - cukier: (optional) bool - indicates weather to use the frequencies after Cukier or McRae + + freq: (optional) str + indicates weather to use the frequencies after 'cukier' or McRae 'mcrae' Default is Cukier Returns @@ -208,12 +206,14 @@ def S(self, m, factor = 1, cukier = True): # , par_names = np.nan, reorder = ran an array with the shape (number of runs, parameters) """ - if cukier: + if freq == 'cukier': omega = self.freq_cukier(m) - else: + elif freq == 'mcrae': omega = self.freq_mcrae82(m) + else: + raise Exception("Missing option for Frequency selection for Parameter definition, choose cukier or mcrae!") - tab = np.outer(self.s(m, factor, cukier), omega) + tab = np.outer(self.s(m, freq), omega) toreturn = np.arcsin(np.sin(tab))/np.pi @@ -268,7 +268,7 @@ def rerange(self, data, min_goal = 0, max_goal = 1, center = np.nan): return(new_data) - def fast_parameters(self, minimum, maximum, cukier = True, factor = 1, logscale = np.nan, names = np.nan): + def fast_parameters(self, minimum, maximum, freq = 'cukier', logscale = np.nan, names = np.nan): """ Function for the creation of a FAST Parameter set based on thier range @@ -281,10 +281,8 @@ def fast_parameters(self, minimum, maximum, cukier = True, factor = 1, logscale array containing the upper parameter boundries names: (optional) str array containing the parameter names - factor: (optional) int - used to create more values than the minimum required. - cukier: (optional) bool - indicates weather to use the frequencies after Cukier or McRae + freq: (optional) str + indicates weather to use the frequencies after 'cukier' or McRae 'mcrae' Default is Cukier logscale: (optional) bool array containing bool values indicating weather a parameter is varied @@ -313,7 +311,7 @@ def fast_parameters(self, minimum, maximum, cukier = True, factor = 1, logscale elif(n != len(logscale)): raise Exception("Expecting minimum and logscale of same size") - toreturn = self.S(m=n, cukier = cukier, factor = factor) #par_names = names, + toreturn = self.S(m=n, freq = freq) #par_names = names, for i in range(0,n): toreturn[:,i] = self.rerange(toreturn[:,i], minimum[i], maximum[i]) @@ -323,7 +321,7 @@ def fast_parameters(self, minimum, maximum, cukier = True, factor = 1, logscale return toreturn - def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): + def sample(self, repetitions, freq = 'cukier', logscale = np.nan): """ Samples from the EFAST algorithm. @@ -331,10 +329,8 @@ def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): ---------- repetitions: int Maximum number of runs. - factor: (optional) int - used to create more values than the minimum required. - cukier: (optional) bool - indicates weather to use the frequencies after Cukier or McRae + freq: (optional) str + indicates weather to use the frequencies after 'cukier' or McRae 'mcrae' Default is Cukier logscale: (optional) bool array containing bool values indicating weather a parameter is varied @@ -361,7 +357,7 @@ def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): self.set_repetiton(repetitions) # Generate Matrix with eFAST parameter sets - N = self.fast_parameters(parmin, parmax, cukier, factor, logscale) + N = self.fast_parameters(parmin, parmax, freq, logscale) print("Starting the eFast algorithm with {} repetitions...".format(repetitions)) @@ -380,7 +376,7 @@ def sample(self, repetitions, cukier = True, factor = 1, logscale = np.nan): lastbackup = rep self.final_call() - def calc_sensitivity(self, results, dbname, cukier = True): + def calc_sensitivity(self, results, dbname, freq = 'cukier'): """ Function to calcultae the sensitivity for a data series (eg. a time series) @@ -393,8 +389,8 @@ def calc_sensitivity(self, results, dbname, cukier = True): dbname: (optional) str name of file to store sensitivity values - cukier: (optional) bool - indicates weather to use the frequencies after Cukier or McRae + freq: (optional) str + indicates weather to use the frequencies after Cukier 'cukier' or McRae 'mcrae' Default is Cukier @@ -411,12 +407,14 @@ def calc_sensitivity(self, results, dbname, cukier = True): # sens_data = np.full((len(results[0]), numberf), np.nan) - if cukier: + if freq == 'cukier': t_runs = self.min_runs_cukier75[numberf-1] t_freq = self.freq_cukier(numberf) - else: + elif freq == 'mcrae': t_runs = self.min_runs_mcrae82[numberf-1] t_freq = self.freq_mcrae82(numberf) + else: + raise Exception("Missing option for Frequency selection for Parameter definition, choose cukier or mcrae!") # Get the names of the parameters to analyse names = ["par" + name for name in self.parameter()["name"]] diff --git a/src/spotpy/analyser.py b/src/spotpy/analyser.py index 11c874cf..8a150d75 100644 --- a/src/spotpy/analyser.py +++ b/src/spotpy/analyser.py @@ -1241,7 +1241,7 @@ def double_serie(x): return(np.append(x, np.flip(x)[1:])) -def efast_sensitivity(x, numberf, t_runs, t_freq, order = 4, make_plot = False, include_total_variance = False, cukier = True): +def efast_sensitivity(x, t_runs, t_freq, order = 4, include_total_variance = False): """ function that calculates the sensitivity from a series of model outputs according to the FAST algorithm From 55cbeec523cd176a39cee7e4866a0091a0efd6ee Mon Sep 17 00:00:00 2001 From: AHerzog Date: Wed, 6 Mar 2024 11:33:13 +0100 Subject: [PATCH 6/6] extended documentation --- src/spotpy/algorithms/efast.py | 167 +++++++++++++++++++++++++++------ src/spotpy/analyser.py | 23 ++++- 2 files changed, 159 insertions(+), 31 deletions(-) diff --git a/src/spotpy/algorithms/efast.py b/src/spotpy/algorithms/efast.py index d6d531db..aedb1561 100644 --- a/src/spotpy/algorithms/efast.py +++ b/src/spotpy/algorithms/efast.py @@ -1,19 +1,30 @@ # -*- coding: utf-8 -*- """ -EFAST algorithm after Reusser et al. transfered from R -Reusser, Dominik E., Wouter Buytaert, and Erwin Zehe. "Temporal dynamics of model parameter sensitivity for computationally expensive models with FAST (Fourier Amplitude Sensitivity Test)." Water Resources Research 47 (2011): W07551. +EFAST algorithm transfered from CRAN R package 'fast' by Dominik Reusser (2015) +Copy of most functions and descriptions and implementation in SPOTPY framework by Anna Herzog (2024) + +This Version of FAST differs from the previously impemented Version, as it does not require a precalculation of minimum model runs by the user. It rather uses a hard coded definition of minimum runs based on Cukier or McRae. +It therefore requieres considreably less model runs (for 5 Parameters 2245 (fast) vs. 71 runs (efast)). + +For further information on the functions, the origninal R package and the fast algorithm please have a look at the R package description from CRAN, or see the following references: + +Reusser, Dominik E., Wouter Buytaert, and Erwin Zehe. +"Temporal dynamics of model parameter sensitivity for computationally expensive models with FAST (Fourier Amplitude Sensitivity Test)." Water Resources Research 47 (2011): W07551. Further References: -CUKIER, R. I.; LEVINE, H. B. & SHULER, K. E. Non-Linear Sensitivity Analysis Of Multi-Parameter Model Systems Journal Of Computational Physics, 1978 , 26 , 1-42 -CUKIER, R. I.; FORTUIN, C. M.; SHULER, K. E.; PETSCHEK, A. G. & SCHAIBLY, J. H. Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .1. Theory Journal Of Chemical Physics, 1973 , 59 , 3873-3878 -SCHAIBLY, J. H. & SHULER, K. E. Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .2. Applications Journal Of Chemical Physics, 1973 , 59 , 3879-3888 -CUKIER, R. I.; SCHAIBLY, J. H. & SHULER, K. E. Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .3. Analysis Of Approximations Journal Of Chemical Physics, 1975 , 63 , 1140-1149 +CUKIER, R. I.; LEVINE, H. B. & SHULER, K. E. Non-Linear +"Sensitivity Analysis Of Multi-Parameter Model Systems" Journal Of Computational Physics, 1978 , 26 , 1-42 +CUKIER, R. I.; FORTUIN, C. M.; SHULER, K. E.; PETSCHEK, A. G. & SCHAIBLY, J. H. +"Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .1. Theory" Journal Of Chemical Physics, 1973 , 59 , 3873-3878 +SCHAIBLY, J. H. & SHULER, K. E. +"Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .2. Applications" Journal Of Chemical Physics, 1973 , 59 , 3879-3888 +CUKIER, R. I.; SCHAIBLY, J. H. & SHULER, K. E. +"Study Of Sensitivity Of Coupled Reaction Systems To Uncertainties In Rate Coefficients .3. Analysis Of Approximations" Journal Of Chemical Physics, 1975 , 63 , 1140-1149 -Transferred form R and implemented by Anna Herzog (2024) Copyright (c) 2018 by Tobias Houska This file is part of Statistical Parameter Optimization Tool for Python(SPOTPY). -:author: Anna Herzog, Tobias Houska +:author: Anna Herzog, Tobias Houska, Dominik Reusser """ import numpy as np @@ -24,7 +35,9 @@ class efast(_algorithm): """ - Efast Algorithm for (distributed) parameter Sensitivity after FAST algorithm according to Cukier 1975 or McRae 1982 + EFAST Algorithm for (distributed) parameter Sensitivity after FAST algorithm according to Cukier 1975 or McRae 1982 + The Fourier Amplitude Sensitivity Test (FAST) is a method to determine global sensitvities of a model on parameter changes + within relatively few model runs. """ _unaccepted_parameter_types = () @@ -79,7 +92,7 @@ def freq_cukier(self, m, i = 1, omega_before = -1): """ This function creates an array of independant frequencies accroding to - Cukier 1975 for usage in the fast method. + Cukier 1975 for usage in the eFAST method. Parameters ---------- @@ -90,11 +103,22 @@ def freq_cukier(self, m, i = 1, omega_before = -1): omega_before: (intern) int internally used previous frequency + Raises + ---------- + Exeption: + "Parameter number m to high, not implemented" + Execution is stopped, if the number of Parameters is to high (max. Number of Parameters for Cukier = 50) + Returns ---------- - value: np.array of float + np.array of float: A 1d-Array of independant frequencies to the order of 4 + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog + """ if i <= 1: @@ -115,7 +139,7 @@ def freq_mcrae82(self, m, i = 1, omega_before = -1): """ This function creates an array of independant frequencies accroding to - McRae 1982 for usage in the fast method. + McRae 1982 for usage in the eFAST method. Parameters ---------- @@ -126,11 +150,22 @@ def freq_mcrae82(self, m, i = 1, omega_before = -1): omega_before: (intern) int internally used previous frequency + Raises + ---------- + Exeption: + "Parameter number m to high, not implemented" + Execution is stopped, if the number of Parameters is to high (max. Number of Parameters for McRae = 15) + Returns ---------- - value: np.array of float + np.array of float: A 1d-Array of independant frequencies to the order of 4 + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog + """ if i <= 1: @@ -151,8 +186,8 @@ def s(self, m, freq = 'cukier'): """ Function that generates a number of equally spaced values between -p1/2 - and pi/2. The number is determined by the number of runs required for the - FAST method for a number of parameters (min_runs_cukier or min_runs_mcrae) + and pi/2. The number is determined by the number of runs required of the + eFAST method for a number of parameters (min_runs_cukier or min_runs_mcrae) Parameters ---------- @@ -163,11 +198,22 @@ def s(self, m, freq = 'cukier'): indicates weather to use the frequencies after 'cukier' or McRae 'mcrae' Default is Cukier + Raises + ---------- + Exeption: + "Missing option for Frequency selection for Parameter definition, choose cukier or mcrae!" + Execution is stopped if an invalid method for the frequency selection is provided + Returns ---------- - value: array of float + 2D array of float: an array of equally spaced values between -pi/2 and pi/2 + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog + """ if freq == 'cukier': @@ -187,8 +233,8 @@ def s(self, m, freq = 'cukier'): def S(self, m, freq = 'cukier'): """ - Function to generate an array of values with provide the base for parameters - for the FAST method. It is usally not used directly but called from the + Function to generate an array of values, which provides the base for the parameterdistribution + in the FAST method. It is usally not used directly but called from the fast_parameters function. Parameters @@ -200,10 +246,21 @@ def S(self, m, freq = 'cukier'): indicates weather to use the frequencies after 'cukier' or McRae 'mcrae' Default is Cukier + Raises + ---------- + Exeption: + "Missing option for Frequency selection for Parameter definition, choose cukier or mcrae!" + Execution is stopped if an invalid method for the frequency selection is provided + Returns ---------- - value: array of float + 2D array of float: an array with the shape (number of runs, parameters) + + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog """ if freq == 'cukier': @@ -243,8 +300,13 @@ def rerange(self, data, min_goal = 0, max_goal = 1, center = np.nan): Returns ---------- - value: array of float + 2D array of float: array with the transformed data + + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog """ @@ -288,12 +350,23 @@ def fast_parameters(self, minimum, maximum, freq = 'cukier', logscale = np.nan, array containing bool values indicating weather a parameter is varied on a logarithmic scale. In that case minimum and maximum are exponents + Raises + ---------- + Exeption: + "Expecting minimum and maximum of same size" + Execution is stopped if the number of minimum, maximum or logscale values differ + Returns ---------- value: array of float an array with the shape (number of runs, parameters) containing the required parameter sets + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog + """ if np.isnan(logscale): @@ -323,7 +396,7 @@ def fast_parameters(self, minimum, maximum, freq = 'cukier', logscale = np.nan, def sample(self, repetitions, freq = 'cukier', logscale = np.nan): """ - Samples from the EFAST algorithm. + Samples from the eFAST algorithm. Input ---------- @@ -335,6 +408,30 @@ def sample(self, repetitions, freq = 'cukier', logscale = np.nan): logscale: (optional) bool array containing bool values indicating weather a parameter is varied on a logarithmic scale. In that case minimum and maximum are exponents + + Raises + ---------- + Warning: + "specified number of repetitions is smaller than minimum required for eFAST analysis! + Simultions will be perfomed but eFAST analysis might not be possible" + + If the specified number of repetitions is smaller than required by the algorithm, the + algortihm will still execute, but the usage of spotpy.analyser.efast.calc_sensitivity() + might not be possible. Rather define a large number of reps as only required minimum runs + will be performed anyway. + + Warning: + "Number of specified repetitions equals or exeeds number of minimum required repetitions for eFAST analysis + programm will stop after required runs." + + Returns + ---------- + database: + spotpy data base in chosen format with objective function values, parameter values and simulation results + + Author + ---------- + Tobias Houska, Anna Herzog """ print("generating eFAST Parameters") @@ -347,7 +444,7 @@ def sample(self, repetitions, freq = 'cukier', logscale = np.nan): min_runs = self.min_runs_cukier75[self.numberf-1] if min_runs > repetitions: - warnings.warn("specified number of repetitions is smaller than minimum required number for FAST analysis!\n"+ + warnings.warn("specified number of repetitions is smaller than minimum required for eFAST analysis!\n"+ "Simultions will be perfomed but eFAST analysis might not be possible") else: repetitions = min_runs @@ -378,21 +475,35 @@ def sample(self, repetitions, freq = 'cukier', logscale = np.nan): def calc_sensitivity(self, results, dbname, freq = 'cukier'): """ - Function to calcultae the sensitivity for a data series (eg. a time series) + Function to calcultae the sensitivity for a data series (eg. a time series) + based on the eFAST algorithm. Parameters ---------- - results: np.array of void - spopty restults array - + spopty restults array as loaded with spotpy.analyser.load_csv_results() dbname: (optional) str - name of file to store sensitivity values - + name of file to store sensitivity values freq: (optional) str indicates weather to use the frequencies after Cukier 'cukier' or McRae 'mcrae' Default is Cukier + Raises + ---------- + Exeption: + "Missing option for Frequency selection for Parameter definition, choose cukier or mcrae!" + Execution is stopped if an invalid method for the frequency selection is provided + + Returns + ---------- + database: + database containing the temporal parameter sensitivities with seperate column + for each parameter and row for eg. time step + + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog """ data = get_modelruns(results) diff --git a/src/spotpy/analyser.py b/src/spotpy/analyser.py index 8a150d75..209ad45c 100644 --- a/src/spotpy/analyser.py +++ b/src/spotpy/analyser.py @@ -1221,7 +1221,7 @@ def _Geweke(samples, intervals=20): def double_serie(x): """ This function reverses the model output series from a number of model runs for - the FAST analysis and appends it to the original series. The last element of the + the eFAST analysis and appends it to the original series. The last element of the existing series is not duplicated during this process. This is required in order to process the model run results for the FAST analysis with the fft function. @@ -1237,6 +1237,11 @@ def double_serie(x): ---------- value: np.array of float doubled x + + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog """ return(np.append(x, np.flip(x)[1:])) @@ -1244,7 +1249,7 @@ def double_serie(x): def efast_sensitivity(x, t_runs, t_freq, order = 4, include_total_variance = False): """ function that calculates the sensitivity from a series of model outputs - according to the FAST algorithm + according to the eFAST algorithm adapted from R package FAST (Reusser et al. 2011) @@ -1268,12 +1273,24 @@ def efast_sensitivity(x, t_runs, t_freq, order = 4, include_total_variance = Fal include the sum of all variances in the result array? default = False - + Raises + ---------- + Exeption: + "Not enough model runs loaded. Expected runs, but found only " + Function is stoped, if the loaded data base contains not enough model + runs for the given number of parameters. A pervious warning might have + appeared during execution of the sample() function. + Returns ---------- value: np.array of float partial variance values accounted for by each parameter + Author + ---------- + Dominic Reusser + spotpy translation: Anna Herzog + """ if len(x) < t_runs: