diff --git a/.travis.yml b/.travis.yml index 125eed4b..f4597719 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,7 @@ +# do not build pull request from base repo twice (only build PRs of forks) +if: type != pull_request OR fork = true + +# language is python language: python python: 3.8 diff --git a/examples/02_cov_model/00_intro.py b/examples/02_cov_model/00_intro.py index 55d18fdc..ed69a072 100644 --- a/examples/02_cov_model/00_intro.py +++ b/examples/02_cov_model/00_intro.py @@ -18,7 +18,7 @@ # use CovModel as the base-class class Gau(gs.CovModel): def cor(self, h): - return np.exp(-h ** 2) + return np.exp(-(h ** 2)) ############################################################################### diff --git a/examples/02_cov_model/05_additional_para.py b/examples/02_cov_model/05_additional_para.py index dc9012a2..7f2a70ac 100755 --- a/examples/02_cov_model/05_additional_para.py +++ b/examples/02_cov_model/05_additional_para.py @@ -19,7 +19,7 @@ def default_opt_arg(self): return {"alpha": 1.5} def cor(self, h): - return np.exp(-h ** self.alpha) + return np.exp(-(h ** self.alpha)) ############################################################################### diff --git a/examples/02_cov_model/06_fitting_para_ranges.py b/examples/02_cov_model/06_fitting_para_ranges.py index f73253ce..c1a7db6f 100755 --- a/examples/02_cov_model/06_fitting_para_ranges.py +++ b/examples/02_cov_model/06_fitting_para_ranges.py @@ -15,7 +15,7 @@ def default_opt_arg(self): return {"alpha": 1.5} def cor(self, h): - return np.exp(-h ** self.alpha) + return np.exp(-(h ** self.alpha)) # Exemplary variogram data (e.g. estimated from field observations) diff --git a/examples/03_variogram/01_variogram_estimation.py b/examples/03_variogram/01_variogram_estimation.py index 6685eb40..08619386 100644 --- a/examples/03_variogram/01_variogram_estimation.py +++ b/examples/03_variogram/01_variogram_estimation.py @@ -230,7 +230,7 @@ def generate_transmissivity(): # dashed lines. plt.figure() # new figure -line, = plt.plot(bin_center, gamma, label="estimated variogram (isotropic)") +(line,) = plt.plot(bin_center, gamma, label="estimated variogram (isotropic)") plt.plot( bin_center, fit_model.variogram(bin_center), @@ -239,7 +239,7 @@ def generate_transmissivity(): label="exp. variogram (isotropic)", ) -line, = plt.plot(x_plot, gamma_x[:21], label="estimated variogram in x-dir") +(line,) = plt.plot(x_plot, gamma_x[:21], label="estimated variogram in x-dir") plt.plot( x_plot, fit_model_x.variogram(x_plot), @@ -248,7 +248,7 @@ def generate_transmissivity(): label="exp. variogram in x-dir", ) -line, = plt.plot(y_plot, gamma_y[:21], label="estimated variogram in y-dir") +(line,) = plt.plot(y_plot, gamma_y[:21], label="estimated variogram in y-dir") plt.plot( y_plot, fit_model_y.variogram(y_plot), diff --git a/examples/03_variogram/02_find_best_model.py b/examples/03_variogram/02_find_best_model.py new file mode 100755 index 00000000..dc5d0f0b --- /dev/null +++ b/examples/03_variogram/02_find_best_model.py @@ -0,0 +1,64 @@ +""" +Finding the best fitting variogram model +---------------------------------------- +""" +import numpy as np +import gstools as gs +from matplotlib import pyplot as plt + +############################################################################### +# Generate a synthetic field with an exponential model. + +x = np.random.RandomState(19970221).rand(1000) * 100.0 +y = np.random.RandomState(20011012).rand(1000) * 100.0 +model = gs.Exponential(dim=2, var=2, len_scale=8) +srf = gs.SRF(model, mean=0, seed=19970221) +field = srf((x, y)) + +############################################################################### +# Estimate the variogram of the field with 40 bins and plot the result. + +bins = np.arange(40) +bin_center, gamma = gs.vario_estimate_unstructured((x, y), field, bins) + +############################################################################### +# Define a set of models to test. + +models = { + "gaussian": gs.Gaussian, + "exponential": gs.Exponential, + "matern": gs.Matern, + "stable": gs.Stable, + "rational": gs.Rational, + "linear": gs.Linear, + "circular": gs.Circular, + "spherical": gs.Spherical, +} +scores = {} + +############################################################################### +# Iterate over all models, fit their variogram and calculate the r2 score. + +# plot the estimated variogram +plt.scatter(bin_center, gamma, label="data") +ax = plt.gca() + +# fit all models to the estimated variogram +for model in models: + fit_model = models[model](dim=2) + para, pcov, r2 = fit_model.fit_variogram(bin_center, gamma, return_r2=True) + fit_model.plot(x_max=40, ax=ax) + scores[model] = r2 + +############################################################################### +# Create a ranking based on the score and determine the best models + +ranking = [ + (k, v) + for k, v in sorted(scores.items(), key=lambda item: item[1], reverse=True) +] +print("RANKING") +for i, (model, score) in enumerate(ranking, 1): + print(i, model, score) + +plt.show() diff --git a/gstools/covmodel/base.py b/gstools/covmodel/base.py index cbac3892..cb39d3ab 100644 --- a/gstools/covmodel/base.py +++ b/gstools/covmodel/base.py @@ -15,7 +15,7 @@ import copy import numpy as np from scipy.integrate import quad as integral -from scipy.optimize import curve_fit, root +from scipy.optimize import root from hankel import SymmetricFourierTransform as SFT from gstools.field.tools import make_isotropic, unrotate_mesh from gstools.tools.geometric import pos2xyz @@ -25,8 +25,11 @@ set_len_anis, set_angles, check_bounds, + check_arg_in_bounds, + default_arg_from_bounds, ) from gstools.covmodel import plot +from gstools.covmodel.fit import fit_variogram __all__ = ["CovModel"] @@ -127,8 +130,17 @@ def __init__( raise TypeError("Don't instantiate 'CovModel' directly!") # optional arguments for the variogram-model + self._opt_arg = [] # look up the defaults for the optional arguments (defined by the user) default = self.default_opt_arg() + for opt_name in opt_arg: + if opt_name not in default: + warnings.warn( + "The given optional argument '{}' ".format(opt_name) + + "is unknown or has at least no defined standard value. " + + "Or you made a Typo... hehe.", + AttributeWarning, + ) # add the default vaules if not specified for def_arg in default: if def_arg not in opt_arg: @@ -144,13 +156,6 @@ def __init__( + "' has a 'bad' name, since it is already present in " + "the class. It could not be added to the model" ) - if opt_name not in self.default_opt_arg().keys(): - warnings.warn( - "The given optional argument '{}' ".format(opt_name) - + "is unknown or has at least no defined standard value. " - + "Or you made a Typo... hehe.", - AttributeWarning, - ) # Magic happens here setattr(self, opt_name, opt_arg[opt_name]) @@ -161,7 +166,7 @@ def __init__( self._opt_arg_bounds = {} bounds = self.default_arg_bounds() bounds.update(self.default_opt_arg_bounds()) - self.set_arg_bounds(**bounds) + self.set_arg_bounds(check_args=False, **bounds) # prepare dim setting self._dim = None @@ -502,7 +507,10 @@ def default_opt_arg(self): Should be given as a dictionary. """ - return {} + res = {} + for opt in self.opt_arg: + res[opt] = 0.0 + return res def default_opt_arg_bounds(self): """Provide default boundaries for optional arguments.""" @@ -611,11 +619,11 @@ def spectral_rad_pdf(self, r): r_gz = np.logical_not(np.isclose(r, 0)) # to prevent numerical errors, we just calculate where r>0 res = np.zeros_like(r, dtype=np.double) - res[r_gz] = rad_fac(self.dim, r[r_gz]) * self.spectral_density( - r[r_gz] + res[r_gz] = rad_fac(self.dim, r[r_gz]) * np.abs( + self.spectral_density(r[r_gz]) ) else: - res = rad_fac(self.dim, r) * self.spectral_density(r) + res = rad_fac(self.dim, r) * np.abs(self.spectral_density(r)) # prevent numerical errors in hankel for small r values (set 0) res[np.logical_not(np.isfinite(res))] = 0.0 # prevent numerical errors in hankel for big r (set non-negative) @@ -637,7 +645,20 @@ def _has_ppf(self): ### fitting routine ####################################################### - def fit_variogram(self, x_data, y_data, maxfev=1000, **para_deselect): + def fit_variogram( + self, + x_data, + y_data, + sill=None, + init_guess="default", + weights=None, + method="trf", + loss="soft_l1", + max_eval=None, + return_r2=False, + curve_fit_kwargs=None, + **para_select + ): """ Fiting the isotropic variogram-model to given data. @@ -647,13 +668,87 @@ def fit_variogram(self, x_data, y_data, maxfev=1000, **para_deselect): The radii of the meassured variogram. y_data : :class:`numpy.ndarray` The messured variogram - maxfev : int, optional - The maximum number of calls to the function in scipy curvefit. - Default: 1000 - **para_deselect - You can deselect the parameters to be fitted, by setting - them "False" as keywords. By default, all parameters are - fitted. + sill : :class:`float` or :class:`bool`, optional + Here you can provide a fixed sill for the variogram. + It needs to be in a fitting range for the var and nugget bounds. + If variance or nugget are not selected for estimation, + the nugget will be recalculated to fulfill: + + * sill = var + nugget + * if the variance is bigger than the sill, + nugget will bet set to its lower bound + and the variance will be set to the fitting partial sill. + + If variance is deselected, it needs to be less than the sill, + otherwise a ValueError comes up. Same for nugget. + If sill=False, it will be deslected from estimation + and set to the current sill of the model. + Then, the procedure above is applied. + Default: None + init_guess : :class:`str` or :class:`dict`, optional + Initial guess for the estimation. Either: + + * "default": using the default values of the covariance model + * "current": using the current values of the covariance model + * dict(name: val): specified value for each parameter by name + + Default: "default" + weights : :class:`str`, :class:`numpy.ndarray`, :class:`callable`, optional + Weights applied to each point in the estimation. Either: + + * 'inv': inverse distance ``1 / (x_data + 1)`` + * list: weights given per bin + * callable: function applied to x_data + + If callable, it must take a 1-d ndarray. + Then ``weights = f(x_data)``. + Default: None + method : {'trf', 'dogbox'}, optional + Algorithm to perform minimization. + + * 'trf' : Trust Region Reflective algorithm, + particularly suitable for large sparse problems with bounds. + Generally robust method. + * 'dogbox' : dogleg algorithm with rectangular trust regions, + typical use case is small problems with bounds. + Not recommended for problems with rank-deficient Jacobian. + + Default: 'trf' + loss : :class:`str` or :class:`callable`, optional + Determines the loss function in scipys curve_fit. + The following keyword values are allowed: + + * 'linear' (default) : ``rho(z) = z``. Gives a standard + least-squares problem. + * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth + approximation of l1 (absolute value) loss. Usually a good + choice for robust least squares. + * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works + similarly to 'soft_l1'. + * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers + influence, but may cause difficulties in optimization process. + * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on + a single residual, has properties similar to 'cauchy'. + + If callable, it must take a 1-d ndarray ``z=f**2`` and return an + array_like with shape (3, m) where row 0 contains function values, + row 1 contains first derivatives and row 2 contains second + derivatives. Default: 'soft_l1' + max_eval : :class:`int` or :any:`None`, optional + Maximum number of function evaluations before the termination. + If None (default), the value is chosen automatically: 100 * n. + return_r2 : :class:`bool`, optional + Whether to return the r2 score of the estimation. + Default: False + curve_fit_kwargs : :class:`dict`, optional + Other keyword arguments passed to scipys curve_fit. Default: None + **para_select + You can deselect parameters from fitting, by setting + them "False" using their names as keywords. + You could also pass fixed values for each parameter. + Then these values will be applied and the involved parameters wont + be fitted. + By default, all parameters are fitted. Returns ------- @@ -661,7 +756,11 @@ def fit_variogram(self, x_data, y_data, maxfev=1000, **para_deselect): Dictonary with the fitted parameter values pcov : :class:`numpy.ndarray` The estimated covariance of `popt` from - :any:`scipy.optimize.curve_fit` + :any:`scipy.optimize.curve_fit`. + To compute one standard deviation errors + on the parameters use ``perr = np.sqrt(np.diag(pcov))``. + r2_score : :class:`float`, optional + r2 score of the curve fitting results. Only if return_r2 is True. Notes ----- @@ -670,93 +769,20 @@ def fit_variogram(self, x_data, y_data, maxfev=1000, **para_deselect): The fitted parameters will be instantly set in the model. """ - # select all parameters to be fitted - para = {"var": True, "len_scale": True, "nugget": True} - for opt in self.opt_arg: - para[opt] = True - # deselect unwanted parameters - para.update(para_deselect) - - # we need arg1, otherwise curve_fit throws an error (bug?!) - def curve(x, arg1, *args): - """Adapted Variogram function.""" - args = (arg1,) + args - para_skip = 0 - opt_skip = 0 - if para["var"]: - var_tmp = args[para_skip] - para_skip += 1 - if para["len_scale"]: - self.len_scale = args[para_skip] - para_skip += 1 - if para["nugget"]: - self.nugget = args[para_skip] - para_skip += 1 - for opt in self.opt_arg: - if para[opt]: - setattr(self, opt, args[para_skip + opt_skip]) - opt_skip += 1 - # set var at last because of var_factor (other parameter needed) - if para["var"]: - self.var = var_tmp - return self.variogram(x) - - # set the lower/upper boundaries for the variogram-parameters - low_bounds = [] - top_bounds = [] - if para["var"]: - low_bounds.append(self.var_bounds[0]) - top_bounds.append(self.var_bounds[1]) - if para["len_scale"]: - low_bounds.append(self.len_scale_bounds[0]) - top_bounds.append(self.len_scale_bounds[1]) - if para["nugget"]: - low_bounds.append(self.nugget_bounds[0]) - top_bounds.append(self.nugget_bounds[1]) - for opt in self.opt_arg: - if para[opt]: - low_bounds.append(self.opt_arg_bounds[opt][0]) - top_bounds.append(self.opt_arg_bounds[opt][1]) - # fit the variogram - popt, pcov = curve_fit( - curve, - x_data, - y_data, - bounds=(low_bounds, top_bounds), - maxfev=maxfev, + return fit_variogram( + model=self, + x_data=x_data, + y_data=y_data, + sill=sill, + init_guess=init_guess, + weights=weights, + method=method, + loss=loss, + max_eval=max_eval, + return_r2=return_r2, + curve_fit_kwargs=curve_fit_kwargs, + **para_select ) - fit_para = {} - para_skip = 0 - opt_skip = 0 - if para["var"]: - var_tmp = popt[para_skip] - fit_para["var"] = popt[para_skip] - para_skip += 1 - else: - fit_para["var"] = self.var - if para["len_scale"]: - self.len_scale = popt[para_skip] - fit_para["len_scale"] = popt[para_skip] - para_skip += 1 - else: - fit_para["len_scale"] = self.len_scale - if para["nugget"]: - self.nugget = popt[para_skip] - fit_para["nugget"] = popt[para_skip] - para_skip += 1 - else: - fit_para["nugget"] = self.nugget - for opt in self.opt_arg: - if para[opt]: - setattr(self, opt, popt[para_skip + opt_skip]) - fit_para[opt] = popt[para_skip + opt_skip] - opt_skip += 1 - else: - fit_para[opt] = getattr(self, opt) - # set var at last because of var_factor (other parameter needed) - if para["var"]: - self.var = var_tmp - return fit_para, pcov ### bounds setting and checks ############################################# @@ -772,11 +798,16 @@ def default_arg_bounds(self): } return res - def set_arg_bounds(self, **kwargs): + def set_arg_bounds(self, check_args=True, **kwargs): r"""Set bounds for the parameters of the model. Parameters ---------- + check_args : bool, optional + Whether to check if the arguments need to resetted to be in the + given bounds. In case, a propper default value will be determined + from the given bounds. + Default: True **kwargs Parameter name as keyword ("var", "len_scale", "nugget", ) and a list of 2 or 3 values as value: @@ -787,22 +818,35 @@ def set_arg_bounds(self, **kwargs): is one of ``"oo"``, ``"cc"``, ``"oc"`` or ``"co"`` to define if the bounds are open ("o") or closed ("c"). """ - for opt in kwargs: - if opt in self.opt_arg: - if not check_bounds(kwargs[opt]): - raise ValueError( - "Given bounds for '" - + opt - + "' are not valid, got: " - + str(kwargs[opt]) + # if variance needs to be resetted, do this at last + var_bnds = [] + for arg in kwargs: + if not check_bounds(kwargs[arg]): + raise ValueError( + "Given bounds for '{0}' are not valid, got: {1}".format( + arg, kwargs[arg] ) - self._opt_arg_bounds[opt] = kwargs[opt] - if opt == "var": - self.var_bounds = kwargs[opt] - if opt == "len_scale": - self.len_scale_bounds = kwargs[opt] - if opt == "nugget": - self.nugget_bounds = kwargs[opt] + ) + if arg in self.opt_arg: + self._opt_arg_bounds[arg] = kwargs[arg] + elif arg == "var": + var_bnds = kwargs[arg] + continue + elif arg == "len_scale": + self.len_scale_bounds = kwargs[arg] + elif arg == "nugget": + self.nugget_bounds = kwargs[arg] + else: + raise ValueError( + "set_arg_bounds: unknown argument '{}'".format(arg) + ) + if check_args and check_arg_in_bounds(self, arg) > 0: + setattr(self, arg, default_arg_from_bounds(kwargs[arg])) + # set var last like allways + if var_bnds: + self.var_bounds = var_bnds + if check_args and check_arg_in_bounds(self, "var") > 0: + self.var = default_arg_from_bounds(var_bnds) def check_arg_bounds(self): """Check arguments to be within the given bounds.""" @@ -810,44 +854,23 @@ def check_arg_bounds(self): for arg in self.arg_bounds: bnd = list(self.arg_bounds[arg]) val = getattr(self, arg) - if len(bnd) == 2: - bnd.append("cc") # use closed intervals by default - if bnd[2][0] == "c": - if val < bnd[0]: - raise ValueError( - str(arg) - + " needs to be >= " - + str(bnd[0]) - + ", got: " - + str(val) - ) - else: - if val <= bnd[0]: - raise ValueError( - str(arg) - + " needs to be > " - + str(bnd[0]) - + ", got: " - + str(val) - ) - if bnd[2][1] == "c": - if val > bnd[1]: - raise ValueError( - str(arg) - + " needs to be <= " - + str(bnd[1]) - + ", got: " - + str(val) - ) - else: - if val >= bnd[1]: - raise ValueError( - str(arg) - + " needs to be < " - + str(bnd[1]) - + ", got: " - + str(val) - ) + error_case = check_arg_in_bounds(self, arg) + if error_case == 1: + raise ValueError( + "{0} needs to be >= {1}, got: {2}".format(arg, bnd[0], val) + ) + if error_case == 2: + raise ValueError( + "{0} needs to be > {1}, got: {2}".format(arg, bnd[0], val) + ) + if error_case == 3: + raise ValueError( + "{0} needs to be <= {1}, got: {2}".format(arg, bnd[1], val) + ) + if error_case == 4: + raise ValueError( + "{0} needs to be < {1}, got: {2}".format(arg, bnd[1], val) + ) ### bounds properties ##################################################### @@ -1252,7 +1275,7 @@ def __repr__(self): ) -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/covmodel/fit.py b/gstools/covmodel/fit.py new file mode 100755 index 00000000..cbf1aecf --- /dev/null +++ b/gstools/covmodel/fit.py @@ -0,0 +1,414 @@ +# -*- coding: utf-8 -*- +""" +GStools subpackage providing tools for the covariance-model. + +.. currentmodule:: gstools.covmodel.fit + +The following classes and functions are provided + +.. autosummary:: + fit_variogram +""" + +# pylint: disable=C0103 +import numpy as np +from scipy.optimize import curve_fit +from gstools.covmodel.tools import check_arg_in_bounds, default_arg_from_bounds + + +__all__ = ["fit_variogram"] + + +DEFAULT_PARA = ["var", "len_scale", "nugget"] + + +def fit_variogram( + model, + x_data, + y_data, + sill=None, + init_guess="default", + weights=None, + method="trf", + loss="soft_l1", + max_eval=None, + return_r2=False, + curve_fit_kwargs=None, + **para_select +): + """ + Fiting the isotropic variogram-model to given data. + + Parameters + ---------- + model : :any:`CovModel` + Covariance Model to fit. + x_data : :class:`numpy.ndarray` + The radii of the meassured variogram. + y_data : :class:`numpy.ndarray` + The messured variogram + sill : :class:`float` or :class:`bool`, optional + Here you can provide a fixed sill for the variogram. + It needs to be in a fitting range for the var and nugget bounds. + If variance or nugget are not selected for estimation, + the nugget will be recalculated to fulfill: + + * sill = var + nugget + * if the variance is bigger than the sill, + nugget will bet set to its lower bound + and the variance will be set to the fitting partial sill. + + If variance is deselected, it needs to be less than the sill, + otherwise a ValueError comes up. Same for nugget. + If sill=False, it will be deslected from estimation + and set to the current sill of the model. + Then, the procedure above is applied. + Default: None + init_guess : :class:`str` or :class:`dict`, optional + Initial guess for the estimation. Either: + + * "default": using the default values of the covariance model + * "current": using the current values of the covariance model + * dict(name: val): specified value for each parameter by name + + Default: "default" + weights : :class:`str`, :class:`numpy.ndarray`, :class:`callable`, optional + Weights applied to each point in the estimation. Either: + + * 'inv': inverse distance ``1 / (x_data + 1)`` + * list: weights given per bin + * callable: function applied to x_data + + If callable, it must take a 1-d ndarray. Then ``weights = f(x_data)``. + Default: None + method : {'trf', 'dogbox'}, optional + Algorithm to perform minimization. + + * 'trf' : Trust Region Reflective algorithm, particularly suitable + for large sparse problems with bounds. Generally robust method. + * 'dogbox' : dogleg algorithm with rectangular trust regions, + typical use case is small problems with bounds. Not recommended + for problems with rank-deficient Jacobian. + + Default: 'trf' + loss : :class:`str` or :class:`callable`, optional + Determines the loss function in scipys curve_fit. + The following keyword values are allowed: + + * 'linear' (default) : ``rho(z) = z``. Gives a standard + least-squares problem. + * 'soft_l1' : ``rho(z) = 2 * ((1 + z)**0.5 - 1)``. The smooth + approximation of l1 (absolute value) loss. Usually a good + choice for robust least squares. + * 'huber' : ``rho(z) = z if z <= 1 else 2*z**0.5 - 1``. Works + similarly to 'soft_l1'. + * 'cauchy' : ``rho(z) = ln(1 + z)``. Severely weakens outliers + influence, but may cause difficulties in optimization process. + * 'arctan' : ``rho(z) = arctan(z)``. Limits a maximum loss on + a single residual, has properties similar to 'cauchy'. + + If callable, it must take a 1-d ndarray ``z=f**2`` and return an + array_like with shape (3, m) where row 0 contains function values, + row 1 contains first derivatives and row 2 contains second + derivatives. Default: 'soft_l1' + max_eval : :class:`int` or :any:`None`, optional + Maximum number of function evaluations before the termination. + If None (default), the value is chosen automatically: 100 * n. + return_r2 : :class:`bool`, optional + Whether to return the r2 score of the estimation. + Default: False + curve_fit_kwargs : :class:`dict`, optional + Other keyword arguments passed to scipys curve_fit. Default: None + **para_select + You can deselect parameters from fitting, by setting + them "False" using their names as keywords. + You could also pass fixed values for each parameter. + Then these values will be applied and the involved parameters wont + be fitted. + By default, all parameters are fitted. + + Returns + ------- + fit_para : :class:`dict` + Dictonary with the fitted parameter values + pcov : :class:`numpy.ndarray` + The estimated covariance of `popt` from + :any:`scipy.optimize.curve_fit`. + To compute one standard deviation errors + on the parameters use ``perr = np.sqrt(np.diag(pcov))``. + r2_score : :class:`float`, optional + r2 score of the curve fitting results. Only if return_r2 is True. + + Notes + ----- + You can set the bounds for each parameter by accessing + :any:`CovModel.set_arg_bounds`. + + The fitted parameters will be instantly set in the model. + """ + # preprocess selected parameters + para, sill, constrain_sill = _pre_para(model, para_select, sill) + # check curve_fit kwargs + curve_fit_kwargs = {} if curve_fit_kwargs is None else curve_fit_kwargs + # check method + if method not in ["trf", "dogbox"]: + raise ValueError("fit: method needs to be either 'trf' or 'dogbox'") + # set weights + _set_weights(weights, x_data, curve_fit_kwargs) + # set the lower/upper boundaries for the variogram-parameters + bounds, init_guess_list = _init_curve_fit_para( + model, para, init_guess, constrain_sill, sill + ) + # create the fitting curve + curve_fit_kwargs["f"] = _get_curve(model, para, constrain_sill, sill) + # set the remaining kwargs for curve_fit + curve_fit_kwargs["bounds"] = bounds + curve_fit_kwargs["p0"] = init_guess_list + curve_fit_kwargs["xdata"] = np.array(x_data) + curve_fit_kwargs["ydata"] = np.array(y_data) + curve_fit_kwargs["loss"] = loss + curve_fit_kwargs["max_nfev"] = max_eval + curve_fit_kwargs["method"] = method + # fit the variogram + popt, pcov = curve_fit(**curve_fit_kwargs) + # convert the results + fit_para = _post_fitting(model, para, popt) + # calculate the r2 score if wanted + if return_r2: + return fit_para, pcov, _r2_score(model, x_data, y_data) + return fit_para, pcov + + +def _pre_para(model, para_select, sill): + """Preprocess selected parameters.""" + var_last = False + for par in para_select: + if par not in model.arg_bounds: + raise ValueError( + "fit: unknow parameter in selection: {}".format(par) + ) + if not isinstance(para_select[par], bool): + if par == "var": + var_last = True + var_tmp = float(para_select[par]) + else: + setattr(model, par, float(para_select[par])) + para_select[par] = False + # set variance last due to possible recalculations + if var_last: + model.var = var_tmp + # remove those that were set to True + para_select = {k: v for k, v in para_select.items() if not v} + # handling the sill + sill = None if (isinstance(sill, bool) and sill) else sill + if sill is not None: + sill = model.sill if isinstance(sill, bool) else float(sill) + constrain_sill = True + sill_low = model.arg_bounds["var"][0] + model.arg_bounds["nugget"][0] + sill_up = model.arg_bounds["var"][1] + model.arg_bounds["nugget"][1] + if not sill_low <= sill <= sill_up: + raise ValueError("fit: sill out of bounds.") + if "var" in para_select and "nugget" in para_select: + if model.var > sill: + model.nugget = model.arg_bounds["nugget"][0] + model.var = sill - model.nugget + else: + model.nugget = sill - model.var + elif "var" in para_select: + if model.var > sill: + raise ValueError( + "fit: if sill is fixed and variance deselected, " + + "the set variance should be less than the given sill." + ) + para_select["nugget"] = False + model.nugget = sill - model.var + elif "nugget" in para_select: + if model.nugget > sill: + raise ValueError( + "fit: if sill is fixed and nugget deselected, " + + "the set nugget should be less than the given sill." + ) + para_select["var"] = False + model.var = sill - model.nugget + else: + # deselect the nugget, to recalculate it accordingly + # nugget = sill - var + para_select["nugget"] = False + else: + constrain_sill = False + # select all parameters to be fitted + para = {par: True for par in DEFAULT_PARA} + para.update({opt: True for opt in model.opt_arg}) + # now deselect unwanted parameters + para.update(para_select) + return para, sill, constrain_sill + + +def _set_weights(weights, x_data, curve_fit_kwargs): + if weights is not None: + if callable(weights): + weights = 1.0 / weights(np.array(x_data)) + elif weights == "inv": + weights = 1.0 + np.array(x_data) + else: + weights = 1.0 / np.array(weights) + curve_fit_kwargs["sigma"] = weights + curve_fit_kwargs["absolute_sigma"] = True + + +def _get_curve(model, para, constrain_sill, sill): + """Create the curve for scipys curve_fit.""" + # we need arg1, otherwise curve_fit throws an error (bug?!) + def curve(x, arg1, *args): + """Adapted Variogram function.""" + args = (arg1,) + args + para_skip = 0 + opt_skip = 0 + if para["var"]: + var_tmp = args[para_skip] + if constrain_sill: + nugget_tmp = sill - var_tmp + # punishment, if resulting nugget out of range for fixed sill + if check_arg_in_bounds(model, "nugget", nugget_tmp) > 0: + return np.full_like(x, np.inf) + # nugget estimation deselected in this case + model.nugget = nugget_tmp + para_skip += 1 + if para["len_scale"]: + model.len_scale = args[para_skip] + para_skip += 1 + if para["nugget"]: + model.nugget = args[para_skip] + para_skip += 1 + for opt in model.opt_arg: + if para[opt]: + setattr(model, opt, args[para_skip + opt_skip]) + opt_skip += 1 + # set var at last because of var_factor (other parameter needed) + if para["var"]: + model.var = var_tmp + return model.variogram(x) + + return curve + + +def _init_curve_fit_para(model, para, init_guess, constrain_sill, sill): + """Create initial guess and bounds for fitting.""" + low_bounds = [] + top_bounds = [] + init_guess_list = [] + for par in DEFAULT_PARA: + if para[par]: + low_bounds.append(model.arg_bounds[par][0]) + if par == "var" and constrain_sill: # var <= sill in this case + top_bounds.append(sill) + else: + top_bounds.append(model.arg_bounds[par][1]) + init_guess_list.append( + _init_guess( + bounds=[low_bounds[-1], top_bounds[-1]], + current=getattr(model, par), + default=1.0, + typ=init_guess, + para_name=par, + ) + ) + for opt in model.opt_arg: + if para[opt]: + low_bounds.append(model.arg_bounds[opt][0]) + top_bounds.append(model.arg_bounds[opt][1]) + init_guess_list.append( + _init_guess( + bounds=[low_bounds[-1], top_bounds[-1]], + current=getattr(model, opt), + default=model.default_opt_arg()[opt], + typ=init_guess, + para_name=opt, + ) + ) + return (low_bounds, top_bounds), init_guess_list + + +def _init_guess(bounds, current, default, typ, para_name): + """Proper determination of initial guess.""" + if isinstance(typ, dict): + if para_name in typ: + return typ[para_name] + # if we have a dict, all parameters need a given init_guess + raise ValueError( + "CovModel.fit: missing init guess for: '{}'".format(para_name) + ) + if typ == "default": + if bounds[0] < default < bounds[1]: + return default + return default_arg_from_bounds(bounds) + if typ == "current": + return current + raise ValueError("CovModel.fit: unkwon init_guess: '{}'".format(typ)) + + +def _post_fitting(model, para, popt): + """Postprocess fitting results and application to model.""" + fit_para = {} + para_skip = 0 + opt_skip = 0 + for par in DEFAULT_PARA: + if para[par]: + if par == "var": # set variance last + var_tmp = popt[para_skip] + else: + setattr(model, par, popt[para_skip]) + fit_para[par] = popt[para_skip] + para_skip += 1 + else: + fit_para[par] = getattr(model, par) + for opt in model.opt_arg: + if para[opt]: + setattr(model, opt, popt[para_skip + opt_skip]) + fit_para[opt] = popt[para_skip + opt_skip] + opt_skip += 1 + else: + fit_para[opt] = getattr(model, opt) + # set var at last because of var_factor (other parameter needed) + if para["var"]: + model.var = var_tmp + return fit_para + + +def _r2_score(model, x_data, y_data): + """Calculate the R2 score.""" + residuals = y_data - model.variogram(x_data) + ss_res = np.sum(residuals ** 2) + ss_tot = np.sum((y_data - np.mean(y_data)) ** 2) + return 1.0 - (ss_res / ss_tot) + + +def logistic_weights(p=0.1, mean=0.7): # pragma: no cover + """ + Return a logistic weights function. + + Parameters + ---------- + p : :class:`float`, optional + Parameter for the growth rate. + Within this percentage of the data range, the function will + be in the upper resp. lower percentile p. The default is 0.1. + mean : :class:`float`, optional + Percentage of the data range, where this function has its + sigmoid's midpoint. The default is 0.7. + + Returns + ------- + callable + Weighting function. + """ + # define the callable weights function + def func(x_data): + """Callable function for the weights.""" + x_range = np.amax(x_data) - np.amin(x_data) + # logit function for growth rate + growth = np.log(p / (1 - p)) / (p * x_range) + x_mean = mean * x_range + np.amin(x_data) + return 1.0 / (1.0 + np.exp(growth * (x_mean - x_data))) + + return func diff --git a/gstools/covmodel/models.py b/gstools/covmodel/models.py index 171f1bfc..8cce6919 100644 --- a/gstools/covmodel/models.py +++ b/gstools/covmodel/models.py @@ -66,7 +66,7 @@ def correlation(self, r): def spectral_density(self, k): # noqa: D102 k = np.array(k, dtype=np.double) return (self.len_scale / np.pi) ** self.dim * np.exp( - -(k * self.len_scale) ** 2 / np.pi + -((k * self.len_scale) ** 2) / np.pi ) def spectral_rad_cdf(self, r): @@ -75,12 +75,12 @@ def spectral_rad_cdf(self, r): if self.dim == 1: return sps.erf(self.len_scale * r / np.sqrt(np.pi)) if self.dim == 2: - return 1.0 - np.exp(-(r * self.len_scale) ** 2 / np.pi) + return 1.0 - np.exp(-((r * self.len_scale) ** 2) / np.pi) if self.dim == 3: return sps.erf( self.len_scale * r / np.sqrt(np.pi) ) - 2 * r * self.len_scale / np.pi * np.exp( - -(r * self.len_scale) ** 2 / np.pi + -((r * self.len_scale) ** 2) / np.pi ) return None @@ -98,9 +98,11 @@ def spectral_rad_ppf(self, u): return np.sqrt(np.pi) / self.len_scale * np.sqrt(-np.log(1.0 - u)) return None + def _has_cdf(self): + return self.dim in [1, 2, 3] + def _has_ppf(self): - # since the ppf is not analytical for dim=3, we have to state that - return False if self.dim == 3 else True + return self.dim in [1, 2] def calc_integral_scale(self): # noqa: D102 return self.len_scale @@ -179,9 +181,11 @@ def spectral_rad_ppf(self, u): return np.sqrt(u_power - 1.0) / self.len_scale return None + def _has_cdf(self): + return self.dim in [1, 2, 3] + def _has_ppf(self): - # since the ppf is not analytical for dim=3, we have to state that - return False if self.dim == 3 else True + return self.dim in [1, 2] def calc_integral_scale(self): # noqa: D102 return self.len_scale @@ -250,6 +254,14 @@ def correlation(self, r): 1 + 0.5 / self.alpha * (r / self.len_scale) ** 2, -self.alpha ) + def calc_integral_scale(self): # noqa: D102 + return ( + self.len_scale + * np.sqrt(np.pi * self.alpha * 0.5) + * sps.gamma(self.alpha - 0.5) + / sps.gamma(self.alpha) + ) + # Stable Model ################################################################ @@ -325,6 +337,9 @@ def correlation(self, r): r = np.array(np.abs(r), dtype=np.double) return np.exp(-np.power(r / self.len_scale, self.alpha)) + def calc_integral_scale(self): # noqa: D102 + return self.len_scale * sps.gamma(1.0 + 1.0 / self.alpha) + # Matérn Model ################################################################ @@ -399,7 +414,7 @@ def correlation(self, r): r = np.array(np.abs(r), dtype=np.double) # for nu > 20 we just use the gaussian model if self.nu > 20.0: - return np.exp(-(r / self.len_scale) ** 2 / 4) + return np.exp(-((r / self.len_scale) ** 2) / 4) # calculate by log-transformation to prevent numerical errors r_gz = r[r > 0.0] res = np.ones_like(r) @@ -421,7 +436,7 @@ def spectral_density(self, k): # noqa: D102 if self.nu > 20.0: return ( (self.len_scale / np.sqrt(np.pi)) ** self.dim - * np.exp(-(k * self.len_scale) ** 2) + * np.exp(-((k * self.len_scale) ** 2)) * ( 1 + ( @@ -634,7 +649,7 @@ class Intersection(CovModel): 0 & r\geq\ell \end{cases} - In 3D: + In >=3D: .. math:: \rho(r) = @@ -648,6 +663,43 @@ class Intersection(CovModel): """ def correlation(self, r): # noqa: D102 + r""" + Intersection correlation function. + + In 1D: + + .. math:: + \rho(r) = + \begin{cases} + 1-\frac{r}{\ell} + & r<\ell\\ + 0 & r\geq\ell + \end{cases} + + In 2D: + + .. math:: + \rho(r) = + \begin{cases} + \frac{2}{\pi}\cdot\left( + \cos^{-1}\left(\frac{r}{\ell}\right) - + \frac{r}{\ell}\cdot\sqrt{1-\left(\frac{r}{\ell}\right)^{2}} + \right) + & r<\ell\\ + 0 & r\geq\ell + \end{cases} + + In >=3D: + + .. math:: + \rho(r) = + \begin{cases} + 1-\frac{3}{2}\cdot\frac{r}{\ell} + + \frac{1}{2}\cdot\left(\frac{r}{\ell}\right)^{3} + & r<\ell\\ + 0 & r\geq\ell + \end{cases} + """ r = np.array(np.abs(r), dtype=np.double) res = np.zeros_like(r) r_ll = r < self.len_scale diff --git a/gstools/covmodel/plot.py b/gstools/covmodel/plot.py index c2a3f68b..cc9535f7 100644 --- a/gstools/covmodel/plot.py +++ b/gstools/covmodel/plot.py @@ -63,7 +63,7 @@ def plot_vario_spatial( field = gstools.field.base.Field(model) field._value_type = "scalar" if x_max is None: - x_max = 3 * model.integral_scale + x_max = 3 * model.len_scale field.mesh_type = "structured" x_s = np.linspace(-x_max, x_max) + x_min pos = [x_s] * model.dim @@ -81,7 +81,7 @@ def plot_cov_spatial( field = gstools.field.base.Field(model) field._value_type = "scalar" if x_max is None: - x_max = 3 * model.integral_scale + x_max = 3 * model.len_scale field.mesh_type = "structured" x_s = np.linspace(-x_max, x_max) + x_min pos = [x_s] * model.dim @@ -99,7 +99,7 @@ def plot_cor_spatial( field = gstools.field.base.Field(model) field._value_type = "scalar" if x_max is None: - x_max = 3 * model.integral_scale + x_max = 3 * model.len_scale field.mesh_type = "structured" x_s = np.linspace(-x_max, x_max) + x_min pos = [x_s] * model.dim @@ -116,7 +116,7 @@ def plot_variogram( """Plot variogram of a given CovModel.""" fig, ax = _get_fig_ax(fig, ax) if x_max is None: - x_max = 3 * model.integral_scale + x_max = 3 * model.len_scale x_s = np.linspace(x_min, x_max) ax.plot(x_s, model.variogram(x_s), label=model.name + " variogram") ax.legend() @@ -130,7 +130,7 @@ def plot_covariance( """Plot covariance of a given CovModel.""" fig, ax = _get_fig_ax(fig, ax) if x_max is None: - x_max = 3 * model.integral_scale + x_max = 3 * model.len_scale x_s = np.linspace(x_min, x_max) ax.plot(x_s, model.covariance(x_s), label=model.name + " covariance") ax.legend() @@ -144,7 +144,7 @@ def plot_correlation( """Plot correlation function of a given CovModel.""" fig, ax = _get_fig_ax(fig, ax) if x_max is None: - x_max = 3 * model.integral_scale + x_max = 3 * model.len_scale x_s = np.linspace(x_min, x_max) ax.plot(x_s, model.correlation(x_s), label=model.name + " correlation") ax.legend() @@ -158,7 +158,7 @@ def plot_spectrum( """Plot specturm of a given CovModel.""" fig, ax = _get_fig_ax(fig, ax) if x_max is None: - x_max = 3 / model.integral_scale + x_max = 3 / model.len_scale x_s = np.linspace(x_min, x_max) ax.plot( x_s, @@ -176,7 +176,7 @@ def plot_spectral_density( """Plot spectral density of a given CovModel.""" fig, ax = _get_fig_ax(fig, ax) if x_max is None: - x_max = 3 / model.integral_scale + x_max = 3 / model.len_scale x_s = np.linspace(x_min, x_max) ax.plot( x_s, @@ -194,7 +194,7 @@ def plot_spectral_rad_pdf( """Plot radial spectral pdf of a given CovModel.""" fig, ax = _get_fig_ax(fig, ax) if x_max is None: - x_max = 3 / model.integral_scale + x_max = 3 / model.len_scale x_s = np.linspace(x_min, x_max) ax.plot( x_s, diff --git a/gstools/covmodel/tools.py b/gstools/covmodel/tools.py index 953631fc..0065dece 100644 --- a/gstools/covmodel/tools.py +++ b/gstools/covmodel/tools.py @@ -10,17 +10,29 @@ InitSubclassMeta rad_fac set_len_anis + set_angles + no_of_angles + rot_planes check_bounds - inc_gamma - exp_int - inc_beta + check_arg_in_bounds + default_arg_from_bounds """ # pylint: disable=C0103 import numpy as np from scipy import special as sps -__all__ = ["InitSubclassMeta", "rad_fac", "set_len_anis", "check_bounds"] +__all__ = [ + "InitSubclassMeta", + "rad_fac", + "set_len_anis", + "set_angles", + "no_of_angles", + "rot_planes", + "check_bounds", + "check_arg_in_bounds", + "default_arg_from_bounds", +] # __init_subclass__ hack ###################################################### @@ -135,7 +147,7 @@ def set_len_anis(dim, len_scale, anis): constant_values=1.0, ) else: - # fill up length-scales with main len_scale, such that len()==dim + # fill up length-scales with the latter len_scale, such that len()==dim if len(ls_tmp) < dim: ls_tmp = np.pad(ls_tmp, (0, dim - len(ls_tmp)), "edge") # if multiple length-scales are given, calculate the anisotropies @@ -157,41 +169,64 @@ def set_angles(dim, angles): Parameters ---------- dim : :class:`int` - spatial dimension (anything different from 1 and 2 is interpreted as 3) - angles : :class:`float`/list + spatial dimension + angles : :class:`float` or :class:`list` the angles of the SRF Returns ------- angles : :class:`float` the angles fitting to the dimension + + Notes + ----- + If too few angles are given, they are filled up with `0`. """ - if dim == 1: - # no rotation in 1D - out_angles = np.empty(0) - elif dim == 2: - # one rotation axis in 2D - out_angles = np.atleast_1d(angles)[:1] - # fill up the rotation angle array with zeros - out_angles = np.pad( - out_angles, - (0, 1 - len(out_angles)), - "constant", - constant_values=0.0, - ) - else: - # three rotation axis in 3D - out_angles = np.atleast_1d(angles)[:3] - # fill up the rotation angle array with zeros - out_angles = np.pad( - out_angles, - (0, 3 - len(out_angles)), - "constant", - constant_values=0.0, - ) + out_angles = np.atleast_1d(angles)[: no_of_angles(dim)] + # fill up the rotation angle array with zeros + out_angles = np.pad( + out_angles, + (0, no_of_angles(dim) - len(out_angles)), + "constant", + constant_values=0.0, + ) return out_angles +def no_of_angles(dim): + """ + Calculate number of rotation angles depending on the dimension. + + Parameters + ---------- + dim : :class:`int` + spatial dimension + + Returns + ------- + :class:`int` + Number of angles. + """ + return (dim * (dim - 1)) // 2 + + +def rot_planes(dim): + """ + Get all 2D sub-planes for rotation. + + Parameters + ---------- + dim : :class:`int` + spatial dimension + + Returns + ------- + :class:`list` + All 2D sub-planes for rotation. + """ + return [(i, j) for i in range(dim - 1) for j in range(i + 1, dim)] + + def check_bounds(bounds): """ Check if given bounds are valid. @@ -217,3 +252,50 @@ def check_bounds(bounds): if len(bounds) == 3 and bounds[2] not in ("oo", "oc", "co", "cc"): return False return True + + +def check_arg_in_bounds(model, arg, val=None): + """Check if given argument value is in bounds of the given model.""" + if arg not in model.arg_bounds: + raise ValueError("check bounds: unknown argument: {}".format(arg)) + bnd = list(model.arg_bounds[arg]) + val = getattr(model, arg) if val is None else val + error_case = 0 + if len(bnd) == 2: + bnd.append("cc") # use closed intervals by default + if bnd[2][0] == "c": + if val < bnd[0]: + error_case = 1 + else: + if val <= bnd[0]: + error_case = 2 + if bnd[2][1] == "c": + if val > bnd[1]: + error_case = 3 + else: + if val >= bnd[1]: + error_case = 4 + return error_case + + +def default_arg_from_bounds(bounds): + """ + Determine a default value from given bounds. + + Parameters + ---------- + bounds : list + bounds for the value. + + Returns + ------- + float + Default value in the given bounds. + """ + if bounds[0] > -np.inf and bounds[1] < np.inf: + return (bounds[0] + bounds[1]) / 2.0 + if bounds[0] > -np.inf: + return bounds[0] + 1.0 + if bounds[1] < np.inf: + return bounds[1] - 1.0 + return 0.0 diff --git a/gstools/field/base.py b/gstools/field/base.py index 75ae0467..39909af0 100755 --- a/gstools/field/base.py +++ b/gstools/field/base.py @@ -396,7 +396,7 @@ def __repr__(self): return "Field(model={0}, mean={1})".format(self.model, self.mean) -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/field/generator.py b/gstools/field/generator.py index 3bbe7a58..67ed7ae9 100644 --- a/gstools/field/generator.py +++ b/gstools/field/generator.py @@ -502,7 +502,7 @@ def _set_dtype(x, y=None, z=None, dtype=np.double): return x, y, z -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/field/srf.py b/gstools/field/srf.py index 74b6ea4c..bc6052fa 100644 --- a/gstools/field/srf.py +++ b/gstools/field/srf.py @@ -288,7 +288,7 @@ def __repr__(self): ) -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/krige/base.py b/gstools/krige/base.py index 366d02cd..eb71e0a4 100755 --- a/gstools/krige/base.py +++ b/gstools/krige/base.py @@ -384,7 +384,7 @@ def unbiased(self): return self._unbiased -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/krige/methods.py b/gstools/krige/methods.py index bad1b059..7c0eed12 100644 --- a/gstools/krige/methods.py +++ b/gstools/krige/methods.py @@ -379,7 +379,7 @@ def __repr__(self): ) -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/random/rng.py b/gstools/random/rng.py index e2915f6f..8f6fd578 100644 --- a/gstools/random/rng.py +++ b/gstools/random/rng.py @@ -154,9 +154,11 @@ def sample_sphere(self, dim, size=None): x[, y[, z]] coordinates on the sphere with shape (dim, size) """ if size is None: # pragma: no cover - coord = np.empty(dim, dtype=np.double) + coord = np.empty((dim, 1), dtype=np.double) else: - coord = np.empty((dim, size), dtype=np.double) + coord = np.empty( # saver conversion of size to resulting shape + (dim,) + tuple(np.atleast_1d(size)), dtype=np.double + ) if dim == 1: coord[0] = self.random.choice([-1, 1], size=size) elif dim == 2: @@ -169,7 +171,25 @@ def sample_sphere(self, dim, size=None): coord[0] = np.sqrt(1.0 - ang2 ** 2) * np.cos(ang1) coord[1] = np.sqrt(1.0 - ang2 ** 2) * np.sin(ang1) coord[2] = ang2 - return coord + else: # pragma: no cover + # http://corysimon.github.io/articles/uniformdistn-on-sphere/ + coord = self.random.normal(size=coord.shape) + while True: # loop until all norms are non-zero + norm = np.linalg.norm(coord, axis=0) + # check for zero norms + zero_norms = np.isclose(norm, 0) + # exit the loop if all norms are non-zero + if not np.any(zero_norms): + break + # transpose, since the next transpose reverses axis order + zero_samples = zero_norms.T.nonzero() + # need to transpose to have dim-axis last + new_shape = coord.T[zero_samples].shape + # resample the zero norm samples + coord.T[zero_samples] = self.random.normal(size=new_shape) + # project onto sphere + coord = coord / norm + return np.reshape(coord, dim) if size is None else coord @property def random(self): @@ -203,7 +223,7 @@ def __repr__(self): return "RNG(seed={})".format(self.seed) -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/random/tools.py b/gstools/random/tools.py index c4799ee4..5a4b0d9d 100644 --- a/gstools/random/tools.py +++ b/gstools/random/tools.py @@ -188,7 +188,7 @@ def _ppf(self, q, *args): return self.ppf_in(q) -if __name__ == "__main__": # pragma: no cover +if __name__ == "__main__": import doctest doctest.testmod() diff --git a/gstools/tools/export.py b/gstools/tools/export.py index 9174830b..7085ac05 100644 --- a/gstools/tools/export.py +++ b/gstools/tools/export.py @@ -40,8 +40,7 @@ def _vtk_structured_helper(pos, fields): - """An internal helper to extract what is needed for the vtk rectilinear grid - """ + """Extract field info for vtk rectilinear grid.""" if not isinstance(fields, dict): fields = {"field": fields} x, y, z = pos2xyz(pos) @@ -80,12 +79,10 @@ def to_vtk_structured(pos, fields): # pragma: no cover live on the point data of this PyVista dataset. """ x, y, z, fields = _vtk_structured_helper(pos=pos, fields=fields) - try: - import pyvista as pv - + if pv is not None: grid = pv.RectilinearGrid(x, y, z) grid.point_arrays.update(fields) - except ImportError: + else: raise ImportError("Please install PyVista to create VTK datasets.") return grid @@ -153,12 +150,10 @@ def to_vtk_unstructured(pos, fields): # pragma: no cover a point cloud with no topology. """ x, y, z, fields = _vtk_unstructured_helper(pos=pos, fields=fields) - try: - import pyvista as pv - + if pv is not None: grid = pv.PolyData(np.c_[x, y, z]).cast_to_unstructured_grid() grid.point_arrays.update(fields) - except ImportError: + else: raise ImportError("Please install PyVista to create VTK datasets.") return grid @@ -235,7 +230,4 @@ def vtk_export( """ if mesh_type == "structured": return vtk_export_structured(filename=filename, pos=pos, fields=fields) - else: - return vtk_export_unstructured( - filename=filename, pos=pos, fields=fields - ) + return vtk_export_unstructured(filename=filename, pos=pos, fields=fields) diff --git a/gstools/variogram/estimator.pyx b/gstools/variogram/estimator.pyx index 8762b946..41414f6c 100644 --- a/gstools/variogram/estimator.pyx +++ b/gstools/variogram/estimator.pyx @@ -77,7 +77,7 @@ cdef inline void normalization_cressie( if counts[i] == 0: counts[i] = 1 variogram[i] = ( - (1./counts[i] * variogram[i])**4 / + 0.5 * (1./counts[i] * variogram[i])**4 / (0.457 + 0.494 / counts[i] + 0.045 / counts[i]**2) ) diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index ab87562f..e13ddf32 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -21,7 +21,7 @@ def _set_estimator(estimator): - """Translate the verbose Python estimator identifier to single char""" + """Translate the verbose Python estimator identifier to single char.""" if estimator.lower() == "matheron": cython_estimator = "m" elif estimator.lower() == "cressie": @@ -50,16 +50,18 @@ def vario_estimate_unstructured( \gamma(r_k) = \frac{1}{2 N(r_k)} \sum_{i=1}^{N(r_k)} (z(\mathbf x_i) - z(\mathbf x_i'))^2 \; , - with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` being the bins. + with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` + being the bins. Or if the estimator "cressie" was chosen: .. math:: - \gamma(r_k) = \frac{\left(\frac{1}{N(r_k)} \sum_{i=1}^{N(r_k)} + \gamma(r_k) = \frac{\frac{1}{2}\left(\frac{1}{N(r_k)}\sum_{i=1}^{N(r_k)} \left|z(\mathbf x_i) - z(\mathbf x_i')\right|^{0.5}\right)^4} {0.457 + 0.494 / N(r_k) + 0.045 / N^2(r_k)} \; , - with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` being the bins. + with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` + being the bins. The Cressie estimator is more robust to outliers. Notes @@ -133,16 +135,18 @@ def vario_estimate_structured(field, direction="x", estimator="matheron"): \gamma(r_k) = \frac{1}{2 N(r_k)} \sum_{i=1}^{N(r_k)} (z(\mathbf x_i) - z(\mathbf x_i'))^2 \; , - with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` being the bins. + with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` + being the bins. Or if the estimator "cressie" was chosen: .. math:: - \gamma(r_k) = \frac{\left(\frac{1}{N(r_k)} \sum_{i=1}^{N(r_k)} + \gamma(r_k) = \frac{\frac{1}{2}\left(\frac{1}{N(r_k)}\sum_{i=1}^{N(r_k)} \left|z(\mathbf x_i) - z(\mathbf x_i')\right|^{0.5}\right)^4} {0.457 + 0.494 / N(r_k) + 0.045 / N^2(r_k)} \; , - with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` being the bins. + with :math:`r_k \leq \| \mathbf x_i - \mathbf x_i' \| < r_{k+1}` + being the bins. The Cressie estimator is more robust to outliers. Warnings @@ -162,7 +166,7 @@ def vario_estimate_structured(field, direction="x", estimator="matheron"): estimator : :class:`str`, optional the estimator function, possible choices: - * "mathoron": the standard method of moments of Matheron + * "matheron": the standard method of moments of Matheron * "cressie": an estimator more robust to outliers Default: "matheron" diff --git a/tests/test_covmodel.py b/tests/test_covmodel.py index d331773d..138d6b39 100644 --- a/tests/test_covmodel.py +++ b/tests/test_covmodel.py @@ -14,6 +14,7 @@ Linear, Circular, Spherical, + Intersection, TPLGaussian, TPLExponential, TPLStable, @@ -31,6 +32,7 @@ def setUp(self): Linear, Circular, Spherical, + Intersection, TPLGaussian, TPLExponential, TPLStable, @@ -44,6 +46,7 @@ def setUp(self): Linear, Circular, Spherical, + Intersection, ] self.dims = range(1, 4) self.lens = [[10, 5, 2]] @@ -61,7 +64,7 @@ def test_creation(self): class User(CovModel): def cor(self, h): - return np.exp(-h ** 2) + return np.exp(-(h ** 2)) user = User(len_scale=2) self.assertAlmostEqual(user.correlation(1), np.exp(-0.25)) @@ -120,6 +123,12 @@ def cor(self, h): if model.has_ppf: model.spectral_rad_ppf([0.0, 0.99]) model.pykrige_kwargs + # check arg bound setting + model.set_arg_bounds( + var=[2, np.inf], nugget=[1, 2] + ) + self.assertAlmostEqual(model.var, 3) + self.assertAlmostEqual(model.nugget, 1.5) def test_fitting(self): for Model in self.std_cov_models: @@ -127,6 +136,30 @@ def test_fitting(self): model = Model(dim=dim) model.fit_variogram(self.gamma_x, self.gamma_y, nugget=False) self.assertAlmostEqual(model.nugget, 0.0) + model = Model(dim=dim) + # also check resetting of var when sill is given lower + model.fit_variogram(self.gamma_x, self.gamma_y, sill=0.9) + self.assertAlmostEqual(model.nugget + model.var, 0.9) + model = Model(dim=dim) + # more detailed checks + model.fit_variogram( + self.gamma_x, self.gamma_y, sill=2, nugget=False + ) + self.assertAlmostEqual(model.var, 2.0) + model = Model(dim=dim) + model.fit_variogram( + self.gamma_x, self.gamma_y, sill=2, nugget=1 + ) + self.assertAlmostEqual(model.var, 1) + model = Model(dim=dim) + ret = model.fit_variogram( + self.gamma_x, + self.gamma_y, + loss="linear", + return_r2=True, + weights="inv", + ) + self.assertEqual(len(ret), 3) if __name__ == "__main__": diff --git a/tests/test_variogram_structured.py b/tests/test_variogram_structured.py index 390ab5b3..e7ff0672 100644 --- a/tests/test_variogram_structured.py +++ b/tests/test_variogram_structured.py @@ -51,7 +51,7 @@ def test_list(self): def test_cressie_1d(self): z = [41.2, 40.2, 39.7, 39.2, 40.1, 38.3, 39.1, 40.0, 41.1, 40.3] gamma = variogram.vario_estimate_structured(z, estimator="cressie") - self.assertAlmostEqual(gamma[1], 1.546, places=3) + self.assertAlmostEqual(gamma[1], 1.546 / 2.0, places=3) def test_1d(self): # literature values @@ -169,8 +169,7 @@ def test_uncorrelated_cressie_2d(self): field, direction="y", estimator="cressie" ) - # TODO figure out what is going on here - var = 0.177 + var = 1.0 / 12.0 self.assertAlmostEqual(gamma_x[0], 0.0, places=1) self.assertAlmostEqual(gamma_x[len(gamma_x) // 2], var, places=1) self.assertAlmostEqual(gamma_y[0], 0.0, places=1)