diff --git a/skgstat/DirectionalVariogram.py b/skgstat/DirectionalVariogram.py index 880bada..e24779d 100644 --- a/skgstat/DirectionalVariogram.py +++ b/skgstat/DirectionalVariogram.py @@ -28,7 +28,7 @@ def __init__(self, model='spherical', dist_func='euclidean', bin_func='even', - normalize=True, + normalize=False, fit_method='trf', fit_sigma=None, directional_model='triangle', @@ -38,8 +38,7 @@ def __init__(self, use_nugget=False, maxlag=None, n_lags=10, - verbose=False, - harmonize=False + verbose=False ): r"""Variogram Class @@ -193,8 +192,7 @@ def __init__(self, function. verbose : bool Set the Verbosity of the class. Not Implemented yet. - harmonize : bool - this kind of works so far, but will be rewritten (and documented) + """ # Set coordinates self._X = np.asarray(coordinates) @@ -254,9 +252,6 @@ def __init__(self, # specify if the lag should be given absolute or relative to the maxlag self._normalized = normalize - # specify if the experimental variogram shall be harmonized - self.harmonize = harmonize - # set the fitting method and sigma array self.fit_method = fit_method self._fit_sigma = None diff --git a/skgstat/SpaceTimeVariogram.py b/skgstat/SpaceTimeVariogram.py index 17b8f7c..7ff06dd 100644 --- a/skgstat/SpaceTimeVariogram.py +++ b/skgstat/SpaceTimeVariogram.py @@ -30,7 +30,6 @@ def __init__(self, estimator='matheron', use_nugget=False, model='product-sum', - verbose=False ): # set coordinates array diff --git a/skgstat/Variogram.py b/skgstat/Variogram.py index 895a3fd..5ece781 100644 --- a/skgstat/Variogram.py +++ b/skgstat/Variogram.py @@ -3,13 +3,14 @@ """ import copy import os +import warnings import numpy as np from pandas import DataFrame import matplotlib.pyplot as plt from scipy.optimize import curve_fit from scipy.spatial.distance import pdist, squareform -from numba import jit +from sklearn.isotonic import IsotonicRegression from skgstat import estimators, models, binning @@ -29,14 +30,13 @@ def __init__(self, model='spherical', dist_func='euclidean', bin_func='even', - normalize=True, + normalize=False, fit_method='trf', fit_sigma=None, use_nugget=False, maxlag=None, n_lags=10, - verbose=False, - harmonize=False + verbose=False ): r"""Variogram Class @@ -149,19 +149,8 @@ def __init__(self, function. verbose : bool Set the Verbosity of the class. Not Implemented yet. - harmonize : bool - this kind of works so far, but will be rewritten (and documented) """ - # deprecation warnings - if normalize and 'SKG_SUPPRESS' not in os.environ.keys(): # pragma: no cover - print('Warning: normalize will change the default value \ -to False. You can add a SKG_SUPPRESS environment variable to suppress this warning.') - if 'SKG_SUPPRESS' not in os.environ.keys(): - print("Warning: 'harmonize' is deprecated and will be removed\ -with the next release. You can add a 'SKG_SUPPRESS' environment variable to \ -suppress this warning.") - # Set coordinates self._X = np.asarray(coordinates) @@ -188,6 +177,9 @@ def __init__(self, self._maxlag = None self.maxlag = maxlag + # harmonize model placeholder + self._harmonize = False + # estimator can be a function or a string self._estimator = None self.set_estimator(estimator_name=estimator) @@ -205,12 +197,6 @@ def __init__(self, # specify if the lag should be given absolute or relative to the maxlag self._normalized = normalize - # specify if the experimental variogram shall be harmonized - self.harmonize = harmonize - if harmonize: - raise DeprecationWarning('Argument will be removed with 0.3') - self._harmonize = None - # set if nugget effect shall be used self._use_nugget = None self.use_nugget = use_nugget @@ -431,8 +417,8 @@ def bins(self, bins): def n_lags(self): """Number of lag bins - Pass the number of lag bins to be used on - this Variogram instance. This will reset + Pass the number of lag bins to be used on + this Variogram instance. This will reset the grouping index and fitting parameters """ @@ -444,12 +430,12 @@ def n_lags(self, n): # string are not implemented yet if isinstance(n, str): raise NotImplementedError('n_lags string values not implemented') - + # n_lags is int elif isinstance(n, int): if n < 1: raise ValueError('n_lags has to be a positive integer') - + # set parameter self._n_lags = n @@ -459,7 +445,7 @@ def n_lags(self, n): # else else: raise ValueError('n_lags has to be a positive integer') - + # reset the fitting self.cof = None self.cov = None @@ -518,6 +504,8 @@ def set_model(self, model_name): self.cof, self.cov = None, None if isinstance(model_name, str): + # at first reset harmonize + self._harmonize = False if model_name.lower() == 'spherical': self._model = models.spherical elif model_name.lower() == 'exponential': @@ -530,12 +518,55 @@ def set_model(self, model_name): self._model = models.stable elif model_name.lower() == 'matern': self._model = models.matern + elif model_name.lower() == 'harmonize': + self._harmonize = True + self._model = self._build_harmonized_model() else: raise ValueError( 'The theoretical Variogram function %s is not understood, please provide the function' % model_name) else: self._model = model_name + def _build_harmonized_model(self): + x = self.bins + y = self.experimental + + _x = x[~np.isnan(y)] + _y = y[~np.isnan(y)] + regr = IsotonicRegression(increasing=True).fit(_x, _y) + + # create the model function + def harmonize(x): + """Monotonized Variogram + + Return the isotonic harmonized experimental variogram. + This means, the experimental variogram is monotonic after harmonization. + + The harmonization is done using following Hinterding (2003) using + the PAVA algorithm (Barlow and Bartholomew, 1972). + + Returns + ------- + gamma : numpy.ndarray + monotonized experimental variogram + + References + ---------- + Barlow, R., D. Bartholomew, et al. (1972): Statistical Interference Under Order Restrictions. + John Wiley and Sons, New York. + Hiterding, A. (2003): Entwicklung hybrider Interpolationsverfahren für den automatisierten Betrieb am + Beispiel meteorologischer Größen. Dissertation, Institut für Geoinformatik, Westphälische + Wilhelms-Universität Münster, IfGIprints, Münster. ISBN: 3-936616-12-4 + + """ + + if isinstance(x, (list, tuple, np.ndarray)): + return regr.transform(x) + else: + return regr.transform([x]) + + return harmonize + @property def use_nugget(self): return self._use_nugget @@ -668,7 +699,6 @@ def fit_sigma(self): ------- void - Notes ----- The cost function is defined as: @@ -855,9 +885,22 @@ def fit(self, force=False, method=None, sigma=None, **kwargs): _x = x[~np.isnan(y)] _y = y[~np.isnan(y)] - # the model function is re-defined. otherwise scipy cannot determine - # the number of parameters - # TODO: def f(n of par) + # handle harmonized models + if self._harmonize: + _x = np.linspace(0, np.max(_x), 100) + _y = self._model(_x) + + # get the params + s = 0.95 * np.nanmax(_y) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + r = np.argwhere(_y >= s)[0][0] + n = _y[0] if self.use_nugget else 0.0 + + # set the params + self.cof = [r, s, n] + return + # Switch the method # Trust Region Reflective @@ -883,11 +926,6 @@ def fit(self, force=False, method=None, sigma=None, **kwargs): **kwargs ) - # monotonization - elif self.fit_method == 'harmonize': - # TODO, make the harmonization and infer the cof from that - raise NotImplementedError - else: raise ValueError("fit method has to be one of ['trf', 'lm']") @@ -914,51 +952,9 @@ def transform(self, x): # if instance is not fitted, fit it if self.cof is None: self.fit(force=True) - - return np.fromiter(map(self.compiled_model, x), dtype=float) - - @property - def compiled_model(self): - """Compiled theoretical variogram model - - Compile the model using the actual fitting parameters to return a - function implementing them. - - The compiled_model will be removed in version 0.3. Use the - `Variogram.fitted_model` property instead. It is works in the same - way, but is significantly faster - - Returns - ------- - callable - - """ - if not 'SKG_SUPPRESS' in os.environ: - print('Warning: compiled_model is deprecated and will be removed. \ -Use Variogram.fitted_model instead. You can add an SKG_SUPPRESS environment variable to supress this warning.') - if self.cof is None: - self.fit(force=True) - - # get the function - func = self._model - - # get the pars - params = self.describe() - ming = params['nugget'] - maxg = params['sill'] - - # define the wrapper - def model(x): - gamma = func(x, *self.cof) - if int(x) == 0 and not np.isfinite(gamma): - return ming - elif int(x) > 0 and not np.isfinite(gamma): - return maxg - else: - return gamma - - # return - return model + + # return the result + return self.fitted_model(x) @property def fitted_model(self): @@ -967,8 +963,7 @@ def fitted_model(self): Returns a callable that takes a distance value and returns a semivariance. This model is fitted to the current Variogram parameters. The function will be interpreted at return time with the - parameters hard-coded into the function code. This makes it way - faster than`Variogram.compiled_model`. + parameters hard-coded into the function code. Returns ------- @@ -985,7 +980,10 @@ def fitted_model(self): # get the function func = self._model - code = """model = lambda x: func(x, %s)""" % \ + if self._harmonize: + code = """model = lambda x: func(x)""" + else: + code = """model = lambda x: func(x, %s)""" % \ (', '.join([str(_) for _ in cof])) # run the code @@ -1101,10 +1099,7 @@ def experimental(self): Variogram.isotonic """ - if self.harmonize: - return self.isotonic - else: - return self._experimental + return self._experimental @property # @jit @@ -1138,35 +1133,6 @@ def _experimental(self): # apply return y.copy() - @property - def isotonic(self): - """ - Return the isotonic harmonized experimental variogram. - This means, the experimental variogram is monotonic after harmonization. - - The harmonization is done using PAVA algorithm: - - Barlow, R., D. Bartholomew, et al. (1972): Statistical Interference Under Order Restrictions. - John Wiley and Sons, New York. - Hiterding, A. (2003): Entwicklung hybrider Interpolationsverfahren für den automatisierten Betrieb am - Beispiel meteorologischer Größen. Dissertation, Institut für Geoinformatik, Westphälische - Wilhelms-Universität Münster, IfGIprints, Münster. ISBN: 3-936616-12-4 - - TODO: solve the import - - :return: np.ndarray, monotonized experimental variogram - """ - # TODO this is imported in the function as sklearn is not a dependency (and should not be for now) - raise NotImplementedError - - try: - from sklearn.isotonic import IsotonicRegression - y = self._experimental - x = self.bins - return IsotonicRegression().fit_transform(x,y) - except ImportError: - raise NotImplementedError('scikit-learn is not installed, but the isotonic function without sklear dependency is not installed yet.') - def __get_fit_bounds(self, x, y): """ Return the bounds for parameter space in fitting a variogram model. diff --git a/skgstat/interfaces/variogram_estimator.py b/skgstat/interfaces/variogram_estimator.py index 1e73fd7..9e26b2b 100644 --- a/skgstat/interfaces/variogram_estimator.py +++ b/skgstat/interfaces/variogram_estimator.py @@ -18,7 +18,6 @@ def __init__(self, maxlag=None, n_lags=10, verbose=False, - harmonize=False, use_score='rmse' ): r"""VariogramEstimator class @@ -65,7 +64,6 @@ def __init__(self, self.maxlag = maxlag self.n_lags = n_lags self.verbose = verbose - self.harmonize = harmonize # add Estimator specific attributes self.use_score = use_score @@ -105,8 +103,7 @@ def fit(self, X, y): fit_sigma=self.fit_sigma, use_nugget=self.use_nugget, maxlag=self.maxlag, - n_lags=self.n_lags, - harmonize=self.harmonize + n_lags=self.n_lags ) # append the data diff --git a/skgstat/tests/DirectionalVariogram.py b/skgstat/tests/DirectionalVariogram.py index cb213d1..ab773fe 100644 --- a/skgstat/tests/DirectionalVariogram.py +++ b/skgstat/tests/DirectionalVariogram.py @@ -15,13 +15,13 @@ def setUp(self): self.v = np.random.normal(5, 4, 30) def test_standard_settings(self): - DV = DirectionalVariogram(self.c, self.v) + DV = DirectionalVariogram(self.c, self.v, normalize=True) for x, y in zip(DV.parameters, [406., 2145., 0]): self.assertAlmostEqual(x, y, places=0) def test_azimuth(self): - DV = DirectionalVariogram(self.c, self.v, azimuth=-45) + DV = DirectionalVariogram(self.c, self.v, azimuth=-45, normalize=True) for x, y in zip(DV.parameters, [27.288, 131.644, 0]): self.assertAlmostEqual(x, y, places=3) @@ -36,7 +36,7 @@ def test_invalid_azimuth(self): ) def test_tolerance(self): - DV = DirectionalVariogram(self.c, self.v, tolerance=15) + DV = DirectionalVariogram(self.c, self.v, tolerance=15, normalize=True) for x, y in zip(DV.parameters, [32.474, 2016.601, 0]): self.assertAlmostEqual(x, y, places=3) @@ -51,7 +51,7 @@ def test_invalid_tolerance(self): ) def test_bandwidth(self): - DV = DirectionalVariogram(self.c, self.v, bandwidth=12) + DV = DirectionalVariogram(self.c, self.v, bandwidth=12, normalize=True) for x, y in zip(DV.parameters, [435.733, 2746.608, 0]): self.assertAlmostEqual(x, y, places=3) diff --git a/skgstat/tests/Variogram.py b/skgstat/tests/Variogram.py index df936c6..9160294 100644 --- a/skgstat/tests/Variogram.py +++ b/skgstat/tests/Variogram.py @@ -1,6 +1,8 @@ import unittest +import os import numpy as np +import pandas as pd from numpy.testing import assert_array_almost_equal import matplotlib.pyplot as plt @@ -19,7 +21,7 @@ def setUp(self): def test_standard_settings(self): V = Variogram(self.c, self.v) - for x, y in zip(V.parameters, [301.266, 291.284, 0]): + for x, y in zip(V.parameters, [7.122, 13.966, 0]): self.assertAlmostEqual(x, y, places=3) def test_pass_median_maxlag_on_instantiation(self): @@ -215,7 +217,7 @@ def test_maxlag_custom_value(self): self.assertAlmostEqual(V.maxlag, 33.3, places=1) def test_use_nugget_setting(self): - V = Variogram(self.c, self.v) + V = Variogram(self.c, self.v, normalize=True) # test the property and setter self.assertEqual(V.use_nugget, False) @@ -491,6 +493,20 @@ def test_implicit_run_fit_transform(self): assert_array_almost_equal(result, res, decimal=2) + def test_harmonize_model(self): + # load data sample + data = pd.read_csv(os.path.dirname(__file__) + '/sample.csv') + V = Variogram(data[['x', 'y']].values, data.z.values) + + V.model = 'harmonize' + x = np.linspace(0, np.max(V.bins), 10) + + assert_array_almost_equal( + V.transform(x), + [np.NaN, 0.57, 1.01, 1.12, 1.15, 1.15, 1.15, 1.15, 1.21, 1.65], + decimal=2 + ) + class TestVariogramQaulityMeasures(unittest.TestCase): def setUp(self):