From 95b14311e4333c73d3c229c9e5a494d62aa93235 Mon Sep 17 00:00:00 2001 From: Tobias Glaubach <34371519+TobiasGlaubach@users.noreply.github.com> Date: Fri, 6 Nov 2020 16:35:27 +0100 Subject: [PATCH] Implemented directional variograms for `unstructured` (#87) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * implemented a prototype for the unstructured function with angles. * minor fix with variable name * added docstring and arguments for angle estimation * added a working version which tests the basic functionality * docstring adaption * changed for automatic testcase data generation and split up the test cases * changed default angle tolerance to 25deg * added option to also return the counts (number of pairs) from unstructured * implemented a meaningful test for 2d variogram estimation * bugfix for 3d case when elevation is 90° or 270° * implemented some basic 3d test cases * vario: cleanup cython routines; use greate-circle for tolerance in 3D; check both directions between point pairs * vario: doc update; correct intervals for angles; general formatting of angles array * vario: better handling of angle ranges * vario: fix wrong assumption about hemisphere for angles Co-authored-by: MuellerSeb --- gstools/variogram/estimator.pyx | 97 ++++++++- gstools/variogram/variogram.py | 81 ++++++-- tests/test_variogram_unstructured.py | 296 ++++++++++++++++++++++++++- 3 files changed, 449 insertions(+), 25 deletions(-) diff --git a/gstools/variogram/estimator.pyx b/gstools/variogram/estimator.pyx index 41414f6c..5c05a32d 100644 --- a/gstools/variogram/estimator.pyx +++ b/gstools/variogram/estimator.pyx @@ -10,7 +10,7 @@ import numpy as np cimport cython from cython.parallel import prange, parallel from libcpp.vector cimport vector -from libc.math cimport fabs, sqrt +from libc.math cimport fabs, sqrt, atan2, acos, asin, sin, cos, M_PI cimport numpy as np @@ -47,6 +47,68 @@ cdef inline double _distance_3d( (y[i] - y[j]) * (y[i] - y[j]) + (z[i] - z[j]) * (z[i] - z[j])) +cdef inline bint _angle_test_1d( + const double[:] x, + const double[:] y, + const double[:] z, + const double[:] angles, + const double angles_tol, + const int i, + const int j +) nogil: + return True + +cdef inline bint _angle_test_2d( + const double[:] x, + const double[:] y, + const double[:] z, + const double[:] angles, + const double angles_tol, + const int i, + const int j +) nogil: + cdef double dx = x[i] - x[j] + cdef double dy = y[i] - y[j] + # azimuth + cdef double phi1 = atan2(dy,dx) % (2.0 * M_PI) + cdef double phi2 = atan2(-dy,-dx) % (2.0 * M_PI) + # check both directions (+/-) + cdef bint dir1 = fabs(phi1 - angles[0]) <= angles_tol + cdef bint dir2 = fabs(phi2 - angles[0]) <= angles_tol + return dir1 or dir2 + +cdef inline bint _angle_test_3d( + const double[:] x, + const double[:] y, + const double[:] z, + const double[:] angles, + const double angles_tol, + const int i, + const int j +) nogil: + cdef double dx = x[i] - x[j] + cdef double dy = y[i] - y[j] + cdef double dz = z[i] - z[j] + cdef double dr = sqrt(dx**2 + dy**2 + dz**2) + # azimuth + cdef double phi1 = atan2(dy, dx) % (2.0 * M_PI) + cdef double phi2 = atan2(-dy, -dx) % (2.0 * M_PI) + # elevation + cdef double theta1 = acos(dz / dr) + cdef double theta2 = acos(-dz / dr) + # definitions for great-circle distance (for tolerance check) + cdef double dx1 = sin(theta1) * cos(phi1) - sin(angles[1]) * cos(angles[0]) + cdef double dy1 = sin(theta1) * sin(phi1) - sin(angles[1]) * sin(angles[0]) + cdef double dz1 = cos(theta1) - cos(angles[1]) + cdef double dx2 = sin(theta2) * cos(phi2) - sin(angles[1]) * cos(angles[0]) + cdef double dy2 = sin(theta2) * sin(phi2) - sin(angles[1]) * sin(angles[0]) + cdef double dz2 = cos(theta2) - cos(angles[1]) + cdef double dist1 = 2.0 * asin(sqrt(dx1**2 + dy1**2 + dz1**2) * 0.5) + cdef double dist2 = 2.0 * asin(sqrt(dx2**2 + dy2**2 + dz2**2) * 0.5) + # check both directions (+/-) + cdef bint dir1 = dist1 <= angles_tol + cdef bint dir2 = dist2 <= angles_tol + return dir1 or dir2 cdef inline double estimator_matheron(const double f_diff) nogil: return f_diff * f_diff @@ -110,6 +172,16 @@ ctypedef double (*_dist_func)( const int ) nogil +ctypedef bint (*_angle_test_func)( + const double[:], + const double[:], + const double[:], + const double[:], + const double, + const int, + const int +) nogil + def unstructured( const double[:] f, @@ -117,6 +189,8 @@ def unstructured( const double[:] x, const double[:] y=None, const double[:] z=None, + const double[:] angles=None, + const double angles_tol=0.436332, str estimator_type='m' ): if x.shape[0] != f.shape[0]: @@ -126,21 +200,35 @@ def unstructured( raise ValueError('len(bin_edges) too small') cdef _dist_func distance + cdef _angle_test_func angle_test + # 3d if z is not None: if z.shape[0] != f.shape[0]: raise ValueError('len(z) = {0} != len(f) = {1} '. format(z.shape[0], f.shape[0])) distance = _distance_3d + angle_test = _angle_test_3d # 2d elif y is not None: if y.shape[0] != f.shape[0]: raise ValueError('len(y) = {0} != len(f) = {1} '. format(y.shape[0], f.shape[0])) distance = _distance_2d + angle_test = _angle_test_2d # 1d else: distance = _distance_1d + angle_test = _angle_test_1d + + if angles is not None: + if z is not None and angles.size < 2: + raise ValueError('3d requested but only one angle given') + if y is not None and angles.size < 1: + raise ValueError('2d with angle requested but no angle given') + + if angles_tol <= 0: + raise ValueError('tolerance for angle search masks must be > 0') cdef _estimator_func estimator_func = choose_estimator_func(estimator_type) cdef _normalization_func normalization_func = ( @@ -160,11 +248,12 @@ def unstructured( for k in range(j+1, k_max): dist = distance(x, y, z, k, j) if dist >= bin_edges[i] and dist < bin_edges[i+1]: - counts[i] += 1 - variogram[i] += estimator_func(f[k] - f[j]) + if angles is None or angle_test(x, y, z, angles, angles_tol, k, j): + counts[i] += 1 + variogram[i] += estimator_func(f[k] - f[j]) normalization_func(variogram, counts) - return np.asarray(variogram) + return np.asarray(variogram), np.asarray(counts) def structured(const double[:,:,:] f, str estimator_type='m'): diff --git a/gstools/variogram/variogram.py b/gstools/variogram/variogram.py index e13ddf32..353e7736 100644 --- a/gstools/variogram/variogram.py +++ b/gstools/variogram/variogram.py @@ -37,9 +37,12 @@ def vario_estimate_unstructured( pos, field, bin_edges, + angles=None, + angles_tol=0.436332, sampling_size=None, sampling_seed=None, estimator="matheron", + return_counts=False, ): r""" Estimates the variogram on a unstructured grid. @@ -64,10 +67,6 @@ def vario_estimate_unstructured( being the bins. The Cressie estimator is more robust to outliers. - Notes - ----- - Internally uses double precision and also returns doubles. - Parameters ---------- pos : :class:`list` @@ -77,6 +76,18 @@ def vario_estimate_unstructured( the spatially distributed data bin_edges : :class:`numpy.ndarray` the bins on which the variogram will be calculated + angles : :class:`numpy.ndarray` + the angles of the main axis to calculate the variogram for in radians + angle definitions from ISO standard 80000-2:2009 + for 1d this parameter will have no effect at all + for 2d supply one angle which is azimuth φ (ccw from +x in xy plane) + for 3d supply two angles which are azimuth φ (ccw from +x in xy plane) + and inclination θ (cw from +z) + angles_tol : class:`float` + the tolerance around the variogram angle to count a point as being + within this direction from another point (the angular tolerance around + the directional vector given by angles) + Default: 25°≈0.436332 sampling_size : :class:`int` or :any:`None`, optional for large input data, this method can take a long time to compute the variogram, therefore this argument specifies @@ -92,18 +103,43 @@ def vario_estimate_unstructured( * "cressie": an estimator more robust to outliers Default: "matheron" + return_counts: class:`bool`, optional + if set to true, this function will also return the number of data + points found at each lag distance as a third return value + Default: False Returns ------- :class:`tuple` of :class:`numpy.ndarray` - the estimated variogram and the bin centers + 1. the bin centers + 2. the estimated variogram values at bin centers + 3. (optional) the number of points found at each bin center + (see argument return_counts) + + Notes + ----- + Internally uses double precision and also returns doubles. """ # TODO check_mesh field = np.array(field, ndmin=1, dtype=np.double) bin_edges = np.array(bin_edges, ndmin=1, dtype=np.double) x, y, z, dim = pos2xyz(pos, calc_dim=True, dtype=np.double) + bin_centres = (bin_edges[:-1] + bin_edges[1:]) / 2.0 + if angles is not None and dim > 1: + # there are at most (dim-1) angles + angles = np.array(angles, ndmin=1, dtype=np.double) # type convert + angles = angles.ravel()[: (dim - 1)] # cutoff + angles = np.pad( + angles, (0, dim - angles.size - 1), "constant", constant_values=0.0 + ) # fill with 0 if too less given + # correct intervalls for angles + angles[0] = angles[0] % (2 * np.pi) + angles[1:] = angles[1:] % np.pi + elif dim == 1: + angles = None + if sampling_size is not None and sampling_size < len(field): sampled_idx = np.random.RandomState(sampling_seed).choice( np.arange(len(field)), sampling_size, replace=False @@ -117,13 +153,22 @@ def vario_estimate_unstructured( cython_estimator = _set_estimator(estimator) - return ( - bin_centres, - unstructured( - field, bin_edges, x, y, z, estimator_type=cython_estimator - ), + estimates, counts = unstructured( + field, + bin_edges, + x, + y, + z, + angles, + angles_tol, + estimator_type=cython_estimator, ) + if return_counts: + return bin_centres, estimates, counts + else: + return bin_centres, estimates + def vario_estimate_structured(field, direction="x", estimator="matheron"): r"""Estimates the variogram on a regular grid. @@ -149,14 +194,6 @@ def vario_estimate_structured(field, direction="x", estimator="matheron"): being the bins. The Cressie estimator is more robust to outliers. - Warnings - -------- - It is assumed that the field is defined on an equidistant Cartesian grid. - - Notes - ----- - Internally uses double precision and also returns doubles. - Parameters ---------- field : :class:`numpy.ndarray` @@ -175,6 +212,14 @@ def vario_estimate_structured(field, direction="x", estimator="matheron"): ------- :class:`numpy.ndarray` the estimated variogram along the given direction. + + Warnings + -------- + It is assumed that the field is defined on an equidistant Cartesian grid. + + Notes + ----- + Internally uses double precision and also returns doubles. """ try: mask = np.array(field.mask, dtype=np.int32) diff --git a/tests/test_variogram_unstructured.py b/tests/test_variogram_unstructured.py index c5a59a85..c219ec22 100644 --- a/tests/test_variogram_unstructured.py +++ b/tests/test_variogram_unstructured.py @@ -5,12 +5,38 @@ import unittest import numpy as np -from gstools import vario_estimate_unstructured - +from gstools import vario_estimate_unstructured, Exponential, SRF +import gstools as gs class TestVariogramUnstructured(unittest.TestCase): def setUp(self): - pass + + # this code generates the testdata for the 2D rotation test cases + x = np.random.RandomState(19970221).rand(20) * 10.0 - 5 + y = np.zeros_like(x) + model = gs.Exponential(dim=2, var=2, len_scale=8) + srf = gs.SRF(model, mean=0, seed=19970221) + field = srf((x, y)) + + bins = np.arange(10) + bin_center, gamma = gs.vario_estimate_unstructured((x, ), field, bins) + idx = np.argsort(x) + self.test_data_rotation_1 = {'gamma': gamma, 'x': x[idx], 'field': field[idx], 'bins': bins, 'bin_center': bin_center} + + # CODE ABOVE SHOULD GENERATE THIS DATA + # x = np.array([ + # -4.86210059, -4.1984934 , -3.9246953 , -3.28490663, -2.16332379, + # -1.87553275, -1.74125124, -1.27224687, -1.20931578, -0.2413368 , + # 0.03200921, 1.17099773, 1.53863105, 1.64478688, 2.75252136, + # 3.3556915 , 3.89828775, 4.21485964, 4.5364357 , 4.79236969]), + # field = np.array([ + # -1.10318365, -0.53566629, -0.41789049, -1.06167529, 0.38449961, + # -0.36550477, -0.98905552, -0.19352766, 0.16264266, 0.26920833, + # 0.05379665, 0.71275006, 0.36651935, 0.17366865, 1.20022343, + # 0.79385446, 0.69456069, 1.0733393 , 0.71191592, 0.71969766]) + # gamma_exp = np.array([ + # 0.14260989, 0.18301197, 0.25855841, 0.29990083, 0.67914526, + # 0.60136535, 0.92875492, 1.46910435, 1.10165104]) def test_doubles(self): x = np.arange(1, 11, 1, dtype=np.double) @@ -217,6 +243,270 @@ def test_assertions(self): ValueError, vario_estimate_unstructured, [x], field_e, bins ) + def test_angles_2D_x2x(self): + + x = self.test_data_rotation_1['x'] + field = self.test_data_rotation_1['field'] + gamma_exp = self.test_data_rotation_1['gamma'] + bins = self.test_data_rotation_1['bins'] + y = np.zeros_like(x) + + # test case 1.) + # all along x axis on x axis + + bin_centres, gamma = vario_estimate_unstructured( + (x, y), + field, + bins, + angles=[0] + ) + + for i in range(gamma.size): + self.assertAlmostEqual(gamma_exp[i], gamma[i], places=3) + + def test_angles_2D_y2x(self): + + x = self.test_data_rotation_1['x'] + field = self.test_data_rotation_1['field'] + gamma_exp = self.test_data_rotation_1['gamma'] + bins = self.test_data_rotation_1['bins'] + y = np.zeros_like(x) + + # test case 2.) + # all along y axis on y axis but calculation for x axis + + bin_centres, gamma = vario_estimate_unstructured( + (y, x), + field, + bins, + angles=[0] + ) + + for i in range(gamma.size): + self.assertAlmostEqual(0, gamma[i], places=3) + + def test_angles_2D_y2y(self): + + x = self.test_data_rotation_1['x'] + field = self.test_data_rotation_1['field'] + gamma_exp = self.test_data_rotation_1['gamma'] + bins = self.test_data_rotation_1['bins'] + y = np.zeros_like(x) + + # test case 3.) + # all along y axis on y axis and calculation for y axis + + bin_centres, gamma = vario_estimate_unstructured( + (y, x), + field, + bins, + angles=[np.pi/2.] + ) + + for i in range(gamma.size): + self.assertAlmostEqual(gamma_exp[i], gamma[i], places=3) + + def test_angles_2D_xy2x(self): + + x = self.test_data_rotation_1['x'] + field = self.test_data_rotation_1['field'] + gamma_exp = self.test_data_rotation_1['gamma'] + bins = self.test_data_rotation_1['bins'] + y = np.zeros_like(x) + + + # test case 4.) + # data along 45deg axis but calculation for x axis + + ccos, csin = np.cos(np.pi/4.), np.sin(np.pi/4.) + + xr = [xx * ccos - yy * csin for xx, yy in zip(x, y)] + yr = [xx * csin + yy * ccos for xx, yy in zip(x, y)] + + bin_centres, gamma = vario_estimate_unstructured( + (xr, yr), + field, + bins, + angles=[0] + ) + + for i in range(gamma.size): + self.assertAlmostEqual(0, gamma[i], places=3) + + def test_angles_2D_estim(self): + + seed = gs.random.MasterRNG(19970221) + rng = np.random.RandomState(seed()) + rng = np.random + x = rng.randint(0, 100, size=3000) + y = rng.randint(0, 100, size=3000) + + model = gs.Exponential(dim=2, var=1, len_scale=[12, 3], angles=np.pi / 8) + model_maj = gs.Exponential(dim=1, var=1, len_scale=[12]) + model_min = gs.Exponential(dim=1, var=1, len_scale=[3]) + + srf = gs.SRF(model, seed=20170519) + field = srf((x, y)) + + + bins = np.arange(0, 50, 2.5) + angle_mask = 22.5 + angle_tol = 22.5 + + bin_centers_maj, gamma_maj = gs.vario_estimate_unstructured( + (x, y), + field, + bins, + angles=[np.deg2rad(angle_mask)], + angles_tol=np.deg2rad(angle_tol) + ) + + bin_centers_min, gamma_min = gs.vario_estimate_unstructured( + (x, y), + field, + bins, + angles=[np.deg2rad(angle_mask + 90.0)], + angles_tol=np.deg2rad(angle_tol) + ) + + gamma_maj_real = model_maj.variogram(bin_centers_maj) + gamma_min_real = model_min.variogram(bin_centers_min) + + # we have no real way of testing values, but we can test some basic properties which definitelly need to be true + # test that the major estimate aligns better with major real than minor real + self.assertTrue(np.sum((gamma_maj_real - gamma_maj)**2) < np.sum((gamma_min_real - gamma_maj)**2)) + # test that the minor estimate aligns better with minor real than major real + self.assertTrue(np.sum((gamma_min_real - gamma_min)**2) < np.sum((gamma_maj_real - gamma_min)**2)) + # test that both variograms converge within reasonable closeness (less than 10% rel error) to the actual field variance + self.assertTrue((np.mean(gamma_min[-5:]) - np.var(field)) / np.var(field) < 0.1) + self.assertTrue((np.mean(gamma_maj[-5:]) - np.var(field)) / np.var(field) < 0.1) + + def test_angles_line_3D(self): + + x = self.test_data_rotation_1['x'] + field = self.test_data_rotation_1['field'] + gamma_exp = self.test_data_rotation_1['gamma'] + bins = self.test_data_rotation_1['bins'] + + def test_xyz(x, y, z, angles, gamma_exp): + bin_centres, gamma = vario_estimate_unstructured( + (x, y, z), + field, + bins, + angles=angles + ) + + if np.ndim(gamma_exp) == 0: + gamma_exp = np.ones_like(gamma) * gamma_exp + + for i in range(gamma.size): + self.assertAlmostEqual(gamma_exp[i], gamma[i], places=3) + + # all along x axis and calculation for x axis + test_xyz(x, np.zeros_like(x), np.zeros_like(x), [0], gamma_exp) + # all along y axis and calculation for x axis + test_xyz(np.zeros_like(x), x, np.zeros_like(x), [0], 0) + # all along z axis and calculation for x axis + test_xyz(np.zeros_like(x), np.zeros_like(x), x, [0], 0) + + angles_rot_azim_90 = [np.pi/2] + # all along x axis and calculation for y axis + test_xyz(x, np.zeros_like(x), np.zeros_like(x), angles_rot_azim_90, 0) + # all along y axis and calculation for y axis + test_xyz(np.zeros_like(x), x, np.zeros_like(x), angles_rot_azim_90, gamma_exp) + # all along z axis and calculation for y axis + test_xyz(np.zeros_like(x), np.zeros_like(x), x, angles_rot_azim_90, 0) + + # for elevation it is important to check, that IF elevation is 90° or 270° it does + # not matter how we rotated before, since any rotation around z (in XY plane) + # followed by a rotation around x' (in YZ' plane) by 90° will result in the same + # coordinates, (when the structure is two points with zero extend) + + # test with [0, 90] + angles_rot_azim_90_elev_90 = [0, np.pi/2] + # all along x axis and calculation for z axis + test_xyz(x, np.zeros_like(x), np.zeros_like(x), angles_rot_azim_90_elev_90, 0) + # all along y axis and calculation for z axis + test_xyz(np.zeros_like(x), x, np.zeros_like(x), angles_rot_azim_90_elev_90, 0) + # all along z axis and calculation for z axis + test_xyz(np.zeros_like(x), np.zeros_like(x), x, angles_rot_azim_90_elev_90, gamma_exp) + + # test with [90, 90] + angles_rot_azim_90_elev_90 = [np.pi/2, np.pi/2] + # all along x axis and calculation for z axis + test_xyz(x, np.zeros_like(x), np.zeros_like(x), angles_rot_azim_90_elev_90, 0) + # all along y axis and calculation for z axis + test_xyz(np.zeros_like(x), x, np.zeros_like(x), angles_rot_azim_90_elev_90, 0) + # all along z axis and calculation for z axis + test_xyz(np.zeros_like(x), np.zeros_like(x), x, angles_rot_azim_90_elev_90, gamma_exp) + + def test_angles_3D_estim(self): + + seed = gs.random.MasterRNG(19970221) + rng = np.random.RandomState(seed()) + rng = np.random + x = rng.randint(0, 50, size=1000) + y = rng.randint(0, 50, size=1000) + z = rng.randint(0, 50, size=1000) + + model = gs.Exponential(dim=3, var=1, len_scale=[10, 5, 2], angles=[np.pi / 8, np.pi / 16]) + model_maj = gs.Exponential(dim=1, var=1, len_scale=[10]) + model_min1 = gs.Exponential(dim=1, var=1, len_scale=[5]) + model_min2 = gs.Exponential(dim=1, var=1, len_scale=[2]) + + srf = gs.SRF(model, seed=20170519) + field = srf((x, y, z)) + + bins = np.arange(0, 25, 5) + angle_mask = [np.pi / 8, np.pi / 16] + angle_tol = 22.5 + + # in x' + bin_centers_maj, gamma_maj = gs.vario_estimate_unstructured( + (x, y, z), + field, + bins, + angles=[angle_mask[0], angle_mask[1]], + angles_tol=np.deg2rad(angle_tol) + ) + + # in y' + bin_centers_min1, gamma_min1 = gs.vario_estimate_unstructured( + (x, y, z), + field, + bins, + angles=[angle_mask[0] + 0.5*np.pi, angle_mask[1]], + angles_tol=np.deg2rad(angle_tol) + ) + + # in z' + bin_centers_min2, gamma_min2 = gs.vario_estimate_unstructured( + (x, y, z), + field, + bins, + angles=[angle_mask[0], angle_mask[1] + 0.5*np.pi], + angles_tol=np.deg2rad(angle_tol) + ) + + gamma_maj_real = model_maj.variogram(bin_centers_maj) + gamma_min1_real = model_min1.variogram(bin_centers_min1) + gamma_min2_real = model_min2.variogram(bin_centers_min2) + + # we have no real way of testing values, but we can test some basic properties which definitelly need to be true + # test that the major estimate aligns better with major real than the minor reals + self.assertTrue(np.sum((gamma_maj_real - gamma_maj)**2) < np.sum((gamma_min1_real - gamma_maj)**2)) + self.assertTrue(np.sum((gamma_maj_real - gamma_maj)**2) < np.sum((gamma_min2_real - gamma_maj)**2)) + # test that the minor1 estimate aligns better with minor1 real than major real and minor2 real + self.assertTrue(np.sum((gamma_min1_real - gamma_min1)**2) < np.sum((gamma_maj_real - gamma_min1)**2)) + self.assertTrue(np.sum((gamma_min1_real - gamma_min1)**2) < np.sum((gamma_min2_real - gamma_min1)**2)) + # test that the minor2 estimate aligns better with minor2 real than major real and minor1 real + self.assertTrue(np.sum((gamma_min2_real - gamma_min2)**2) < np.sum((gamma_maj_real - gamma_min2)**2)) + self.assertTrue(np.sum((gamma_min2_real - gamma_min2)**2) < np.sum((gamma_min1_real - gamma_min2)**2)) + # test that all variograms converge within reasonable closeness (less than 10% rel error) to the actual field variance + self.assertTrue((np.mean(gamma_maj[-5:]) - np.var(field)) / np.var(field) < 0.1) + self.assertTrue((np.mean(gamma_min1[-5:]) - np.var(field)) / np.var(field) < 0.1) + self.assertTrue((np.mean(gamma_min2[-5:]) - np.var(field)) / np.var(field) < 0.1) + if __name__ == "__main__": unittest.main()