From 639e3b2e403d40d9ea7b2dd6cc3b95039d667778 Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Fri, 31 May 2024 23:35:24 -0700 Subject: [PATCH 1/7] fix docstrings, fix centering calculations --- pymc/gp/hsgp_approx.py | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 8dd8c6edb3f..61dccc3c689 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -96,23 +96,31 @@ class HSGPParams(NamedTuple): def approx_hsgp_hyperparams( - x: np.ndarray, lengthscale_range: list[float], cov_func: str + x_range: list[float], lengthscale_range: list[float], cov_func: str ) -> HSGPParams: """Utility function that uses heuristics to recommend minimum `m` and `c` values, based on recommendations from Ruitort-Mayol et. al. In practice, you need to choose `c` large enough to handle the largest lengthscales, - and `m` large enough to accommodate the smallest lengthscales. + and `m` large enough to accommodate the smallest lengthscales. Use your prior on the + lengthscale as guidance for setting the prior range. For example, if you believe + that 95% of the prior mass of the lengthscale is between 1 and 5, set the + `lengthscale_range` to be [1, 5], or maybe a touch wider. + + Also, be sure to pass in an `x` that is exemplary of the domain not just of your + training data, but also where you intend to make predictions. For instance, if your + training x values are from [0, 10], and you intend to predict from [7, 15], you can + pass in `x_range = [0, 15]`. NB: These recommendations are based on a one-dimensional GP. Parameters ---------- - x : np.ndarray - The input variable on which the GP is going to be evaluated. - Careful: should be the X values you want to predict over, not *only* the training X. + x_range : list[float] + The range of the x values you intend to both train and predict over. Should be a list with + two elements, [x_min, x_max]. lengthscale_range : List[float] - The range of the lengthscales. Should be a list with two elements [lengthscale_min, lengthscale_max]. + The range of the lengthscales. Should be a list with two elements, [lengthscale_min, lengthscale_max]. cov_func : str The covariance function to use. Supported options are "expquad", "matern52", and "matern32". @@ -126,7 +134,7 @@ def approx_hsgp_hyperparams( Scaling factor such that L = c * S, where L is the boundary of the approximation. Increasing it helps approximate larger lengthscales, but may require increasing m. - `S` : float - The value of `S`, which is half the range of `x`. + The value of `S`, which is half the range, or radius, of `x`. Raises ------ @@ -139,11 +147,12 @@ def approx_hsgp_hyperparams( Practical Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming """ if lengthscale_range[0] >= lengthscale_range[1]: - raise ValueError("One of the boundaries out of order") + raise ValueError("One of the `lengthscale_range` boundaries is out of order.") + + if x_range[0] >= x_range[1]: + raise ValueError("One of the `x_range` boundaries is out of order.") - X_center = (np.max(x, axis=0) - np.min(x, axis=0)) / 2 - Xs = x - X_center - S = np.max(np.abs(Xs), axis=0) + S = (x_range[1] - x_range[0]) / 2.0 if cov_func.lower() == "expquad": a1, a2 = 3.2, 1.75 @@ -394,7 +403,7 @@ def prior_linearized(self, Xs: TensorLike): # Important: fix the computation of the midpoint of X. # If X is mutated later, the training midpoint will be subtracted, not the testing one. if self._X_center is None: - self._X_center = (pt.max(Xs, axis=0) - pt.min(Xs, axis=0)).eval() / 2 + self._X_center = (pt.max(Xs, axis=0) + pt.min(Xs, axis=0)).eval() / 2 Xs = Xs - self._X_center # center for accurate computation # Index Xs using input_dim and active_dims of covariance function From 2b6ab5ab740dae4312a44318fc526135080db598 Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Fri, 31 May 2024 23:43:30 -0700 Subject: [PATCH 2/7] Change Xs to X, because the s indicated scaled, which we dont have to do anymore --- pymc/gp/hsgp_approx.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 61dccc3c689..5ca80b7e8d4 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -330,7 +330,7 @@ def L(self) -> pt.TensorVariable: def L(self, value: TensorLike): self._L = pt.as_tensor_variable(value) - def prior_linearized(self, Xs: TensorLike): + def prior_linearized(self, X: TensorLike): """Linearized version of the HSGP. Returns the Laplace eigenfunctions and the square root of the power spectral density needed to create the GP. @@ -343,7 +343,7 @@ def prior_linearized(self, Xs: TensorLike): Parameters ---------- - Xs: array-like + X: array-like Function input values. Returns @@ -371,9 +371,9 @@ def prior_linearized(self, Xs: TensorLike): # L = [10] means the approximation is valid from Xs = [-10, 10] gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func) + # Set X as Data so it can be mutated later, and then pass it to the GP X = pm.Data("X", X) - # Pass X to the GP - phi, sqrt_psd = gp.prior_linearized(Xs=X) + phi, sqrt_psd = gp.prior_linearized(X=X) # Specify standard normal prior in the coefficients, the number of which # is given by the number of basis vectors, saved in `n_basis_vectors`. @@ -403,8 +403,8 @@ def prior_linearized(self, Xs: TensorLike): # Important: fix the computation of the midpoint of X. # If X is mutated later, the training midpoint will be subtracted, not the testing one. if self._X_center is None: - self._X_center = (pt.max(Xs, axis=0) + pt.min(Xs, axis=0)).eval() / 2 - Xs = Xs - self._X_center # center for accurate computation + self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2 + Xs = X - self._X_center # center for accurate computation # Index Xs using input_dim and active_dims of covariance function Xs, _ = self.cov_func._slice(Xs) @@ -600,7 +600,7 @@ def __init__( super().__init__(mean_func=mean_func, cov_func=cov_func) - def prior_linearized(self, Xs: TensorLike): + def prior_linearized(self, X: TensorLike): """Linearized version of the approximation. Returns the cosine and sine bases and coefficients of the expansion needed to create the GP. @@ -615,8 +615,8 @@ def prior_linearized(self, Xs: TensorLike): Parameters ---------- - Xs: array-like - Function input values. Assumes they have been mean subtracted or centered at zero. + X: array-like + Function input values. Returns ------- @@ -640,15 +640,9 @@ def prior_linearized(self, Xs: TensorLike): # m=200 means 200 basis vectors gp = pm.gp.HSGPPeriodic(m=200, scale=scale, cov_func=cov_func) - # Order is important. First calculate the mean, then make X a shared variable, - # then subtract the mean. When X is mutated later, the correct mean will be - # subtracted. - X_mean = np.mean(X, axis=0) - X = pm.MutableData("X", X) - Xs = X - X_mean - - # Pass the zero-subtracted Xs in to the GP - (phi_cos, phi_sin), psd = gp.prior_linearized(Xs=Xs) + # Set X as Data so it can be mutated later, and then pass it to the GP + X = pm.Data("X", X) + (phi_cos, phi_sin), psd = gp.prior_linearized(X=X) # Specify standard normal prior in the coefficients. The number of which # is twice the number of basis vectors minus one. @@ -675,6 +669,13 @@ def prior_linearized(self, Xs: TensorLike): with model: ppc = pm.sample_posterior_predictive(idata, var_names=["f"]) """ + # Important: fix the computation of the midpoint of X. + # If X is mutated later, the training midpoint will be subtracted, not the testing one. + if self._X_center is None: + self._X_center = (pt.max(Xs, axis=0) + pt.min(Xs, axis=0)).eval() / 2 + Xs = Xs - self._X_center # center for accurate computation + + # Index Xs using input_dim and active_dims of covariance function Xs, _ = self.cov_func._slice(Xs) phi_cos, phi_sin = calc_basis_periodic(Xs, self.cov_func.period, self._m, tl=pt) @@ -697,9 +698,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign dims: None Dimension name for the GP random variable. """ - self._X_mean = pt.mean(X, axis=0) - - (phi_cos, phi_sin), psd = self.prior_linearized(X - self._X_mean) + (phi_cos, phi_sin), psd = self.prior_linearized(X) m = self._m self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1)) From 54bc64eaf79e9a1bef055247ad9d5b0923b407c4 Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Fri, 31 May 2024 23:55:37 -0700 Subject: [PATCH 3/7] fix test fails --- pymc/gp/hsgp_approx.py | 20 ++++++++++++-------- tests/gp/test_hsgp_approx.py | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 5ca80b7e8d4..1f40e85b34b 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -102,14 +102,14 @@ def approx_hsgp_hyperparams( based on recommendations from Ruitort-Mayol et. al. In practice, you need to choose `c` large enough to handle the largest lengthscales, - and `m` large enough to accommodate the smallest lengthscales. Use your prior on the - lengthscale as guidance for setting the prior range. For example, if you believe + and `m` large enough to accommodate the smallest lengthscales. Use your prior on the + lengthscale as guidance for setting the prior range. For example, if you believe that 95% of the prior mass of the lengthscale is between 1 and 5, set the `lengthscale_range` to be [1, 5], or maybe a touch wider. - Also, be sure to pass in an `x` that is exemplary of the domain not just of your + Also, be sure to pass in an `x` that is exemplary of the domain not just of your training data, but also where you intend to make predictions. For instance, if your - training x values are from [0, 10], and you intend to predict from [7, 15], you can + training x values are from [0, 10], and you intend to predict from [7, 15], you can pass in `x_range = [0, 15]`. NB: These recommendations are based on a one-dimensional GP. @@ -295,6 +295,7 @@ def __init__( if parametrization is not None: parametrization = parametrization.lower().replace("-", "").replace("_", "") + if parametrization not in ["centered", "noncentered"]: raise ValueError("`parametrization` must be either 'centered' or 'noncentered'.") @@ -597,6 +598,7 @@ def __init__( self._m = m self.scale = scale + self._X_center = None super().__init__(mean_func=mean_func, cov_func=cov_func) @@ -672,8 +674,8 @@ def prior_linearized(self, X: TensorLike): # Important: fix the computation of the midpoint of X. # If X is mutated later, the training midpoint will be subtracted, not the testing one. if self._X_center is None: - self._X_center = (pt.max(Xs, axis=0) + pt.min(Xs, axis=0)).eval() / 2 - Xs = Xs - self._X_center # center for accurate computation + self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2 + Xs = X - self._X_center # center for accurate computation # Index Xs using input_dim and active_dims of covariance function Xs, _ = self.cov_func._slice(Xs) @@ -715,7 +717,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign def _build_conditional(self, Xnew): try: - beta, X_mean = self._beta, self._X_mean + beta, X_center = self._beta, self._X_center except AttributeError: raise ValueError( @@ -724,7 +726,9 @@ def _build_conditional(self, Xnew): Xnew, _ = self.cov_func._slice(Xnew) - phi_cos, phi_sin = calc_basis_periodic(Xnew - X_mean, self.cov_func.period, self._m, tl=pt) + phi_cos, phi_sin = calc_basis_periodic( + Xnew - X_center, self.cov_func.period, self._m, tl=pt + ) m = self._m J = pt.arange(0, m, 1) # rescale basis coefficients by the sqrt variance term diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py index 09eb13970e9..d80db1bb76f 100644 --- a/tests/gp/test_hsgp_approx.py +++ b/tests/gp/test_hsgp_approx.py @@ -129,7 +129,7 @@ def test_mean_invariance(self): _ = pm.Data("X", X) cov_func = pm.gp.cov.ExpQuad(1, ls=3) gp = pm.gp.HSGP(m=[20], L=[10], cov_func=cov_func) - _ = gp.prior_linearized(Xs=X) + _ = gp.prior_linearized(X=X) x_new = np.linspace(-10, 20, 100)[:, None] with model: From 4d96c281ee1bec6f6a0b0242e75b115490a1695f Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Sat, 1 Jun 2024 15:26:13 -0700 Subject: [PATCH 4/7] remove c, HSGPParams tuple, fix docstring --- pymc/gp/hsgp_approx.py | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 1f40e85b34b..7fbb6573aa5 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -17,7 +17,6 @@ from collections.abc import Sequence from types import ModuleType -from typing import NamedTuple import numpy as np import pytensor.tensor as pt @@ -89,15 +88,9 @@ def calc_basis_periodic( return phi_cos, phi_sin -class HSGPParams(NamedTuple): - m: int - c: float - S: float - - def approx_hsgp_hyperparams( x_range: list[float], lengthscale_range: list[float], cov_func: str -) -> HSGPParams: +) -> tuple[int, float]: """Utility function that uses heuristics to recommend minimum `m` and `c` values, based on recommendations from Ruitort-Mayol et. al. @@ -107,10 +100,10 @@ def approx_hsgp_hyperparams( that 95% of the prior mass of the lengthscale is between 1 and 5, set the `lengthscale_range` to be [1, 5], or maybe a touch wider. - Also, be sure to pass in an `x` that is exemplary of the domain not just of your + Also, be sure to pass in an `x_range` that is exemplary of the domain not just of your training data, but also where you intend to make predictions. For instance, if your - training x values are from [0, 10], and you intend to predict from [7, 15], you can - pass in `x_range = [0, 15]`. + training x values are from [0, 10], and you intend to predict from [7, 15], the narrowest + `x_range` you should pass in would be `x_range = [0, 15]`. NB: These recommendations are based on a one-dimensional GP. @@ -126,15 +119,11 @@ def approx_hsgp_hyperparams( Returns ------- - HSGPParams - A named tuple containing the recommended values for `m`, `c`, and `S`. - - `m` : int - Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost. - - `c` : float - Scaling factor such that L = c * S, where L is the boundary of the approximation. - Increasing it helps approximate larger lengthscales, but may require increasing m. - - `S` : float - The value of `S`, which is half the range, or radius, of `x`. + - `m` : int + Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost. + - `c` : float + Scaling factor such that L = c * S, where L is the boundary of the approximation. + Increasing it helps approximate larger lengthscales, but may require increasing m. Raises ------ @@ -171,7 +160,7 @@ def approx_hsgp_hyperparams( c = max(a1 * (lengthscale_range[1] / S), 1.2) m = int(a2 * c / (lengthscale_range[0] / S)) - return HSGPParams(m=m, c=c, S=S) + return m, c class HSGP(Base): From 7640a18f5ea371ec71ca7b220409c36bce3c6b61 Mon Sep 17 00:00:00 2001 From: Bill Engels Date: Sat, 1 Jun 2024 15:43:25 -0700 Subject: [PATCH 5/7] add a bit more detail to docstring of set_boundary --- pymc/gp/hsgp_approx.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 7fbb6573aa5..0c2db8a5f99 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -31,10 +31,13 @@ def set_boundary(Xs: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: - """Set the boundary using the `Xs` centered around 0 and `c`. `c` is usually a scalar - multiplier greater than 1.0, but it may be one value per dimension or column of `Xs`. + """Set the boundary using the `Xs` and `c`. `Xs` must be centered around zero, and `c` + is usually a scalar multiplier greater than 1.0, but it may also be one value per dimension + or column of `Xs`. """ - S = pt.max(pt.abs(Xs), axis=0) # important: the Xs should be centered around 0 + S = pt.max( + pt.abs(Xs), axis=0 + ) # important: the Xs should have previously been centered around 0 L = (c * S).eval() # eval() makes sure L is not changed with out-of-sample preds return L From 0ad3f9c31c5e588a5eb7ced5f1e081e9b8c0c9e9 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 10 Jun 2024 14:30:05 -0300 Subject: [PATCH 6/7] Compute S with more generic formula for radius --- pymc/gp/hsgp_approx.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 0c2db8a5f99..f04af07f251 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -30,14 +30,14 @@ TensorLike = np.ndarray | pt.TensorVariable -def set_boundary(Xs: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: - """Set the boundary using the `Xs` and `c`. `Xs` must be centered around zero, and `c` - is usually a scalar multiplier greater than 1.0, but it may also be one value per dimension - or column of `Xs`. +def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: + """Set the boundary using `X` and `c`. `X` can be centered around zero but doesn't have to be, + and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per + dimension or column of `X`. """ - S = pt.max( - pt.abs(Xs), axis=0 - ) # important: the Xs should have previously been centered around 0 + # compute radius. Works whether X is 0-centered or not + S = (pt.max(X, axis=0) - pt.min(X, axis=0)) / 2.0 + L = (c * S).eval() # eval() makes sure L is not changed with out-of-sample preds return L From 5f760b955120bd4d62bc45de92a58252b2f3c0cb Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 10 Jun 2024 14:31:27 -0300 Subject: [PATCH 7/7] Compute S with more generic formula for radius --- pymc/gp/hsgp_approx.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index f04af07f251..760262276a0 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -31,8 +31,8 @@ def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: - """Set the boundary using `X` and `c`. `X` can be centered around zero but doesn't have to be, - and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per + """Set the boundary using `X` and `c`. `X` can be centered around zero but doesn't have to be, + and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per dimension or column of `X`. """ # compute radius. Works whether X is 0-centered or not