diff --git a/pymc/gp/hsgp_approx.py b/pymc/gp/hsgp_approx.py index 8dd8c6edb3f..760262276a0 100644 --- a/pymc/gp/hsgp_approx.py +++ b/pymc/gp/hsgp_approx.py @@ -17,7 +17,6 @@ from collections.abc import Sequence from types import ModuleType -from typing import NamedTuple import numpy as np import pytensor.tensor as pt @@ -31,11 +30,14 @@ TensorLike = np.ndarray | pt.TensorVariable -def set_boundary(Xs: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: - """Set the boundary using the `Xs` centered around 0 and `c`. `c` is usually a scalar - multiplier greater than 1.0, but it may be one value per dimension or column of `Xs`. +def set_boundary(X: TensorLike, c: numbers.Real | TensorLike) -> np.ndarray: + """Set the boundary using `X` and `c`. `X` can be centered around zero but doesn't have to be, + and `c` is usually a scalar multiplier greater than 1.0, but it may also be one value per + dimension or column of `X`. """ - S = pt.max(pt.abs(Xs), axis=0) # important: the Xs should be centered around 0 + # compute radius. Works whether X is 0-centered or not + S = (pt.max(X, axis=0) - pt.min(X, axis=0)) / 2.0 + L = (c * S).eval() # eval() makes sure L is not changed with out-of-sample preds return L @@ -89,44 +91,42 @@ def calc_basis_periodic( return phi_cos, phi_sin -class HSGPParams(NamedTuple): - m: int - c: float - S: float - - def approx_hsgp_hyperparams( - x: np.ndarray, lengthscale_range: list[float], cov_func: str -) -> HSGPParams: + x_range: list[float], lengthscale_range: list[float], cov_func: str +) -> tuple[int, float]: """Utility function that uses heuristics to recommend minimum `m` and `c` values, based on recommendations from Ruitort-Mayol et. al. In practice, you need to choose `c` large enough to handle the largest lengthscales, - and `m` large enough to accommodate the smallest lengthscales. + and `m` large enough to accommodate the smallest lengthscales. Use your prior on the + lengthscale as guidance for setting the prior range. For example, if you believe + that 95% of the prior mass of the lengthscale is between 1 and 5, set the + `lengthscale_range` to be [1, 5], or maybe a touch wider. + + Also, be sure to pass in an `x_range` that is exemplary of the domain not just of your + training data, but also where you intend to make predictions. For instance, if your + training x values are from [0, 10], and you intend to predict from [7, 15], the narrowest + `x_range` you should pass in would be `x_range = [0, 15]`. NB: These recommendations are based on a one-dimensional GP. Parameters ---------- - x : np.ndarray - The input variable on which the GP is going to be evaluated. - Careful: should be the X values you want to predict over, not *only* the training X. + x_range : list[float] + The range of the x values you intend to both train and predict over. Should be a list with + two elements, [x_min, x_max]. lengthscale_range : List[float] - The range of the lengthscales. Should be a list with two elements [lengthscale_min, lengthscale_max]. + The range of the lengthscales. Should be a list with two elements, [lengthscale_min, lengthscale_max]. cov_func : str The covariance function to use. Supported options are "expquad", "matern52", and "matern32". Returns ------- - HSGPParams - A named tuple containing the recommended values for `m`, `c`, and `S`. - - `m` : int - Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost. - - `c` : float - Scaling factor such that L = c * S, where L is the boundary of the approximation. - Increasing it helps approximate larger lengthscales, but may require increasing m. - - `S` : float - The value of `S`, which is half the range of `x`. + - `m` : int + Number of basis vectors. Increasing it helps approximate smaller lengthscales, but increases computational cost. + - `c` : float + Scaling factor such that L = c * S, where L is the boundary of the approximation. + Increasing it helps approximate larger lengthscales, but may require increasing m. Raises ------ @@ -139,11 +139,12 @@ def approx_hsgp_hyperparams( Practical Hilbert Space Approximate Bayesian Gaussian Processes for Probabilistic Programming """ if lengthscale_range[0] >= lengthscale_range[1]: - raise ValueError("One of the boundaries out of order") + raise ValueError("One of the `lengthscale_range` boundaries is out of order.") - X_center = (np.max(x, axis=0) - np.min(x, axis=0)) / 2 - Xs = x - X_center - S = np.max(np.abs(Xs), axis=0) + if x_range[0] >= x_range[1]: + raise ValueError("One of the `x_range` boundaries is out of order.") + + S = (x_range[1] - x_range[0]) / 2.0 if cov_func.lower() == "expquad": a1, a2 = 3.2, 1.75 @@ -162,7 +163,7 @@ def approx_hsgp_hyperparams( c = max(a1 * (lengthscale_range[1] / S), 1.2) m = int(a2 * c / (lengthscale_range[0] / S)) - return HSGPParams(m=m, c=c, S=S) + return m, c class HSGP(Base): @@ -286,6 +287,7 @@ def __init__( if parametrization is not None: parametrization = parametrization.lower().replace("-", "").replace("_", "") + if parametrization not in ["centered", "noncentered"]: raise ValueError("`parametrization` must be either 'centered' or 'noncentered'.") @@ -321,7 +323,7 @@ def L(self) -> pt.TensorVariable: def L(self, value: TensorLike): self._L = pt.as_tensor_variable(value) - def prior_linearized(self, Xs: TensorLike): + def prior_linearized(self, X: TensorLike): """Linearized version of the HSGP. Returns the Laplace eigenfunctions and the square root of the power spectral density needed to create the GP. @@ -334,7 +336,7 @@ def prior_linearized(self, Xs: TensorLike): Parameters ---------- - Xs: array-like + X: array-like Function input values. Returns @@ -362,9 +364,9 @@ def prior_linearized(self, Xs: TensorLike): # L = [10] means the approximation is valid from Xs = [-10, 10] gp = pm.gp.HSGP(m=[200], L=[10], cov_func=cov_func) + # Set X as Data so it can be mutated later, and then pass it to the GP X = pm.Data("X", X) - # Pass X to the GP - phi, sqrt_psd = gp.prior_linearized(Xs=X) + phi, sqrt_psd = gp.prior_linearized(X=X) # Specify standard normal prior in the coefficients, the number of which # is given by the number of basis vectors, saved in `n_basis_vectors`. @@ -394,8 +396,8 @@ def prior_linearized(self, Xs: TensorLike): # Important: fix the computation of the midpoint of X. # If X is mutated later, the training midpoint will be subtracted, not the testing one. if self._X_center is None: - self._X_center = (pt.max(Xs, axis=0) - pt.min(Xs, axis=0)).eval() / 2 - Xs = Xs - self._X_center # center for accurate computation + self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2 + Xs = X - self._X_center # center for accurate computation # Index Xs using input_dim and active_dims of covariance function Xs, _ = self.cov_func._slice(Xs) @@ -588,10 +590,11 @@ def __init__( self._m = m self.scale = scale + self._X_center = None super().__init__(mean_func=mean_func, cov_func=cov_func) - def prior_linearized(self, Xs: TensorLike): + def prior_linearized(self, X: TensorLike): """Linearized version of the approximation. Returns the cosine and sine bases and coefficients of the expansion needed to create the GP. @@ -606,8 +609,8 @@ def prior_linearized(self, Xs: TensorLike): Parameters ---------- - Xs: array-like - Function input values. Assumes they have been mean subtracted or centered at zero. + X: array-like + Function input values. Returns ------- @@ -631,15 +634,9 @@ def prior_linearized(self, Xs: TensorLike): # m=200 means 200 basis vectors gp = pm.gp.HSGPPeriodic(m=200, scale=scale, cov_func=cov_func) - # Order is important. First calculate the mean, then make X a shared variable, - # then subtract the mean. When X is mutated later, the correct mean will be - # subtracted. - X_mean = np.mean(X, axis=0) - X = pm.MutableData("X", X) - Xs = X - X_mean - - # Pass the zero-subtracted Xs in to the GP - (phi_cos, phi_sin), psd = gp.prior_linearized(Xs=Xs) + # Set X as Data so it can be mutated later, and then pass it to the GP + X = pm.Data("X", X) + (phi_cos, phi_sin), psd = gp.prior_linearized(X=X) # Specify standard normal prior in the coefficients. The number of which # is twice the number of basis vectors minus one. @@ -666,6 +663,13 @@ def prior_linearized(self, Xs: TensorLike): with model: ppc = pm.sample_posterior_predictive(idata, var_names=["f"]) """ + # Important: fix the computation of the midpoint of X. + # If X is mutated later, the training midpoint will be subtracted, not the testing one. + if self._X_center is None: + self._X_center = (pt.max(X, axis=0) + pt.min(X, axis=0)).eval() / 2 + Xs = X - self._X_center # center for accurate computation + + # Index Xs using input_dim and active_dims of covariance function Xs, _ = self.cov_func._slice(Xs) phi_cos, phi_sin = calc_basis_periodic(Xs, self.cov_func.period, self._m, tl=pt) @@ -688,9 +692,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign dims: None Dimension name for the GP random variable. """ - self._X_mean = pt.mean(X, axis=0) - - (phi_cos, phi_sin), psd = self.prior_linearized(X - self._X_mean) + (phi_cos, phi_sin), psd = self.prior_linearized(X) m = self._m self._beta = pm.Normal(f"{name}_hsgp_coeffs_", size=(m * 2 - 1)) @@ -707,7 +709,7 @@ def prior(self, name: str, X: TensorLike, dims: str | None = None): # type: ign def _build_conditional(self, Xnew): try: - beta, X_mean = self._beta, self._X_mean + beta, X_center = self._beta, self._X_center except AttributeError: raise ValueError( @@ -716,7 +718,9 @@ def _build_conditional(self, Xnew): Xnew, _ = self.cov_func._slice(Xnew) - phi_cos, phi_sin = calc_basis_periodic(Xnew - X_mean, self.cov_func.period, self._m, tl=pt) + phi_cos, phi_sin = calc_basis_periodic( + Xnew - X_center, self.cov_func.period, self._m, tl=pt + ) m = self._m J = pt.arange(0, m, 1) # rescale basis coefficients by the sqrt variance term diff --git a/tests/gp/test_hsgp_approx.py b/tests/gp/test_hsgp_approx.py index 09eb13970e9..d80db1bb76f 100644 --- a/tests/gp/test_hsgp_approx.py +++ b/tests/gp/test_hsgp_approx.py @@ -129,7 +129,7 @@ def test_mean_invariance(self): _ = pm.Data("X", X) cov_func = pm.gp.cov.ExpQuad(1, ls=3) gp = pm.gp.HSGP(m=[20], L=[10], cov_func=cov_func) - _ = gp.prior_linearized(Xs=X) + _ = gp.prior_linearized(X=X) x_new = np.linspace(-10, 20, 100)[:, None] with model: