Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Set data when building Linearmodel #249

Merged
merged 10 commits into from
Nov 13, 2023
4 changes: 2 additions & 2 deletions pymc_experimental/linearmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ def build_model(self, X: pd.DataFrame, y: pd.Series):
"""
cfg = self.model_config

# The model is built with placeholder data.
# Actual data will be set by _data_setter when fitting or evaluating the model.
# Data array size can change but number of dimensions must stay the same.
with pm.Model() as self.model:
x = pm.MutableData("x", np.zeros((1,)), dims="observation")
Expand All @@ -94,6 +92,8 @@ def build_model(self, X: pd.DataFrame, y: pd.Series):
dims="observation",
)

self._data_setter(X, y)

def _data_setter(self, X: pd.DataFrame, y: Optional[Union[pd.DataFrame, pd.Series]] = None):
with self.model:
pm.set_data({"x": X.squeeze()})
Expand Down
27 changes: 13 additions & 14 deletions pymc_experimental/model_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,8 @@ def sample_model(self, **kwargs):
with self.model:
sampler_args = {**self.sampler_config, **kwargs}
idata = pm.sample(**sampler_args)
idata.extend(pm.sample_prior_predictive())
idata.extend(pm.sample_posterior_predictive(idata))
idata.extend(pm.sample_prior_predictive(), join="right")
idata.extend(pm.sample_posterior_predictive(idata), join="right")

idata = self.set_idata_attrs(idata)
return idata
Expand Down Expand Up @@ -601,17 +601,16 @@ def sample_prior_predictive(

if self.model is None:
self.build_model(X_pred, y_pred)

self._data_setter(X_pred, y_pred)
if self.model is not None:
with self.model: # sample with new input data
prior_pred: az.InferenceData = pm.sample_prior_predictive(samples, **kwargs)
self.set_idata_attrs(prior_pred)
if extend_idata:
if self.idata is not None:
self.idata.extend(prior_pred)
else:
self.idata = prior_pred
else:
self._data_setter(X_pred, y_pred)
with self.model: # sample with new input data
prior_pred: az.InferenceData = pm.sample_prior_predictive(samples, **kwargs)
self.set_idata_attrs(prior_pred)
if extend_idata:
if self.idata is not None:
self.idata.extend(prior_pred, join="right")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we test that all these join="right" are doing the right thing (i.e., discarding the old value and replacing the new one), and that extend_idata=False is being respected?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible. I'll work on it and update the PR accordingly.

else:
self.idata = prior_pred

prior_predictive_samples = az.extract(prior_pred, "prior_predictive", combined=combined)

Expand Down Expand Up @@ -641,7 +640,7 @@ def sample_posterior_predictive(self, X_pred, extend_idata, combined, **kwargs):
with self.model: # sample with new input data
post_pred = pm.sample_posterior_predictive(self.idata, **kwargs)
if extend_idata:
self.idata.extend(post_pred)
self.idata.extend(post_pred, join="right")

posterior_predictive_samples = az.extract(
post_pred, "posterior_predictive", combined=combined
Expand Down
39 changes: 33 additions & 6 deletions pymc_experimental/tests/test_linearmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@
sklearn_available = False


@pytest.fixture(scope="module")
def toy_actual_params():
return {
"intercept": 3,
"slope": 5,
"obs_error": 0.5,
}


@pytest.fixture(scope="module")
def toy_X():
x = np.linspace(start=0, stop=1, num=100)
Expand All @@ -44,23 +53,24 @@ def toy_X():


@pytest.fixture(scope="module")
def toy_y(toy_X):
y = 5 * toy_X["input"] + 3
y = y + np.random.normal(0, 1, size=len(toy_X))
def toy_y(toy_X, toy_actual_params):
y = toy_actual_params["slope"] * toy_X["input"] + toy_actual_params["intercept"]
rng = np.random.default_rng(427)
y = y + rng.normal(0, toy_actual_params["obs_error"], size=len(toy_X))
y = pd.Series(y, name="output")
return y


@pytest.fixture(scope="module")
def fitted_linear_model_instance(toy_X, toy_y):
sampler_config = {
"draws": 500,
"tune": 300,
"draws": 50,
"tune": 30,
"chains": 2,
"target_accept": 0.95,
}
model = LinearModel(sampler_config=sampler_config)
model.fit(toy_X, toy_y)
model.fit(toy_X, toy_y, random_seed=312)
return model


Expand Down Expand Up @@ -101,6 +111,23 @@ def test_fit(fitted_linear_model_instance):
assert isinstance(post_pred, xr.DataArray)


def test_parameter_fit(toy_X, toy_y, toy_actual_params):
"""Check that the fit model recovered the data-generating parameters."""
# Fit the model with a sufficient number of samples
sampler_config = {
"draws": 500,
"tune": 300,
"chains": 2,
"target_accept": 0.95,
}
model = LinearModel(sampler_config=sampler_config)
model.fit(toy_X, toy_y, random_seed=312)
fit_params = model.idata.posterior.mean()
np.testing.assert_allclose(fit_params["intercept"], toy_actual_params["intercept"], rtol=0.1)
np.testing.assert_allclose(fit_params["slope"], toy_actual_params["slope"], rtol=0.1)
np.testing.assert_allclose(fit_params["σ_model_fmc"], toy_actual_params["obs_error"], rtol=0.1)


def test_predict(fitted_linear_model_instance):
model = fitted_linear_model_instance
X_pred = pd.DataFrame({"input": np.random.uniform(low=0, high=1, size=100)})
Expand Down
Loading