Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sample_prior function #2876

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions RELEASE-NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
Gaussian Process implementation for efficient inference, along with
lower-level functions such as `cartesian` and `kronecker` products.
- Added `Coregion` covariance function.
- Add new 'pairplot' function, for plotting scatter or hexbin matrices of sampled parameters.
Optionally it can plot divergences.
- Add new 'pairplot' function, for plotting scatter or hexbin matrices of sampled parameters. Optionally it can plot divergences.
- Plots of discrete distributions in the docstrings
- Add logitnormal distribution
- Densityplot: add support for discrete variables
Expand All @@ -21,6 +20,7 @@
- Changed the `compare` function to accept a dictionary of model-trace pairs instead of two separate lists of models and traces.
- add test and support for creating multivariate mixture and mixture of mixtures
- `distribution.draw_values`, now is also able to draw values from conditionally dependent RVs, such as autotransformed RVs (Refer to PR #2902).
- New function `pm.sample_prior` which generates test data from a model in the absence of data.

### Fixes

Expand Down
47 changes: 36 additions & 11 deletions pymc3/distributions/distribution.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import collections
import numbers
import numpy as np
import theano.tensor as tt
Expand Down Expand Up @@ -210,7 +211,7 @@ def random(self, *args, **kwargs):



def draw_values(params, point=None):
def draw_values(params, point=None, size=None):
"""
Draw (fix) parameter values. Handles a number of cases:

Expand Down Expand Up @@ -254,7 +255,7 @@ def draw_values(params, point=None):

# Init givens and the stack of nodes to try to `_draw_value` from
givens = {}
stored = set([]) # Some nodes
stored = set() # Some nodes
stack = list(leaf_nodes.values()) # A queue would be more appropriate
while stack:
next_ = stack.pop(0)
Expand All @@ -279,13 +280,14 @@ def draw_values(params, point=None):
# The named node's children givens values must also be taken
# into account.
children = named_nodes_children[next_]
temp_givens = [givens[k] for k in givens.keys() if k in children]
temp_givens = [givens[k] for k in givens if k in children]
try:
# This may fail for autotransformed RVs, which don't
# have the random method
givens[next_.name] = (next_, _draw_value(next_,
point=point,
givens=temp_givens))
givens=temp_givens,
size=size))
stored.add(next_.name)
except theano.gof.fg.MissingInputError:
# The node failed, so we must add the node's parents to
Expand All @@ -295,10 +297,28 @@ def draw_values(params, point=None):
if node is not None and
node.name not in stored and
node not in params])
values = []
for param in params:
values.append(_draw_value(param, point=point, givens=givens.values()))
return values

# Funny dance to resolve missing nodes
params = dict(enumerate(params)) # some nodes are not hashable
evaluated = {}
to_eval = set()
missing_inputs = set(params)
while to_eval or missing_inputs:
if to_eval == missing_inputs:
raise ValueError('Cannot resolve inputs for {}'.format([str(params[j]) for j in to_eval]))
to_eval = set(missing_inputs)
missing_inputs = set()
for param_idx in to_eval:
param = params[param_idx]
try: # might evaluate in a bad order,
evaluated[param_idx] = _draw_value(param, point=point, givens=givens.values(), size=size)
if isinstance(param, collections.Hashable) and named_nodes_parents.get(param):
givens[param.name] = (param, evaluated[param_idx])
except theano.gof.fg.MissingInputError:
missing_inputs.add(param_idx)


return [evaluated[j] for j in params] # set the order back


@memoize
Expand Down Expand Up @@ -326,7 +346,7 @@ def _compile_theano_function(param, vars, givens=None):
allow_input_downcast=True)


def _draw_value(param, point=None, givens=None):
def _draw_value(param, point=None, givens=None, size=None):
"""Draw a random value from a distribution or return a constant.

Parameters
Expand Down Expand Up @@ -355,14 +375,19 @@ def _draw_value(param, point=None, givens=None):
if point and hasattr(param, 'model') and param.name in point:
return point[param.name]
elif hasattr(param, 'random') and param.random is not None:
return param.random(point=point, size=None)
return param.random(point=point, size=size)
elif hasattr(param, 'distribution') and hasattr(param.distribution, 'random') and param.distribution.random is not None:
return param.distribution.random(point=point, size=size)
else:
if givens:
variables, values = list(zip(*givens))
else:
variables = values = []
func = _compile_theano_function(param, variables)
return func(*values)
if size is None:
return func(*values)
else:
return np.array([func(*value) for value in zip(*values)])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The size seems to only be imposed to param's with a random method, and we hope the content of values to be the right size in the end. Shouldn't there be some enforcement of the size, for the numbers.Number, np.ndarray, tt.TensorConstant, tt.sharedvar.SharedVariable and tt.TensorVariable in point cases for us to be sure that values will in fact have the desired output size?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am relying here on theano catching those sorts of errors, and giving more informative errors than I could. I am running this on a few different models to make sure it gives reasonable results, but so far those sorts of inputs get broadcast in a sensible manner.

else:
raise ValueError('Unexpected type in draw_value: %s' % type(param))

Expand Down
49 changes: 47 additions & 2 deletions pymc3/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@

from .backends.base import BaseTrace, MultiTrace
from .backends.ndarray import NDArray
from .distributions import draw_values
from .model import modelcontext, Point, all_continuous
from .step_methods import (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
BinaryGibbsMetropolis, CategoricalGibbsMetropolis,
Slice, CompoundStep, arraystep)
from .util import update_start_vals
from .util import update_start_vals, is_transformed_name, get_untransformed_name, get_default_varnames
from .vartypes import discrete_types
from pymc3.step_methods.hmc import quadpotential
from pymc3 import plots
Expand All @@ -25,7 +26,7 @@
import sys
sys.setrecursionlimit(10000)

__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts']
__all__ = ['sample', 'iter_sample', 'sample_ppc', 'sample_ppc_w', 'init_nuts', 'sample_generative']

STEP_METHODS = (NUTS, HamiltonianMC, Metropolis, BinaryMetropolis,
BinaryGibbsMetropolis, Slice, CategoricalGibbsMetropolis)
Expand Down Expand Up @@ -1206,6 +1207,50 @@ def sample_ppc_w(traces, samples=None, models=None, weights=None,
return {k: np.asarray(v) for k, v in ppc.items()}


def sample_generative(samples=500, model=None, vars=None, random_seed=None):
"""Generate samples from the generative model.

Parameters
----------
samples : int
Number of samples from the prior to generate. Defaults to 500.
model : Model (optional if in `with` context)
vars : iterable
Variables for which to compute the posterior predictive samples.
Defaults to `model.named_vars`.
random_seed : int
Seed for the random number generator.

Returns
-------
dict
Dictionary with the variables as keys. The values are arrays of prior samples.
"""
model = modelcontext(model)

if vars is None:
vars = set(model.named_vars.keys())

if random_seed is not None:
np.random.seed(random_seed)

names = get_default_varnames(model.named_vars, include_transformed=False)
# draw_values fails with auto-transformed variables. transform them later!
values = draw_values([model[name] for name in names], size=samples)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow this is really efficient! However, is it sure that the values draw in the children of a graph is depending on the samples from their parent? In the previous implementation, we always sample by evaluating a point which contains samples from a higher hierarchy. For example, if b ~ p(a), we sample a_tilde first then sample b~p(a_tilde). Is it the case her also?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A simple example:

X = theano.shared(np.arange(3))
with pm.Model() as m:
    ind = pm.Categorical('i', np.ones(3)/3)
    x = pm.Deterministic('X', X[ind])
    prior=pm.sample_generative(10)

prior
{'X': array([0, 0, 2, 1, 2, 2, 1, 0, 0, 1]),
 'i': array([1, 0, 0, 2, 2, 0, 2, 1, 0, 0])}

i and X should be identical.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a super helpful example! Let me take a look at it -- there's some work already to avoid some edge cases, and I would have thought this got caught.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caught the bug (will add tests for all this, too). Your example runs as desired now!

I make sure I evaluate the params by making a dictionary of index integers to nodes (avoids non-hashability of ndarray). After evaluating the nodes, I was accidentally using the index integer to check if it was a child of another node. This was never true, so I never supplied that value to the rest of the graph.


data = {k: v for k, v in zip(names, values)}

prior = {}
for var_name in vars:
if var_name in data:
prior[var_name] = data[var_name]
elif is_transformed_name(var_name):
untransformed = get_untransformed_name(var_name)
if untransformed in data:
prior[var_name] = model[untransformed].transformation.forward(data[untransformed]).eval()
return prior


def init_nuts(init='auto', chains=1, n_init=500000, model=None,
random_seed=None, progressbar=True, **kwargs):
"""Set up the mass matrix initialization for NUTS.
Expand Down
14 changes: 14 additions & 0 deletions pymc3/tests/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,17 @@ def test_exec_nuts_init(method):
assert len(start) == 2
assert isinstance(start[0], dict)
assert 'a' in start[0] and 'b_log__' in start[0]


def test_sample_generative():
observed = np.random.normal(10, 1, size=200)
with pm.Model():
# Use a prior that's way off to show we're actually sampling from it
mu = pm.Normal('mu', mu=-10, sd=1)
positive_mu = pm.Deterministic('positive_mu', np.abs(mu))
pm.Normal('x_obs', mu=mu, sd=1, observed=observed)
prior = pm.sample_generative()

assert (prior['mu'] < 0).all()
assert (prior['positive_mu'] > 0).all()
assert (prior['x_obs'] < 0).all()