Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python scripts to BYM2 case study #228

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
592 changes: 592 additions & 0 deletions jupyter/car-iar-poisson/car-iar-poisson.ipynb

Large diffs are not rendered by default.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions jupyter/car-iar-poisson/data/nyc_subset.json

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions jupyter/car-iar-poisson/data/scotland_data.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"N": 56, "y": [9, 39, 11, 9, 15, 8, 26, 7, 6, 20, 13, 5, 3, 8, 17, 9, 2, 7, 9, 7, 16, 31, 11, 7, 19, 15, 7, 10, 16, 11, 5, 3, 7, 8, 11, 9, 11, 8, 6, 4, 10, 8, 2, 6, 19, 3, 2, 3, 28, 6, 1, 1, 1, 1, 0, 0], "E": [1.4, 8.7, 3.0, 2.5, 4.3, 2.4, 8.1, 2.3, 2.0, 6.6, 4.4, 1.8, 1.1, 3.3, 7.8, 4.6, 1.1, 4.2, 5.5, 4.4, 10.5, 22.7, 8.8, 5.6, 15.5, 12.5, 6.0, 9.0, 14.4, 10.2, 4.8, 2.9, 7.0, 8.5, 12.3, 10.1, 12.7, 9.4, 7.2, 5.3, 18.8, 15.8, 4.3, 14.6, 50.7, 8.2, 5.6, 9.3, 88.7, 19.6, 3.4, 3.6, 5.7, 7.0, 4.2, 1.8], "x": [16, 16, 10, 24, 10, 24, 10, 7, 7, 16, 7, 16, 10, 24, 7, 16, 10, 7, 7, 10, 7, 16, 10, 7, 1, 1, 7, 7, 10, 10, 7, 24, 10, 7, 7, 0, 10, 1, 16, 0, 1, 16, 16, 0, 1, 7, 1, 1, 0, 1, 1, 0, 1, 1, 16, 10], "adj": [5, 9, 11, 19, 7, 10, 6, 12, 18, 20, 28, 1, 11, 12, 13, 19, 3, 8, 2, 10, 13, 16, 17, 6, 1, 11, 17, 19, 23, 29, 2, 7, 16, 22, 1, 5, 9, 12, 3, 5, 11, 5, 7, 17, 19, 31, 32, 35, 25, 29, 50, 7, 10, 17, 21, 22, 29, 7, 9, 13, 16, 19, 29, 4, 20, 28, 33, 55, 56, 1, 5, 9, 13, 17, 4, 18, 55, 16, 29, 50, 10, 16, 9, 29, 34, 36, 37, 39, 27, 30, 31, 44, 47, 48, 55, 56, 15, 26, 29, 25, 29, 42, 43, 24, 31, 32, 55, 4, 18, 33, 45, 9, 15, 16, 17, 21, 23, 25, 26, 34, 43, 50, 24, 38, 42, 44, 45, 56, 14, 24, 27, 32, 35, 46, 47, 14, 27, 31, 35, 18, 28, 45, 56, 23, 29, 39, 40, 42, 43, 51, 52, 54, 14, 31, 32, 37, 46, 23, 37, 39, 41, 23, 35, 36, 41, 46, 30, 42, 44, 49, 51, 54, 23, 34, 36, 40, 41, 34, 39, 41, 49, 52, 36, 37, 39, 40, 46, 49, 53, 26, 30, 34, 38, 43, 51, 26, 29, 34, 42, 24, 30, 38, 48, 49, 28, 30, 33, 56, 31, 35, 37, 41, 47, 53, 24, 31, 46, 48, 49, 53, 24, 44, 47, 49, 38, 40, 41, 44, 47, 48, 52, 53, 54, 15, 21, 29, 34, 38, 42, 54, 34, 40, 49, 54, 41, 46, 47, 49, 34, 38, 49, 51, 52, 18, 20, 24, 27, 56, 18, 24, 30, 33, 45, 55], "weights": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], "num": [4, 2, 2, 3, 5, 2, 5, 1, 6, 4, 4, 3, 4, 3, 3, 6, 6, 6, 5, 3, 3, 2, 6, 8, 3, 4, 4, 4, 11, 6, 7, 4, 4, 9, 5, 4, 5, 6, 5, 5, 7, 6, 4, 5, 4, 6, 6, 4, 9, 3, 4, 4, 4, 5, 5, 6]}
47 changes: 47 additions & 0 deletions jupyter/car-iar-poisson/stan/bym2_offset_only.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// use for case study NYC data subset
data {
int<lower=0> N;
int<lower=0> N_edges;
array[N_edges] int<lower=1, upper=N> node1; // node1[i] adjacent to node2[i]
array[N_edges] int<lower=1, upper=N> node2; // and node1[i] < node2[i]

array[N] int<lower=0> y; // count outcomes
vector<lower=0>[N] E; // exposure

real<lower=0> scaling_factor; // scales the variance of the spatial effects
}
transformed data {
vector[N] log_E = log(E);
}
parameters {
real beta0; // intercept

real<lower=0> sigma; // overall standard deviation
real<lower=0, upper=1> rho; // proportion unstructured vs. spatially structured variance

vector[N] theta; // heterogeneous effects
vector[N] phi; // spatial effects
}
transformed parameters {
vector[N] convolved_re;
// variance of each component should be approximately equal to 1
convolved_re = sqrt(1 - rho) * theta + sqrt(rho / scaling_factor) * phi;
}
model {
y ~ poisson_log(log_E + beta0 + convolved_re * sigma);

target += -0.5 * dot_self(phi[node1] - phi[node2]);

beta0 ~ normal(0, 1);
theta ~ normal(0, 1);
sigma ~ normal(0, 1);
rho ~ beta(0.5, 0.5);
// soft sum-to-zero constraint on phi)
sum(phi) ~ normal(0, 0.001 * N); // equivalent to mean(phi) ~ normal(0,0.001)
}
generated quantities {
real log_precision = -2.0 * log(sigma);
real logit_rho = log(rho / (1.0 - rho));
vector[N] eta = log_E + beta0 + convolved_re * sigma;
vector[N] mu = exp(eta);
}
51 changes: 51 additions & 0 deletions jupyter/car-iar-poisson/stan/bym2_predictor_plus_offset.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// use for Scotland dataset
data {
int<lower=0> N;
int<lower=0> N_edges;
array[N_edges] int<lower=1, upper=N> node1; // node1[i] adjacent to node2[i]
array[N_edges] int<lower=1, upper=N> node2; // and node1[i] < node2[i]

array[N] int<lower=0> y; // count outcomes
vector[N] x; // predictor
vector<lower=0>[N] E; // exposure

real<lower=0> scaling_factor; // scales the variance of the spatial effects
}
transformed data {
vector[N] log_E = log(E);
}
parameters {
real beta0; // intercept
real beta1; // slope

real<lower=0> sigma; // overall standard deviation
real<lower=0, upper=1> rho; // proportion unstructured vs. spatially structured variance

vector[N] theta; // heterogeneous effects
vector[N] phi; // spatial effects
}
transformed parameters {
vector[N] convolved_re;
// variance of each component should be approximately equal to 1
convolved_re = sqrt(1 - rho) * theta + sqrt(rho / scaling_factor) * phi;
}
model {
y ~ poisson_log(log_E + beta0 + beta1 * x + convolved_re * sigma);

// This is the prior for phi! (up to proportionality)
target += -0.5 * dot_self(phi[node1] - phi[node2]);
// soft sum-to-zero constraint on phi)
sum(phi) ~ normal(0, 0.001 * N); // equivalent to mean(phi) ~ normal(0,0.001)

beta0 ~ normal(0.0, 5.0);
beta1 ~ normal(0.0, 5.0);
theta ~ normal(0.0, 1.0);
sigma ~ normal(0, 5);
rho ~ beta(0.5, 0.5);
}
generated quantities {
real log_precision = -2.0 * log(sigma);
real logit_rho = log(rho / (1.0 - rho));
vector[N] eta = log_E + beta0 + beta1 * x + convolved_re * sigma;
vector[N] mu = exp(eta);
}
Binary file not shown.
48 changes: 48 additions & 0 deletions jupyter/car-iar-poisson/stan/bym_predictor_plus_offset.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// use for Scotland dataset
data {
int<lower=0> N;
int<lower=0> N_edges;
array[N_edges] int<lower=1, upper=N> node1; // node1[i] adjacent to node2[i]
array[N_edges] int<lower=1, upper=N> node2; // and node1[i] < node2[i]

array[N] int<lower=0> y; // count outcomes
vector[N] x; // predictor
vector<lower=0>[N] E; // exposure
}
transformed data {
vector[N] log_E = log(E);
}
parameters {
real beta0; // intercept
real beta1; // slope

real<lower=0> tau_theta; // precision of heterogeneous effects
real<lower=0> tau_phi; // precision of spatial effects

vector[N] theta; // heterogeneous effects
vector[N] phi; // spatial effects
}
transformed parameters {
real<lower=0> sigma_theta = inv(sqrt(tau_theta)); // convert precision to sigma
real<lower=0> sigma_phi = inv(sqrt(tau_phi)); // convert precision to sigma
}
model {
y ~ poisson_log(log_E + beta0 + beta1 * x + phi * sigma_phi
+ theta * sigma_theta);

// NOTE: no prior on phi_raw, it is used to construct phi
// the following computes the prior on phi on the unit scale with sd = 1
target += -0.5 * dot_self(phi[node1] - phi[node2]);
// soft sum-to-zero constraint on phi)
sum(phi) ~ normal(0, 0.001 * N); // equivalent to mean(phi) ~ normal(0,0.001)

beta0 ~ normal(0, 5);
beta1 ~ normal(0, 5);
theta ~ normal(0, 1);
tau_theta ~ gamma(3.2761, 1.81); // Carlin WinBUGS priors
tau_phi ~ gamma(1, 1); // Carlin WinBUGS priors
}
generated quantities {
vector[N] mu = exp(log_E + beta0 + beta1 * x + phi * sigma_phi
+ theta * sigma_theta);
}
Binary file added jupyter/car-iar-poisson/stan/pois
Binary file not shown.
19 changes: 19 additions & 0 deletions jupyter/car-iar-poisson/stan/pois.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
data {
int<lower=0> N;
array[N] int<lower=0> y; // count outcomes
vector<lower=0>[N] E; // exposure
}
transformed data {
vector[N] log_E = log(E);
}
parameters {
real beta0; // intercept
}
model {
y ~ poisson_log(log_E + beta0); // intercept only, no covariates
beta0 ~ normal(0.0, 2.5);
}
generated quantities {
vector[N] eta = log_E + beta0;
vector[N] mu = exp(eta);
}
Binary file added jupyter/car-iar-poisson/stan/pois_icar
Binary file not shown.
39 changes: 39 additions & 0 deletions jupyter/car-iar-poisson/stan/pois_icar.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
functions {
real icar_normal_lpdf(vector phi, int N, array[] int node1,
array[] int node2) {
return -0.5 * dot_self(phi[node1] - phi[node2]);
}
}
data {
int<lower=0> N;
int<lower=0> N_edges;
array[N_edges] int<lower=1, upper=N> node1; // node1[i] adjacent to node2[i]
array[N_edges] int<lower=1, upper=N> node2; // and node1[i] < node2[i]

array[N] int<lower=0> y; // count outcomes
vector<lower=0>[N] x; // coefficient
vector<lower=0>[N] E; // exposure
}
transformed data {
vector[N] log_E = log(E);
}
parameters {
real beta0; // intercept
real beta1; // slope
real<lower=0> sigma; // overall standard deviation
vector[N] phi; // spatial effects
}
model {
y ~ poisson_log(log_E + beta0 + beta1 * x + phi * sigma);
beta0 ~ normal(0.0, 1.0);
beta1 ~ normal(0.0, 1.0);
sigma ~ normal(0.0, 1.0);
phi ~ icar_normal(N, node1, node2);
// soft sum-to-zero constraint on phi
// more efficient than mean(phi) ~ normal(0, 0.001)
sum(phi) ~ normal(0, 0.001 * N);
}
generated quantities {
vector[N] eta = log_E + beta0 + beta1 * x + phi * sigma;
vector[N] mu = exp(eta);
}
Binary file added jupyter/car-iar-poisson/stan/pois_re
Binary file not shown.
23 changes: 23 additions & 0 deletions jupyter/car-iar-poisson/stan/pois_re.stan
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
data {
int<lower=0> N;
array[N] int<lower=0> y; // count outcomes
vector<lower=0>[N] E; // exposure
}
transformed data {
vector[N] log_E = log(E);
}
parameters {
real beta0; // intercept
vector[N] theta; // heterogeneous random effects
real<lower=0> sigma; // non-centered re variance
}
model {
y ~ poisson_log(log_E + beta0 + theta * sigma);
beta0 ~ normal(0.0, 2.5);
theta ~ normal(0, 1);
sigma ~ normal(0, 5);
}
generated quantities {
vector[N] eta = log_E + beta0 + theta * sigma;
vector[N] mu = exp(eta);
}