-
-
Notifications
You must be signed in to change notification settings - Fork 478
/
19.4_RedundantParameters&IntentionallyNonidentifiableModels.R
148 lines (124 loc) · 4.85 KB
/
19.4_RedundantParameters&IntentionallyNonidentifiableModels.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
library(rstan)
library(ggplot2)
srrs2 <- read.table ("srrs2.dat", header=T, sep=",")
mn <- srrs2$state=="MN"
radon <- srrs2$activity[mn]
log.radon <- log (ifelse (radon==0, .1, radon))
floor <- srrs2$floor[mn] # 0 for basement, 1 for first floor
n <- length(radon)
y <- log.radon
x <- floor
# get county index variable
county.name <- as.vector(srrs2$county[mn])
uniq <- unique(county.name)
J <- length(uniq)
county <- rep (NA, J)
for (i in 1:J){
county[county.name==uniq[i]] <- i
}
# no predictors
ybarbar = mean(y)
sample.size <- as.vector (table (county))
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
cty.mns = tapply(y,county,mean)
cty.vars = tapply(y,county,var)
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
## Redundant mean parameters for a simple nested model
# non redundant
dataList.1 <- list(N=n, J=J,y=y,county=county)
radon.sf1 <- stan(file='radon.stan', data=dataList.1, iter=1000, chains=4)
print(radon.sf1, pars = c("eta", "sigma_y", "lp__"))
# redudant -- much faster
radon_redundant.sf1 <- stan(file='radon_redundant.stan', data=dataList.1,
iter=1000, chains=4)
print(radon_redundant.sf1, pars = c("eta", "sigma_y", "lp__"))
#############################################################################################
## Read the data
# Data are at http://www.stat.columbia.edu/~gelman/arm/examples/pilots
# The R codes & data files should be saved in the same directory for
# the source command to work
## Read the pilots data & define variables
# Data are at http://www.stat.columbia.edu/~gelman/arm/examples/pilots
pilots <- read.table ("pilots.dat", header=TRUE)
attach (pilots)
group.names <- as.vector(unique(group))
scenario.names <- as.vector(unique(scenario))
n.group <- length(group.names)
n.scenario <- length(scenario.names)
successes <- NULL
failures <- NULL
group.id <- NULL
scenario.id <- NULL
for (j in 1:n.group){
for (k in 1:n.scenario){
ok <- group==group.names[j] & scenario==scenario.names[k]
successes <- c (successes, sum(recovered[ok]==1,na.rm=T))
failures <- c (failures, sum(recovered[ok]==0,na.rm=T))
group.id <- c (group.id, j)
scenario.id <- c (scenario.id, k)
}
}
y <- successes/(successes+failures)
y.mat <- matrix (y, n.scenario, n.group)
sort.group <- order(apply(y.mat,2,mean))
sort.scenario <- order(apply(y.mat,1,mean))
group.id.new <- sort.group[group.id]
scenario.id.new <- sort.scenario[scenario.id]
y.mat.new <- y.mat[sort.scenario,sort.group]
scenario.abbr <- c("Nagoya", "B'ham", "Detroit", "Ptsbgh", "Roseln", "Chrlt", "Shemya", "Toledo")
## Define variables
treatment <- group.id
airport <- scenario.id
## Fit the 2-model using Bugs
n.treatment <- max(treatment)
n.airport <- max(airport)
n <- length(y)
## Model fit
dataList.2 <- list(N=n,y=y,n_airport=n.airport,
n_treatment=n.treatment,airport=airport,
treatment=treatment)
pilots.sf1 <- stan(file='pilots.stan', data=dataList.2, iter=1000, chains=4)
print(pilots.sf1,pars = c("g","d", "sigma_y", "lp__"))
polls.subset <- read.table ("polls.subset.dat")
attach(polls.subset)
# Load in data for region indicators
# Use "state", an R data file (type ?state from the R command window for info)
#
# Regions: 1=northeast, 2=south, 3=north central, 4=west, 5=d.c.
# We have to insert d.c. (it is the 9th "state" in alphabetical order)
data (state) # "state" is an R data file
state.abbr <- c (state.abb[1:8], "DC", state.abb[9:50])
dc <- 9
not.dc <- c(1:8,10:51)
region <- c(3,4,4,3,4,4,1,1,5,3,3,4,4,2,2,2,2,3,3,1,1,1,2,2,3,2,4,2,4,1,1,4,1,3,2,2,3,4,1,1,3,2,3,3,4,1,3,4,1,2,4)
# define other data summaries
ok <- !is.na(bush)
y <- bush[ok] # 1 if support bush, 0 if support dukakis
n <- length(y) # of survey respondents
n.age <- max(age) # of age categories
n.edu <- max(edu) # of education categories
n.state <- max(state) # of states
n.region <- max(region) # of regions
age.ok <- age[ok]
edu.ok <- edu[ok]
state.ok <- state[ok]
region.ok <- region[ok]
female.ok <- female[ok]
black.ok <- black[ok]
# also include a measure of previous vote as a state-level predictor
library (foreign)
presvote <- read.dta ("presvote.dta")
attach (presvote)
v.prev <- presvote$g76_84pr
age.edu <- n.edu*(age-1) + edu
age.edu.ok <- age.edu[ok]
n.age.edu <- 16
dataList.3 <- list(N=n, n_age=n.age,n_edu=n.edu,n_state=n.state,
n_region=n.region, n_age_edu=n.age.edu,
y=y,female=female.ok,black=black.ok,
age=age.ok,edu=edu.ok, state=state.ok,region=region,
v_prev=v.prev,age_edu=age.edu.ok)
election88.sf1 <- stan(file='election88.stan', data=dataList.3,
iter=1000, chains=4)
print(election88.sf1, pars = c("beta","b_age", "b_edu","b_state","b_region","b_age_edu", "lp__"))