-
-
Notifications
You must be signed in to change notification settings - Fork 478
/
7.3_SimulationForNonLinearPredictions.R
149 lines (122 loc) · 4.55 KB
/
7.3_SimulationForNonLinearPredictions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
library(rstan)
library(ggplot2)
congress <- vector ("list", 49)
for (i in 1:49){
year <- 1896 + 2*(i-1)
file <- paste ("cong3/", year, ".asc", sep="")
data.year <- matrix (scan (file), byrow=TRUE, ncol=5)
data.year <- cbind (rep(year, nrow(data.year)), data.year)
congress[[i]] <- data.year
}
# Note: download all ".asc" files into your R working directory in a file
# named cong3 for the above command to work
i86 <- (1986-1896)/2 + 1
cong86 <- congress[[i86]]
cong88 <- congress[[i86+1]]
cong90 <- congress[[i86+2]]
v86 <- cong86[,5]/(cong86[,5]+cong86[,6])
bad86 <- cong86[,5]==-9 | cong86[,6]==-9
v86[bad86] <- NA
contested86 <- v86>.1 & v86<.9
inc86 <- cong86[,4]
v88 <- cong88[,5]/(cong88[,5]+cong88[,6])
bad88 <- cong88[,5]==-9 | cong88[,6]==-9
v88[bad88] <- NA
contested88 <- v88>.1 & v88<.9
inc88 <- cong88[,4]
v90 <- cong90[,5]/(cong90[,5]+cong90[,6])
bad90 <- cong90[,5]==-9 | cong90[,6]==-9
v90[bad90] <- NA
contested90 <- v90>.1 & v90<.9
inc90 <- cong90[,4]
jitt <- function (x,delta) {x + runif(length(x), -delta, delta)}
## Plot Figure 7.3
v88.hist <- ifelse (v88<.1, .0001, ifelse (v88>.9, .9999, v88))
frame1 = data.frame(x1=v88.hist)
p1 <- ggplot(frame1,aes(x=x1)) +
scale_x_continuous("Democratic Share of the Two-Party Vote") +
labs(title="Congressional Elections in 1988") +
geom_histogram(colour = "black", fill = "white",binwidth=0.05) +
theme_bw()
print(p1)
## Fitting the model (congress.stan)
## lm (vote.88 ~ vote.86 + incumbency.88)
v86.adjusted <- ifelse (v86<.1, .25, ifelse (v86>.9, .75, v86))
vote.86 <- v86.adjusted[contested88]
incumbency.88 <- inc88[contested88]
vote.88 <- v88[contested88]
ok <- !is.na (vote.86+incumbency.88+vote.88)
dataList.1 <- list(N=length(vote.88[ok]), vote_88=vote.88[ok], vote_86=vote.86[ok],incumbency_88=incumbency.88[ok])
congress.sf1 <- stan(file='congress.stan', data=dataList.1,
iter=1000, chains=4)
print(congress.sf1)
fit88.post <- extract(congress.sf1)
## Figure 7.4
# 7.4 (a)
j.v86 <- ifelse (contested86, v86, jitt (v86, .02))
j.v88 <- ifelse (contested88, v88, jitt (v88, .02))
frame1 = data.frame(x1=j.v86[inc88==0],y1=j.v88[inc88==0])
frame2 = data.frame(x2=j.v86[inc88==1],y2=j.v88[inc88==1])
frame3 = data.frame(x3=j.v86[inc88==-1],y3=j.v88[inc88==-1])
dev.new()
p2 <- ggplot() +
geom_point(data=frame1,aes(x=x1,y=y1),shape=1) +
geom_point(data=frame2,aes(x=x2,y=y2),shape=16) +
geom_point(data=frame3,aes(x=x3,y=y3),shape=4) +
scale_y_continuous("Democratic Vote Share in 1988") +
scale_x_continuous("Democratic Vote Share in 1986") +
theme_bw() +
geom_abline(yintercept=0,slope=1) +
labs(title="Raw Data (jittered at 0 and 1)")
print(p2)
# 7.4 (b)
v86.adjusted <- ifelse (v86<.1, .25, ifelse (v86>.9, .75, v86))
vote.86 <- v86.adjusted[contested88]
vote.88 <- v88[contested88]
incumbency.88 <- inc88[contested88]
frame4 = data.frame(x1=vote.86[incumbency.88==0],y1=vote.88[incumbency.88==0])
frame5 = data.frame(x2=vote.86[incumbency.88==1],y2=vote.88[incumbency.88==1])
frame6 = data.frame(x3=vote.86[incumbency.88==-1],y3=vote.88[incumbency.88==-1])
dev.new()
p3 <- ggplot() +
geom_point(data=frame4,aes(x=x1,y=y1),shape=1) +
geom_point(data=frame5,aes(x=x2,y=y2),shape=16) +
geom_point(data=frame6,aes(x=x3,y=y3),shape=4) +
scale_y_continuous("Democratic Vote Share in 1988") +
scale_x_continuous("Democratic Vote Share in 1986") +
theme_bw() +
geom_abline(yintercept=0,slope=1) +
labs(title="Adjusted data (imputing 0's and 1's to .75)")
print(p3)
## Simulation for inferences and predictions of new data points
incumbency.90 <- inc90
vote.88 <- v88
n.tilde <- length (vote.88)
X.tilde <- cbind (rep (1, n.tilde), vote.88, incumbency.90)
n.sims <- 4000
sim.88 <- fit88.post$beta
y.tilde <- array (NA, c(n.sims, n.tilde))
for (s in 1:n.sims){
pred <- X.tilde %*% fit88.post$beta[s,]
ok <- !is.na(pred)
y.tilde[s,ok] <- rnorm (sum(ok), pred[ok], fit88.post$sigma[s])
}
## Predictive simulation for a nonlinear function of new data
y.tilde.new <- ifelse (y.tilde=="NaN", 0, y.tilde)
dems.tilde <- rowSums (y.tilde.new > .5)
# or
dems.tilde <- rep (NA, n.sims)
for (s in 1:n.sims){
dems.tilde[s] <- sum (y.tilde.new[s,] > .5)
}
## Implementation using functions
Pred.88 <- function (X.pred){
pred <- X.tilde %*% t(fit88.post$beta)
ok <- !is.na(pred)
n.pred <- length (pred)
y.pred <- rep (NA, n.pred)
y.pred[ok] <- rnorm (sum(ok), pred[ok], fit88.post$sigma)
return (y.pred)
}
y.tilde <- replicate (1000, Pred.88 (X.tilde))
dems.tilde <- replicate (1000, Pred.88 (X.tilde) > .5)