-
-
Notifications
You must be signed in to change notification settings - Fork 478
/
7.2_SummarizingLinearRegressionUsingSimulation.R
63 lines (50 loc) · 1.93 KB
/
7.2_SummarizingLinearRegressionUsingSimulation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
library(rstan)
library(ggplot2)
source("earnings.data.R", echo = TRUE)
## Simulation to represent predictive uncertainty
# Model of log earnings with interactions (earnings_interactions.stan)
## lm(log.earn ~ height + male + height:male)
dataList.3 <- list(N=N, earnings=earnings, height=height,sex=sex1)
earnings_interactions.sf1 <- stan(file='earnings_interactions.stan',
data=dataList.3,
iter=1000, chains=4)
print(earnings_interactions.sf1)
post <- extract(earnings_interactions.sf1)
# Prediction
log.earn <- log(earnings)
male <- 2 - sex1
earn.logmodel.3 <- lm (log.earn ~ height + male + height:male)
x.new <- data.frame (height=68, male=1)
pred.interval <- predict (earn.logmodel.3, x.new, interval="prediction",
level=.95)
print (exp (pred.interval))
## Constructing the predictive interval using simulation
pred <- exp (rnorm (1000, 9.95, .88))
pred.original.scale <- rnorm (1000, 9.95, .88)
# Histograms (Figure 7.2)
frame1 = data.frame(x1=pred.original.scale)
p1 <- ggplot(frame1,aes(x=x1)) +
scale_x_continuous("log(earnings)") +
geom_histogram(colour = "black", fill = "white") +
theme_bw()
print(p1)
dev.new()
frame2 = data.frame(x1=pred)
p2 <- ggplot(frame1,aes(x=x1)) +
scale_x_continuous("earnings") +
geom_histogram(colour = "black", fill = "white") +
theme_bw()
print(p2)
## Why do we need simulation for predictive inferences?
pred.man <- exp (rnorm (1000, 8.4 + 0.017*68 - 0.079*1 + .007*68*1, .88))
pred.woman <- exp (rnorm (1000, 8.4 + 0.017*68 - 0.079*0 + .007*68*0, .88))
pred.diff <- pred.man - pred.woman
pred.ratio <- pred.man/pred.woman
## Simulation to represent uncertainty in regression coefficients
n.sims <- 1000
height.coef <- post$beta[,2]
mean (height.coef)
sd (height.coef)
quantile (height.coef, c(.025, .975))
height.for.men.coef <- post$beta[,2] + post$beta[,4]
quantile (height.for.men.coef, c(.025, .975))