-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBayesian_MCMC.R
68 lines (59 loc) · 2.06 KB
/
Bayesian_MCMC.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
bigdata = read.csv("/election_data/electoral_votes.csv", header=T)
State<- bigdata[,"State"]
Trump <- numeric (length(State))
for (i in 1:length(State)){
state = State[i]
## create paths to csv files by concatenatings strings
path = paste("poll_by_state/", state, ".csv", sep = "")
data = read.csv(path, header=T)
## we only need 2 columns, samplesize(N) and Trump_voters(X)
smalldata <- data[,c("samplesize","Trump_voters")]
smalldata
### the question is, find the posterior distribution of p given value of X
log_lik = function(th){
result = 0
for (i in 1:nrow(smalldata)){
n <- smalldata[i,"samplesize"]
x <- smalldata[i,"Trump_voters"]
result = result + dbinom(x,n,th,log=TRUE)
return (result)
}
}
### use beta distribution for prior
log_prior = function (th){
### use parameter of beta as .5 and .5 to gives more weight at p =.5
log(dbeta(th, .5 , .5))
}
# log posterior is proportional to the sum of log_prior and log_lik
post = function (th){
log_prior(th)+log_lik(th)
}
### perform Metropolis Hasting
nit = 100000
results = matrix(0,nrow = nit, ncol = 1)
## start at the mean of probability from all polls "th"
## th is registered as the first result
th = mean(data[,"rawpoll_trump"])/100
results [1,1] = th
for (it in 2:nit){
# proose a candidae by add a sample from a Gaussian distribution
cand = th + rnorm(1,0,.001)
if (0<cand && cand <1){
# since we take the log-likelihood, we need to take exponential back
ratio = exp(post(cand)/post(th))
if (runif(1,0,1)<ratio){
th = cand
}
}
results [it,] = th
}
## we collect the mean of all iterations
p = results[,1]
Trump [i] = mean(p)
## the process is long, printing the progress to keep yourself entertained
message = paste ("Done ", State[i], "," , length (State) -i , " states to go", sep = "")
print (message)
}
# write the probabilities into a csv file, ready to use by Python
df = data.frame(State, Trump)
write.csv(df,"election_data/bayesian_data.csv")