-
Notifications
You must be signed in to change notification settings - Fork 0
/
One_Proportion_Obama_Care_Fa16.R
215 lines (146 loc) · 7.01 KB
/
One_Proportion_Obama_Care_Fa16.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
# Bayesian Inferences for θ,
# a related parameter, and future data - A conjugate (Beta) prior analysis
# See pages 51-52 of the Text by CJBH for some of the R commands described below.
# Suppose observed data are summarized as
# 16 favored Obama Care, out of n=48 constituents polled
# NOTE: A sufficient statistic, T=number among n who favor Obama Care, exists in this
# sampling model.
# A priori, theta is believed to be within the interval [0.2 ,0.6] with high probability.
# Fit a prior density from the Beta(a,b) family to match this info - see lecture notes
# old way ( <- )to make assignments to objects in R
a.prior <- 9.2
b.prior <- 13.8
# Current way ( = ) to make assignments to objects in R
a.prior = 9.2
b.prior = 13.8
# you will find both = and <- used interchangeably in R-codes, etc.
# plotting p1(theta) - prior density
theta = seq(0,1,0.01) # Discretize theta using a grid of size 0.01
prr.theta = dbeta(theta,shape1=a.prior,shape2=b.prior)
#print(cbind(theta, prr.theta))
par(mfrow=c(2,2)) # making two rows and two columns for plots
plot(theta,prr.theta, type="l", main=paste("Beta(a=",a.prior,",b=",b.prior,") prior"),
xlab="theta",ylab="Prob",cex.main=0.8)
# Likelihood, p2(tstar|theta),of the actual data from Binomial(n, theta)
n = 48 ## number of people who were polled
tstar = 16 ## actual number who favored Obama Care
lik.theta = dbinom(tstar,size=n,prob=theta)
# Unnormalized Likelihood -kernel
lik.theta = (theta^tstar)*((1-theta)^(n-tstar))
plot(theta,lik.theta, type="l", col="blue", main="Binomial Likelihood",xlab="theta",ylab="",cex.main=0.8)
# Product of Likelihood and Prior, but only
# kernel or unnormalized posterior of theta
unnorm.post.theta = lik.theta*prr.theta
plot(theta,unnorm.post.theta,type="l",main="Likelihood*Prior", col="purple", cex.main=0.8)
# The Posterior is a Beta distribution due to the conjugate structure
# of the Beta prior and Binomial likelihood
# The parameters of the Beta posterior distribution
a.post = tstar + a.prior
b.post = (n-tstar)+ b.prior
a.post
b.post
post.theta = dbeta(theta,shape1=a.post, shape2=b.post)
plot(theta, post.theta, type="l", main=paste("Posterior: Beta distn (a=",
a.post,",b=",b.post,")"),col="red", cex.main=0.8)
# posterior by itself #
# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
plot(theta, post.theta, type="l",main=paste("Posterior: Beta distn (a=",
a.post,",b=",b.post,")"),col="red", cex.main=0.8)
# Exact Posterior mean
post.mean = a.post/(a.post+b.post)
# Exact Posterior mode
post.mode = (a.post-1)/(a.post+b.post-2)
print(c("Post mean=", post.mean, "Post mode=", post.mode))
#
# Approximate inferences (the posterior, point estimates, 95% credible Interval, posterior
#prob)
# for theta by Direct Monte-Carlo Simulation - i.i.d sampler
# pseudo random sample of size N from the posterior distribution of theta
#
N = 1000
# Fix the seed in sampling the posterior
set.seed(12345)
sample.post.theta = rbeta(N,shape1=a.post,shape2=b.post)
# Approx. posterior dist of theta
# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
hist(sample.post.theta, xlab = "theta", ylab = "frequency", main =
"Approximate posterior distribution of theta
using N=1000 draws from the posterior")
# Approximate posterior mean, variance of theta
MC.est.post.mean.theta = mean(sample.post.theta)
MC.est.post.var.theta = var(sample.post.theta)
print(c("MC est of Post mean=", MC.est.post.mean.theta, "MC est of Post variance=",
MC.est.post.var.theta))
# Approx. 95% credible Interval for theta
print(quantile(sample.post.theta,probs = c(0.025,0.975)))
# What is the approx. Posterior probability that theta < 0.50?
post.prob0.5 = mean((sample.post.theta < 0.5))
post.prob0.5
# Approximate inferences (posterior median, 95% credible Interval, posterior prob) for theta by numerical methods
# Posterior median
post.med = qbeta(0.5,shape1=a.post,shape2=b.post)
print(c("approx Post median=", post.med))
#95% credible Interval for theta
print(c(qbeta(0.025,shape1=a.post,shape2=b.post),
qbeta(0.975,shape1=a.post,shape2=b.post) ) )
# approx Posterior probability that theta < 0.50
pbeta(0.5,shape1=a.post,shape2=b.post)
# INFERENCE FOR TAU BEGINS
# Use the pseudo random sample of size N from the posterior distribution of theta
# to get a pseudo random sample of size N from the posterior distribution of tau,
# the odds of favoring Obama Care.
sample.post.tau = sample.post.theta/(1-sample.post.theta)
# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
hist(sample.post.tau, xlab = "tau", ylab = "frequency", main =
"Approximate posterior distribution of tau
using N=1000 draws from the posterior")
#
# Approximate Bayesian Inferences for tau using by Monte-Carlo simulation -
# pseudo random sample of size N from the posterior distribution of tau
#
# A point estimate of tau
MC.approx.post.mean.tau = mean(sample.post.tau)
MC.approx.post.mean.tau
#95% credible Interval for tau
print(quantile(sample.post.tau,probs = c(0.025,0.975)))
# What is the (approximate) Posterior probability that tau >= 1.0?
post.prob1.0 = mean((sample.post.tau >= 1.0))
post.prob1.0
# Approximate predictive Bayesian inferences for W = # of people among the next 40 polled ># who favor Obama Care
# by Monte-Carlo simulation - pseudo random
# sample of size N2 from the posterior predictive distribution of W
m = 40
N2 = length(sample.post.theta)
set.seed(12345)
sample.pred.W = rbinom(n=N2,size=m,prob= sample.post.theta)
# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
hist(sample.pred.W, xlab = "W = # of people among 40 who favor Obama Care ", ylab = "frequency", main =
"Approximate posterior predictive distribution of W
using N=1000 draws from the posterior predictive")
# A point estimate of W
MC.est.pred.mean.W = mean(sample.pred.W)
MC.est.pred.mean.W
# Approx. 95% posterior predictive Interval for W
print(quantile(sample.pred.W,probs = c(0.025,0.975)))
# What is the (approximate) posterior predictive probability that W >= 25?
pred.prob.25 = mean((sample.pred.W >= 25))
pred.prob.25
# Does the sampling model fit the data?
# check if tstar is or is not in the middle of the histogram of the predictive distribution of the
# future datum, W1 = # of people among the next 48 polled who would favor Obama Care
# Approximate predictive Bayesian inferences for W1 using a
# pseudo random sample of size N2 from the posterior predictive distribution of W1
m1 = 48
N2 = length(sample.post.theta)
set.seed(12345)
sample.pred.W1 = rbinom(n=N2,size=m1,prob= sample.post.theta)
# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
hist(sample.pred.W1, xlab = "W1 = # of people among 48 who favor Obama Care", ylab = "frequency", main =
"Approximate posterior predictive distribution of W1
using N=1000 draws from the posterior predictive")
abline(v=tstar, lwd=3, col="red")