One_Proportion_Obama_Care_Fa16.R

# Bayesian Inferences for θ,
# a related parameter, and future data - A conjugate (Beta) prior analysis

# See pages 51-52 of the Text by CJBH for some of the R commands described below.
# Suppose observed data are summarized as
#  16 favored Obama Care, out of n=48 constituents polled
# NOTE: A sufficient statistic, T=number among n who favor Obama Care, exists in this 
# sampling model.

# A priori, theta is believed to be within the interval [0.2 ,0.6] with high probability.
# Fit a prior density from the Beta(a,b) family to match this info - see lecture notes

# old way ( <- )to make assignments to objects in R
a.prior <- 9.2   
b.prior <- 13.8

# Current way ( = ) to make assignments to objects in R
a.prior = 9.2   
b.prior = 13.8

# you will find both = and <- used interchangeably in R-codes, etc.
# plotting p1(theta) - prior density

theta = seq(0,1,0.01)  # Discretize theta using a grid of size 0.01

prr.theta = dbeta(theta,shape1=a.prior,shape2=b.prior)

#print(cbind(theta, prr.theta))

par(mfrow=c(2,2))  # making two rows and two columns for plots

plot(theta,prr.theta, type="l", main=paste("Beta(a=",a.prior,",b=",b.prior,") prior"),
     xlab="theta",ylab="Prob",cex.main=0.8)

# Likelihood, p2(tstar|theta),of the actual data from Binomial(n, theta)
n = 48    ## number of people who were polled
tstar = 16    ##  actual number who favored Obama Care 
lik.theta = dbinom(tstar,size=n,prob=theta)

# Unnormalized Likelihood -kernel
lik.theta = (theta^tstar)*((1-theta)^(n-tstar))

plot(theta,lik.theta, type="l", col="blue", main="Binomial Likelihood",xlab="theta",ylab="",cex.main=0.8)

# Product of Likelihood and Prior, but only 
# kernel or unnormalized posterior of theta
unnorm.post.theta = lik.theta*prr.theta
plot(theta,unnorm.post.theta,type="l",main="Likelihood*Prior", col="purple", cex.main=0.8)

# The Posterior is a Beta distribution due to the conjugate structure
# of the Beta prior and Binomial likelihood
# The parameters of the Beta posterior distribution
a.post = tstar + a.prior   
b.post = (n-tstar)+ b.prior

a.post
b.post

post.theta = dbeta(theta,shape1=a.post, shape2=b.post)

plot(theta, post.theta, type="l", main=paste("Posterior: Beta distn (a=",
                                             a.post,",b=",b.post,")"),col="red", cex.main=0.8)

# posterior by itself #

# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)

plot(theta, post.theta, type="l",main=paste("Posterior: Beta distn (a=",
                                            a.post,",b=",b.post,")"),col="red", cex.main=0.8)

# Exact Posterior mean 
post.mean = a.post/(a.post+b.post)

# Exact Posterior mode  
post.mode = (a.post-1)/(a.post+b.post-2)

print(c("Post mean=", post.mean, "Post mode=", post.mode))

#
# Approximate inferences (the posterior, point estimates, 95% credible Interval, posterior 
#prob) 
# for theta by Direct Monte-Carlo Simulation - i.i.d sampler
# pseudo random sample of size N from the posterior distribution of theta
#

N = 1000
# Fix the seed in sampling the posterior
set.seed(12345)

sample.post.theta = rbeta(N,shape1=a.post,shape2=b.post)

# Approx. posterior dist of theta
# A square region is needed for the next Plot

quartz(width=4,height=4,pointsize=8)
hist(sample.post.theta, xlab = "theta", ylab = "frequency", main =
       "Approximate posterior distribution of theta 
     using N=1000 draws from the posterior")

# Approximate posterior mean, variance of theta

MC.est.post.mean.theta = mean(sample.post.theta)

MC.est.post.var.theta = var(sample.post.theta)

print(c("MC est of Post mean=", MC.est.post.mean.theta, "MC est of Post variance=", 
        MC.est.post.var.theta))

# Approx. 95% credible Interval for theta

print(quantile(sample.post.theta,probs = c(0.025,0.975)))

# What is the approx. Posterior probability that theta < 0.50?

post.prob0.5 = mean((sample.post.theta < 0.5))

post.prob0.5

# Approximate inferences (posterior median, 95% credible Interval, posterior prob) for theta by numerical methods

# Posterior median
post.med = qbeta(0.5,shape1=a.post,shape2=b.post)

print(c("approx Post median=", post.med))

#95% credible Interval for theta

print(c(qbeta(0.025,shape1=a.post,shape2=b.post), 
        qbeta(0.975,shape1=a.post,shape2=b.post) ) )

# approx Posterior probability that theta < 0.50 

pbeta(0.5,shape1=a.post,shape2=b.post)


#  INFERENCE FOR TAU BEGINS
# Use the pseudo random sample of size N from the posterior distribution of theta
# to get a pseudo random sample of size N from the posterior distribution of tau,
# the odds of favoring Obama Care.

sample.post.tau = sample.post.theta/(1-sample.post.theta)

# A square region is needed for the next Plot

quartz(width=4,height=4,pointsize=8)
hist(sample.post.tau, xlab = "tau", ylab = "frequency", main =
       "Approximate posterior distribution of tau 
     using N=1000 draws from the posterior")

#
# Approximate Bayesian Inferences for tau using by Monte-Carlo simulation -
# pseudo random sample of size N from the posterior distribution of tau
#

# A point estimate of tau 
MC.approx.post.mean.tau = mean(sample.post.tau)

MC.approx.post.mean.tau

#95% credible Interval for tau
print(quantile(sample.post.tau,probs = c(0.025,0.975)))

# What is the (approximate) Posterior probability that tau >= 1.0?
post.prob1.0 = mean((sample.post.tau >= 1.0))

post.prob1.0

# Approximate predictive Bayesian inferences for W = # of people among the next 40 polled ># who favor Obama Care 
# by Monte-Carlo simulation - pseudo random 
# sample of size N2 from the posterior predictive distribution of W
m = 40    
N2 = length(sample.post.theta)
set.seed(12345)
sample.pred.W = rbinom(n=N2,size=m,prob= sample.post.theta)

# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
hist(sample.pred.W, xlab = "W = # of people among 40 who favor Obama Care ", ylab = "frequency", main = 
       "Approximate posterior predictive distribution of W 
     using N=1000 draws from the posterior predictive")

# A point estimate of W 
MC.est.pred.mean.W = mean(sample.pred.W)

MC.est.pred.mean.W

# Approx. 95% posterior predictive Interval for W

print(quantile(sample.pred.W,probs = c(0.025,0.975)))

# What is the (approximate) posterior predictive probability that W >= 25?

pred.prob.25 = mean((sample.pred.W >= 25))

pred.prob.25

# Does the sampling model fit the data?
# check if tstar is or is not in the middle of the histogram of the predictive distribution of the 
# future datum,  W1 = # of people among the next 48 polled who would favor Obama Care

# Approximate predictive Bayesian inferences for W1 using a
# pseudo random sample of size N2 from the posterior predictive distribution of W1

m1 = 48    
N2 = length(sample.post.theta)
set.seed(12345)
sample.pred.W1 = rbinom(n=N2,size=m1,prob= sample.post.theta)

# A square region is needed for the next Plot
quartz(width=4,height=4,pointsize=8)
hist(sample.pred.W1, xlab = "W1 = # of people among 48 who favor Obama Care", ylab = "frequency", main =
       "Approximate posterior predictive distribution of W1
     using N=1000 draws from the posterior predictive") 
abline(v=tstar, lwd=3, col="red")