Skip to content

Commit

Permalink
cleaning up and commenting replication files
Browse files Browse the repository at this point in the history
  • Loading branch information
rajitachandak committed Mar 27, 2024
1 parent 1b6a8fe commit fc0912e
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 11 deletions.
14 changes: 5 additions & 9 deletions R/bw_table.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Script for generating table 4 in Cattaneo, Chandak, Jansson and Ma (2022)
start.time <- Sys.time()
# Script for generating table 1 in Cattaneo, Chandak, Jansson and Ma (2022)

#parameters
n = 1000
num_sims = 1000
y = 0.0
x = 0.0
#true density function
true_dens = stats::dnorm(y, mean = 0, sd=1)
true_dens = stats::dnorm(y, mean = 0, sd=1)

#grid for bandwidth range
h_grid = c(0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5)*0.6
Expand Down Expand Up @@ -57,11 +57,7 @@ table_data = table_data[2:nrow(table_data), ]
table_data[, 2] = abs(table_data[,2])
colnames(table_data) = c("BW", "bias", "sd", "rmse", "WBC CE", "RBC CE",
"WBC AL", "RBC AL")
rownames(table_data) = h_grid/0.6
#rownames(table_data) = h_grid/0.6
#print table
table_data
print(xtable(table_data), file='bw_table.tex')

end.time <- Sys.time()
time.taken <- end.time - start.time
time.taken
#print(xtable(table_data), file='bw_table.tex')
13 changes: 11 additions & 2 deletions R/lpcde_comparison.R
Original file line number Diff line number Diff line change
@@ -1,33 +1,42 @@
#Loading dataset
data("iris")
attach(iris)

#loading conditional density estimation packages
library("np")
library("hdrcde")
library(latex2exp)

#selecting variables
y = as.matrix(iris[, 1], ncol=1)
x = as.matrix(iris[, 3], ncol=1)
x_evals = quantile(x)

#scatter plot of data with chosen conditioning values
plot(x,y)
abline(v=x_evals[2], col="green")
abline(v=x_evals[3], col="green")
abline(v=x_evals[4], col="green")


#estimation at first conditional value
lpcde_est_q1 = lpcde::lpcde(x_data=x, y_data=y, x=x_evals[2], bw=0.9, ng=30, nonneg = TRUE, normalize = TRUE)
hdr_est_q1 = cde(x, y, x.margin=x_evals[2], y.margin = lpcde_est_q1$Estimate[,1])
npbws_q1 = npcdensbw(x, y)
np_est_q1 = fitted(npcdens(exdat=rep(x_evals[2], 30), eydat=lpcde_est_q1$Estimate[,1], bws=npbws_q1))

#estimation at second conditional value
lpcde_est_q2 = lpcde::lpcde(x_data=x, y_data=y, x=x_evals[3], ng=30, bw=0.9, nonneg = TRUE, normalize = TRUE)
hdr_est_q2 = cde(x, y, x.margin=x_evals[3], y.margin = lpcde_est_q2$Estimate[,1])
npbws_q2 = npcdensbw(x, y)
np_est_q2 = fitted(npcdens(exdat=rep(x_evals[3], 30), eydat=lpcde_est_q2$Estimate[,1], bws=npbws_q2))

#estimation at third conditional value
lpcde_est_q3 = lpcde::lpcde(x_data=x, y_data=y, x=x_evals[4], ng=30, bw=1, nonneg = TRUE, normalize = TRUE)
hdr_est_q3 = cde(x, y, x.margin=x_evals[4], y.margin = lpcde_est_q3$Estimate[,1])
npbws_q3 = npcdensbw(x, y)
np_est_q3 = fitted(npcdens(exdat=rep(x_evals[4], 30), eydat=lpcde_est_q3$Estimate[,1], bws=npbws_q3))

#plotting all CDEs
plot(1, main=TeX(r'($f(y|x=1.6)$)'), xlab="Sepal length", ylab="density", ylim=c(-0.8, 1.6), xlim=c(4.8, 6.9))
lines(lpcde_est_q1$Estimate[, 1], lpcde_est_q1$Estimate[,3])
lines(hdr_est_q1$y, hdr_est_q1$z, col=2)
Expand All @@ -46,7 +55,7 @@ lines(hdr_est_q3$y, hdr_est_q3$z, col=2)
lines(lpcde_est_q3$Estimate[, 1], np_est_q3, col=3)
legend('topright',lwd=1, legend=c('lpcde', 'hdrcde', 'np'), col=c(1,2,3))

# lpcde estimate with CI
# lpcde estimates with CI
plot(lpcde_est_q1, xlabel="Sepal length", ylabel="density", title=TeX(r'($f(y|x=1.6)$ with confidence bands)')) + ggplot2::ylim(-0.8, 1.6) + ggplot2::xlim(4.8, 6.9) + ggplot2::theme(legend.position='none')
plot(lpcde_est_q2,xlabel="Sepal length", ylabel="density", title=TeX(r'($f(y|x=4.35)$ with confidence bands)'))+ ggplot2::ylim(-0.8, 1.6) + ggplot2::xlim(4.8, 6.9) + ggplot2::theme(legend.position='none')
plot(lpcde_est_q3, xlabel="Sepal length", ylabel="density", title=TeX(r'($f(y|x=5.1)$ with confidence bands)'))+ ggplot2::ylim(-0.8, 1.6) + ggplot2::xlim(4.8, 6.9) + ggplot2::theme(legend.position='none')
Expand Down
55 changes: 55 additions & 0 deletions R/pw_table.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Script for generating table 2 in Cattaneo, Chandak, Jansson and Ma (2022)

#parameters
n = 2000
num_sims = 100
y_points = c(0.0, 0.8, 1.5)
x_points = c(0.0, 0.8, 1.5)

pw_testing = function(s){
print(s)
# simulating data
x_data = matrix(rnorm(n, mean=0, sd=1))
y_data = matrix(rnorm(n, mean=x_data, sd=1))

true_dens = stats::dnorm(y_points, mean = x, sd=1)

#density estimation
est = lpcde::lpcde(y_data=y_data, x_data=x_data, y_grid=y_points, x=x, bw=bw_star, rbc=TRUE)

# extracting values of interest
f_hat = est$Estimate[,3]
f_hat_rbc = est$Estimate[,4]
bias = (f_hat - true_dens)
bias_rbc = (f_hat_rbc - true_dens)
sd = est$Estimate[,5]
sd_rbc = est$Estimate[,6]
rmse = sqrt(sd^2 +bias^2)

# coverage probabilities
ce = 100*ifelse((abs(bias/sd)<=1.96), 1, 0)
ce_rbc = 100*ifelse((abs(bias_rbc/sd_rbc)<=1.96), 1, 0)

#average width
aw_pw = 2*1.96*sd
aw_pw_rbc = 2*1.96*sd_rbc

results = matrix(c(bw_star, bias, sd, rmse, ce, ce_rbc, aw_pw, aw_pw_rbc),
ncol = 8)
}

pw_table = list()
bw_mat = matrix(c(0.51, 0.58, 0.8, 0.6, 0.6, 0.6, 1, 0.8, .9), nrow=3)
# running simulations
for (i in 1:length(x_points)){
x = x_points[i]
bw_star= bw_mat[, i]
object = parallel::mclapply(1:num_sims, pw_testing, mc.cores =8)
avg = Reduce('+', object)/num_sims
avg[, 2] = abs(avg[,2])
colnames(avg) = c("BW", "bias", "sd", "rmse", "WBC CE", "RBC CE",
"WBC AL", "RBC AL")
pw_table[[i]] = avg
}

pw_table

0 comments on commit fc0912e

Please sign in to comment.