Skip to content

Commit

Permalink
Upgrade to v3 of testthat, prepare for covariates
Browse files Browse the repository at this point in the history
  • Loading branch information
kolesarm committed Mar 7, 2024
1 parent 96fba02 commit f3ee09a
Show file tree
Hide file tree
Showing 15 changed files with 311 additions and 297 deletions.
11 changes: 6 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: RDHonest
Title: Honest Inference in Regression Discontinuity Designs
Version: 0.4.1
Version: 0.5.0
Authors@R:
c(person(given = "Michal",
family = "Kolesár",
Expand All @@ -14,7 +14,7 @@ Authors@R:
Description: Honest and nearly-optimal confidence intervals in fuzzy and sharp
regression discontinuity designs and for inference at a point based on local
linear regression.
Depends: R (>= 4.1.0)
Depends: R (>= 4.3.0)
License: GPL-3
Encoding: UTF-8
LazyData: true
Expand All @@ -28,8 +28,9 @@ Suggests:
knitr,
rmarkdown,
formatR
RoxygenNote: 7.3.1
URL: https://github.com/kolesarm/RDHonest
VignetteBuilder: knitr
Config/testthat/edition: 3
Language: en-US
URL: https://github.com/kolesarm/RDHonest
BugReports: https://github.com/kolesarm/RDHonest/issues
RoxygenNote: 7.3.1
VignetteBuilder: knitr
4 changes: 2 additions & 2 deletions R/NPRfunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ LPReg <- function(X, Y, h, K, order=1, se.method=NULL, sigma2, J=3,
Gamma <- crossprod(R, W*R)

if (h==0 || inherits(try(solve(Gamma), silent=TRUE), "try-error"))
return(list(theta=0, sigma2=NA, var=NA, w=0, eff.obs=0))

return(list(theta=0, sigma2=NA*Y, res=NA*Y,
var=NA*stats::var(Y), est_w=W*NA, eff.obs=0))
## weights if we think of the estimator as linear estimator
wgt <- (W*R %*% solve(Gamma))[, 1]
beta <- solve(Gamma, crossprod(R, W*Y))
Expand Down
73 changes: 47 additions & 26 deletions R/RDHonest.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@
#'
#' }
#'
#' The methods use conditional variance given by \code{sigmaY2}, if supplied.
#' Otherwise, for the purpose of estimating the optimal bandwidth,
#' The methods use conditional variance given by \code{sigmaY2}, if
#' supplied. Otherwise, for the purpose of estimating the optimal bandwidth,
#' conditional variance is estimated using the method specified by
#' \code{se.initial}.
#' @param beta Determines quantile of excess length to optimize, if bandwidth
Expand All @@ -60,19 +60,19 @@
#' @param alpha determines confidence level, \code{1-alpha} for
#' constructing/optimizing confidence intervals.
#' @param M Bound on second derivative of the conditional mean function.
#' @param sclass Smoothness class, either \code{"T"} for Taylor or
#' \code{"H"} for Hölder class.
#' @param sclass Smoothness class, either \code{"T"} for Taylor or \code{"H"}
#' for Hölder class.
#' @param h bandwidth, a scalar parameter. If not supplied, optimal bandwidth is
#' computed according to criterion given by \code{opt.criterion}.
#' @param weights Optional vector of weights to weight the observations
#' (useful for aggregated data). Disregarded if optimal kernel is used.
#' @param weights Optional vector of weights to weight the observations (useful
#' for aggregated data). Disregarded if optimal kernel is used.
#' @param point.inference Do inference at a point determined by \code{cutoff}
#' instead of RD.
#' @param T0 Initial estimate of the treatment effect for calculating the
#' optimal bandwidth. Only relevant for Fuzzy RD.
#' @param sigmaY2 Supply variance of outcome. Ignored when kernel is optimal.
#' @param sigmaD2 Supply variance of treatment (fuzzy RD only).
#' @param sigmaYD Supply covariante of treatment and outcome (fuzzy RD only).
#' @param sigmaYD Supply covariance of treatment and outcome (fuzzy RD only).
#' @param clusterid Cluster id for cluster-robust standard errors
#' @return Returns an object of class \code{"RDResults"}. The function
#' \code{print} can be used to obtain and print a summary of the results. An
Expand Down Expand Up @@ -140,7 +140,8 @@
RDHonest <- function(formula, data, subset, weights, cutoff=0, M,
kern="triangular", na.action, opt.criterion="MSE", h,
se.method="nn", alpha=0.05, beta=0.8, J=3, sclass="H",
T0=0, point.inference=FALSE, sigmaY2, sigmaD2, sigmaYD, clusterid) {
T0=0, point.inference=FALSE, sigmaY2, sigmaD2, sigmaYD,
clusterid) {
## construct model frame
cl <- mf <- match.call(expand.dots = FALSE)
m <- match(c("formula", "data", "subset", "weights", "na.action", "sigmaY2",
Expand All @@ -156,46 +157,66 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M,
mf <- eval(mf, parent.frame())

## TODO: what are the supported se methods, weighting + optimal kernel etc
if (!(se.method %in% c("nn", "EHW", "supplied.var")))
stop("Unsupported se.method")

if (point.inference) {
method <- "IP"
method <- c("Value of conditional mean"="IP")
} else if (length(formula)[2]==2) {
method <- "FRD"
method <- c("Fuzzy RD parameter"="FRD")
} else {
method <- "SRD"
method <- c("Sharp RD parameter"="SRD")
}

d <- NPRData(mf, cutoff, method)
process_options(M, se.method, method, d, kern)

if (NCOL(d$W)>0 && (missing(M) || missing(h))) {
ret <- NPRHonest(d, MROT(d), kern, h, opt.criterion=opt.criterion,
alpha=alpha, beta=beta, se.method=se.method, J=J,
sclass=sclass, T0=T0)
return(0) # TODO
}

if (missing(M)) {
M <- MROT(d)
message("Using Armstong & Kolesar (2020) ROT for smoothness constant M")
}
if (kern=="optimal" && method=="SRD") {

if (kern=="optimal") {
ret <- RDTOpt(d, M, opt.criterion, alpha, beta, se.method, J)
} else if (!missing(h)) {
ret <- NPRHonest(d, M, kern, h, alpha=alpha, se.method=se.method, J=J,
sclass=sclass, T0=T0)
} else {
ret <- NPRHonest(d, M, kern, opt.criterion=opt.criterion, alpha=alpha,
beta=beta, se.method=se.method, J=J, sclass=sclass,
T0=T0)
ret <- NPRHonest(d, M, kern, h, opt.criterion=opt.criterion,
alpha=alpha, beta=beta, se.method=se.method, J=J,
sclass=sclass, T0=T0)
}

ret$call <- cl
ret$na.action <- attr(mf, "na.action")

if (is.nan(ret$coefficients$leverage) || ret$coefficients$leverage>0.1)
if (!is.finite(ret$coefficients$leverage) || ret$coefficients$leverage>0.1)
message(paste0("Maximal leverage is large: ",
round(ret$coefficients$leverage, 2),
".\nInference may be inaccurate. ",
"Consider using bigger bandwidth."))
ret$coefficients$term <- switch(method, IP="Value of conditional mean",
SRD="Sharp RD parameter",
"Fuzzy RD parameter")
ret$coefficients$term <- names(method)
ret
}

process_options <- function(M, se.method, method, d, kern) {

Check warning on line 203 in R/RDHonest.R

View workflow job for this annotation

GitHub Actions / lint

file=R/RDHonest.R,line=203,col=1,[cyclocomp_linter] Functions should have cyclomatic complexity of less than 15, this has 19.
if (kern=="optimal" && method!="SRD")
stop("Optimal kernel requires sharp RD design.")

if (!missing(M)) {
if (method=="FRD" && (length(M)!=2 || !is.numeric(M)))
stop("M must be a numeric vector of length 2")
if (method!="FRD" && (length(M)!=1 || !is.numeric(M)))
stop("M must be a numeric vector of length 1.")
}
if (!(se.method %in% c("nn", "EHW", "supplied.var"))) {
stop("Unsupported se.method")
}
if (NCOL(d$W)>0 && method=="IP")
stop("Covariates not allowed whem method is 'IP'.")
}


## @param d object of class \code{"RDData"}, \code{"FRDData"}, or
## \code{"LPPData"}
Expand Down Expand Up @@ -233,7 +254,7 @@ NPRHonest <- function(d, M, kern="triangular", h, opt.criterion, alpha=0.05,
}

## Determine bias
if (nobs==0) {
if (nobs==0 || is.na(nobs)) {
## If bandwidths too small, big bias / sd
bias <- r1$se <- sqrt(.Machine$double.xmax/10)
} else if (sclass=="T") {
Expand Down
17 changes: 10 additions & 7 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,23 @@ unAsIs <- function(X) {

## Class Constructor
NPRData <- function(d, cutoff, class) {
Xindex <- if (class=="FRD") 3 else 2
Xidx <- if (class=="FRD") 3 else 2
## Unclass in case generated by I()
if (is.unsorted(d[[Xindex]]))
d <- d[sort(unAsIs(d[[Xindex]]), index.return=TRUE)$ix, ]
Y <- if (class=="FRD") cbind(d[[1]], d[[2]]) else d[[1]]
if (is.unsorted(d[[Xidx]]))
d <- d[sort(unAsIs(d[[Xidx]]), index.return=TRUE)$ix, ]
Y <- drop(as.matrix(d[, seq_len(Xidx-1)]))
W <- stats::model.matrix(attr(d, "terms"), d)[, -seq_len(Xidx)]

df <- structure(list(Y=Y, X=d[[Xindex]] - cutoff, orig.cutoff=cutoff,
var.names=names(d)[1:Xindex]), class=class)
df <- structure(list(Y=Y, X=d[[Xidx]] - cutoff, orig.cutoff=cutoff, W=W,
var.names=c(names(d)[1:Xidx], colnames(W))),
class=class)
df$p <- df$X>=0
df$m <- df$X<0
if (class!="FRD") {
df$sigma2 <- d$"(sigmaY2)"
} else if (!is.null(d$"(sigmaY2)")) {
df$sigma2 <- cbind(d$"(sigmaY2)", d$"(sigmaYD)", d$"(sigmaYD)", d$"(sigmaD2)")
df$sigma2 <- cbind(d$"(sigmaY2)", d$"(sigmaYD)", d$"(sigmaYD)",
d$"(sigmaD2)")
}

df$clusterid <- d$"(clusterid)"
Expand Down
14 changes: 7 additions & 7 deletions man/RDHonest.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 8 additions & 9 deletions tests/testthat/test_cbound.R
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
context("Test estimation of bound on C")

test_that("Test estimation of bound on M in Lee data", {
r <- RDHonest(voteshare~margin, data=lee08)
ma <- function(a) max(abs(a))

expect_message(r <- RDHonest(voteshare~margin, data=lee08))
r1 <- RDSmoothnessBound(r, s=100, separate=TRUE, multiple=TRUE, sclass="T")
expect_equal(r1$estimate, c(0.00147446, 0.00016410))
expect_lt(ma(r1$estimate-c(0.00147446, 0.00016410)), 3e-9)
r2 <- RDSmoothnessBound(r, s=100, separate=FALSE, multiple=TRUE, sclass="H")
expect_equal(r2$conf.low, 0.017285255)
expect_lt(ma(r2$conf.low- 0.017285255), 1e-9)
r3 <- RDSmoothnessBound(r, s=100, separate=FALSE, multiple=FALSE,
sclass="H")
expect_equal(r3$conf.low, 0.59509966)
sclass="H")$conf.low
expect_equal(r3, 0.59509966)
## Should fail, s too big
expect_error(RDSmoothnessBound(r, s=10000))
r <- RDHonest(log(earnings)~yearat14, cutoff=1947, data=cghs, M=0.023,
h=3.4)
expect_error(RDSmoothnessBound(r, s=20, separate=TRUE, multiple=FALSE))
r4 <- RDSmoothnessBound(r, s=2, separate=TRUE, multiple=FALSE)
expect_equal(r4$estimate, c(0.015359658, 0L))

expect_lt(ma(r4$estimate- c(0.015359658, 0L)), 1e-9)

## Old implementation
## r2 <- capture.output(print(r, digits=5))
Expand Down
66 changes: 35 additions & 31 deletions tests/testthat/test_clustering.R
Original file line number Diff line number Diff line change
@@ -1,27 +1,32 @@
context("Test RD")

test_that("Test clustering formulas", {
## Everyone in their own cluster
s0 <- RDHonest(voteshare~margin, data=lee08, se.method="EHW")
s0c <- RDHonest(voteshare~margin, data=lee08, se.method="EHW",
clusterid=seq_along(lee08$margin))
expect_message(s0 <- RDHonest(voteshare~margin, data=lee08,
se.method="EHW"))
expect_message(s0c <- RDHonest(voteshare~margin, data=lee08,
se.method="EHW",
clusterid=seq_along(lee08$margin)))
expect_equal(s0$coefficients, s0c$coefficients)
rr <- rcp[1:1000, ]
f0 <- RDHonest(c~retired | elig_year, data=rr, se.method="EHW")
f0c <- RDHonest(c~retired | elig_year, data=rr, clusterid=seq_along(rr$c))
expect_message(f0 <- RDHonest(c~retired | elig_year, data=rr,
se.method="EHW"))
expect_message(f0c <- RDHonest(c~retired | elig_year, data=rr,
clusterid=seq_along(rr$c)))
expect_equal(f0$coefficients, f0c$coefficients)
p0 <- RDHonest(c~elig_year, data=rr, se.method="EHW", point.inference=TRUE)
p0c <- RDHonest(c~elig_year, data=rr, clusterid=seq_along(rr$c),
point.inference=TRUE)
expect_message(p0 <- RDHonest(c~elig_year, data=rr,
se.method="EHW", point.inference=TRUE))
expect_message(p0c <- RDHonest(c~elig_year, data=rr,
clusterid=seq_along(rr$c),
point.inference=TRUE))
expect_equal(p0$coefficients, p0c$coefficients)

## Check we match sandwich::vcovCL: uniform + triangular
set.seed(42)
clusterid <- sample(1:50, NROW(lee08), replace=TRUE)
s1h <- RDHonest(clusterid~margin, data=lee08, se.method="EHW", h=5,
kern="uniform")
s1c <- RDHonest(clusterid~margin, data=lee08, se.method="EHW", h=5,
kern="uniform", clusterid=clusterid)
expect_message(s1h <- RDHonest(clusterid~margin, data=lee08,
se.method="EHW", h=5, kern="uniform"))
expect_message(s1c <- RDHonest(clusterid~margin, data=lee08,
se.method="EHW", h=5,
kern="uniform", clusterid=clusterid))
m1 <- lm(clusterid~margin*I(margin>0), data=lee08, subset=abs(margin)<=5)
## Should equal
## sqrt(sum(s1c$data$est_w[s1c$data$est_w!=0]^2*m1$residuals^2))
Expand All @@ -30,8 +35,8 @@ test_that("Test clustering formulas", {
## sandwich::vcovCL(m1, type="HC0", cluster=clusterid[abs(lee08$margin)<=5],
## cadjust=FALSE)[3, 3]
expect_equal(as.numeric(s1c$coefficients[3])^2, 6.523026427)
s2c <- RDHonest(clusterid~margin, data=lee08, se.method="EHW",
clusterid=clusterid)
expect_message(s2c <- RDHonest(clusterid~margin, data=lee08,
se.method="EHW", clusterid=clusterid))
h <- s2c$coefficients$bandwidth
m2 <- lm(clusterid~margin*I(margin>0), data=lee08,
subset=abs(margin)<=h,
Expand All @@ -44,10 +49,10 @@ test_that("Test clustering formulas", {

set.seed(42)
clusterid <- sample(1:20, NROW(rr), replace=TRUE)
p1c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid,
point.inference=TRUE, kern="uniform")
p1h <- RDHonest(c~elig_year, data=rr, se.method="EHW", point.inference=TRUE,
kern="uniform") # same bw
expect_message(p1c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid,
point.inference=TRUE, kern="uniform"))
expect_message(p1h <- RDHonest(c~elig_year, data=rr, se.method="EHW",
point.inference=TRUE, kern="uniform"))
m2 <- lm(c~elig_year, data=rr, subset=abs(elig_year)<=5)
expect_equal(p1c$coefficients$estimate, unname(m2$coefficients[1]))
## sqrt(sandwich::vcovHC(m2, type="HC0")[1, 1]) 792.2299439
Expand All @@ -56,8 +61,8 @@ test_that("Test clustering formulas", {
expect_equal(p1c$coefficients$std.error^2, 723178.2655)
expect_equal(p1h$coefficients$std.error, 792.2299439)

p2c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid,
point.inference=TRUE, h=8.27778063886)
expect_message(p2c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid,
point.inference=TRUE, h=8.27778063886))
h <- p2c$coefficients$bandwidth
m3 <- lm(c~elig_year, data=rr, subset=abs(elig_year)<=h,
weights = (1 - abs(elig_year/h)))
Expand All @@ -67,10 +72,12 @@ test_that("Test clustering formulas", {
expect_equal(as.numeric(p2c$coefficients[2:3]),
c(unname(m3$coefficients[1]), 919.162944))

f1c <- RDHonest(c~retired | elig_year, data=rr, clusterid=clusterid,
kern="uniform")
f2c <- RDHonest(cn~retired | elig_year, data=rr, clusterid=clusterid,
kern="uniform", h=4)
expect_message(f1c <- RDHonest(c~retired | elig_year, data=rr,
clusterid=clusterid,
kern="uniform"))
expect_message(f2c <- RDHonest(cn~retired | elig_year, data=rr,
clusterid=clusterid,
kern="uniform", h=4))
## m3 <- AER::ivreg(c~retired+elig_year+elig_year:I(elig_year>0) |
## elig_year*I(elig_year>0),
## data=rr, subset=abs(elig_year)<=5)
Expand All @@ -85,9 +92,6 @@ test_that("Test clustering formulas", {
## Triangular kernel doesn't work with sandwich + ivreg

## Check preliminary bw calculations and rho
dd <- data.frame(y=log(rr$c), x=rr$elig_year, "(clusterid)"=clusterid)
names(dd)[3]<- "(clusterid)"
d <- NPRData(dd, cutoff=0, class="SRD")
r0 <- PrelimVar(d)
expect_equal(r0$rho, -0.0008690793197)
r0 <- RDHonest(log(c)~elig_year, h=Inf, M=1, data=rr, clusterid=clusterid)
expect_equal(PrelimVar(r0$d)$rho, -0.0008690793197)
})
Loading

0 comments on commit f3ee09a

Please sign in to comment.