diff --git a/DESCRIPTION b/DESCRIPTION index 2693587..e3f2702 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: RDHonest Title: Honest Inference in Regression Discontinuity Designs -Version: 0.4.1 +Version: 0.5.0 Authors@R: c(person(given = "Michal", family = "Kolesár", @@ -14,7 +14,7 @@ Authors@R: Description: Honest and nearly-optimal confidence intervals in fuzzy and sharp regression discontinuity designs and for inference at a point based on local linear regression. -Depends: R (>= 4.1.0) +Depends: R (>= 4.3.0) License: GPL-3 Encoding: UTF-8 LazyData: true @@ -28,8 +28,9 @@ Suggests: knitr, rmarkdown, formatR -RoxygenNote: 7.3.1 -URL: https://github.com/kolesarm/RDHonest -VignetteBuilder: knitr +Config/testthat/edition: 3 Language: en-US +URL: https://github.com/kolesarm/RDHonest BugReports: https://github.com/kolesarm/RDHonest/issues +RoxygenNote: 7.3.1 +VignetteBuilder: knitr diff --git a/R/NPRfunctions.R b/R/NPRfunctions.R index 4041c8d..cd329d7 100644 --- a/R/NPRfunctions.R +++ b/R/NPRfunctions.R @@ -12,8 +12,8 @@ LPReg <- function(X, Y, h, K, order=1, se.method=NULL, sigma2, J=3, Gamma <- crossprod(R, W*R) if (h==0 || inherits(try(solve(Gamma), silent=TRUE), "try-error")) - return(list(theta=0, sigma2=NA, var=NA, w=0, eff.obs=0)) - + return(list(theta=0, sigma2=NA*Y, res=NA*Y, + var=NA*stats::var(Y), est_w=W*NA, eff.obs=0)) ## weights if we think of the estimator as linear estimator wgt <- (W*R %*% solve(Gamma))[, 1] beta <- solve(Gamma, crossprod(R, W*Y)) diff --git a/R/RDHonest.R b/R/RDHonest.R index 04a0472..4e31da1 100644 --- a/R/RDHonest.R +++ b/R/RDHonest.R @@ -50,8 +50,8 @@ #' #' } #' -#' The methods use conditional variance given by \code{sigmaY2}, if supplied. -#' Otherwise, for the purpose of estimating the optimal bandwidth, +#' The methods use conditional variance given by \code{sigmaY2}, if +#' supplied. Otherwise, for the purpose of estimating the optimal bandwidth, #' conditional variance is estimated using the method specified by #' \code{se.initial}. #' @param beta Determines quantile of excess length to optimize, if bandwidth @@ -60,19 +60,19 @@ #' @param alpha determines confidence level, \code{1-alpha} for #' constructing/optimizing confidence intervals. #' @param M Bound on second derivative of the conditional mean function. -#' @param sclass Smoothness class, either \code{"T"} for Taylor or -#' \code{"H"} for Hölder class. +#' @param sclass Smoothness class, either \code{"T"} for Taylor or \code{"H"} +#' for Hölder class. #' @param h bandwidth, a scalar parameter. If not supplied, optimal bandwidth is #' computed according to criterion given by \code{opt.criterion}. -#' @param weights Optional vector of weights to weight the observations -#' (useful for aggregated data). Disregarded if optimal kernel is used. +#' @param weights Optional vector of weights to weight the observations (useful +#' for aggregated data). Disregarded if optimal kernel is used. #' @param point.inference Do inference at a point determined by \code{cutoff} #' instead of RD. #' @param T0 Initial estimate of the treatment effect for calculating the #' optimal bandwidth. Only relevant for Fuzzy RD. #' @param sigmaY2 Supply variance of outcome. Ignored when kernel is optimal. #' @param sigmaD2 Supply variance of treatment (fuzzy RD only). -#' @param sigmaYD Supply covariante of treatment and outcome (fuzzy RD only). +#' @param sigmaYD Supply covariance of treatment and outcome (fuzzy RD only). #' @param clusterid Cluster id for cluster-robust standard errors #' @return Returns an object of class \code{"RDResults"}. The function #' \code{print} can be used to obtain and print a summary of the results. An @@ -140,7 +140,8 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M, kern="triangular", na.action, opt.criterion="MSE", h, se.method="nn", alpha=0.05, beta=0.8, J=3, sclass="H", - T0=0, point.inference=FALSE, sigmaY2, sigmaD2, sigmaYD, clusterid) { + T0=0, point.inference=FALSE, sigmaY2, sigmaD2, sigmaYD, + clusterid) { ## construct model frame cl <- mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "subset", "weights", "na.action", "sigmaY2", @@ -156,46 +157,66 @@ RDHonest <- function(formula, data, subset, weights, cutoff=0, M, mf <- eval(mf, parent.frame()) ## TODO: what are the supported se methods, weighting + optimal kernel etc - if (!(se.method %in% c("nn", "EHW", "supplied.var"))) - stop("Unsupported se.method") - if (point.inference) { - method <- "IP" + method <- c("Value of conditional mean"="IP") } else if (length(formula)[2]==2) { - method <- "FRD" + method <- c("Fuzzy RD parameter"="FRD") } else { - method <- "SRD" + method <- c("Sharp RD parameter"="SRD") } + d <- NPRData(mf, cutoff, method) + process_options(M, se.method, method, d, kern) + + if (NCOL(d$W)>0 && (missing(M) || missing(h))) { + ret <- NPRHonest(d, MROT(d), kern, h, opt.criterion=opt.criterion, + alpha=alpha, beta=beta, se.method=se.method, J=J, + sclass=sclass, T0=T0) + return(0) # TODO + } if (missing(M)) { M <- MROT(d) message("Using Armstong & Kolesar (2020) ROT for smoothness constant M") } - if (kern=="optimal" && method=="SRD") { + + if (kern=="optimal") { ret <- RDTOpt(d, M, opt.criterion, alpha, beta, se.method, J) - } else if (!missing(h)) { - ret <- NPRHonest(d, M, kern, h, alpha=alpha, se.method=se.method, J=J, - sclass=sclass, T0=T0) } else { - ret <- NPRHonest(d, M, kern, opt.criterion=opt.criterion, alpha=alpha, - beta=beta, se.method=se.method, J=J, sclass=sclass, - T0=T0) + ret <- NPRHonest(d, M, kern, h, opt.criterion=opt.criterion, + alpha=alpha, beta=beta, se.method=se.method, J=J, + sclass=sclass, T0=T0) } + ret$call <- cl ret$na.action <- attr(mf, "na.action") - if (is.nan(ret$coefficients$leverage) || ret$coefficients$leverage>0.1) + if (!is.finite(ret$coefficients$leverage) || ret$coefficients$leverage>0.1) message(paste0("Maximal leverage is large: ", round(ret$coefficients$leverage, 2), ".\nInference may be inaccurate. ", "Consider using bigger bandwidth.")) - ret$coefficients$term <- switch(method, IP="Value of conditional mean", - SRD="Sharp RD parameter", - "Fuzzy RD parameter") + ret$coefficients$term <- names(method) ret } +process_options <- function(M, se.method, method, d, kern) { + if (kern=="optimal" && method!="SRD") + stop("Optimal kernel requires sharp RD design.") + + if (!missing(M)) { + if (method=="FRD" && (length(M)!=2 || !is.numeric(M))) + stop("M must be a numeric vector of length 2") + if (method!="FRD" && (length(M)!=1 || !is.numeric(M))) + stop("M must be a numeric vector of length 1.") + } + if (!(se.method %in% c("nn", "EHW", "supplied.var"))) { + stop("Unsupported se.method") + } + if (NCOL(d$W)>0 && method=="IP") + stop("Covariates not allowed whem method is 'IP'.") +} + ## @param d object of class \code{"RDData"}, \code{"FRDData"}, or ## \code{"LPPData"} @@ -233,7 +254,7 @@ NPRHonest <- function(d, M, kern="triangular", h, opt.criterion, alpha=0.05, } ## Determine bias - if (nobs==0) { + if (nobs==0 || is.na(nobs)) { ## If bandwidths too small, big bias / sd bias <- r1$se <- sqrt(.Machine$double.xmax/10) } else if (sclass=="T") { diff --git a/R/utils.R b/R/utils.R index c2f537d..8a4384f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -8,20 +8,23 @@ unAsIs <- function(X) { ## Class Constructor NPRData <- function(d, cutoff, class) { - Xindex <- if (class=="FRD") 3 else 2 + Xidx <- if (class=="FRD") 3 else 2 ## Unclass in case generated by I() - if (is.unsorted(d[[Xindex]])) - d <- d[sort(unAsIs(d[[Xindex]]), index.return=TRUE)$ix, ] - Y <- if (class=="FRD") cbind(d[[1]], d[[2]]) else d[[1]] + if (is.unsorted(d[[Xidx]])) + d <- d[sort(unAsIs(d[[Xidx]]), index.return=TRUE)$ix, ] + Y <- drop(as.matrix(d[, seq_len(Xidx-1)])) + W <- stats::model.matrix(attr(d, "terms"), d)[, -seq_len(Xidx)] - df <- structure(list(Y=Y, X=d[[Xindex]] - cutoff, orig.cutoff=cutoff, - var.names=names(d)[1:Xindex]), class=class) + df <- structure(list(Y=Y, X=d[[Xidx]] - cutoff, orig.cutoff=cutoff, W=W, + var.names=c(names(d)[1:Xidx], colnames(W))), + class=class) df$p <- df$X>=0 df$m <- df$X<0 if (class!="FRD") { df$sigma2 <- d$"(sigmaY2)" } else if (!is.null(d$"(sigmaY2)")) { - df$sigma2 <- cbind(d$"(sigmaY2)", d$"(sigmaYD)", d$"(sigmaYD)", d$"(sigmaD2)") + df$sigma2 <- cbind(d$"(sigmaY2)", d$"(sigmaYD)", d$"(sigmaYD)", + d$"(sigmaD2)") } df$clusterid <- d$"(clusterid)" diff --git a/man/RDHonest.Rd b/man/RDHonest.Rd index ef71154..e0719fd 100644 --- a/man/RDHonest.Rd +++ b/man/RDHonest.Rd @@ -41,8 +41,8 @@ which the function is called.} \item{subset}{optional vector specifying a subset of observations to be used in the fitting process.} -\item{weights}{Optional vector of weights to weight the observations -(useful for aggregated data). Disregarded if optimal kernel is used.} +\item{weights}{Optional vector of weights to weight the observations (useful +for aggregated data). Disregarded if optimal kernel is used.} \item{cutoff}{specifies the RD cutoff in the running variable. For inference at a point, specifies the point \eqn{x_0} at which to calculate the @@ -77,8 +77,8 @@ contain \code{NA}s. The default is set by the \code{na.action} setting of } - The methods use conditional variance given by \code{sigmaY2}, if supplied. - Otherwise, for the purpose of estimating the optimal bandwidth, + The methods use conditional variance given by \code{sigmaY2}, if + supplied. Otherwise, for the purpose of estimating the optimal bandwidth, conditional variance is estimated using the method specified by \code{se.initial}.} @@ -110,8 +110,8 @@ intervals; otherwise ignored.} \item{J}{Number of nearest neighbors, if "nn" is specified in \code{se.method}.} -\item{sclass}{Smoothness class, either \code{"T"} for Taylor or -\code{"H"} for Hölder class.} +\item{sclass}{Smoothness class, either \code{"T"} for Taylor or \code{"H"} +for Hölder class.} \item{T0}{Initial estimate of the treatment effect for calculating the optimal bandwidth. Only relevant for Fuzzy RD.} @@ -123,7 +123,7 @@ instead of RD.} \item{sigmaD2}{Supply variance of treatment (fuzzy RD only).} -\item{sigmaYD}{Supply covariante of treatment and outcome (fuzzy RD only).} +\item{sigmaYD}{Supply covariance of treatment and outcome (fuzzy RD only).} \item{clusterid}{Cluster id for cluster-robust standard errors} } diff --git a/tests/testthat/test_cbound.R b/tests/testthat/test_cbound.R index dd1a9a1..1be7bb3 100644 --- a/tests/testthat/test_cbound.R +++ b/tests/testthat/test_cbound.R @@ -1,22 +1,21 @@ -context("Test estimation of bound on C") - test_that("Test estimation of bound on M in Lee data", { - r <- RDHonest(voteshare~margin, data=lee08) + ma <- function(a) max(abs(a)) + + expect_message(r <- RDHonest(voteshare~margin, data=lee08)) r1 <- RDSmoothnessBound(r, s=100, separate=TRUE, multiple=TRUE, sclass="T") - expect_equal(r1$estimate, c(0.00147446, 0.00016410)) + expect_lt(ma(r1$estimate-c(0.00147446, 0.00016410)), 3e-9) r2 <- RDSmoothnessBound(r, s=100, separate=FALSE, multiple=TRUE, sclass="H") - expect_equal(r2$conf.low, 0.017285255) + expect_lt(ma(r2$conf.low- 0.017285255), 1e-9) r3 <- RDSmoothnessBound(r, s=100, separate=FALSE, multiple=FALSE, - sclass="H") - expect_equal(r3$conf.low, 0.59509966) + sclass="H")$conf.low + expect_equal(r3, 0.59509966) ## Should fail, s too big expect_error(RDSmoothnessBound(r, s=10000)) r <- RDHonest(log(earnings)~yearat14, cutoff=1947, data=cghs, M=0.023, h=3.4) expect_error(RDSmoothnessBound(r, s=20, separate=TRUE, multiple=FALSE)) r4 <- RDSmoothnessBound(r, s=2, separate=TRUE, multiple=FALSE) - expect_equal(r4$estimate, c(0.015359658, 0L)) - + expect_lt(ma(r4$estimate- c(0.015359658, 0L)), 1e-9) ## Old implementation ## r2 <- capture.output(print(r, digits=5)) diff --git a/tests/testthat/test_clustering.R b/tests/testthat/test_clustering.R index c201002..8d5be02 100644 --- a/tests/testthat/test_clustering.R +++ b/tests/testthat/test_clustering.R @@ -1,27 +1,32 @@ -context("Test RD") - test_that("Test clustering formulas", { ## Everyone in their own cluster - s0 <- RDHonest(voteshare~margin, data=lee08, se.method="EHW") - s0c <- RDHonest(voteshare~margin, data=lee08, se.method="EHW", - clusterid=seq_along(lee08$margin)) + expect_message(s0 <- RDHonest(voteshare~margin, data=lee08, + se.method="EHW")) + expect_message(s0c <- RDHonest(voteshare~margin, data=lee08, + se.method="EHW", + clusterid=seq_along(lee08$margin))) expect_equal(s0$coefficients, s0c$coefficients) rr <- rcp[1:1000, ] - f0 <- RDHonest(c~retired | elig_year, data=rr, se.method="EHW") - f0c <- RDHonest(c~retired | elig_year, data=rr, clusterid=seq_along(rr$c)) + expect_message(f0 <- RDHonest(c~retired | elig_year, data=rr, + se.method="EHW")) + expect_message(f0c <- RDHonest(c~retired | elig_year, data=rr, + clusterid=seq_along(rr$c))) expect_equal(f0$coefficients, f0c$coefficients) - p0 <- RDHonest(c~elig_year, data=rr, se.method="EHW", point.inference=TRUE) - p0c <- RDHonest(c~elig_year, data=rr, clusterid=seq_along(rr$c), - point.inference=TRUE) + expect_message(p0 <- RDHonest(c~elig_year, data=rr, + se.method="EHW", point.inference=TRUE)) + expect_message(p0c <- RDHonest(c~elig_year, data=rr, + clusterid=seq_along(rr$c), + point.inference=TRUE)) expect_equal(p0$coefficients, p0c$coefficients) ## Check we match sandwich::vcovCL: uniform + triangular set.seed(42) clusterid <- sample(1:50, NROW(lee08), replace=TRUE) - s1h <- RDHonest(clusterid~margin, data=lee08, se.method="EHW", h=5, - kern="uniform") - s1c <- RDHonest(clusterid~margin, data=lee08, se.method="EHW", h=5, - kern="uniform", clusterid=clusterid) + expect_message(s1h <- RDHonest(clusterid~margin, data=lee08, + se.method="EHW", h=5, kern="uniform")) + expect_message(s1c <- RDHonest(clusterid~margin, data=lee08, + se.method="EHW", h=5, + kern="uniform", clusterid=clusterid)) m1 <- lm(clusterid~margin*I(margin>0), data=lee08, subset=abs(margin)<=5) ## Should equal ## sqrt(sum(s1c$data$est_w[s1c$data$est_w!=0]^2*m1$residuals^2)) @@ -30,8 +35,8 @@ test_that("Test clustering formulas", { ## sandwich::vcovCL(m1, type="HC0", cluster=clusterid[abs(lee08$margin)<=5], ## cadjust=FALSE)[3, 3] expect_equal(as.numeric(s1c$coefficients[3])^2, 6.523026427) - s2c <- RDHonest(clusterid~margin, data=lee08, se.method="EHW", - clusterid=clusterid) + expect_message(s2c <- RDHonest(clusterid~margin, data=lee08, + se.method="EHW", clusterid=clusterid)) h <- s2c$coefficients$bandwidth m2 <- lm(clusterid~margin*I(margin>0), data=lee08, subset=abs(margin)<=h, @@ -44,10 +49,10 @@ test_that("Test clustering formulas", { set.seed(42) clusterid <- sample(1:20, NROW(rr), replace=TRUE) - p1c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid, - point.inference=TRUE, kern="uniform") - p1h <- RDHonest(c~elig_year, data=rr, se.method="EHW", point.inference=TRUE, - kern="uniform") # same bw + expect_message(p1c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid, + point.inference=TRUE, kern="uniform")) + expect_message(p1h <- RDHonest(c~elig_year, data=rr, se.method="EHW", + point.inference=TRUE, kern="uniform")) m2 <- lm(c~elig_year, data=rr, subset=abs(elig_year)<=5) expect_equal(p1c$coefficients$estimate, unname(m2$coefficients[1])) ## sqrt(sandwich::vcovHC(m2, type="HC0")[1, 1]) 792.2299439 @@ -56,8 +61,8 @@ test_that("Test clustering formulas", { expect_equal(p1c$coefficients$std.error^2, 723178.2655) expect_equal(p1h$coefficients$std.error, 792.2299439) - p2c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid, - point.inference=TRUE, h=8.27778063886) + expect_message(p2c <- RDHonest(c~elig_year, data=rr, clusterid=clusterid, + point.inference=TRUE, h=8.27778063886)) h <- p2c$coefficients$bandwidth m3 <- lm(c~elig_year, data=rr, subset=abs(elig_year)<=h, weights = (1 - abs(elig_year/h))) @@ -67,10 +72,12 @@ test_that("Test clustering formulas", { expect_equal(as.numeric(p2c$coefficients[2:3]), c(unname(m3$coefficients[1]), 919.162944)) - f1c <- RDHonest(c~retired | elig_year, data=rr, clusterid=clusterid, - kern="uniform") - f2c <- RDHonest(cn~retired | elig_year, data=rr, clusterid=clusterid, - kern="uniform", h=4) + expect_message(f1c <- RDHonest(c~retired | elig_year, data=rr, + clusterid=clusterid, + kern="uniform")) + expect_message(f2c <- RDHonest(cn~retired | elig_year, data=rr, + clusterid=clusterid, + kern="uniform", h=4)) ## m3 <- AER::ivreg(c~retired+elig_year+elig_year:I(elig_year>0) | ## elig_year*I(elig_year>0), ## data=rr, subset=abs(elig_year)<=5) @@ -85,9 +92,6 @@ test_that("Test clustering formulas", { ## Triangular kernel doesn't work with sandwich + ivreg ## Check preliminary bw calculations and rho - dd <- data.frame(y=log(rr$c), x=rr$elig_year, "(clusterid)"=clusterid) - names(dd)[3]<- "(clusterid)" - d <- NPRData(dd, cutoff=0, class="SRD") - r0 <- PrelimVar(d) - expect_equal(r0$rho, -0.0008690793197) + r0 <- RDHonest(log(c)~elig_year, h=Inf, M=1, data=rr, clusterid=clusterid) + expect_equal(PrelimVar(r0$d)$rho, -0.0008690793197) }) diff --git a/tests/testthat/test_cv.R b/tests/testthat/test_cv.R index 35904ad..545a96c 100644 --- a/tests/testthat/test_cv.R +++ b/tests/testthat/test_cv.R @@ -1,5 +1,3 @@ -context("Test bounded normal mean") - test_that("Sanity check for CVb for big and small values", { alpha <- 0.07 diff --git a/tests/testthat/test_frd.R b/tests/testthat/test_frd.R index 2dd2431..ed583bb 100644 --- a/tests/testthat/test_frd.R +++ b/tests/testthat/test_frd.R @@ -1,118 +1,94 @@ -context("Test inference under no bias") - test_that("Selected bw is infinite", { - d <- NPRData(cbind(logcn=log(rcp[1:5000, 6]), rcp[1:5000, c(3, 2)]), - cutoff=0, "FRD") - ## Expect using all data - r0 <- NPRHonest(d, M=c(0, 0), kern="triangular", opt.criterion="OCI", - T0=0)$coefficients - r1 <- NPRHonest(d, M=c(0, 0), kern="uniform", opt.criterion="FLCI", - T0=0)$coefficients - r2 <- NPReg(d, Inf, "uniform") - expect_equal(c(r1$std.error, r1$estimate), - c(r2$se, r2$estimate)) - expect_identical(r1$maximum.bias, 0) + r0 <- RDHonest(log(cn)~retired|elig_year, data=rcp[1:5000, ], M=c(0, 0), + opt.criterion="OCI") ## For triangular kernel, maximum bw is given by end of support, so we ## expect to find minimum at boundary - expect_equal(r1$bandwidth, max(abs(d$X))) + expect_gt(r0$coefficients$bandwidth, max(abs(r0$d$X))-2e-6) + r0$d$sigma2 <- NULL + r1 <- NPRHonest(r0$d, M=c(0, 0), kern="uniform", opt.criterion="FLCI", + T0=0)$coefficients + r2 <- NPReg(r0$d, Inf, "uniform") + expect_identical(r1$maximum.bias, 0) - d <- NPRData(lee08, cutoff=0, "SRD") - r1 <- NPRHonest(d, M=0, kern="uniform", opt.criterion="MSE")$coefficients - r2 <- NPReg(d, Inf, "uniform") expect_equal(c(r1$std.error, r1$estimate), - c(r2$se, r2$estimate)) - expect_identical(r1$maximum.bias, 0) -}) + unname(c(r2$se, r2$estimate))) + expect_equal(r1$bandwidth, max(abs(r0$d$X))) -context("Test FRD") + r1 <- RDHonest(voteshare~margin, data=lee08, M=0, kern="uniform", + opt.criterion="MSE") + r2 <- NPReg(r1$d, Inf, "uniform") + expect_equal(c(r1$coefficients$std.error, r1$coefficients$estimate), + unname(c(r2$se, r2$estimate))) + expect_identical(r1$coefficients$maximum.bias, 0) +}) test_that("FRD data example check", { - d <- NPRData(cbind(logcn=log(rcp[, 6]), rcp[, c(3, 2)]), cutoff=0, "FRD") - M <- MROT(d) - r1 <- NPRHonest(d, M, kern="triangular", opt.criterion="MSE", - T0=0)$coefficients - r2 <- NPRHonest(d, M, kern="triangular", opt.criterion="MSE", - T0=r1$estimate)$coefficients - r1a <- NPRHonest(d, M, kern="triangular", opt.criterion="MSE", T0=0, - alpha=r1$p.value)$coefficients + expect_message(r1 <- RDHonest(log(cn)~retired|elig_year, + data=rcp[1:5000, ])) + r1$d$sigma2 <- NULL + M0 <- c(r1$coefficients$M.rf, r1$coefficients$M.fs) + r2 <- NPRHonest(r1$d, M=M0, kern="triangular", opt.criterion="MSE", + T0=r1$coefficients$estimate)$coefficients + r1a <- NPRHonest(r1$d, M=M0, kern="triangular", opt.criterion="MSE", T0=0, + alpha=r1$coefficients$p.value)$coefficients + expect_equal(r1a$conf.high, 0) - expect_equal(max(abs(expect_equal(r1a$conf.high, 0))), 0) ## With positive T0, expect greater effective M, and thus smaller bandwidth - expect_lt(r2$bandwidth, r1$bandwidth) + expect_lt(r2$bandwidth, r1$coefficients$bandwidth) ## On travis, only the first 6 digits match, not sure why - expect_equal(round(r2$estimate, 6), -0.188134) + expect_equal(round(r2$estimate, 6), -0.244582) }) test_that("FRD with almost perfect first stage", { - d <- NPRData(data.frame(y=lee08$voteshare, - d=lee08$margin>0, x=lee08$margin), cutoff=0, "FRD") - M <- MROT(d) - r0 <- NPRHonest(d, M, kern="triangular", opt.criterion="FLCI") - r1 <- NPRHonest(d, M, kern="triangular", opt.criterion="FLCI", + expect_message(r0 <- RDHonest(voteshare~I(margin>0)|margin, data=lee08, + kern="triangular", opt.criterion="FLCI")) + r0$d$sigma2 <- NULL + M0 <- c(r0$coefficients$M.rf, r0$coefficients$M.fs) + r1 <- NPRHonest(r0$d, M0, kern="triangular", opt.criterion="FLCI", T0=r0$coefficients$estimate)$coefficients - r2 <- NPRHonest(NPRData(lee08, cutoff=0, "SRD"), unname(M[1]), - kern="triangular", opt.criterion="FLCI")$coefficients - expect_lt(max(abs(r2[2:6]-r2[2:6])), 3e-7) + r2 <- RDHonest(voteshare~margin, M=M0[1], data=lee08, + kern="triangular", opt.criterion="FLCI")$coefficients + expect_lt(max(abs(r1[2:8]-r2[2:8])), 3e-7) set.seed(42) df <- data.frame(y=lee08$voteshare, d=lee08$margin+rnorm(n=length(lee08$margin), sd=0.1)>0, x=lee08$margin) - d <- NPRData(df, cutoff=0, "FRD") - M <- MROT(d) - r0 <- NPRHonest(d, M, kern="triangular", opt.criterion="MSE") - r1 <- NPRHonest(d, M, kern="triangular", opt.criterion="MSE", + expect_message(r0 <- RDHonest(y~d|x, data=df, + kern="triangular", opt.criterion="MSE")) + r0$d$sigma2 <- NULL + M0 <- c(r0$coefficients$M.rf, r0$coefficients$M.fs) + r1 <- NPRHonest(r0$d, M0, kern="triangular", opt.criterion="MSE", T0=r0$coefficients$estimate)$coefficients - r2 <- NPRHonest(NPRData(lee08, cutoff=0, "SRD"), unname(M[1]), - kern="triangular", opt.criterion="MSE")$coefficients - expect_lt(abs(r1$estimate*r1$first.stage-r2$estimate), 1e-2) - expect_lt(abs(r1$bandwidth-r2$bandwidth), 0.1) - expect_lt(abs(r0$coefficients$conf.low-r1$conf.low), 0.01) + r2 <- RDHonest(voteshare~margin, data=lee08, M=M0[1], kern="triangular", + opt.criterion="MSE")$coefficients + expect_lt(abs(r1$estimate*r1$first.stage-r2$estimate), 2e-3) + expect_lt(abs(r1$bandwidth-r2$bandwidth), 2e-2) + expect_lt(abs(r0$coefficients$conf.low-r1$conf.low), 1e-3) }) test_that("FRD interface", { - d <- NPRData(cbind(logf=log(rcp[1:10000, 6]), rcp[1:10000, c(3, 2)]), - cutoff=0, "FRD") - M <- MROT(d) rcp1 <- rcp[1:10000, ] - r1 <- NPRHonest(d, M, kern="triangular", opt.criterion="OCI", T0=0) - p1 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0, M=M, - kern="triangular", opt.criterion="OCI", T0=0) - expect_equal(r1$coefficients$estimate, p1$coefficients$estimate) - p1.1 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0, - kern="triangular", T0=0, h=6) + expect_message(p1.1 <- RDHonest(log(cn)~retired | elig_year, + data=rcp1, cutoff=0, + kern="triangular", T0=0, h=6)) expect_lt(abs(p1.1$coefficients$conf.high-0.150118293818), 1e-11) expect_lt(abs(p1.1$coefficients$maximum.bias-0.0980630668382), 1e-11) - p1.2 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0) + expect_message(p1.2 <- RDHonest(log(cn)~retired | elig_year, + data=rcp1, cutoff=0)) expect_lt(abs(p1.2$coefficients$M.fs-0.00626732839127), 1e-11) expect_lt(abs(p1.2$coefficients$M.rf-0.00436022188088), 1e-11) expect_lt(abs(p1.2$coefficients$conf.high- 0.17195578263), 1e-6) expect_lt(abs(p1.2$coefficients$estimate+0.172378084757), 1e-6) - r2 <- NPRHonest(d, M, kern="triangular", opt.criterion="OCI", - T0=r1$coefficients$estimate) - p2 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0, M=M, - kern="triangular", opt.criterion="OCI", - T0=p1$coefficients$estimate) - expect_equal(r2$coefficients$estimate, p2$coefficients$estimate) - ## codecov.io check - expect_equal(capture.output(print(r2))[c(5:7, 9:12)], - capture.output(print(p2))[c(11:13, 15:18)]) - expect_equal(capture.output(print(p2, digits=4))[15], - "First stage estimate: 0.3418 ") - expect_equal(capture.output(print(p2, digits=4))[14], - "Maximal leverage for fuzzy RD parameter: 0.00361") - - r3 <- NPRHonest(d, M, kern="triangular", h=7, T0=r1$coefficients$estimate) - p3 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0, M=M, - kern="triangular", opt.criterion="OCI", - T0=p1$coefficients$estimate, h=7) - expect_equal(r3$coefficients$estimate, p3$coefficients$estimate) - - r4 <- OptBW(d, M, kern="triangular", opt.criterion="OCI", - T0=r1$coefficients$estimate) - p4 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0, M=M, + p2 <- RDHonest(log(cn)~retired | elig_year, data=rcp1, cutoff=0, + M=as.numeric(p1.1$coefficients[16:17]), kern="triangular", opt.criterion="OCI", - T0=p1$coefficients$estimate) - expect_identical(r4, p4$coefficients$bandwidth) + T0=p1.1$coefficients$estimate) + eo <- c(paste0(" Fuzzy RD parameter -0.2021", + " 0.1534 0.09753 (-0.5545, 0.1503)"), + "Maximal leverage for fuzzy RD parameter: 0.003629", + "First stage estimate: 0.3416 ") + expect_equal(capture.output(print(p2, digits=4))[c(9, 14:15)], + eo) }) diff --git a/tests/testthat/test_interface.R b/tests/testthat/test_interface.R new file mode 100644 index 0000000..414f747 --- /dev/null +++ b/tests/testthat/test_interface.R @@ -0,0 +1,10 @@ +test_that("Test inputs", { + expect_error(r2 <- RDHonest(log(cn)~retired|elig_year, data=rcp, M=1, T0=0)) + expect_error(r2 <- RDHonest(log(cn)~elig_year, data=rcp, M=c(1, 1))) + + expect_message(pp <- RDHonest(voteshare~margin, data=lee08, + M=2, h=5, subset=I(margin>0))$coefficients) + + expect_equal(as.numeric(pp[c(2, 3, 11)]), + c(0L, sqrt(.Machine$double.xmax/10), NA)) +}) diff --git a/tests/testthat/test_kernels.R b/tests/testthat/test_kernels.R index 67629db..b432622 100644 --- a/tests/testthat/test_kernels.R +++ b/tests/testthat/test_kernels.R @@ -1,5 +1,3 @@ -context("Equivalent kernels") - test_that("Numeric and analytical equivalent kernels agree", { us <- matrix(c(-2, seq(-0.9, 0.9, by=0.05), 0, 2), nrow = 4) diff --git a/tests/testthat/test_lpp.R b/tests/testthat/test_lpp.R index 6bdea75..0a84dd9 100644 --- a/tests/testthat/test_lpp.R +++ b/tests/testthat/test_lpp.R @@ -1,44 +1,42 @@ -context("Test Inference at a point") - test_that("Inference at point agrees with RD", { - d <- NPRData(lee08, cutoff=0, "SRD") - rde <- NPRHonest(d, h=5, M=2)$coefficients - dp <- NPRData(lee08[lee08$margin>=0, ], cutoff=0, "IP") - dm <- NPRData(lee08[lee08$margin<0, ], cutoff=0, "IP") - p0 <- NPRHonest(dp, h=5, M=2) - pp <- p0$coefficients - mm <- NPRHonest(dm, h=5, M=2)$coefficients - expect_equal(pp$estimate-mm$estimate, rde$estimate) - expect_equal(pp$std.error^2+mm$std.error^2, rde$std.error^2) - expect_equal(pp$maximum.bias+mm$maximum.bias, rde$maximum.bias) + rde <- RDHonest(voteshare~margin, data=lee08, M=2, h=5) + pp <- RDHonest(voteshare~margin, data=lee08, M=2, h=5, + subset=I(margin>=0), point.inference=TRUE) + mm <- RDHonest(voteshare~margin, data=lee08, M=2, h=5, + subset=I(margin<0), point.inference=TRUE) + expect_equal(pp$coefficients[2]-mm$coefficients[2], rde$coefficients[2]) + expect_equal(pp$coefficients[3]^2+mm$coefficients[3]^2, + rde$coefficients[3]^2) + expect_equal(pp$coefficients[4]+mm$coefficients[4], rde$coefficients[4]) - p2 <- RDHonest(voteshare~margin, data=lee08, subset=margin>=0, h=5, M=2, - point.inference=TRUE) - expect_equal(p0$coefficients[-1], p2$coefficients[-1]) - r <- NPRHonest(d, h=7, M=2)$coefficients - rm <- NPRHonest(dp, h=7, M=2)$coefficients - rp <- NPRHonest(dm, h=7, M=2)$coefficients + r <- NPRHonest(rde$d, h=7, M=2)$coefficients + rm <- NPRHonest(mm$d, h=7, M=2)$coefficients + rp <- NPRHonest(pp$d, h=7, M=2)$coefficients expect_equal(r$maximum.bias, rm$maximum.bias+rp$maximum.bias) expect_equal(sqrt(rp$std.error^2+rm$std.error^2), r$std.error) - ## Silverman variance should approximately match }) test_that("MROT matches paper", { - Mh <- RDHonest(mortHS~povrate, data=headst, cutoff=0, h=0)$coefficients$M + suppressMessages(Mh <- RDHonest(mortHS~povrate, data=headst, + cutoff=0, h=0)$coefficients$M) expect_equal(Mh, 0.29939992) - Mp <- RDHonest(mortHS~povrate, data=headst, subset = (povrate>=0), - cutoff=0, h=0, point.inference=TRUE) - Mm <- RDHonest(mortHS~povrate, data=headst, subset = (povrate<0), - h=0, point.inference=TRUE) + suppressMessages(Mp <- RDHonest(mortHS~povrate, data=headst, + subset = (povrate>=0), cutoff=0, h=0, + point.inference=TRUE)) + suppressMessages(Mm <- RDHonest(mortHS~povrate, data=headst, + subset = (povrate<0), h=0, + point.inference=TRUE)) expect_equal(Mp$coefficients$M, Mh) expect_lt(Mm$coefficients$M, Mh) }) test_that("ROT bandwidth check", { ## Interior - d <- NPRData(lee08, cutoff=0, "IP") + r0 <- RDHonest(voteshare~margin, data=lee08, point.inference=TRUE, M=0) + expect_lt(abs(r0$coefficients$bandwidth - 100), 1e-5) + d <- r0$d b1 <- ROTBW(d, kern="uniform") ## f0 using Silverman: @@ -48,32 +46,35 @@ test_that("ROT bandwidth check", { h <- (C*sigma(ll)^2 / (length(d$X)*f0*ll$coefficients[3]^2))^(1/5) expect_equal(b1, unname(h)) - dp <- NPRData(lee08[lee08$margin>0, ], cutoff=0, "IP") - bp1 <- ROTBW(dp, kern="uniform") + r1 <- RDHonest(voteshare~margin, data=lee08, point.inference=TRUE, + subset=margin>0, M=1)$d + bp1 <- ROTBW(r1, kern="uniform") ## f0 using Silverman: f0 <- 0.0079735105 C <- 36 # nu0/(4*mu_2^2) - ll <- lm(dp$Y~dp$X+I(dp$X^2)+I(dp$X^3)+I(dp$X^4)) + ll <- lm(r1$Y~r1$X+I(r1$X^2)+I(r1$X^3)+I(r1$X^4)) der <- unname(ll$coefficients[3]) - h <- (C*sigma(ll)^2 / (length(dp$X)*f0*der^2))^(1/5) + h <- (C*sigma(ll)^2 / (length(r1$X)*f0*der^2))^(1/5) expect_equal(bp1, h) }) test_that("Optimal bandwidth calculations", { - rr <- RDHonest(voteshare ~ margin, data=lee08, subset = (margin>0), - kern="uniform", opt.criterion="FLCI", point.inference=TRUE) + expect_message(rr <- RDHonest(voteshare ~ margin, data=lee08, + subset = (margin>0), + kern="uniform", opt.criterion="FLCI", + point.inference=TRUE)) expect_equal(rr$coefficients$conf.high.onesided, 55.24963853) expect_equal(rr$coefficients$eff.obs, 858) expect_equal(rr$coefficients$eff.obs, sum(lee08$margin<=rr$coefficients$bandwidth & lee08$margin>0)) - dp <- NPRData(lee08[lee08$margin>0, ], cutoff=0, "IP") Mh <- rr$coefficients$M - re <- RDHonest(voteshare ~ margin, data=lee08, subset = (margin>0), - kern="uniform", opt.criterion="FLCI", point.inference=TRUE, - se.method="EHW") + expect_message(re <- RDHonest(voteshare ~ margin, data=lee08, + subset = (margin>0), + kern="uniform", opt.criterion="FLCI", + point.inference=TRUE, se.method="EHW")) ## Should match regression rl <- lm(voteshare ~ margin, data=lee08, subset=margin>0 & margin<=rr$coefficients$bandwidth) @@ -87,13 +88,15 @@ test_that("Optimal bandwidth calculations", { opt.criterion="MSE", point.inference=TRUE) r <- capture.output(print(r1, digits=4)) expect_equal(r[11], "Bandwidth: 13.41, Kernel: triangular") - r2 <- OptBW(dp, M=2*Mh, opt.criterion="MSE") + + re$d$sigma2 <- NULL + r2 <- OptBW(re$d, M=2*Mh, opt.criterion="MSE") expect_identical(r2, r1$coefficients$bandwidth) expect_lt(abs(r2- 13.4109133), 1e-6) ## Make sure we're getting positive worst-case bias - r <- RDHonest(voteshare ~ margin, data=lee08, subset = (margin>0), - cutoff=20, - kern="uniform", opt.criterion="MSE", point.inference=TRUE) - expect_equal(r$coefficients$maximum.bias, 0.2482525) + expect_message(r <- RDHonest(voteshare ~ margin, data=lee08, + subset = (margin>0), cutoff=20, kern="uniform", + opt.criterion="MSE", point.inference=TRUE)) + expect_equal(r$coefficients$maximum.bias, 0.248252496) }) diff --git a/tests/testthat/test_npr.R b/tests/testthat/test_npr.R index 12d7985..50cecc0 100644 --- a/tests/testthat/test_npr.R +++ b/tests/testthat/test_npr.R @@ -1,7 +1,8 @@ -context("Test NPR") - test_that("Test NN variance estimator", { - d <- NPRData(rcp[1:5000, c(6, 3, 2)], cutoff=0, "FRD") + r0 <- RDHonest(cn~retired | elig_year, data=rcp[1:5000, ], + M=c(1, 1), h=10) + d <- r0$d + s0 <- sigmaNN(d$X[d$p], d$Y[d$p, ], J=5) s1 <- sigmaNN(d$X[d$p], d$Y[d$p, 1], J=5) s2 <- sigmaNN(d$X[d$p], d$Y[d$p, 2], J=5) @@ -10,7 +11,10 @@ test_that("Test NN variance estimator", { }) test_that("Test LPreg", { - d <- NPRData(rcp[1:5000, c(6, 3, 2)], cutoff=0, "FRD") + r0 <- RDHonest(cn~retired | elig_year, data=rcp[1:5000, ], + M=c(1, 1), h=10) + d <- r0$d + d$sigma2 <- NA K <- EqKern("triangular", boundary=FALSE, order=0) r0e <- LPReg(d$X[d$m], d$Y[d$m, ], h=10, K, order=2, se.method="EHW") r0n <- LPReg(d$X[d$m], d$Y[d$m, ], h=10, K, order=2, se.method="nn", J=4) @@ -21,11 +25,13 @@ test_that("Test LPreg", { expect_lt(max(abs(r0e$theta- c(r1e$theta, r2e$theta))), 1e-10) expect_lt(max(abs(r0n$theta- c(r1n$theta, r2n$theta))), 1e-10) - expect_equal(r0e$sigma2[, c(1, 4)], cbind(r1e$sigma2, r2e$sigma2)) - expect_equal(r0n$sigma2[, c(1, 4)], cbind(r1n$sigma2, r2n$sigma2)) + expect_equal(unname(r0e$sigma2[, c(1, 4)]), + cbind(r1e$sigma2, r2e$sigma2)) + expect_equal(unname(r0n$sigma2[, c(1, 4)]), + cbind(r1n$sigma2, r2n$sigma2)) - expect_equal(r0e$var[c(1, 4)], c(r1e$var, r2e$var)) - expect_equal(r0n$var[c(1, 4)], c(r1n$var, r2n$var)) + expect_equal(unname(r0e$var[c(1, 4)]), c(r1e$var, r2e$var)) + expect_equal(unname(r0n$var[c(1, 4)]), c(r1n$var, r2n$var)) expect_equal(r0e$w, r1e$w) expect_equal(r0e$w, r1n$w) @@ -36,9 +42,11 @@ test_that("Test LPreg", { test_that("Test NPReg", { ## Replicate Ludwig-Miller lumi <- headst[!is.na(headst$mortHS), c("mortHS", "povrate")] - mort <- NPRData(lumi, cutoff=0, "SRD") - mortm <- NPRData(lumi[lumi$povrate<0, ], cutoff=0, "IP") - mortp <- NPRData(lumi[lumi$povrate>=0, ], cutoff=0, "IP") + mort <- RDHonest(mortHS~povrate, data=lumi, M=1)$d + mortm <- RDHonest(mortHS~povrate, data=lumi, M=1, + subset=povrate<0, point.inference=TRUE)$d + mortp <- RDHonest(mortHS~povrate, data=lumi, M=1, + subset=povrate>=0, point.inference=TRUE)$d t1 <- data.frame() t2 <- data.frame() bws <- c(9, 18, 36) @@ -62,19 +70,19 @@ test_that("Test NPReg", { expect_lt(max(abs(t1-t2)), 1e-11) ## Replicate Battistin et al - df <- NPRData(cbind(rcp[, c(3, 2)]), cutoff=0, "SRD") - dr <- NPRData(cbind(logcn=log(rcp[, 6]), rcp[, 2, drop=FALSE]), - cutoff=0, "SRD") - d <- NPRData(cbind(logcn=log(rcp[, 6]), rcp[, c(3, 2)]), cutoff=0, "FRD") + df <- RDHonest(retired~elig_year, data=rcp, M=1, h=6)$d + dr <- RDHonest(log(cn)~elig_year, data=rcp, M=1, h=6)$d + d <- RDHonest(log(cn)~retired|elig_year, data=rcp, M=c(1, 1), h=6)$d + rf <- NPReg(df, 10, "uniform", order=1, se.method="EHW") rr <- NPReg(dr, 10, "uniform", order=1, se.method="EHW") expect_equal(unname(c(rf$estimate, round(rf$se, 4))), c(0.43148435, 0.0181)) expect_equal(unname(c(rr$estimate, round(rr$se, 4))), - c(-0.0355060, 0.0211)) + c(-0.03550599496, 0.0211)) r1 <- NPReg(d, 10, "uniform", order=1, se.method="EHW") - expect_equal(c(r1$estimate, r1$se), - c(-0.08228802, 0.0483039)) + expect_equal(unname(c(r1$estimate, r1$se)), + c(-0.08228802393, 0.04830389419)) ## Numbers from ## dt <- cbind(logcn=log(rcp[, 6]), rcp[, c(3, 2)], Z=rcp$elig_year>=0) ## r4 <- AER::ivreg(logcn ~ retired + Z:elig_year + elig_year | @@ -87,5 +95,6 @@ test_that("Test NPReg", { ## r3 <- AER::ivreg(logcn~retired | Z, subset=(abs(elig_year)<=5), data=dt) ## summary(r3, vcov = sandwich::sandwich, ## diagnostics = TRUE)$coefficients[2, 1:2] - expect_equal(c(r2$estimate, r2$se), c(-0.14157767, 0.02562096)) + expect_equal(unname(c(r2$estimate, r2$se)), + c(-0.14157767658, 0.02562096397)) }) diff --git a/tests/testthat/test_rd.R b/tests/testthat/test_rd.R index e3c698c..2d69d66 100644 --- a/tests/testthat/test_rd.R +++ b/tests/testthat/test_rd.R @@ -1,21 +1,4 @@ -context("Test RD") - -test_that("Test class constructor sorting", { - d0 <- NPRData(rcp[, c(6, 3, 2)], cutoff=3, "FRD") - d1 <- NPRData(rcp[sort(rcp$elig_year, - index.return=TRUE)$ix, c(6, 3, 2)], cutoff=3, "FRD") - expect_identical(d1, d0) - - d0 <- NPRData(rcp[, c(6, 2)], cutoff=3, "SRD") - d1 <- NPRData(rcp[sort(rcp$elig_year, - index.return=TRUE)$ix, c(6, 2)], cutoff=3, "SRD") - expect_identical(d1, d0) - - d0 <- NPRData(rcp[, c(6, 2)], cutoff=-3, "IP") - d1 <- NPRData(rcp[sort(rcp$elig_year, - index.return=TRUE)$ix, c(6, 2)], cutoff=-3, "IP") - expect_identical(d1, d0) - +test_that("Test I() in formulas", { ## Now test that I() works expect_equal(RDHonest(voteshare ~ margin, data=lee08, M=0, h=2)$coefficients$estimate, @@ -29,23 +12,27 @@ test_that("Test class constructor sorting", { test_that("IK bandwidth calculations", { ## Test IK bandwidth in Lee data, IK Table 1 - d <- NPRData(lee08, cutoff=0, "SRD") + d <- RDHonest(voteshare~margin, data=lee08, h=10, M=0.1)$d + d$sigma2 <- NULL dig <- getOption("digits") options(digits=8) - r1 <- capture.output(IKBW(d, verbose=TRUE)) + r1 <- capture.output(h <- IKBW(d, verbose=TRUE)) + options(digits=dig) expect_equal(r1[c(2, 4, 5, 6, 8)], c(" h1: 14.44507 ", " f(0): 0.0089622411 ", " sigma^2_{+}(0): 12.024424 ^2", " sigma^2_{+}(0): 10.472064 ^2 ", " h_{2, +}: 60.513312 h_{2, -}: 60.993358 ")) - options(digits=dig) expect_equal(IKBW(d), 29.3872649956) - r <- NPReg(d, IKBW(d, kern="uniform"), "uniform") + d0 <- RDHonest(voteshare~margin, data=lee08, h=h, M=0.1)$d + d0$sigma2 <- NULL + + r <- NPReg(d0, IKBW(d0, kern="uniform"), "uniform") expect_equal(r$estimate, 8.0770003749) - d <- PrelimVar(NPRData(lee08, cutoff=0, "SRD"), se.initial="EHW") + d <- PrelimVar(d0, se.initial="EHW") expect_equal(sqrt(mean(d$sigma2[d$p])), 12.58183131) expect_equal(sqrt(mean(d$sigma2[d$m])), 10.79067278) }) @@ -93,8 +80,7 @@ test_that("Honest inference in Lee and LM data", { expect_equal(r2o[10], "Onesided CIs: (-Inf, 4.742), (-7.138, Inf)") expect_equal(r1o[16], "24 observations with missing values dropped") - d <- NPRData(headst[!is.na(headst$mortHS), c("mortHS", "povrate")], - cutoff=0, "SRD") + d <- RDHonest(mortHS~povrate, data=headst, h=10, M=2)$d d <- PrelimVar(d, se.initial="Silverman") es <- function(kern, se.method) { NPRHonest(d, M=0.0076085544, kern=kern, sclass="H", se.method=se.method, @@ -189,7 +175,8 @@ test_that("Honest inference in Lee and LM data", { ## expect_equal(r3$h, 5.0590753991) ## Decrease M, these results are not true minima... - d <- PrelimVar(NPRData(lee08, cutoff=0, "SRD"), se.initial="Silverman") + d <- RDHonest(voteshare~margin, data=lee08, h=1, M=1)$d + d <- PrelimVar(d, se.initial="Silverman") r1 <- NPRHonest(d, M=0.01, kern="uniform", opt.criterion="MSE", beta=0.8, sclass="T")$coefficients r2 <- NPRHonest(d, M=0.01, kern="uniform", opt.criterion="MSE", beta=0.8, @@ -203,15 +190,16 @@ test_that("Honest inference in Lee and LM data", { ## Missing values expect_error(RDHonest(mortHS ~ povrate, data=headst, kern="uniform", h=12, na.action="na.fail")) - r1 <- RDHonest(mortHS ~ povrate, data=headst, kern="uniform", - na.action="na.omit") + expect_message(r1 <- RDHonest(mortHS ~ povrate, data=headst, kern="uniform", + na.action="na.omit")) r1 <- capture.output(print(r1, digits=6)) expect_equal(r1[c(11, 12, 16)], c("Bandwidth: 3.98048, Kernel: uniform", "Number of effective observations: 239", "24 observations with missing values dropped")) - r2 <- RDHonest(mortHS ~ povrate, data=headst, kern="epanechnikov", - point.inference=TRUE, cutoff=4) + expect_message(r2 <- RDHonest(mortHS ~ povrate, data=headst, + kern="epanechnikov", + point.inference=TRUE, cutoff=4)) expect_equal(capture.output(print(r2, digits=6))[c(11, 12, 16)], c("Bandwidth: 9.42625, Kernel: epanechnikov", "Number of effective observations: 373.642", @@ -290,15 +278,15 @@ test_that("Supplied variance", { point.inference=TRUE, cutoff=2) expect_equal(r$coefficients$std.error, r2$coefficients$std.error) - r <- RDHonest(log(cn)~retired | elig_year, data=rcp[1:100, ], cutoff=0, - M=c(0.002, 0.005), T0=0, h=7, kern="uniform") + expect_message(r <- RDHonest(log(cn)~retired | elig_year, data=rcp[1:100, ], + cutoff=0, M=c(0.002, 0.005), T0=0, h=7, + kern="uniform")) ## Data not in order - r2 <- RDHonest(r$data$Y[, 1]~r$data$Y[, 2] | r$data$X, data=rcp[1:100, ], - cutoff=0, M=c(0.002, 0.005), T0=0, h=7, - sigmaY2=r$data$sigma2, kern="uniform") + expect_message(r2 <- RDHonest(r$data$Y[, 1]~r$data$Y[, 2] | r$data$X, + data=rcp[1:100, ], cutoff=0, + M=c(0.002, 0.005), T0=0, h=7, + sigmaY2=r$data$sigma2, kern="uniform")) expect_equal(r$coefficients$std.error, r2$coefficients$std.error) - - }) diff --git a/tests/testthat/test_weights.R b/tests/testthat/test_weights.R index 809847b..7fc7f7e 100644 --- a/tests/testthat/test_weights.R +++ b/tests/testthat/test_weights.R @@ -1,5 +1,3 @@ -context("Test weighted RD") - test_that("Test weighting using cghs", { ## Make 10 groups, estimate within-group variance set.seed(42) @@ -17,10 +15,10 @@ test_that("Test weighting using cghs", { sum(ix)) dd <- rbind(dd, df) } - s0 <- RDHonest(log(earnings)~yearat14, cutoff=1947, h=5, data=cghs, - M=1)$coefficients - s1 <- RDHonest(y~x, h=5, data=dd, M=1, weights=weights, sigmaY2=sigma2, - se.method="supplied.var")$coefficients + s0 <- RDHonest(log(earnings)~yearat14, cutoff=1947, h=5, + data=cghs, M=1)$coefficients + s1 <- RDHonest(y~x, h=5, data=dd, M=1, weights=weights, + sigmaY2=sigma2, se.method="supplied.var")$coefficients expect_equal(s0, s1) ## Don't supply variance @@ -29,10 +27,10 @@ test_that("Test weighting using cghs", { ## SE should be close enough expect_lt(max(abs(s2[c(3, 5, 6)]-s0[c(3, 5, 6)])), 3e-3) ## Estimate M - s0a <- RDHonest(log(earnings)~yearat14, cutoff=1947, h=5, data=cghs, - kern="uniform")$coefficients - s2a <- RDHonest(y~x, h=5, data=dd, weights=weights, - kern="uniform")$coefficients + expect_message(s0a <- RDHonest(log(earnings)~yearat14, cutoff=1947, h=5, + data=cghs, kern="uniform")$coefficients) + expect_message(s2a <- RDHonest(y~x, h=5, data=dd, weights=weights, + kern="uniform")$coefficients) expect_lt(max(abs(s0a[c(2, 4, 9:11)]-s2a[c(2, 4, 9:11)])), 1e-8) ## SE should be close enough expect_lt(max(abs(s2a[c(3, 5, 6)] - s0a[c(3, 5, 6)])), 3e-3) @@ -51,7 +49,8 @@ test_that("Test weighting using cghs", { expect_lt(max(abs(t2[c(3, 5, 6)]-t0[c(3, 5, 6)])), 1.1e-3) ## Collapse data fully - s0 <- RDHonest(log(earnings)~yearat14, cutoff=1947, data=cghs)$coefficients + expect_message(s0 <- RDHonest(log(earnings)~yearat14, + cutoff=1947, data=cghs)$coefficients) dd <- data.frame() for (j in unique(cghs$yearat14)) { ix <- cghs$yearat14==j @@ -60,24 +59,27 @@ test_that("Test weighting using cghs", { sigma2=var(log(cghs$earnings[ix]))/sum(ix)) dd <- rbind(dd, df) } - s1 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, sigmaY2=sigma2, - se.method="supplied.var", h=s0$bandwidth) + expect_message(s1 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, + sigmaY2=sigma2, se.method="supplied.var", + h=s0$bandwidth)) expect_equal(s1$coefficients, s0) ## If we use supplied.var for variance estimation, should match approx - s2 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, sigmaY2=sigma2, - se.method="supplied.var")$coefficients + expect_message(s2 <- RDHonest(y~x, cutoff=1947, data=dd, + weights=weights, sigmaY2=sigma2, + se.method="supplied.var")$coefficients) expect_lt(max(abs(s2[2:9]-s0[2:9])), 4e-3) ## Test that sigmaNN works when J large - s3 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, J=10, - h=s0$bandwidth)$coefficients - s4 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, - J=20, h=s0$bandwidth)$coefficients + expect_message(s3 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, + J=10, h=s0$bandwidth)$coefficients) + expect_message(s4 <- RDHonest(y~x, cutoff=1947, data=dd, weights=weights, + J=20, h=s0$bandwidth)$coefficients) expect_equal(s3, s4) }) ## Fuzzy RD test_that("Test weighting using rcp ", { - r0 <- RDHonest(log(cn)~retired|elig_year, data=rcp, T0=0)$coefficients + expect_message(r0 <- RDHonest(log(cn)~retired|elig_year, + data=rcp, T0=0)$coefficients) dd <- data.frame(x=sort(unique(rcp$elig_year)), y=NA, d=NA, weights=NA, sig11=NA, sig12=NA, sig21=NA, sig22=NA) for (j in seq_len(NROW(dd))) { @@ -85,12 +87,14 @@ test_that("Test weighting using rcp ", { Y <- cbind(log(rcp$cn[ix]), rcp$retired[ix]) dd[j, -1] <- c(colMeans(Y), sum(ix), as.vector(var(Y))/sum(ix)) } - r1 <- RDHonest(y~d|x, data=dd, weights=weights, sigmaY2=sig11, T0=0, - sigmaYD=sig21, sigmaD2=sig22, h=r0$bandwidth, - se.method="supplied.var")$coefficients + expect_message(r1 <- RDHonest(y~d|x, data=dd, weights=weights, + sigmaY2=sig11, T0=0, sigmaYD=sig21, + sigmaD2=sig22, h=r0$bandwidth, + se.method="supplied.var")$coefficients) + rownames(r0) <- rownames(r1) expect_equal(r0, r1) - r2 <- RDHonest(y~d|x, data=dd, weights=weights, T0=0, sigmaY2=sig11, - sigmaYD=sig21, sigmaD2=sig22, - se.method="supplied.var")$coefficients + expect_message(r2 <- RDHonest(y~d|x, data=dd, weights=weights, T0=0, + sigmaY2=sig11, sigmaYD=sig21, sigmaD2=sig22, + se.method="supplied.var")$coefficients) expect_lt(max(abs(r2[2:8]-r0[2:8])), 2e-3) })