From 62b3f0970d3f6b065c11d1738e6f4156b8c156e9 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 18 Jul 2024 17:20:48 +0100 Subject: [PATCH 1/2] Add stats qualification --- R/joinerml-tidiers.R | 2 +- R/mediation-tidiers.R | 10 +++++----- R/mfx-tidiers.R | 4 ++-- R/quantreg-nlrq-tidiers.R | 2 +- R/quantreg-rq-tidiers.R | 2 +- R/stats-mlm-tidiers.R | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/R/joinerml-tidiers.R b/R/joinerml-tidiers.R index 9f4c1076c..e4f1a0f6c 100644 --- a/R/joinerml-tidiers.R +++ b/R/joinerml-tidiers.R @@ -102,7 +102,7 @@ tidy.mjoint <- function(x, component = "survival", conf.int = FALSE, out <- out[, c(5, 1:4)] if (conf.int) { - cv <- qnorm(1 - (1 - conf.level) / 2) + cv <- stats::qnorm(1 - (1 - conf.level) / 2) out$conf.low <- out$estimate - cv * out$std.error out$conf.high <- out$estimate + cv * out$std.error } diff --git a/R/mediation-tidiers.R b/R/mediation-tidiers.R index 2bb9a7661..071619c91 100644 --- a/R/mediation-tidiers.R +++ b/R/mediation-tidiers.R @@ -71,10 +71,10 @@ tidy.mediate <- function(x, conf.int = FALSE, conf.level = .95, ...) { 3 / 2 } a <- top / under - lower.inv <- pnorm(z + (z + qnorm(low)) / (1 - a * (z + qnorm(low)))) - lower2 <- lower <- quantile(theta, lower.inv) - upper.inv <- pnorm(z + (z + qnorm(high)) / (1 - a * (z + qnorm(high)))) - upper2 <- upper <- quantile(theta, upper.inv) + lower.inv <- stats::pnorm(z + (z + stats::qnorm(low)) / (1 - a * (z + stats::qnorm(low)))) + lower2 <- lower <- stats::quantile(theta, lower.inv) + upper.inv <- stats::pnorm(z + (z + stats::qnorm(high)) / (1 - a * (z + stats::qnorm(high)))) + upper2 <- upper <- stats::quantile(theta, upper.inv) return(c(lower, upper)) } ci <- with( @@ -83,7 +83,7 @@ tidy.mediate <- function(x, conf.int = FALSE, conf.level = .95, ...) { ) if (s$boot.ci.type != "bca") { CI <- function(theta) { - return(quantile(theta, c(low, high), na.rm = TRUE)) + return(stats::quantile(theta, c(low, high), na.rm = TRUE)) } ci <- with( x, diff --git a/R/mfx-tidiers.R b/R/mfx-tidiers.R index fc415d6a4..839f02d4a 100644 --- a/R/mfx-tidiers.R +++ b/R/mfx-tidiers.R @@ -82,10 +82,10 @@ tidy.mfx <- x_tidy <- x_tidy %>% dplyr::mutate( - conf.low = estimate - qt(1 - (1 - conf.level) / 2, + conf.low = estimate - stats::qt(1 - (1 - conf.level) / 2, df = x$fit$df.residual ) * std.error, - conf.high = estimate + qt(1 - (1 - conf.level) / 2, + conf.high = estimate + stats::qt(1 - (1 - conf.level) / 2, df = x$fit$df.residual ) * std.error ) diff --git a/R/quantreg-nlrq-tidiers.R b/R/quantreg-nlrq-tidiers.R index ed5bffb0e..f41df50a3 100644 --- a/R/quantreg-nlrq-tidiers.R +++ b/R/quantreg-nlrq-tidiers.R @@ -43,7 +43,7 @@ tidy.nlrq <- function(x, conf.int = FALSE, conf.level = 0.95, ...) { if (conf.int) { x_summary <- summary(x) a <- (1 - conf.level) / 2 - cv <- qt(c(a, 1 - a), df = x_summary[["rdf"]]) + cv <- stats::qt(c(a, 1 - a), df = x_summary[["rdf"]]) ret[["conf.low"]] <- ret[["estimate"]] + (cv[1] * ret[["std.error"]]) ret[["conf.high"]] <- ret[["estimate"]] + (cv[2] * ret[["std.error"]]) } diff --git a/R/quantreg-rq-tidiers.R b/R/quantreg-rq-tidiers.R index 131afea46..6d1d4f989 100644 --- a/R/quantreg-rq-tidiers.R +++ b/R/quantreg-rq-tidiers.R @@ -186,7 +186,7 @@ process_rq <- function(rq_obj, se.type = NULL, } if (conf.int) { a <- (1 - conf.level) / 2 - cv <- qt(c(a, 1 - a), df = rq_obj[["rdf"]]) + cv <- stats::qt(c(a, 1 - a), df = rq_obj[["rdf"]]) co[["conf.low"]] <- co[["estimate"]] + (cv[1] * co[["std.error"]]) co[["conf.high"]] <- co[["estimate"]] + (cv[2] * co[["std.error"]]) } diff --git a/R/stats-mlm-tidiers.R b/R/stats-mlm-tidiers.R index c7d132886..b22622feb 100644 --- a/R/stats-mlm-tidiers.R +++ b/R/stats-mlm-tidiers.R @@ -76,7 +76,7 @@ tidy.mlm <- function(x, confint_mlm <- function(object, level = 0.95, ...) { coef <- as.numeric(coef(object)) alpha <- (1 - level) / 2 - crit_val <- qt(c(alpha, 1 - alpha), object$df.residual) + crit_val <- stats::qt(c(alpha, 1 - alpha), object$df.residual) se <- sqrt(diag(stats::vcov(object))) ci <- as.data.frame(coef + se %o% crit_val) colnames(ci) <- c("conf.low", "conf.high") From 20677bf4f2113e71b9a398454f88c3a5e5d0d604 Mon Sep 17 00:00:00 2001 From: Tom Palmer Date: Thu, 18 Jul 2024 17:21:45 +0100 Subject: [PATCH 2/2] Amend p-values calculations to use lower.tail=FALSE --- NEWS.md | 2 ++ R/lfe-tidiers.R | 2 +- R/spdep-tidiers.R | 4 ++-- R/survival-aareg-tidiers.R | 2 +- R/survival-survdiff-tidiers.R | 2 +- R/survival-survreg-tidiers.R | 2 +- 6 files changed, 8 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index 6ed4f9d21..4badf3e02 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,6 +24,8 @@ * Corrected coefficient values in `tidy.varest()` output (#1174). +* Improved the numerical precision of several p-value calculations. + # broom 1.0.5 * `tidy.coxph()` will now pass its ellipses `...` to `summary()` internally (#1151 by `@ste-tuf`). diff --git a/R/lfe-tidiers.R b/R/lfe-tidiers.R index ec5fa372d..3b02bedf8 100644 --- a/R/lfe-tidiers.R +++ b/R/lfe-tidiers.R @@ -169,7 +169,7 @@ tidy.felm <- function(x, conf.int = FALSE, conf.level = .95, fe = FALSE, se.type rename(estimate = effect, std.error = se) %>% select(contains("response"), dplyr::everything()) %>% mutate(statistic = estimate / std.error) %>% - mutate(p.value = 2 * (1 - stats::pt(statistic, df = N))) + mutate(p.value = 2 * (stats::pt(abs(statistic), df = N, lower.tail = FALSE))) if (conf.int) { crit_val_low <- stats::qnorm(1 - (1 - conf.level) / 2) diff --git a/R/spdep-tidiers.R b/R/spdep-tidiers.R index db44bf107..11c44b874 100644 --- a/R/spdep-tidiers.R +++ b/R/spdep-tidiers.R @@ -77,7 +77,7 @@ tidy.sarlm <- function(x, conf.int = FALSE, conf.level = .95, ...) { estimate = as.numeric(s$rho), std.error = as.numeric(s$rho.se), statistic = as.numeric(estimate / std.error), - p.value = as.numeric(2 * (1 - pnorm(abs(statistic)))) + p.value = as.numeric(2 * (stats::pnorm(abs(statistic), lower.tail = FALSE))) ) ret <- bind_rows(rho, ret) } @@ -89,7 +89,7 @@ tidy.sarlm <- function(x, conf.int = FALSE, conf.level = .95, ...) { estimate = as.numeric(s$lambda), std.error = as.numeric(s$lambda.se), statistic = as.numeric(estimate / std.error), - p.value = as.numeric(2 * (1 - pnorm(abs(statistic)))) + p.value = as.numeric(2 * (stats::pnorm(abs(statistic), lower.tail = FALSE))) ) ret <- bind_rows(ret, lambda) } diff --git a/R/survival-aareg-tidiers.R b/R/survival-aareg-tidiers.R index 879ed216a..7a15b3b4d 100644 --- a/R/survival-aareg-tidiers.R +++ b/R/survival-aareg-tidiers.R @@ -72,7 +72,7 @@ glance.aareg <- function(x, ...) { as_glance_tibble( statistic = chi, - p.value = as.numeric(1 - stats::pchisq(chi, df)), + p.value = as.numeric(stats::pchisq(chi, df, lower.tail = FALSE)), df = df, nobs = stats::nobs(x), na_types = "rrii" diff --git a/R/survival-survdiff-tidiers.R b/R/survival-survdiff-tidiers.R index c8ee7d648..be52c5411 100644 --- a/R/survival-survdiff-tidiers.R +++ b/R/survival-survdiff-tidiers.R @@ -78,6 +78,6 @@ glance.survdiff <- function(x, ...) { statistic = x$chisq, df = (sum(1 * (tmp > 0))) - 1 ) - rval$p.value <- 1 - stats::pchisq(rval$statistic, rval$df) + rval$p.value <- stats::pchisq(rval$statistic, rval$df, lower.tail = FALSE) rval } diff --git a/R/survival-survreg-tidiers.R b/R/survival-survreg-tidiers.R index 9a2f77a76..d9bfb9573 100644 --- a/R/survival-survreg-tidiers.R +++ b/R/survival-survreg-tidiers.R @@ -116,7 +116,7 @@ glance.survreg <- function(x, ...) { BIC = stats::BIC(x), df.residual = stats::df.residual(x), nobs = stats::nobs(x), - p.value = 1 - stats::pchisq(2 * diff(x$loglik), sum(x$df) - x$idf), + p.value = stats::pchisq(2 * diff(x$loglik), sum(x$df) - x$idf, lower.tail = FALSE), na_types = "iirrrriir" ) }