From 272183a9ba9a0f349460ba7ed1f0b37acd665c98 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 19:20:19 +0200 Subject: [PATCH 01/82] Fix check issues --- .Rbuildignore | 7 +++++++ R/phi.R | 10 +++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 96c5763c..d124fd41 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -9,3 +9,10 @@ ^pkgdown$ ^.github$ \.code-workspace$ +^README\.rmd +^\.git$ +^WIP$ +\.lintr$ +^CRAN-SUBMISSION$ +^cran-comments\.md$ +^LICENSE\.md$ diff --git a/R/phi.R b/R/phi.R index 48a07f1e..fa459dac 100644 --- a/R/phi.R +++ b/R/phi.R @@ -19,19 +19,19 @@ phi.ftable <- function(tab, ...) { #' @export phi.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist", "quantile"), ...) { - terms <- all.vars(formula) - tab <- table(data[[terms[1]]], data[[terms[2]]]) + formula_terms <- all.vars(formula) + tab <- table(data[[formula_terms[1]]], data[[formula_terms[2]]]) method <- match.arg(method) if (is.null(ci.lvl) || is.na(ci.lvl)) { - .cramer(tab) + .cramers_v(tab) } else { - straps <- sjstats::bootstrap(data[terms], n) + straps <- sjstats::bootstrap(data[formula_terms], n) tables <- lapply(straps$strap, function(x) { dat <- as.data.frame(x) table(dat[[1]], dat[[2]]) }) - phis <- sapply(tables, function(x) .phi(x)) + phis <- sapply(tables, .phi) ci <- boot_ci(phis, ci.lvl = ci.lvl, method = method) data_frame( From 20d05ef40a9f964be33ea9c01b3acc93e4a7bf8c Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 19:21:05 +0200 Subject: [PATCH 02/82] don't test on mac --- .github/workflows/R-check.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/R-check.yaml b/.github/workflows/R-check.yaml index 93fd9037..7c504b9b 100644 --- a/.github/workflows/R-check.yaml +++ b/.github/workflows/R-check.yaml @@ -32,8 +32,6 @@ jobs: ## This is because they are already run in `R-CMD-check-strict` workflow. ## ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - { os: macOS-latest, r: "release" } - - { os: windows-latest, r: "devel" } - { os: windows-latest, r: "release" } From 684c9ede8db87fcc8b314274881e164e1f92b846 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 20:38:17 +0200 Subject: [PATCH 03/82] fixes --- DESCRIPTION | 1 - NAMESPACE | 4 --- R/S3-methods.R | 20 +++++---------- R/anova_stats.R | 64 ++++++++++++++++++++++------------------------ R/boot_ci.R | 13 +++++----- R/cv_error.R | 1 - R/helpfunctions.R | 1 - R/inequ_trends.R | 16 ++++-------- man/inequ_trend.Rd | 7 ++--- 9 files changed, 53 insertions(+), 74 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e25170ae..fa5b8e61 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,6 @@ Imports: parameters, performance, purrr, - rlang, sjlabelled, sjmisc, stats, diff --git a/NAMESPACE b/NAMESPACE index 5a224af0..85fb0ad5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -157,9 +157,6 @@ importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_lgl) importFrom(purrr,walk) -importFrom(rlang,.data) -importFrom(rlang,enquo) -importFrom(rlang,quo_name) importFrom(sjlabelled,as_numeric) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) @@ -198,6 +195,5 @@ importFrom(stats,vcov) importFrom(stats,weighted.mean) importFrom(stats,weights) importFrom(stats,xtabs) -importFrom(tidyr,gather) importFrom(tidyr,nest) importFrom(tidyr,unnest) diff --git a/R/S3-methods.R b/R/S3-methods.R index 434b700a..a7fae7b5 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -297,13 +297,10 @@ print.sj_resample <- function(x, ...) { -#' @importFrom tidyr gather -#' @importFrom rlang .data #' @export plot.sj_inequ_trend <- function(x, ...) { - if (!requireNamespace("ggplot2", quietly = TRUE)) { - stop("Package `ggplot2` required for plotting inequalities trends.", call. = F) - } + .data <- NULL + insight::check_if_installed("ggplot2") # add time indicator x$data$zeit <- seq_len(nrow(x$data)) @@ -312,14 +309,11 @@ plot.sj_inequ_trend <- function(x, ...) { gather.cols1 <- colnames(x$data)[!colnames(x$data) %in% c("zeit", "lo", "hi")] gather.cols2 <- colnames(x$data)[!colnames(x$data) %in% c("zeit", "rr", "rd")] - key_col <- "grp" - value_col <- "y" - # gather data to plot rr and rd - dat1 <- tidyr::gather(x$data, !! key_col, !! value_col, !! gather.cols1) + dat1 <- datawizard::data_to_long(x$data, select = gather.cols1, names_to = "grp", values_to = "y") # gather data for raw prevalences - dat2 <- tidyr::gather(x$data, !! key_col, !! value_col, !! gather.cols2) + dat2 <- datawizard::data_to_long(x$data, select = gather.cols1, names_to = "grp", values_to = "y") # Proper value names, for facet labels dat1$grp[dat1$grp == "rr"] <- "Rate Ratios" @@ -327,7 +321,7 @@ plot.sj_inequ_trend <- function(x, ...) { # plot prevalences gp1 <- ggplot2::ggplot(dat2, ggplot2::aes_string(x = "zeit", y = "y", colour = "grp")) + - ggplot2::geom_smooth(method = "loess", se = F) + + ggplot2::geom_smooth(method = "loess", se = FALSE) + ggplot2::labs(title = "Prevalance Rates for Lower and Higher SES Groups", y = "Prevalances", x = "Time", colour = "") + ggplot2::scale_color_manual(values = c("darkblue", "darkred"), labels = c("High SES", "Low SES")) @@ -335,11 +329,11 @@ plot.sj_inequ_trend <- function(x, ...) { # plot rr and rd gp2 <- ggplot2::ggplot(dat1, ggplot2::aes_string(x = "zeit", y = "y", colour = "grp")) + - ggplot2::geom_smooth(method = "loess", se = F) + + ggplot2::geom_smooth(method = "loess", se = FALSE) + ggplot2::facet_wrap(~grp, ncol = 1, scales = "free") + ggplot2::labs(title = "Proportional Change in Rate Ratios and Rate Differences", colour = NULL, y = NULL, x = "Time") + - ggplot2::guides(colour = FALSE) + ggplot2::guides(colour = "none") suppressMessages(graphics::plot(gp1)) suppressMessages(graphics::plot(gp2)) diff --git a/R/anova_stats.R b/R/anova_stats.R index f7917687..70619b21 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -29,7 +29,7 @@ #' } #' @export anova_stats <- function(model, digits = 3) { - insight::check_if_installed("pwr") + insight::check_if_installed(c("pwr", "sjmisc")) # .Deprecated("effectsize::effectsize()", package = "effectsize") @@ -47,39 +47,38 @@ anova_stats <- function(model, digits = 3) { cohens.f <- sqrt(partial.etasq / (1 - partial.etasq)) # bind as data frame - as <- dplyr::bind_rows( + anov_stat <- rbinb( data.frame(etasq, partial.etasq, omegasq, partial.omegasq, epsilonsq, cohens.f), data.frame(etasq = NA, partial.etasq = NA, omegasq = NA, partial.omegasq = NA, epsilonsq = NA, cohens.f = NA) - ) %>% - sjmisc::add_columns(aov.sum) + ) + anov_stat <- sjmisc::add_columns(anov_stat, aov.sum) # get nr of terms - nt <- nrow(as) - 1 + nt <- nrow(anov_stat) - 1 # finally, compute power - power <- tryCatch( - { - c( - pwr::pwr.f2.test(u = as$df[1:nt], v = as$df[nrow(as)], f2 = as$cohens.f[1:nt]^2)[["power"]], - NA - ) - }, + as_power <- tryCatch( + c( + pwr::pwr.f2.test( + u = anov_stat$df[1:nt], + v = anov_stat$df[nrow(anov_stat)], + f2 = anov_stat$cohens.f[1:nt]^2 + )[["power"]], + NA + ), error = function(x) { NA } ) - out <- sjmisc::add_variables(as, power = power) %>% - sjmisc::round_num(digits = digits) %>% - as.data.frame() + out <- sjmisc::add_variables(anov_stat, power = as_power) + out <- as.data.frame(sjmisc::round_num(out, digits = digits)) class(out) <- c("sj_anova_stat", class(out)) out } - -#' @importFrom rlang .data aov_stat <- function(model, type) { aov.sum <- aov_stat_summary(model) aov.res <- aov_stat_core(aov.sum, type) @@ -165,36 +164,35 @@ aov_stat_core <- function(aov.sum, type) { N <- sum(aov.sum[["df"]]) + 1 - if (type == "omega") { + aovstat <- switch(type, # compute omega squared for each model term - aovstat <- purrr::map_dbl(1:n_terms, function(x) { + omega = unlist(lapply(1:n_terms, function(x) { ss.term <- aov.sum[["sumsq"]][x] df.term <- aov.sum[["df"]][x] (ss.term - df.term * meansq.resid) / (ss.total + meansq.resid) - }) - } else if (type == "pomega") { + })), # compute partial omega squared for each model term - aovstat <- purrr::map_dbl(1:n_terms, function(x) { + pomega = unlist(lapply(1:n_terms, function(x) { df.term <- aov.sum[["df"]][x] meansq.term <- aov.sum[["meansq"]][x] (df.term * (meansq.term - meansq.resid)) / (df.term * meansq.term + (N - df.term) * meansq.resid) - }) - } else if (type == "epsilon") { + })), # compute epsilon squared for each model term - aovstat <- purrr::map_dbl(1:n_terms, function(x) { + epsilon = unlist(lapply(1:n_terms, function(x) { ss.term <- aov.sum[["sumsq"]][x] df.term <- aov.sum[["df"]][x] (ss.term - df.term * meansq.resid) / ss.total - }) - } else if (type == "eta") { + })), # compute eta squared for each model term - aovstat <- - purrr::map_dbl(1:n_terms, ~ aov.sum[["sumsq"]][.x] / sum(aov.sum[["sumsq"]])) - } else if (type %in% c("cohens.f", "peta")) { + eta = unlist(lapply(1:n_terms, function(x) { + aov.sum[["sumsq"]][x] / sum(aov.sum[["sumsq"]]) + })), # compute partial eta squared for each model term - aovstat <- - purrr::map_dbl(1:n_terms, ~ aov.sum[["sumsq"]][.x] / (aov.sum[["sumsq"]][.x] + ss.resid)) - } + cohens.f = , + peta = unlist(lapply(1:n_terms, function(x) { + aov.sum[["sumsq"]][x] / (aov.sum[["sumsq"]][x] + ss.resid) + })) + ) # compute Cohen's F if (type == "cohens.f") aovstat <- sqrt(aovstat / (1 - aovstat)) diff --git a/R/boot_ci.R b/R/boot_ci.R index a1ad3bdd..f399df20 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -129,9 +129,8 @@ #' spread_coef(models, append = FALSE) %>% #' # compute the CI for all bootstrapped model coefficients #' boot_ci()} -#' @importFrom rlang .data #' @export -boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = .95) { +boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95) { # match arguments method <- match.arg(method) @@ -144,9 +143,9 @@ boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = .95) { # bootstrap values or quantiles if (method == "dist") { # get bootstrap standard error - bootse <- stats::qt((1 + ci.lvl) / 2, df = length(x) - 1) * stats::sd(x, na.rm = T) + bootse <- stats::qt((1 + ci.lvl) / 2, df = length(x) - 1) * stats::sd(x, na.rm = TRUE) # lower and upper confidence interval - ci <- mean(x, na.rm = T) + c(-bootse, bootse) + ci <- mean(x, na.rm = TRUE) + c(-bootse, bootse) } else { # CI based on quantiles of bootstrapped values ci <- stats::quantile(x, probs = c((1 - ci.lvl) / 2, (1 + ci.lvl) / 2)) @@ -168,7 +167,7 @@ boot_se <- function(data, ...) { # compute confidence intervalls for all values transform_boot_result(lapply(.dat, function(x) { # get bootstrap standard error - se <- stats::sd(x, na.rm = T) + se <- stats::sd(x, na.rm = TRUE) names(se) <- "std.err" se })) @@ -185,7 +184,7 @@ boot_p <- function(data, ...) { # compute confidence intervalls for all values transform_boot_result(lapply(.dat, function(x) { # compute t-statistic - t.stat <- mean(x, na.rm = T) / stats::sd(x, na.rm = T) + t.stat <- mean(x, na.rm = TRUE) / stats::sd(x, na.rm = TRUE) # compute p-value p <- 2 * stats::pt(abs(t.stat), df = length(x) - 1, lower.tail = FALSE) names(p) <- "p.value" @@ -202,7 +201,7 @@ boot_est <- function(data, ...) { # compute mean for all values (= bootstrapped estimate) transform_boot_result(lapply(.dat, function(x) { - estimate <- mean(x, na.rm = T) + estimate <- mean(x, na.rm = TRUE) names(estimate) <- "estimate" estimate })) diff --git a/R/cv_error.R b/R/cv_error.R index c4880b12..5d999c4f 100644 --- a/R/cv_error.R +++ b/R/cv_error.R @@ -35,7 +35,6 @@ #' @importFrom dplyr mutate summarise #' @importFrom purrr map map2 map_dbl map_df #' @importFrom tidyr unnest -#' @importFrom rlang .data #' @importFrom insight find_response #' @importFrom performance rmse #' @export diff --git a/R/helpfunctions.R b/R/helpfunctions.R index ce36f369..1dc6804a 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -46,7 +46,6 @@ dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) #' @importFrom tidyr nest #' @importFrom dplyr select filter group_vars #' @importFrom stats complete.cases -#' @importFrom rlang .data get_grouped_data <- function(x) { # retain observations that are complete wrt grouping vars, then nest grps <- x %>% diff --git a/R/inequ_trends.R b/R/inequ_trends.R index e4415375..3b94a669 100644 --- a/R/inequ_trends.R +++ b/R/inequ_trends.R @@ -26,7 +26,7 @@ #' in changes in rate differences and rate ratios. The function implements #' the algorithm proposed by \emph{Mackenbach et al. 2015}. #' -#' @examples +#' @examplesIf requireNamespace("ggplot2") #' # This example reproduces Fig. 1 of Mackenbach et al. 2015, p.5 #' #' # 40 simulated time points, with an initial rate ratio of 2 and @@ -47,27 +47,21 @@ #' prev.data <- data.frame(lo, hi) #' #' # print values -#' inequ_trend(prev.data, lo, hi) +#' inequ_trend(prev.data, "lo", "hi") #' #' # plot trends - here we see that the relative inequalities #' # are increasing over time, while the absolute inequalities #' # are first increasing as well, but later are decreasing #' # (while rel. inequ. are still increasing) -#' plot(inequ_trend(prev.data, lo, hi)) +#' plot(inequ_trend(prev.data, "lo", "hi")) #' -#' @importFrom dplyr select -#' @importFrom rlang quo_name enquo .data #' @export inequ_trend <- function(data, prev.low, prev.hi) { # prepare data for prevalence rates for low and hi status groups if (is.null(data) || missing(data)) { dat <- data.frame(prev.low, prev.hi) } else { - # get variable names - # create quosures - low <- rlang::quo_name(rlang::enquo(prev.low)) - high <- rlang::quo_name(rlang::enquo(prev.hi)) - dat <- dplyr::select(data, !! low, !! high) + dat <- data[c(prev.low, prev.hi)] } # ensure common column names @@ -96,7 +90,7 @@ inequ_trend <- function(data, prev.low, prev.hi) { for (t in 2:nrow(dat)) { delta.low <- (dat$lo[t] - dat$lo[t - 1]) / dat$lo[t - 1] delta.hi <- (dat$hi[t] - dat$hi[t - 1]) / dat$hi[t - 1] - dat$rd[t] <- dat$rd[t - 1] + (dat$lo[t - 1 ] * delta.low - dat$hi[t - 1] * delta.hi) + dat$rd[t] <- dat$rd[t - 1] + (dat$lo[t - 1] * delta.low - dat$hi[t - 1] * delta.hi) } # return diff --git a/man/inequ_trend.Rd b/man/inequ_trend.Rd index 5a2bbc9f..25a188ed 100644 --- a/man/inequ_trend.Rd +++ b/man/inequ_trend.Rd @@ -36,6 +36,7 @@ in changes in rate differences and rate ratios. The function implements the algorithm proposed by \emph{Mackenbach et al. 2015}. } \examples{ +\dontshow{if (requireNamespace("ggplot2")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # This example reproduces Fig. 1 of Mackenbach et al. 2015, p.5 # 40 simulated time points, with an initial rate ratio of 2 and @@ -56,14 +57,14 @@ for (i in 2:n) hi[i] <- hi[i - 1] * .97 prev.data <- data.frame(lo, hi) # print values -inequ_trend(prev.data, lo, hi) +inequ_trend(prev.data, "lo", "hi") # plot trends - here we see that the relative inequalities # are increasing over time, while the absolute inequalities # are first increasing as well, but later are decreasing # (while rel. inequ. are still increasing) -plot(inequ_trend(prev.data, lo, hi)) - +plot(inequ_trend(prev.data, "lo", "hi")) +\dontshow{\}) # examplesIf} } \references{ Mackenbach JP, Martikainen P, Menvielle G, de Gelder R. 2015. The Arithmetic of Reducing Relative and Absolute Inequalities in Health: A Theoretical Analysis Illustrated with European Mortality Data. Journal of Epidemiology and Community Health 70(7): 730-36. \doi{10.1136/jech-2015-207018} From 746e2d5b548bd78cf6b39cfb626a6982d3a5ae8a Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 20:42:08 +0200 Subject: [PATCH 04/82] fix --- R/anova_stats.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/anova_stats.R b/R/anova_stats.R index 70619b21..1569ef5a 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -47,7 +47,7 @@ anova_stats <- function(model, digits = 3) { cohens.f <- sqrt(partial.etasq / (1 - partial.etasq)) # bind as data frame - anov_stat <- rbinb( + anov_stat <- rbind( data.frame(etasq, partial.etasq, omegasq, partial.omegasq, epsilonsq, cohens.f), data.frame(etasq = NA, partial.etasq = NA, omegasq = NA, partial.omegasq = NA, epsilonsq = NA, cohens.f = NA) ) From e54c4ae645ae2df62f3558285245f264c73ecc2f Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 21:53:59 +0200 Subject: [PATCH 05/82] update --- DESCRIPTION | 4 +--- NAMESPACE | 42 ------------------------------------------ R/S3-methods.R | 20 ++------------------ R/boot_ci.R | 2 -- R/confint_ncg.R | 12 +++++------- R/cv.R | 1 - R/cv_error.R | 45 +++++++++++++++++++-------------------------- R/gmd.R | 1 - R/gof.R | 1 - R/helpfunctions.R | 3 +-- R/phi.R | 1 + R/se_ybar.R | 1 - R/svy_median.R | 5 +---- R/svyglmnb.R | 7 +------ R/svyglmzip.R | 13 +------------ R/var_pop.R | 8 ++------ R/weight.R | 1 - R/wtd_mean.R | 2 -- man/var_pop.Rd | 1 - 19 files changed, 34 insertions(+), 136 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fa5b8e61..3515a90a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,14 +25,11 @@ Imports: insight, lme4, magrittr, - MASS, - modelr, parameters, performance, purrr, sjlabelled, sjmisc, - stats, tidyr Suggests: brms, @@ -41,6 +38,7 @@ Suggests: coin, ggplot2, graphics, + MASS, pscl, pwr, sjPlot, diff --git a/NAMESPACE b/NAMESPACE index 85fb0ad5..b1154c27 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -125,34 +125,26 @@ export(weighted_se) export(weighted_ttest) export(wtd_sd) export(xtab_statistics) -importFrom(MASS,glm.nb) importFrom(bayestestR,ci) importFrom(bayestestR,equivalence_test) importFrom(dplyr,case_when) importFrom(dplyr,filter) importFrom(dplyr,group_vars) -importFrom(dplyr,mutate) importFrom(dplyr,quos) importFrom(dplyr,select) importFrom(dplyr,select_if) -importFrom(dplyr,summarise) importFrom(insight,export_table) -importFrom(insight,find_formula) -importFrom(insight,find_response) importFrom(insight,format_p) importFrom(insight,format_value) -importFrom(insight,get_data) importFrom(insight,get_response) importFrom(insight,link_inverse) importFrom(insight,print_color) importFrom(lme4,ngrps) importFrom(magrittr,"%>%") -importFrom(modelr,crossv_kfold) importFrom(performance,mse) importFrom(performance,rmse) importFrom(purrr,flatten_df) importFrom(purrr,map) -importFrom(purrr,map2) importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_lgl) @@ -160,40 +152,6 @@ importFrom(purrr,walk) importFrom(sjlabelled,as_numeric) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) -importFrom(sjmisc,is_num_fac) importFrom(sjmisc,str_contains) -importFrom(sjmisc,trim) importFrom(sjmisc,typical_value) -importFrom(stats,as.formula) -importFrom(stats,chisq.test) -importFrom(stats,coef) -importFrom(stats,complete.cases) -importFrom(stats,dpois) -importFrom(stats,family) -importFrom(stats,fitted) -importFrom(stats,formula) -importFrom(stats,kruskal.test) -importFrom(stats,lm) -importFrom(stats,model.frame) -importFrom(stats,model.matrix) -importFrom(stats,na.omit) -importFrom(stats,na.pass) -importFrom(stats,nobs) -importFrom(stats,pf) -importFrom(stats,pnorm) -importFrom(stats,predict.glm) -importFrom(stats,pt) -importFrom(stats,qf) -importFrom(stats,qnorm) -importFrom(stats,resid) -importFrom(stats,sd) -importFrom(stats,setNames) -importFrom(stats,terms) -importFrom(stats,update) -importFrom(stats,var) -importFrom(stats,vcov) -importFrom(stats,weighted.mean) -importFrom(stats,weights) -importFrom(stats,xtabs) importFrom(tidyr,nest) -importFrom(tidyr,unnest) diff --git a/R/S3-methods.R b/R/S3-methods.R index a7fae7b5..40fb6e19 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -1,12 +1,9 @@ -#' @importFrom stats model.matrix -#' @importFrom insight get_data #' @export model.matrix.gls <- function(object, ...) { stats::model.matrix(object, data = insight::get_data(object)) } -#' @importFrom stats coef vcov pnorm #' @importFrom dplyr case_when #' @export print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) { @@ -33,7 +30,6 @@ print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) { -#' @importFrom stats coef vcov pnorm #' @importFrom dplyr case_when #' @export print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) { @@ -56,7 +52,6 @@ print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) { -#' @importFrom stats qnorm coef pnorm vcov tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) { v_se <- match.arg(v_se) @@ -80,7 +75,6 @@ tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) { -#' @importFrom stats qnorm coef pnorm vcov tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) { v_se <- match.arg(v_se) @@ -142,16 +136,12 @@ formula.svyglm.zip <- function(x, ...) { -#' @importFrom MASS glm.nb -#' @importFrom stats coef setNames predict.glm #' @export predict.svyglm.nb <- function(object, newdata = NULL, type = c("link", "response", "terms"), se.fit = FALSE, dispersion = NULL, terms = NULL, na.action = na.pass, ...) { - - if (!isNamespaceLoaded("survey")) - requireNamespace("survey", quietly = TRUE) + insight::check_if_installed(c("survey", "MASS")) fnb <- MASS::glm.nb( attr(object, "nb.formula", exact = TRUE), @@ -178,9 +168,6 @@ predict.svyglm.nb <- function(object, newdata = NULL, } -#' @importFrom MASS glm.nb -#' @importFrom stats coef setNames predict.glm -#' @importFrom insight get_response #' @export residuals.svyglm.nb <- function(object, ...) { @@ -201,7 +188,6 @@ residuals.svyglm.nb <- function(object, ...) { } -#' @importFrom stats terms formula #' @export terms.svyglm.nb <- function(x, ...) { @@ -235,7 +221,6 @@ deviance.svyglm.nb <- function(object, ...) { } -#' @importFrom insight print_color #' @export print.tidy_stan <- function(x, ...) { insight::print_color("\nSummary Statistics of Stan-Model\n\n", "blue") @@ -261,8 +246,8 @@ print.tidy_stan <- function(x, ...) { } -#' @importFrom sjmisc trim clean_term_name <- function(x) { + insight::check_if_installed("sjmisc") x <- sjmisc::trim(x) format(x, width = max(nchar(x))) } @@ -340,7 +325,6 @@ plot.sj_inequ_trend <- function(x, ...) { } -#' @importFrom stats kruskal.test na.omit #' @export print.sj_mwu <- function(x, ...) { insight::print_color("\n# Mann-Whitney-U-Test\n\n", "blue") diff --git a/R/boot_ci.R b/R/boot_ci.R index f399df20..3d7601ca 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -158,7 +158,6 @@ boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95) { #' @rdname boot_ci -#' @importFrom stats sd #' @export boot_se <- function(data, ...) { # evaluate arguments, generate data @@ -175,7 +174,6 @@ boot_se <- function(data, ...) { #' @rdname boot_ci -#' @importFrom stats sd pt #' @export boot_p <- function(data, ...) { # evaluate arguments, generate data diff --git a/R/confint_ncg.R b/R/confint_ncg.R index 6996d7da..abf8a305 100644 --- a/R/confint_ncg.R +++ b/R/confint_ncg.R @@ -3,7 +3,6 @@ # Author: Ken Kelley # License: GPL-3 -#' @importFrom stats pf qf confint_ncg <- function(F.value = NULL, conf.level = 0.95, df.1 = NULL, df.2 = NULL) { alpha.lower <- alpha.upper <- (1 - conf.level) / 2 tol <- 1e-09 @@ -40,12 +39,12 @@ confint_ncg <- function(F.value = NULL, conf.level = 0.95, df.1 = NULL, df.2 = N Diff.2 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = LL.Bounds[2]) - (1 - alpha.lower) > tol Diff.3 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = LL.Bounds[3]) - (1 - alpha.lower) > tol - if (isTRUE(Diff.1) & isTRUE(Diff.2) & !isTRUE(Diff.3)) { + if (isTRUE(Diff.1) && isTRUE(Diff.2) && !isTRUE(Diff.3)) { LL.Bounds <- c(LL.Bounds[2], (LL.Bounds[2] + LL.Bounds[3]) / 2, LL.Bounds[3]) } - if (isTRUE(Diff.1) & !isTRUE(Diff.2) & !isTRUE(Diff.3)) { + if (isTRUE(Diff.1) && !isTRUE(Diff.2) && !isTRUE(Diff.3)) { LL.Bounds <- c(LL.Bounds[1], (LL.Bounds[1] + LL.Bounds[2]) / 2, LL.Bounds[2]) } @@ -89,14 +88,13 @@ confint_ncg <- function(F.value = NULL, conf.level = 0.95, df.1 = NULL, df.2 = N Diff.2 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = UL.Bounds[2]) - alpha.upper > tol Diff.3 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = UL.Bounds[3]) - alpha.upper > tol - if (isTRUE(Diff.1) & isTRUE(Diff.2) & !isTRUE(Diff.3)) { + if (isTRUE(Diff.1) && isTRUE(Diff.2) && !isTRUE(Diff.3)) { UL.Bounds <- c(UL.Bounds[2], (UL.Bounds[2] + UL.Bounds[3]) / 2, UL.Bounds[3]) } - if (isTRUE(Diff.1) & !isTRUE(Diff.2) & !isTRUE(Diff.3)) { - UL.Bounds <- c(UL.Bounds[1], (UL.Bounds[1] + - UL.Bounds[2])/2, UL.Bounds[2]) + if (isTRUE(Diff.1) && !isTRUE(Diff.2) && !isTRUE(Diff.3)) { + UL.Bounds <- c(UL.Bounds[1], (UL.Bounds[1] + UL.Bounds[2]) / 2, UL.Bounds[2]) } Diff <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = UL.Bounds[2]) - alpha.upper diff --git a/R/cv.R b/R/cv.R index 38d6ece0..e21a0fc9 100644 --- a/R/cv.R +++ b/R/cv.R @@ -20,7 +20,6 @@ #' fit <- lm(barthtot ~ c160age + c12hour, data = efc) #' cv(fit) #' -#' @importFrom stats sd #' @export cv <- function(x, ...) { # return value diff --git a/R/cv_error.R b/R/cv_error.R index 5d999c4f..557b8d15 100644 --- a/R/cv_error.R +++ b/R/cv_error.R @@ -31,40 +31,33 @@ #' neg_c_7 ~ barthtot + c12hour #' )) #' -#' @importFrom modelr crossv_kfold -#' @importFrom dplyr mutate summarise -#' @importFrom purrr map map2 map_dbl map_df -#' @importFrom tidyr unnest -#' @importFrom insight find_response -#' @importFrom performance rmse #' @export cv_error <- function(data, formula, k = 5) { - insight::check_if_installed("broom") - - # compute cross validation data - cv_data <- data %>% - modelr::crossv_kfold(k = k) %>% - dplyr::mutate( - trained.models = purrr::map(.data$train, ~ stats::lm(formula, data = .x)), - predicted = purrr::map2(.data$trained.models, .data$test, ~ broom::augment(.x, newdata = .y)), - residuals = purrr::map(.data$predicted, ~.x[[insight::find_response(formula)]] - .x$.fitted), - rmse.train = purrr::map_dbl(.data$trained.models, ~ performance::rmse(.x)) - ) + insight::check_if_installed("datawizard") + # response + resp <- insight::find_response(formula) - # Training error - train.error <- dplyr::summarise(cv_data, train.error = mean(.data$rmse.train, na.rm = TRUE)) + # compute cross validation data + cv_data <- lapply(k, function(i) { + datawizard::data_partition(data, proportion = 0.8) + }) + # get train and test datasets + train_data <- lapply(cv_data, function(cvdat) cvdat[[1]]) + test_data <- lapply(cv_data, function(cvdat) cvdat[[2]]) + # fit models on datasets + trained_models <- lapply(train_data, function(x) stats::lm(formula, data = x)) + test_models <- lapply(test_data, function(x) stats::lm(formula, data = x)) - # Test error - test.error <- cv_data %>% - tidyr::unnest(.data$predicted, .data$residuals) %>% - dplyr::summarise(test.error = sqrt(mean(.data$residuals^2, na.rm = TRUE))) + # RMSE + train_error <- mean(vapply(trained_models, performance::rmse, numeric(1)), na.rm = TRUE) + test_error <- mean(vapply(test_models, performance::rmse, numeric(1)), na.rm = TRUE) data_frame( model = deparse(formula), - train.error = round(train.error[[1]], 4), - test.error = round(test.error[[1]], 4) + train.error = round(train_error, 4), + test.error = round(test_error, 4) ) } @@ -73,5 +66,5 @@ cv_error <- function(data, formula, k = 5) { #' @rdname cv_error #' @export cv_compare <- function(data, formulas, k = 5) { - purrr::map_df(formulas, ~ cv_error(data, formula = .x, k = k)) + do.call(rbind, lapply(formulas, function(f) cv_error(data, formula = f, k = k))) } diff --git a/R/gmd.R b/R/gmd.R index c7c3eaf4..9dab2e96 100644 --- a/R/gmd.R +++ b/R/gmd.R @@ -39,7 +39,6 @@ gmd <- function(x, ...) { } -#' @importFrom stats na.omit gmd_helper <- function(x) { if (!is.numeric(x)) return(NA) diff --git a/R/gof.R b/R/gof.R index 3699a235..9b6d614e 100644 --- a/R/gof.R +++ b/R/gof.R @@ -48,7 +48,6 @@ #' # equal to population #' chisq_gof(efc$e42dep, prop.table(table(efc$e42dep))) #' -#' @importFrom stats na.omit fitted resid formula as.formula lm pnorm chisq.test #' @export chisq_gof <- function(x, prob = NULL, weights = NULL) { if (inherits(x, "glm")) { diff --git a/R/helpfunctions.R b/R/helpfunctions.R index 1dc6804a..32bc546c 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -45,7 +45,6 @@ dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) #' @importFrom tidyr nest #' @importFrom dplyr select filter group_vars -#' @importFrom stats complete.cases get_grouped_data <- function(x) { # retain observations that are complete wrt grouping vars, then nest grps <- x %>% @@ -64,7 +63,7 @@ get_grouped_data <- function(x) { .compact_character <- function(x) { - x[!sapply(x, function(i) is.null(i) || nchar(i) == 0 || is.na(i) || any(i == "NULL", na.rm = TRUE))] + x[!sapply(x, function(i) is.null(i) || !nzchar(i, keepNA = TRUE) || is.na(i) || any(i == "NULL", na.rm = TRUE))] } diff --git a/R/phi.R b/R/phi.R index fa459dac..eb57406c 100644 --- a/R/phi.R +++ b/R/phi.R @@ -44,6 +44,7 @@ phi.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist .phi <- function(tab) { + insight::check_if_installed("MASS") # convert to flat table if (!inherits(tab, "ftable")) tab <- stats::ftable(tab) diff --git a/R/se_ybar.R b/R/se_ybar.R index 0b1bc882..59335f3f 100644 --- a/R/se_ybar.R +++ b/R/se_ybar.R @@ -18,7 +18,6 @@ #' se_ybar(fit) #' } #' @importFrom lme4 ngrps -#' @importFrom stats nobs #' @importFrom purrr map_dbl #' @export se_ybar <- function(fit) { diff --git a/R/svy_median.R b/R/svy_median.R index 963aea2c..d2a14c1f 100644 --- a/R/svy_median.R +++ b/R/svy_median.R @@ -1,11 +1,8 @@ #' @rdname weighted_sd -#' @importFrom stats as.formula #' @export survey_median <- function(x, design) { # check if pkg survey is available - if (!requireNamespace("survey", quietly = TRUE)) { - stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE) - } + insight::check_if_installed("suvey") # deparse v <- stats::as.formula(paste("~", as.character(substitute(x)))) diff --git a/R/svyglmnb.R b/R/svyglmnb.R index 309b8095..26c84d52 100644 --- a/R/svyglmnb.R +++ b/R/svyglmnb.R @@ -65,14 +65,9 @@ utils::globalVariables("scaled.weights") #' # print coefficients and standard errors #' fit #' } -#' @importFrom MASS glm.nb -#' @importFrom stats weights update model.frame coef as.formula family #' @export svyglm.nb <- function(formula, design, ...) { - # check if pkg survey is available - if (!requireNamespace("survey", quietly = TRUE)) { - stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE) - } + insight::check_if_installed(c("survey", "MASS")) # get design weights. we need to scale these weights for the glm.nb() function dw <- stats::weights(design) diff --git a/R/svyglmzip.R b/R/svyglmzip.R index f9b575b9..a56c4e5c 100644 --- a/R/svyglmzip.R +++ b/R/svyglmzip.R @@ -45,19 +45,9 @@ utils::globalVariables("scaled.weights") #' # print coefficients and standard errors #' fit #' } -#' @importFrom insight find_formula -#' @importFrom stats weights update model.frame coef as.formula family #' @export svyglm.zip <- function(formula, design, ...) { - # check if pkg survey is available - if (!requireNamespace("survey", quietly = TRUE)) { - stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE) - } - - if (!requireNamespace("pscl", quietly = TRUE)) { - stop("Package `pscl` needed to for this function to work. Please install it.", call. = FALSE) - } - + insight::check_if_installed(c("survey", "pscl")) # get design weights. we need to scale these weights for the glm.nb() function dw <- stats::weights(design) @@ -94,7 +84,6 @@ svyglm.zip <- function(formula, design, ...) { } -#' @importFrom stats dpois # log-likelihood function used in "svymle()" sjstats_loglik_zip <- function(y, eta, logitp) { mu <- exp(eta) diff --git a/R/var_pop.R b/R/var_pop.R index bfe5367d..a269d763 100644 --- a/R/var_pop.R +++ b/R/var_pop.R @@ -22,12 +22,9 @@ #' sd(efc$c12hour, na.rm = TRUE) #' # population sd #' sd_pop(efc$c12hour) -#' -#' @importFrom stats na.omit var -#' @importFrom sjmisc is_num_fac -#' @importFrom sjlabelled as_numeric #' @export var_pop <- function(x) { + insight::check_if_installed(c("sjmisc", "datawizard")) # check for categorical if (is.factor(x)) { # only allow numeric factors @@ -36,7 +33,7 @@ var_pop <- function(x) { return(NA) } # convert factor to numeric - x <- sjlabelled::as_numeric(x) + x <- datawizard::to_numeric(x, dummy_factors = FALSE) } # remove NA @@ -49,7 +46,6 @@ var_pop <- function(x) { #' @rdname var_pop -#' @importFrom stats na.omit var #' @export sd_pop <- function(x) { # get population variance diff --git a/R/weight.R b/R/weight.R index c36a3c55..43888455 100644 --- a/R/weight.R +++ b/R/weight.R @@ -38,7 +38,6 @@ #' table(x) #' table(weight(x, w)) #' -#' @importFrom stats na.pass xtabs #' @importFrom sjlabelled as_numeric #' @export weight <- function(x, weights, digits = 0) { diff --git a/R/wtd_mean.R b/R/wtd_mean.R index 2b5f369b..6b373600 100644 --- a/R/wtd_mean.R +++ b/R/wtd_mean.R @@ -4,14 +4,12 @@ weighted_mean <- function(x, weights = NULL) { UseMethod("weighted_mean") } -#' @importFrom stats weighted.mean #' @export weighted_mean.default <- function(x, weights = NULL) { if (is.null(weights)) weights <- rep(1, length(x)) stats::weighted.mean(x, w = weights, na.rm = TRUE) } -#' @importFrom stats weighted.mean #' @importFrom purrr map_dbl #' @importFrom dplyr select_if #' @export diff --git a/man/var_pop.Rd b/man/var_pop.Rd index a25c5ac6..79bdc64b 100644 --- a/man/var_pop.Rd +++ b/man/var_pop.Rd @@ -35,5 +35,4 @@ var_pop(efc$c12hour) sd(efc$c12hour, na.rm = TRUE) # population sd sd_pop(efc$c12hour) - } From e3609cbaf669a7c73cae2c8bc9b6d221e1bc3fc2 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 22:10:01 +0200 Subject: [PATCH 06/82] cleanup --- NAMESPACE | 7 ---- R/S3-methods.R | 103 ++++++------------------------------------------- R/prop.R | 49 ++++++++++++----------- man/prop.Rd | 29 +++++++------- 4 files changed, 51 insertions(+), 137 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b1154c27..becc1365 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,23 +12,19 @@ S3method(formula,svyglm.nb) S3method(formula,svyglm.zip) S3method(model.frame,svyglm.nb) S3method(model.frame,svyglm.zip) -S3method(model.matrix,gls) S3method(phi,formula) S3method(phi,ftable) S3method(phi,table) S3method(plot,sj_inequ_trend) S3method(predict,svyglm.nb) S3method(print,sj_anova_stat) -S3method(print,sj_check_assump) S3method(print,sj_chi2gof) S3method(print,sj_grpmean) -S3method(print,sj_grpmeans) S3method(print,sj_htest_chi) S3method(print,sj_htest_kw) S3method(print,sj_htest_mwu) S3method(print,sj_mwu) S3method(print,sj_outliers) -S3method(print,sj_pval) S3method(print,sj_resample) S3method(print,sj_ttest) S3method(print,sj_wcor) @@ -39,7 +35,6 @@ S3method(print,svyglm.nb) S3method(print,svyglm.zip) S3method(print,tidy_stan) S3method(residuals,svyglm.nb) -S3method(summary,sj_pval) S3method(terms,svyglm.nb) S3method(weighted_correlation,default) S3method(weighted_correlation,formula) @@ -143,12 +138,10 @@ importFrom(lme4,ngrps) importFrom(magrittr,"%>%") importFrom(performance,mse) importFrom(performance,rmse) -importFrom(purrr,flatten_df) importFrom(purrr,map) importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_lgl) -importFrom(purrr,walk) importFrom(sjlabelled,as_numeric) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) diff --git a/R/S3-methods.R b/R/S3-methods.R index 40fb6e19..fe8c8289 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -1,9 +1,3 @@ -#' @export -model.matrix.gls <- function(object, ...) { - stats::model.matrix(object, data = insight::get_data(object)) -} - - #' @importFrom dplyr case_when #' @export print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) { @@ -30,19 +24,17 @@ print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) { -#' @importFrom dplyr case_when #' @export print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) { se <- match.arg(se) sm <- tidy_svyglm.zip(x, digits, v_se = se)[-1, ] - pan <- dplyr::case_when( - sm$p.value < 0.001 ~ "<0.001 ***", - sm$p.value < 0.01 ~ sprintf("%.*f ** ", digits, sm$p.value), - sm$p.value < 0.05 ~ sprintf("%.*f * ", digits, sm$p.value), - sm$p.value < 0.1 ~ sprintf("%.*f . ", digits, sm$p.value), - TRUE ~ sprintf("%.*f ", digits, sm$p.value) - ) + pan <- ifelse(sm$p.value < 0.001, "<0.001 ***", + ifelse(sm$p.value < 0.01, sprintf("%.*f ** ", digits, sm$p.value), + ifelse(sm$p.value < 0.05, sprintf("%.*f * ", digits, sm$p.value), + ifelse(sm$p.value < 0.1, sprintf("%.*f . ", digits, sm$p.value), + sprintf("%.*f ", digits, sm$p.value) + )))) sm$p.value <- pan print(sm, ...) @@ -97,20 +89,18 @@ tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) { -#' @importFrom dplyr select #' @export model.frame.svyglm.nb <- function(formula, ...) { - pred <- attr(formula, "nb.terms", exact = T) - dplyr::select(formula$design$variables, string_one_of(pattern = pred, x = colnames(formula$design$variables))) + pred <- attr(formula, "nb.terms", exact = TRUE) + formula$design$variables[intersect(pred, colnames(formula$design$variables))] } -#' @importFrom dplyr select #' @export model.frame.svyglm.zip <- function(formula, ...) { - pred <- attr(formula, "zip.terms", exact = T) - dplyr::select(formula$design$variables, string_one_of(pattern = pred, x = colnames(formula$design$variables))) + pred <- attr(formula, "zip.terms", exact = TRUE) + formula$design$variables[intersect(pred, colnames(formula$design$variables))] } @@ -198,14 +188,11 @@ terms.svyglm.nb <- function(x, ...) { } -#' @importFrom purrr map flatten_df #' @export AIC.svyglm.nb <- function(object, ...) { ## FIXME this one just returns the AIC of the underlying glm.nb() model - list(object, ...) %>% - purrr::map(~ getaic(.x)) %>% - purrr::flatten_df() %>% - as.data.frame() + aics <- lapply(list(object, ...), getaic) + as.data.frame(do.call(rbind, aics)) } @@ -477,48 +464,6 @@ print_grpmean <- function(x, digits = NULL, ...) { } -#' @importFrom purrr walk -#' @export -print.sj_grpmeans <- function(x, ...) { - - cat("\n") - purrr::walk(x, function(dat) { - # get grouping title label - grp <- attr(dat, "group", exact = T) - - # print title for grouping - insight::print_color(sprintf("Grouped by:\n%s\n\n", grp), "cyan") - - # print grpmean-table - print_grpmean(dat, digits = attributes(x)$digits, ...) - - cat("\n\n") - }) -} - - -#' @export -print.sj_pval <- function(x, digits = 3, summary = FALSE, ...) { - - if (summary) { - df.kr <- attr(x, "df.kr", exact = TRUE) - t.kr <- attr(x, "t.kr", exact = TRUE) - - if (!is.null(df.kr)) x$df <- df.kr - if (!is.null(t.kr)) x$statistic <- t.kr - } - - x <- purrr::map_if(x, is.numeric, round, digits = digits) - print.data.frame(as.data.frame(x), ..., row.names = TRUE) -} - - -#' @export -summary.sj_pval <- function(object, digits = 3, summary = FALSE, ...) { - print(object, digits, summary = TRUE) -} - - #' @export print.sj_chi2gof <- function(x, ...) { insight::print_color("\n# Chi-squared Goodness-of-Fit Test\n\n", "blue") @@ -540,30 +485,6 @@ print.sj_chi2gof <- function(x, ...) { } -#' @export -print.sj_check_assump <- function(x, ...) { - insight::print_color("\n# Checking Model-Assumptions\n\n", "blue") - cat(sprintf(" Model: %s", attr(x, "formula", exact = TRUE))) - - insight::print_color("\n\n violated statistic\n", "red") - - v1 <- ifelse(x$heteroskedasticity < 0.05, "yes", "no") - v2 <- ifelse(x$multicollinearity > 4, "yes", "no") - v3 <- ifelse(x$non.normal.resid < 0.05, "yes", "no") - v4 <- ifelse(x$autocorrelation < 0.05, "yes", "no") - - s1 <- sprintf("p = %.3f", x$heteroskedasticity) - s2 <- sprintf("vif = %.3f", x$multicollinearity) - s3 <- sprintf("p = %.3f", x$non.normal.resid) - s4 <- sprintf("p = %.3f", x$autocorrelation) - - cat(sprintf(" Heteroskedasticity %8s %11s\n", v1, s1)) - cat(sprintf(" Non-normal residuals %8s %11s\n", v3, s3)) - cat(sprintf(" Autocorrelated residuals%8s %11s\n", v4, s4)) - cat(sprintf(" Multicollinearity %8s %11s\n", v2, s2)) -} - - #' @export print.sj_ttest <- function(x, ...) { insight::print_color(sprintf("\n%s (%s)\n", x$method, x$alternative), "blue") diff --git a/R/prop.R b/R/prop.R index 167b3cdd..fc000815 100644 --- a/R/prop.R +++ b/R/prop.R @@ -35,7 +35,7 @@ #' returns a data frame with one column per group with grouping categories, #' followed by one column with proportions per condition. #' -#' @examples +#' @examplesIf getRversion() >= "4.2.0" && requireNamespace("datawizard", quietly = TRUE) #' data(efc) #' #' # proportion of value 1 in e42dep @@ -52,10 +52,10 @@ #' prop(efc, e42dep == 1, e42dep > 2, na.rm = FALSE) #' #' # for factors or character vectors, use quoted or unquoted values -#' library(sjmisc) +#' library(datawizard) #' # convert numeric to factor, using labels as factor levels -#' efc$e16sex <- to_label(efc$e16sex) -#' efc$n4pstu <- to_label(efc$n4pstu) +#' efc$e16sex <- to_factor(efc$e16sex) +#' efc$n4pstu <- to_factor(efc$n4pstu) #' #' # get proportion of female older persons #' prop(efc, e16sex == female) @@ -70,24 +70,23 @@ #' ) #' #' # also works with pipe-chains -#' library(dplyr) -#' efc %>% prop(e17age > 70) -#' efc %>% prop(e17age > 70, e16sex == 1) +#' efc |> prop(e17age > 70) +#' efc |> prop(e17age > 70, e16sex == 1) #' #' # and with group_by -#' efc %>% -#' group_by(e16sex) %>% +#' efc |> +#' data_group(e16sex) |> #' prop(e42dep > 2) #' -#' efc %>% -#' select(e42dep, c161sex, c172code, e16sex) %>% -#' group_by(c161sex, c172code) %>% +#' efc |> +#' data_select(c("e42dep", "c161sex", "c172code", "e16sex")) |> +#' data_group(c161sex, c172code) |> #' prop(e42dep > 2, e16sex == 1) #' #' # same for "props()" -#' efc %>% -#' select(e42dep, c161sex, c172code, c12hour, n4pstu) %>% -#' group_by(c161sex, c172code) %>% +#' efc |> +#' data_select(c("e42dep", "c161sex", "c172code", "c12hour", "n4pstu")) |> +#' data_group(c161sex, c172code) |> #' props( #' e42dep > 2, #' c12hour > 20 & c12hour < 40, @@ -103,7 +102,7 @@ prop <- function(data, ..., weights = NULL, na.rm = TRUE, digits = 4) { # get dots dots <- match.call(expand.dots = FALSE)$`...` - proportions(data, dots, weight.by = weights, na.rm, digits, multi_logical = FALSE) + .proportions(data, dots, weight.by = weights, na.rm, digits, multi_logical = FALSE) } @@ -118,12 +117,11 @@ props <- function(data, ..., na.rm = TRUE, digits = 4) { # get dots dots <- match.call(expand.dots = FALSE)$`...` - proportions(data, dots, NULL, na.rm, digits, multi_logical = TRUE) + .proportions(data, dots, NULL, na.rm, digits, multi_logical = TRUE) } -#' @importFrom purrr map_df -proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { +.proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { # remember comparisons comparisons <- lapply(dots, function(x) { # to character, and remove spaces and quotes @@ -142,7 +140,7 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { grps <- get_grouped_data(data) # now get proportions for each subset - fr <- purrr::map_df( + fr <- do.call(rbind, lapply( seq_len(nrow(grps)), function(i) { # get data from grouped data frame @@ -156,7 +154,7 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { as.data.frame(t(unlist(result))) } - ) + )) # now we need the values from the groups of the grouped data frame @@ -174,12 +172,13 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { } # add row order, based on values of grouping variables - reihenfolge <- rep(sort(unique(sjlabelled::as_numeric(data[[var.name]]))), length.out = nrow(fr)) %>% - as.data.frame() %>% - dplyr::bind_cols(reihenfolge) + reihenfolge <- cbind( + as.data.frame(rep(sort(unique(sjlabelled::as_numeric(data[[var.name]]))), length.out = nrow(fr))), + reihenfolge + ) # bind values as column - fr <- dplyr::bind_cols(data.frame(val.labels, stringsAsFactors = FALSE), fr) + fr <- cbind(data.frame(val.labels, stringsAsFactors = FALSE), fr) } # get column names. we need variable labels as column names diff --git a/man/prop.Rd b/man/prop.Rd index 6f263659..60274772 100644 --- a/man/prop.Rd +++ b/man/prop.Rd @@ -51,6 +51,7 @@ also processes comparisons, which are passed as character vector (see 'Examples'). } \examples{ +\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("sjmisc", quietly = TRUE) && requireNamespace("datawizard", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) # proportion of value 1 in e42dep @@ -67,10 +68,10 @@ props(efc, e17age > 70 & e17age < 80) prop(efc, e42dep == 1, e42dep > 2, na.rm = FALSE) # for factors or character vectors, use quoted or unquoted values -library(sjmisc) +library(datawizard) # convert numeric to factor, using labels as factor levels -efc$e16sex <- to_label(efc$e16sex) -efc$n4pstu <- to_label(efc$n4pstu) +efc$e16sex <- to_factor(efc$e16sex) +efc$n4pstu <- to_factor(efc$n4pstu) # get proportion of female older persons prop(efc, e16sex == female) @@ -85,27 +86,27 @@ props(efc, ) # also works with pipe-chains -library(dplyr) -efc \%>\% prop(e17age > 70) -efc \%>\% prop(e17age > 70, e16sex == 1) +efc |> prop(e17age > 70) +efc |> prop(e17age > 70, e16sex == 1) # and with group_by -efc \%>\% - group_by(e16sex) \%>\% +efc |> + data_group(e16sex) |> prop(e42dep > 2) -efc \%>\% - select(e42dep, c161sex, c172code, e16sex) \%>\% - group_by(c161sex, c172code) \%>\% +efc |> + data_select(c("e42dep", "c161sex", "c172code", "e16sex")) |> + data_group(c161sex, c172code) |> prop(e42dep > 2, e16sex == 1) # same for "props()" -efc \%>\% - select(e42dep, c161sex, c172code, c12hour, n4pstu) \%>\% - group_by(c161sex, c172code) \%>\% +efc |> + data_select(c("e42dep", "c161sex", "c172code", "c12hour", "n4pstu")) |> + data_group(c161sex, c172code) |> props( e42dep > 2, c12hour > 20 & c12hour < 40, n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3' ) +\dontshow{\}) # examplesIf} } From 8a80d97f5024852bfb86b0af4d914325f46fd3bf Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 22:17:06 +0200 Subject: [PATCH 07/82] fixes --- NAMESPACE | 16 +--- R/prop.R | 6 +- R/re-exports.R | 12 +++ R/svy_median.R | 2 +- R/wtd_cor.R | 6 +- R/wtd_mean.R | 20 ----- R/wtd_median.R | 40 --------- R/wtd_mwu.R | 74 ---------------- R/wtd_sd.R | 115 ------------------------- R/wtd_se.R | 85 +++++++++++++++++- R/wtd_ttest.R | 6 +- R/wtd_variance.R | 4 +- man/prop.Rd | 8 +- man/reexports.Rd | 5 ++ man/{weighted_sd.Rd => weighted_se.Rd} | 36 ++------ 15 files changed, 126 insertions(+), 309 deletions(-) delete mode 100644 R/wtd_mean.R delete mode 100644 R/wtd_median.R delete mode 100644 R/wtd_mwu.R delete mode 100644 R/wtd_sd.R rename man/{weighted_sd.Rd => weighted_se.Rd} (88%) diff --git a/NAMESPACE b/NAMESPACE index becc1365..409d0466 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -38,15 +38,6 @@ S3method(residuals,svyglm.nb) S3method(terms,svyglm.nb) S3method(weighted_correlation,default) S3method(weighted_correlation,formula) -S3method(weighted_mannwhitney,default) -S3method(weighted_mannwhitney,formula) -S3method(weighted_mean,data.frame) -S3method(weighted_mean,default) -S3method(weighted_median,data.frame) -S3method(weighted_median,default) -S3method(weighted_sd,data.frame) -S3method(weighted_sd,default) -S3method(weighted_sd,matrix) S3method(weighted_se,data.frame) S3method(weighted_se,default) S3method(weighted_se,matrix) @@ -111,23 +102,22 @@ export(var_pop) export(weight) export(weight2) export(weighted_correlation) -export(weighted_mannwhitney) export(weighted_mean) export(weighted_median) -export(weighted_ranktest) export(weighted_sd) export(weighted_se) export(weighted_ttest) -export(wtd_sd) export(xtab_statistics) importFrom(bayestestR,ci) importFrom(bayestestR,equivalence_test) +importFrom(datawizard,weighted_mean) +importFrom(datawizard,weighted_median) +importFrom(datawizard,weighted_sd) importFrom(dplyr,case_when) importFrom(dplyr,filter) importFrom(dplyr,group_vars) importFrom(dplyr,quos) importFrom(dplyr,select) -importFrom(dplyr,select_if) importFrom(insight,export_table) importFrom(insight,format_p) importFrom(insight,format_value) diff --git a/R/prop.R b/R/prop.R index fc000815..9c070353 100644 --- a/R/prop.R +++ b/R/prop.R @@ -75,18 +75,18 @@ #' #' # and with group_by #' efc |> -#' data_group(e16sex) |> +#' data_group("e16sex") |> #' prop(e42dep > 2) #' #' efc |> #' data_select(c("e42dep", "c161sex", "c172code", "e16sex")) |> -#' data_group(c161sex, c172code) |> +#' data_group(c("c161sex", "c172code")) |> #' prop(e42dep > 2, e16sex == 1) #' #' # same for "props()" #' efc |> #' data_select(c("e42dep", "c161sex", "c172code", "c12hour", "n4pstu")) |> -#' data_group(c161sex, c172code) |> +#' data_group(c("c161sex", "c172code")) |> #' props( #' e42dep > 2, #' c12hour > 20 & c12hour < 40, diff --git a/R/re-exports.R b/R/re-exports.R index f16d9fb6..3a50abef 100644 --- a/R/re-exports.R +++ b/R/re-exports.R @@ -26,3 +26,15 @@ bayestestR::equivalence_test #' @importFrom insight link_inverse #' @export insight::link_inverse + +#' @importFrom datawizard weighted_sd +#' @export +datawizard::weighted_sd + +#' @importFrom datawizard weighted_mean +#' @export +datawizard::weighted_mean + +#' @importFrom datawizard weighted_median +#' @export +datawizard::weighted_median diff --git a/R/svy_median.R b/R/svy_median.R index d2a14c1f..973cd0cd 100644 --- a/R/svy_median.R +++ b/R/svy_median.R @@ -1,4 +1,4 @@ -#' @rdname weighted_sd +#' @rdname weighted_se #' @export survey_median <- function(x, design) { # check if pkg survey is available diff --git a/R/wtd_cor.R b/R/wtd_cor.R index bcb7e24f..6df781d0 100644 --- a/R/wtd_cor.R +++ b/R/wtd_cor.R @@ -1,11 +1,11 @@ -#' @rdname weighted_sd +#' @rdname weighted_se #' @export weighted_correlation <- function(data, ...) { UseMethod("weighted_correlation") } -#' @rdname weighted_sd +#' @rdname weighted_se #' @export weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...) { if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) @@ -35,7 +35,7 @@ weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...) } -#' @rdname weighted_sd +#' @rdname weighted_se #' @export weighted_correlation.formula <- function(formula, data, ci.lvl = .95, ...) { diff --git a/R/wtd_mean.R b/R/wtd_mean.R deleted file mode 100644 index 6b373600..00000000 --- a/R/wtd_mean.R +++ /dev/null @@ -1,20 +0,0 @@ -#' @rdname weighted_sd -#' @export -weighted_mean <- function(x, weights = NULL) { - UseMethod("weighted_mean") -} - -#' @export -weighted_mean.default <- function(x, weights = NULL) { - if (is.null(weights)) weights <- rep(1, length(x)) - stats::weighted.mean(x, w = weights, na.rm = TRUE) -} - -#' @importFrom purrr map_dbl -#' @importFrom dplyr select_if -#' @export -weighted_mean.data.frame <- function(x, weights = NULL) { - if (is.null(weights)) weights <- rep(1, length(x)) - dplyr::select_if(x, is.numeric) %>% - purrr::map_dbl(~ weighted.mean(.x, w = weights)) -} diff --git a/R/wtd_median.R b/R/wtd_median.R deleted file mode 100644 index 0e19d98a..00000000 --- a/R/wtd_median.R +++ /dev/null @@ -1,40 +0,0 @@ -#' @rdname weighted_sd -#' @export -weighted_median <- function(x, weights = NULL) { - UseMethod("weighted_median") -} - -#' @export -weighted_median.default <- function(x, weights = NULL) { - weighted_md_helper(x, w = weights, p = 0.5) -} - -#' @export -weighted_median.data.frame <- function(x, weights = NULL) { - dplyr::select_if(x, is.numeric) %>% - purrr::map_dbl(~ weighted_md_helper(.x, w = weights, p = 0.5)) -} - -weighted_md_helper <- function(x, w, p = .5) { - if (is.null(w)) w <- rep(1, length(x)) - - x[is.na(w)] <- NA - w[is.na(x)] <- NA - - w <- na.omit(w) - x <- na.omit(x) - - order <- order(x) - x <- x[order] - w <- w[order] - - rw <- cumsum(w) / sum(w) - md.values <- min(which(rw >= p)) - - if (rw[md.values] == p) - q <- mean(x[md.values:(md.values + 1)]) - else - q <- x[md.values] - - q -} diff --git a/R/wtd_mwu.R b/R/wtd_mwu.R deleted file mode 100644 index d0e9fa3d..00000000 --- a/R/wtd_mwu.R +++ /dev/null @@ -1,74 +0,0 @@ -#' @rdname weighted_sd -#' @export -weighted_mannwhitney <- function(data, ...) { - UseMethod("weighted_mannwhitney") -} - - -#' @importFrom dplyr select -#' @rdname weighted_sd -#' @export -weighted_mannwhitney.default <- function(data, x, grp, weights, ...) { - x.name <- deparse(substitute(x)) - g.name <- deparse(substitute(grp)) - w.name <- deparse(substitute(weights)) - - # create string with variable names - vars <- c(x.name, g.name, w.name) - - # get data - dat <- suppressMessages(dplyr::select(data, !! vars)) - dat <- na.omit(dat) - - weighted_mannwhitney_helper(dat) -} - - -#' @importFrom dplyr select -#' @rdname weighted_sd -#' @export -weighted_mannwhitney.formula <- function(formula, data, ...) { - vars <- all.vars(formula) - - # get data - dat <- suppressMessages(dplyr::select(data, !! vars)) - dat <- na.omit(dat) - - weighted_mannwhitney_helper(dat) -} - -weighted_mannwhitney_helper <- function(dat, vars) { - # check if pkg survey is available - if (!requireNamespace("survey", quietly = TRUE)) { - stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE) - } - - x.name <- colnames(dat)[1] - group.name <- colnames(dat)[2] - - colnames(dat) <- c("x", "g", "w") - - if (dplyr::n_distinct(dat$g, na.rm = TRUE) > 2) { - m <- "Weighted Kruskal-Wallis test" - method <- "KruskalWallis" - } else { - m <- "Weighted Mann-Whitney-U test" - method <- "wilcoxon" - } - - design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) - mw <- survey::svyranktest(formula = x ~ g, design, test = method) - - attr(mw, "x.name") <- x.name - attr(mw, "group.name") <- group.name - class(mw) <- c("sj_wmwu", "list") - - mw$method <- m - - mw -} - - -#' @rdname weighted_sd -#' @export -weighted_ranktest <- weighted_mannwhitney.formula diff --git a/R/wtd_sd.R b/R/wtd_sd.R deleted file mode 100644 index 95af674d..00000000 --- a/R/wtd_sd.R +++ /dev/null @@ -1,115 +0,0 @@ -#' @title Weighted statistics for tests and variables -#' @name weighted_sd -#' @description \strong{Weighted statistics for variables} -#' \cr \cr -#' \code{weighted_sd()}, \code{weighted_se()}, \code{weighted_mean()} and \code{weighted_median()} -#' compute weighted standard deviation, standard error, mean or median for a -#' variable or for all variables of a data frame. \code{survey_median()} computes the -#' median for a variable in a survey-design (see \code{\link[survey]{svydesign}}). -#' \code{weighted_correlation()} computes a weighted correlation for a two-sided alternative -#' hypothesis. -#' \cr \cr -#' \strong{Weighted tests} -#' \cr \cr -#' \code{weighted_ttest()} computes a weighted t-test, while \code{weighted_mannwhitney()} -#' computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test -#' (for more than two groups). `weighted_ranktest()` is an alias for `weighted_mannwhitney()`. -#' \code{weighted_chisqtest()} computes a weighted Chi-squared test for contingency tables. -#' -#' @param x (Numeric) vector or a data frame. For \code{survey_median()}, \code{weighted_ttest()}, -#' \code{weighted_mannwhitney()} and \code{weighted_chisqtest()} the bare (unquoted) variable -#' name, or a character vector with the variable name. -#' @param weights Bare (unquoted) variable name, or a character vector with -#' the variable name of the numeric vector of weights. If \code{weights = NULL}, -#' unweighted statistic is reported. -#' @param data A data frame. -#' @param formula A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a -#' numeric variable giving the data values and \code{rhs1} a factor with two -#' levels giving the corresponding groups and \code{rhs2} a variable with weights. -#' @param y Optional, bare (unquoted) variable name, or a character vector with -#' the variable name. -#' @param grp Bare (unquoted) name of the cross-classifying variable, where -#' \code{x} is grouped into the categories represented by \code{grp}, -#' or a character vector with the variable name. -#' @param mu A number indicating the true value of the mean (or difference in -#' means if you are performing a two sample test). -#' @param ci.lvl Confidence level of the interval. -#' @param alternative A character string specifying the alternative hypothesis, -#' must be one of \code{"two.sided"} (default), \code{"greater"} or -#' \code{"less"}. You can specify just the initial letter. -#' @param paired Logical, whether to compute a paired t-test. -#' @param ... For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used. -#' For \code{weighted_chisqtest()}, further arguments passed down to -#' \code{\link[stats]{chisq.test}}. -#' -#' @inheritParams svyglm.nb -#' -#' @return The weighted (test) statistic. -#' -#' @note \code{weighted_chisq()} is a convenient wrapper for \code{\link{crosstable_statistics}}. -#' For a weighted one-way Anova, use \code{means_by_group()} with -#' \code{weights}-argument. -#' \cr \cr -#' \code{weighted_ttest()} assumes unequal variance between the two groups. -#' -#' @examples -#' # weighted sd and se ---- -#' weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100)) -#' -#' data(efc) -#' weighted_sd(efc[, 1:3], runif(n = nrow(efc))) -#' weighted_se(efc[, 1:3], runif(n = nrow(efc))) -#' -#' # survey_median ---- -#' # median for variables from weighted survey designs -#' if (require("survey")) { -#' data(nhanes_sample) -#' -#' des <- svydesign( -#' id = ~SDMVPSU, -#' strat = ~SDMVSTRA, -#' weights = ~WTINT2YR, -#' nest = TRUE, -#' data = nhanes_sample -#' ) -#' -#' survey_median(total, des) -#' survey_median("total", des) -#' } -#' -#' # weighted t-test ---- -#' efc$weight <- abs(rnorm(nrow(efc), 1, .3)) -#' weighted_ttest(efc, e17age, weights = weight) -#' weighted_ttest(efc, e17age, c160age, weights = weight) -#' weighted_ttest(e17age ~ e16sex + weight, efc) -#' @export -weighted_sd <- function(x, weights = NULL) { - UseMethod("weighted_sd") -} - - -#' @rdname weighted_sd -#' @export -wtd_sd <- weighted_sd - - -#' @export -weighted_sd.data.frame <- function(x, weights = NULL) { - sd_result <- purrr::map_dbl(x, ~ sqrt(weighted_variance(.x, weights))) - names(sd_result) <- colnames(x) - - sd_result -} - -#' @export -weighted_sd.matrix <- function(x, weights = NULL) { - sd_result <- purrr::map_dbl(x, ~ sqrt(weighted_variance(.x, weights))) - names(sd_result) <- colnames(x) - - sd_result -} - -#' @export -weighted_sd.default <- function(x, weights = NULL) { - sqrt(weighted_variance(x, weights)) -} diff --git a/R/wtd_se.R b/R/wtd_se.R index 7fdefcbc..58801f7f 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -1,4 +1,87 @@ -#' @rdname weighted_sd +#' @title Weighted statistics for tests and variables +#' @name weighted_se +#' @description \strong{Weighted statistics for variables} +#' \cr \cr +#' \code{weighted_sd()}, \code{weighted_se()}, \code{weighted_mean()} and \code{weighted_median()} +#' compute weighted standard deviation, standard error, mean or median for a +#' variable or for all variables of a data frame. \code{survey_median()} computes the +#' median for a variable in a survey-design (see \code{\link[survey]{svydesign}}). +#' \code{weighted_correlation()} computes a weighted correlation for a two-sided alternative +#' hypothesis. +#' \cr \cr +#' \strong{Weighted tests} +#' \cr \cr +#' \code{weighted_ttest()} computes a weighted t-test, while \code{weighted_mannwhitney()} +#' computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test +#' (for more than two groups). `weighted_ranktest()` is an alias for `weighted_mannwhitney()`. +#' \code{weighted_chisqtest()} computes a weighted Chi-squared test for contingency tables. +#' +#' @param x (Numeric) vector or a data frame. For \code{survey_median()}, \code{weighted_ttest()}, +#' \code{weighted_mannwhitney()} and \code{weighted_chisqtest()} the bare (unquoted) variable +#' name, or a character vector with the variable name. +#' @param weights Bare (unquoted) variable name, or a character vector with +#' the variable name of the numeric vector of weights. If \code{weights = NULL}, +#' unweighted statistic is reported. +#' @param data A data frame. +#' @param formula A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a +#' numeric variable giving the data values and \code{rhs1} a factor with two +#' levels giving the corresponding groups and \code{rhs2} a variable with weights. +#' @param y Optional, bare (unquoted) variable name, or a character vector with +#' the variable name. +#' @param grp Bare (unquoted) name of the cross-classifying variable, where +#' \code{x} is grouped into the categories represented by \code{grp}, +#' or a character vector with the variable name. +#' @param mu A number indicating the true value of the mean (or difference in +#' means if you are performing a two sample test). +#' @param ci.lvl Confidence level of the interval. +#' @param alternative A character string specifying the alternative hypothesis, +#' must be one of \code{"two.sided"} (default), \code{"greater"} or +#' \code{"less"}. You can specify just the initial letter. +#' @param paired Logical, whether to compute a paired t-test. +#' @param ... For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used. +#' For \code{weighted_chisqtest()}, further arguments passed down to +#' \code{\link[stats]{chisq.test}}. +#' +#' @inheritParams svyglm.nb +#' +#' @return The weighted (test) statistic. +#' +#' @note \code{weighted_chisq()} is a convenient wrapper for \code{\link{crosstable_statistics}}. +#' For a weighted one-way Anova, use \code{means_by_group()} with +#' \code{weights}-argument. +#' \cr \cr +#' \code{weighted_ttest()} assumes unequal variance between the two groups. +#' +#' @examples +#' # weighted sd and se ---- +#' weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100)) +#' +#' data(efc) +#' weighted_sd(efc[, 1:3], runif(n = nrow(efc))) +#' weighted_se(efc[, 1:3], runif(n = nrow(efc))) +#' +#' # survey_median ---- +#' # median for variables from weighted survey designs +#' if (require("survey")) { +#' data(nhanes_sample) +#' +#' des <- svydesign( +#' id = ~SDMVPSU, +#' strat = ~SDMVSTRA, +#' weights = ~WTINT2YR, +#' nest = TRUE, +#' data = nhanes_sample +#' ) +#' +#' survey_median(total, des) +#' survey_median("total", des) +#' } +#' +#' # weighted t-test ---- +#' efc$weight <- abs(rnorm(nrow(efc), 1, .3)) +#' weighted_ttest(efc, e17age, weights = weight) +#' weighted_ttest(efc, e17age, c160age, weights = weight) +#' weighted_ttest(e17age ~ e16sex + weight, efc) #' @export weighted_se <- function(x, weights = NULL) { UseMethod("weighted_se") diff --git a/R/wtd_ttest.R b/R/wtd_ttest.R index ed00245b..85459f6b 100644 --- a/R/wtd_ttest.R +++ b/R/wtd_ttest.R @@ -1,10 +1,10 @@ -#' @rdname weighted_sd +#' @rdname weighted_se #' @export weighted_ttest <- function(data, ...) { UseMethod("weighted_ttest") } -#' @rdname weighted_sd +#' @rdname weighted_se #' @export weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) { @@ -50,7 +50,7 @@ weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = } -#' @rdname weighted_sd +#' @rdname weighted_se #' @export weighted_ttest.formula <- function(formula, data, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) { diff --git a/R/wtd_variance.R b/R/wtd_variance.R index 12f77f28..0bde43a2 100644 --- a/R/wtd_variance.R +++ b/R/wtd_variance.R @@ -4,8 +4,8 @@ weighted_variance <- function(x, w) { x[is.na(w)] <- NA w[is.na(x)] <- NA - w <- na.omit(w) - x <- na.omit(x) + w <- stats::na.omit(w) + x <- stats::na.omit(x) xbar <- sum(w * x) / sum(w) sum(w * ((x - xbar)^2)) / (sum(w) - 1) diff --git a/man/prop.Rd b/man/prop.Rd index 60274772..8aee0669 100644 --- a/man/prop.Rd +++ b/man/prop.Rd @@ -51,7 +51,7 @@ also processes comparisons, which are passed as character vector (see 'Examples'). } \examples{ -\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("sjmisc", quietly = TRUE) && requireNamespace("datawizard", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("datawizard", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) # proportion of value 1 in e42dep @@ -91,18 +91,18 @@ efc |> prop(e17age > 70, e16sex == 1) # and with group_by efc |> - data_group(e16sex) |> + data_group("e16sex") |> prop(e42dep > 2) efc |> data_select(c("e42dep", "c161sex", "c172code", "e16sex")) |> - data_group(c161sex, c172code) |> + data_group(c("c161sex", "c172code")) |> prop(e42dep > 2, e16sex == 1) # same for "props()" efc |> data_select(c("e42dep", "c161sex", "c172code", "c12hour", "n4pstu")) |> - data_group(c161sex, c172code) |> + data_group(c("c161sex", "c172code")) |> props( e42dep > 2, c12hour > 20 & c12hour < 40, diff --git a/man/reexports.Rd b/man/reexports.Rd index 8af864d8..96bdc437 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -10,6 +10,9 @@ \alias{ci} \alias{equivalence_test} \alias{link_inverse} +\alias{weighted_sd} +\alias{weighted_mean} +\alias{weighted_median} \title{Objects exported from other packages} \keyword{internal} \description{ @@ -19,6 +22,8 @@ below to see their documentation. \describe{ \item{bayestestR}{\code{\link[bayestestR]{ci}}, \code{\link[bayestestR]{equivalence_test}}} + \item{datawizard}{\code{\link[datawizard]{weighted_mean}}, \code{\link[datawizard:weighted_mean]{weighted_median}}, \code{\link[datawizard:weighted_mean]{weighted_sd}}} + \item{insight}{\code{\link[insight]{link_inverse}}} \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} diff --git a/man/weighted_sd.Rd b/man/weighted_se.Rd similarity index 88% rename from man/weighted_sd.Rd rename to man/weighted_se.Rd index c553f998..f9dbc9d0 100644 --- a/man/weighted_sd.Rd +++ b/man/weighted_se.Rd @@ -1,19 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_mean.R, -% R/wtd_median.R, R/wtd_mwu.R, R/wtd_sd.R, R/wtd_se.R, R/wtd_ttest.R +% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_se.R, +% R/wtd_ttest.R \name{survey_median} \alias{survey_median} \alias{weighted_correlation} \alias{weighted_correlation.default} \alias{weighted_correlation.formula} -\alias{weighted_mean} -\alias{weighted_median} -\alias{weighted_mannwhitney} -\alias{weighted_mannwhitney.default} -\alias{weighted_mannwhitney.formula} -\alias{weighted_ranktest} -\alias{weighted_sd} -\alias{wtd_sd} \alias{weighted_se} \alias{weighted_ttest} \alias{weighted_ttest.default} @@ -28,22 +20,6 @@ weighted_correlation(data, ...) \method{weighted_correlation}{formula}(formula, data, ci.lvl = 0.95, ...) -weighted_mean(x, weights = NULL) - -weighted_median(x, weights = NULL) - -weighted_mannwhitney(data, ...) - -\method{weighted_mannwhitney}{default}(data, x, grp, weights, ...) - -\method{weighted_mannwhitney}{formula}(formula, data, ...) - -weighted_ranktest(formula, data, ...) - -weighted_sd(x, weights = NULL) - -wtd_sd(x, weights = NULL) - weighted_se(x, weights = NULL) weighted_ttest(data, ...) @@ -97,10 +73,6 @@ unweighted statistic is reported.} numeric variable giving the data values and \code{rhs1} a factor with two levels giving the corresponding groups and \code{rhs2} a variable with weights.} -\item{grp}{Bare (unquoted) name of the cross-classifying variable, where -\code{x} is grouped into the categories represented by \code{grp}, -or a character vector with the variable name.} - \item{mu}{A number indicating the true value of the mean (or difference in means if you are performing a two sample test).} @@ -109,6 +81,10 @@ means if you are performing a two sample test).} \item{alternative}{A character string specifying the alternative hypothesis, must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. You can specify just the initial letter.} + +\item{grp}{Bare (unquoted) name of the cross-classifying variable, where +\code{x} is grouped into the categories represented by \code{grp}, +or a character vector with the variable name.} } \value{ The weighted (test) statistic. From c7ad6feb9eb7dfc50d6fa53535dd4624ff43fc44 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 22:51:10 +0200 Subject: [PATCH 08/82] fix --- DESCRIPTION | 3 ++- R/wtd_se.R | 3 --- man/weighted_se.Rd | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3515a90a..fb4f1215 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,7 +16,8 @@ Description: Collection of convenient functions for common statistical computati License: GPL-3 Depends: R (>= 3.4), - utils + utils, + stats Imports: bayestestR, datawizard, diff --git a/R/wtd_se.R b/R/wtd_se.R index 58801f7f..37c55e42 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -28,9 +28,6 @@ #' levels giving the corresponding groups and \code{rhs2} a variable with weights. #' @param y Optional, bare (unquoted) variable name, or a character vector with #' the variable name. -#' @param grp Bare (unquoted) name of the cross-classifying variable, where -#' \code{x} is grouped into the categories represented by \code{grp}, -#' or a character vector with the variable name. #' @param mu A number indicating the true value of the mean (or difference in #' means if you are performing a two sample test). #' @param ci.lvl Confidence level of the interval. diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index f9dbc9d0..22c2ee6e 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -81,10 +81,6 @@ means if you are performing a two sample test).} \item{alternative}{A character string specifying the alternative hypothesis, must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. You can specify just the initial letter.} - -\item{grp}{Bare (unquoted) name of the cross-classifying variable, where -\code{x} is grouped into the categories represented by \code{grp}, -or a character vector with the variable name.} } \value{ The weighted (test) statistic. From 30b633d563bfd070ad963d0f72b9c1a46ca0a782 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 23:18:38 +0200 Subject: [PATCH 09/82] need fixes --- NAMESPACE | 3 -- R/helpfunctions.R | 19 ------- R/prop.R | 132 +++++++++------------------------------------- man/prop.Rd | 20 ------- 4 files changed, 26 insertions(+), 148 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 409d0466..2a5670e5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -114,8 +114,6 @@ importFrom(datawizard,weighted_mean) importFrom(datawizard,weighted_median) importFrom(datawizard,weighted_sd) importFrom(dplyr,case_when) -importFrom(dplyr,filter) -importFrom(dplyr,group_vars) importFrom(dplyr,quos) importFrom(dplyr,select) importFrom(insight,export_table) @@ -137,4 +135,3 @@ importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) importFrom(sjmisc,str_contains) importFrom(sjmisc,typical_value) -importFrom(tidyr,nest) diff --git a/R/helpfunctions.R b/R/helpfunctions.R index 32bc546c..8a792d1d 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -43,25 +43,6 @@ get_glm_family <- function(fit) { dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) -#' @importFrom tidyr nest -#' @importFrom dplyr select filter group_vars -get_grouped_data <- function(x) { - # retain observations that are complete wrt grouping vars, then nest - grps <- x %>% - dplyr::group_modify(~ dplyr::filter(.x, stats::complete.cases(.y))) %>% - tidyr::nest() - - # arrange data - if (length(dplyr::group_vars(x)) == 1) - reihe <- order(grps[[1]]) - else - reihe <- order(grps[[1]], grps[[2]]) - grps <- grps[reihe, ] - - grps -} - - .compact_character <- function(x) { x[!sapply(x, function(i) is.null(i) || !nzchar(i, keepNA = TRUE) || is.na(i) || any(i == "NULL", na.rm = TRUE))] } diff --git a/R/prop.R b/R/prop.R index 9c070353..834d88e2 100644 --- a/R/prop.R +++ b/R/prop.R @@ -72,37 +72,13 @@ #' # also works with pipe-chains #' efc |> prop(e17age > 70) #' efc |> prop(e17age > 70, e16sex == 1) -#' -#' # and with group_by -#' efc |> -#' data_group("e16sex") |> -#' prop(e42dep > 2) -#' -#' efc |> -#' data_select(c("e42dep", "c161sex", "c172code", "e16sex")) |> -#' data_group(c("c161sex", "c172code")) |> -#' prop(e42dep > 2, e16sex == 1) -#' -#' # same for "props()" -#' efc |> -#' data_select(c("e42dep", "c161sex", "c172code", "c12hour", "n4pstu")) |> -#' data_group(c("c161sex", "c172code")) |> -#' props( -#' e42dep > 2, -#' c12hour > 20 & c12hour < 40, -#' n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3' -#' ) #' @export prop <- function(data, ..., weights = NULL, na.rm = TRUE, digits = 4) { # check argument if (!is.data.frame(data)) { insight::format_error("`data` needs to be a data frame.") } - - # get dots - dots <- match.call(expand.dots = FALSE)$`...` - - .proportions(data, dots, weight.by = weights, na.rm, digits, multi_logical = FALSE) + .proportions(data, dots = list(...), weight.by = weights, na.rm, digits, multi_logical = FALSE) } @@ -113,11 +89,7 @@ props <- function(data, ..., na.rm = TRUE, digits = 4) { if (!is.data.frame(data)) { insight::format_error("`data` needs to be a data frame.") } - - # get dots - dots <- match.call(expand.dots = FALSE)$`...` - - .proportions(data, dots, NULL, na.rm, digits, multi_logical = TRUE) + .proportions(data, dots = list(...), NULL, na.rm, digits, multi_logical = TRUE) } @@ -130,87 +102,35 @@ props <- function(data, ..., na.rm = TRUE, digits = 4) { x }) - # do we have a grouped data frame? if (inherits(data, "grouped_df")) { - - # remember order of values - reihenfolge <- NULL - - # get grouped data - grps <- get_grouped_data(data) - - # now get proportions for each subset - fr <- do.call(rbind, lapply( - seq_len(nrow(grps)), - function(i) { - # get data from grouped data frame - .d <- grps$data[[i]] - - # iterate dots (comparing conditions) - if (multi_logical) - result <- lapply(dots, get_multiple_proportion, .d, na.rm, digits) - else - result <- lapply(dots, get_proportion, .d, weight.by, na.rm, digits) - - as.data.frame(t(unlist(result))) - } - )) - - - # now we need the values from the groups of the grouped data frame - for (i in (ncol(grps) - 1):1) { - # get value label - var.name <- colnames(grps)[i] - val.labels <- suppressWarnings( - rep(sjlabelled::get_labels(data[[var.name]]), length.out = nrow(fr)) - ) - - # if we have no value labels, use values instead - if (is.null(val.labels)) { - val.labels <- - rep(unique(sort(data[[var.name]])), length.out = nrow(fr)) - } - - # add row order, based on values of grouping variables - reihenfolge <- cbind( - as.data.frame(rep(sort(unique(sjlabelled::as_numeric(data[[var.name]]))), length.out = nrow(fr))), - reihenfolge - ) - - # bind values as column - fr <- cbind(data.frame(val.labels, stringsAsFactors = FALSE), fr) - } - - # get column names. we need variable labels as column names - var.names <- colnames(grps)[seq_len(ncol(grps) - 1)] - var.labels <- sjlabelled::get_label(data[, var.names], def.value = var.names) - - # set variable labels and comparisons as colum names - colnames(fr) <- c(var.labels, comparisons) - - # order rows by values of grouping variables - fr <- fr[do.call(order, reihenfolge), ] - - fr - + grps <- attributes(data)$groups + result <- lapply(grps[[".rows"]], function(x) { + .process_prop(data[x, , drop = FALSE], comparisons, dots, multi_logical, na.rm, digits, weight.by) + }) } else { - # iterate dots (comparing conditions) - if (multi_logical) - result <- lapply(dots, get_multiple_proportion, data, na.rm, digits) - else - result <- lapply(dots, get_proportion, data, weight.by, na.rm, digits) + result <- .process_prop(data, comparisons, dots, multi_logical, na.rm, digits, weight.by) + } + result +} - # if we have more than one proportion, return a data frame. this allows us - # to save more information, the condition and the proportion value - if (length(comparisons) > 1) { - return(data_frame( - condition = as.character(unlist(comparisons)), - prop = unlist(result) - )) - } - unlist(result) +.process_prop <- function(data, comparisons, dots, multi_logical, na.rm, digits, weight.by) { + # iterate dots (comparing conditions) + if (multi_logical) + result <- lapply(dots, get_multiple_proportion, data, na.rm, digits) + else + result <- lapply(dots, get_proportion, data, weight.by, na.rm, digits) + + # if we have more than one proportion, return a data frame. this allows us + # to save more information, the condition and the proportion value + if (length(comparisons) > 1) { + return(data_frame( + condition = as.character(unlist(comparisons)), + prop = unlist(result) + )) } + + unlist(result) } diff --git a/man/prop.Rd b/man/prop.Rd index 8aee0669..118e69d8 100644 --- a/man/prop.Rd +++ b/man/prop.Rd @@ -88,25 +88,5 @@ props(efc, # also works with pipe-chains efc |> prop(e17age > 70) efc |> prop(e17age > 70, e16sex == 1) - -# and with group_by -efc |> - data_group("e16sex") |> - prop(e42dep > 2) - -efc |> - data_select(c("e42dep", "c161sex", "c172code", "e16sex")) |> - data_group(c("c161sex", "c172code")) |> - prop(e42dep > 2, e16sex == 1) - -# same for "props()" -efc |> - data_select(c("e42dep", "c161sex", "c172code", "c12hour", "n4pstu")) |> - data_group(c("c161sex", "c172code")) |> - props( - e42dep > 2, - c12hour > 20 & c12hour < 40, - n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3' - ) \dontshow{\}) # examplesIf} } From 251a524ee1c548a3f49e622a818818491f4d0267 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 23:24:26 +0200 Subject: [PATCH 10/82] updates --- DESCRIPTION | 1 - NAMESPACE | 2 -- R/auto_prior.R | 16 ++++------------ R/boot_ci.R | 30 +++++++++++++----------------- R/bootstrap.R | 21 ++++++++++----------- R/re-exports.R | 5 ----- man/boot_ci.Rd | 24 +++++++++++++----------- man/bootstrap.Rd | 21 +++++++++++---------- man/reexports.Rd | 3 --- 9 files changed, 51 insertions(+), 72 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fb4f1215..d590506e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,6 @@ Imports: effectsize, insight, lme4, - magrittr, parameters, performance, purrr, diff --git a/NAMESPACE b/NAMESPACE index 2a5670e5..dc23189c 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -43,7 +43,6 @@ S3method(weighted_se,default) S3method(weighted_se,matrix) S3method(weighted_ttest,default) S3method(weighted_ttest,formula) -export("%>%") export(anova_stats) export(auto_prior) export(boot_ci) @@ -123,7 +122,6 @@ importFrom(insight,get_response) importFrom(insight,link_inverse) importFrom(insight,print_color) importFrom(lme4,ngrps) -importFrom(magrittr,"%>%") importFrom(performance,mse) importFrom(performance,rmse) importFrom(purrr,map) diff --git a/R/auto_prior.R b/R/auto_prior.R index 7442c09d..aed7a9b2 100644 --- a/R/auto_prior.R +++ b/R/auto_prior.R @@ -72,9 +72,7 @@ #' auto_prior(mf, efc, FALSE) #' @export auto_prior <- function(formula, data, gaussian, locations = NULL) { - - if (!requireNamespace("brms", quietly = TRUE)) - stop("Package `brms` required.", call. = FALSE) + insight::check_if_installed("brms") scale.b <- 2.5 scale.y <- 10 @@ -82,20 +80,14 @@ auto_prior <- function(formula, data, gaussian, locations = NULL) { pred <- insight::find_predictors(formula, effects = "all", flatten = TRUE) y.name <- insight::find_response(formula, combine = TRUE) - cols <- c(y.name, pred) - - data <- data %>% - dplyr::select(!! cols) %>% - stats::na.omit() %>% - as.data.frame() - + data <- stats::na.omit(data[c(y.name, pred)]) y <- data[[y.name]] # check if response is binary if (missing(gaussian) && dplyr::n_distinct(y, na.rm = TRUE) == 2) gaussian <- FALSE if (isTRUE(gaussian) && dplyr::n_distinct(y, na.rm = TRUE) == 2) - warning("Priors were calculated based on assumption that the response is Gaussian, however it seems to be binary.", call. = F) + insight::format_alert("Priors were calculated based on assumption that the response is Gaussian, however it seems to be binary.") # nolint if (gaussian) { @@ -135,7 +127,7 @@ auto_prior <- function(formula, data, gaussian, locations = NULL) { term.names <- c(term.names, i) } - for (i in 1:length(term.names)) { + for (i in seq_along(term.names)) { if (!is.null(locations) && length(locations) >= (i + 1)) location.b <- locations[i + 1] diff --git a/R/boot_ci.R b/R/boot_ci.R index 3d7601ca..d9d23ac1 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -54,7 +54,7 @@ #' #' @seealso \code{\link{bootstrap}} to generate nonparametric bootstrap samples. #' -#' @examples +#' @examplesIf getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE) #' library(dplyr) #' library(purrr) #' data(efc) @@ -88,12 +88,12 @@ #' #' #' # bootstrap() and boot_ci() work fine within pipe-chains -#' efc %>% -#' bootstrap(100) %>% +#' efc |> +#' bootstrap(100) |> #' mutate( #' models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)), #' dependency = map_dbl(models, ~coef(.x)[2]) -#' ) %>% +#' ) |> #' boot_ci(dependency) #' #' # check p-value @@ -105,28 +105,28 @@ #' # bootstrapped statistics like confidence intervals or p-values #' library(dplyr) #' library(sjmisc) -#' efc %>% +#' efc |> #' # generate bootstrap replicates -#' bootstrap(100) %>% +#' bootstrap(100) |> #' # apply lm to all bootstrapped data sets #' mutate( #' models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) -#' ) %>% +#' ) |> #' # spread model coefficient for all 100 models -#' spread_coef(models) %>% +#' spread_coef(models) |> #' # compute the CI for all bootstrapped model coefficients #' boot_ci(e42dep, c161sex, c172code) #' #' # or... -#' efc %>% +#' efc |> #' # generate bootstrap replicates -#' bootstrap(100) %>% +#' bootstrap(100) |> #' # apply lm to all bootstrapped data sets #' mutate( #' models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) -#' ) %>% +#' ) |> #' # spread model coefficient for all 100 models -#' spread_coef(models, append = FALSE) %>% +#' spread_coef(models, append = FALSE) |> #' # compute the CI for all bootstrapped model coefficients #' boot_ci()} #' @export @@ -209,11 +209,7 @@ boot_est <- function(data, ...) { transform_boot_result <- function(res) { # transform a bit, so we have each estimate in a row, and ci's as columns... - res %>% - as.data.frame() %>% - t() %>% - as.data.frame() %>% - rownames_as_column(var = "term") + rownames_as_column(as.data.frame(t(as.data.frame(res))), var = "term") } diff --git a/R/bootstrap.R b/R/bootstrap.R index c3d3635f..1f8bdbd1 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -40,7 +40,7 @@ #' @seealso \code{\link{boot_ci}} to calculate confidence intervals from #' bootstrap samples. #' -#' @examples +#' @examplesIf getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE) #' data(efc) #' bs <- bootstrap(efc, 5) #' @@ -62,16 +62,15 @@ #' })) #' #' # or as tidyverse-approach -#' if (require("dplyr") && require("purrr")) { -#' bs <- efc %>% -#' bootstrap(100) %>% -#' mutate( -#' c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) -#' ) -#' -#' # bootstrapped standard error -#' boot_se(bs, c12hour) -#' } +#' library(dplyr) +#' library(purrr) +#' bs <- efc |> +#' bootstrap(100) |> +#' mutate( +#' c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) +#' ) +#' # bootstrapped standard error +#' boot_se(bs, c12hour) #' @export bootstrap <- function(data, n, size) { if (!missing(size) && !is.null(size)) { diff --git a/R/re-exports.R b/R/re-exports.R index 3a50abef..aea31ba3 100644 --- a/R/re-exports.R +++ b/R/re-exports.R @@ -1,8 +1,3 @@ -#' @importFrom magrittr %>% -#' @export -magrittr::`%>%` - - #' @importFrom sjmisc typical_value #' @export sjmisc::typical_value diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd index c437f137..7f4c252e 100644 --- a/man/boot_ci.Rd +++ b/man/boot_ci.Rd @@ -72,6 +72,7 @@ assuming normal distribution. } } \examples{ +\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} library(dplyr) library(purrr) data(efc) @@ -105,12 +106,12 @@ coef(fit)[2] # bootstrap() and boot_ci() work fine within pipe-chains -efc \%>\% - bootstrap(100) \%>\% +efc |> + bootstrap(100) |> mutate( models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)), dependency = map_dbl(models, ~coef(.x)[2]) - ) \%>\% + ) |> boot_ci(dependency) # check p-value @@ -122,30 +123,31 @@ summary(fit)$coefficients[3, ] # bootstrapped statistics like confidence intervals or p-values library(dplyr) library(sjmisc) -efc \%>\% +efc |> # generate bootstrap replicates - bootstrap(100) \%>\% + bootstrap(100) |> # apply lm to all bootstrapped data sets mutate( models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) - ) \%>\% + ) |> # spread model coefficient for all 100 models - spread_coef(models) \%>\% + spread_coef(models) |> # compute the CI for all bootstrapped model coefficients boot_ci(e42dep, c161sex, c172code) # or... -efc \%>\% +efc |> # generate bootstrap replicates - bootstrap(100) \%>\% + bootstrap(100) |> # apply lm to all bootstrapped data sets mutate( models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) - ) \%>\% + ) |> # spread model coefficient for all 100 models - spread_coef(models, append = FALSE) \%>\% + spread_coef(models, append = FALSE) |> # compute the CI for all bootstrapped model coefficients boot_ci()} +\dontshow{\}) # examplesIf} } \references{ Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164 diff --git a/man/bootstrap.Rd b/man/bootstrap.Rd index ac2e2c63..b54c6396 100644 --- a/man/bootstrap.Rd +++ b/man/bootstrap.Rd @@ -49,6 +49,7 @@ method automatically applies whenever coercion is done because a data frame is required as input. See 'Examples' in \code{\link{boot_ci}}. } \examples{ +\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) bs <- bootstrap(efc, 5) @@ -70,16 +71,16 @@ bs$c12hour <- unlist(lapply(bs$strap, function(x) { })) # or as tidyverse-approach -if (require("dplyr") && require("purrr")) { - bs <- efc \%>\% - bootstrap(100) \%>\% - mutate( - c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) - ) - - # bootstrapped standard error - boot_se(bs, c12hour) -} +library(dplyr) +library(purrr) +bs <- efc |> + bootstrap(100) |> + mutate( + c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) + ) +# bootstrapped standard error +boot_se(bs, c12hour) +\dontshow{\}) # examplesIf} } \seealso{ \code{\link{boot_ci}} to calculate confidence intervals from diff --git a/man/reexports.Rd b/man/reexports.Rd index 96bdc437..33c21fa3 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -3,7 +3,6 @@ \docType{import} \name{reexports} \alias{reexports} -\alias{\%>\%} \alias{typical_value} \alias{mse} \alias{rmse} @@ -26,8 +25,6 @@ below to see their documentation. \item{insight}{\code{\link[insight]{link_inverse}}} - \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} - \item{performance}{\code{\link[performance:performance_mse]{mse}}, \code{\link[performance:performance_rmse]{rmse}}} \item{sjmisc}{\code{\link[sjmisc]{typical_value}}} From defe5300bf8895ff0b2cf06e5b625209a13a55df Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 23:31:52 +0200 Subject: [PATCH 11/82] fix --- DESCRIPTION | 3 +-- NAMESPACE | 4 ---- R/S3-methods.R | 28 ++++++++++++++++------------ R/anova_stats.R | 2 +- R/auto_prior.R | 4 ++-- R/boot_ci.R | 6 +++++- R/gmd.R | 4 ++-- R/weight.R | 5 +++-- R/wtd_cor.R | 18 +++++++++--------- R/wtd_ttest.R | 14 +++----------- R/xtab_statistics.R | 1 + 11 files changed, 43 insertions(+), 46 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d590506e..339ecf1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,14 +21,12 @@ Depends: Imports: bayestestR, datawizard, - dplyr, effectsize, insight, lme4, parameters, performance, purrr, - sjlabelled, sjmisc, tidyr Suggests: @@ -36,6 +34,7 @@ Suggests: broom, car, coin, + dplyr, ggplot2, graphics, MASS, diff --git a/NAMESPACE b/NAMESPACE index dc23189c..283fd85a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -112,9 +112,6 @@ importFrom(bayestestR,equivalence_test) importFrom(datawizard,weighted_mean) importFrom(datawizard,weighted_median) importFrom(datawizard,weighted_sd) -importFrom(dplyr,case_when) -importFrom(dplyr,quos) -importFrom(dplyr,select) importFrom(insight,export_table) importFrom(insight,format_p) importFrom(insight,format_value) @@ -128,7 +125,6 @@ importFrom(purrr,map) importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_lgl) -importFrom(sjlabelled,as_numeric) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) importFrom(sjmisc,str_contains) diff --git a/R/S3-methods.R b/R/S3-methods.R index fe8c8289..4fb00f63 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -1,15 +1,16 @@ -#' @importFrom dplyr case_when #' @export print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) { se <- match.arg(se) sm <- tidy_svyglm.nb(x, digits, v_se = se)[-1, -2] - pan <- dplyr::case_when( - sm$p.value < 0.001 ~ "<0.001 ***", - sm$p.value < 0.01 ~ sprintf("%.*f ** ", digits, sm$p.value), - sm$p.value < 0.05 ~ sprintf("%.*f * ", digits, sm$p.value), - sm$p.value < 0.1 ~ sprintf("%.*f . ", digits, sm$p.value), - TRUE ~ sprintf("%.*f ", digits, sm$p.value) + pan <- ifelse(sm$p.value < 0.001, "<0.001 ***", + ifelse(sm$p.value < 0.01, sprintf("%.*f ** ", digits, sm$p.value), # nolint + ifelse(sm$p.value < 0.05, sprintf("%.*f * ", digits, sm$p.value), # nolint + ifelse(sm$p.value < 0.1, sprintf("%.*f . ", digits, sm$p.value), # nolint + sprintf("%.*f ", digits, sm$p.value) + ) + ) + ) ) sm$p.value <- pan @@ -30,11 +31,14 @@ print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) { sm <- tidy_svyglm.zip(x, digits, v_se = se)[-1, ] pan <- ifelse(sm$p.value < 0.001, "<0.001 ***", - ifelse(sm$p.value < 0.01, sprintf("%.*f ** ", digits, sm$p.value), - ifelse(sm$p.value < 0.05, sprintf("%.*f * ", digits, sm$p.value), - ifelse(sm$p.value < 0.1, sprintf("%.*f . ", digits, sm$p.value), - sprintf("%.*f ", digits, sm$p.value) - )))) + ifelse(sm$p.value < 0.01, sprintf("%.*f ** ", digits, sm$p.value), # nolint + ifelse(sm$p.value < 0.05, sprintf("%.*f * ", digits, sm$p.value), # nolint + ifelse(sm$p.value < 0.1, sprintf("%.*f . ", digits, sm$p.value), # nolint + sprintf("%.*f ", digits, sm$p.value) + ) + ) + ) + ) sm$p.value <- pan print(sm, ...) diff --git a/R/anova_stats.R b/R/anova_stats.R index 1569ef5a..84849c30 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -107,7 +107,7 @@ aov_stat_summary <- function(model) { # for mixed models, add information on residuals if (mm) { res <- stats::residuals(ori.model) - aov.sum <- dplyr::bind_rows( + aov.sum <- rbind( aov.sum, data_frame( term = "Residuals", diff --git a/R/auto_prior.R b/R/auto_prior.R index aed7a9b2..7c17243f 100644 --- a/R/auto_prior.R +++ b/R/auto_prior.R @@ -84,9 +84,9 @@ auto_prior <- function(formula, data, gaussian, locations = NULL) { y <- data[[y.name]] # check if response is binary - if (missing(gaussian) && dplyr::n_distinct(y, na.rm = TRUE) == 2) gaussian <- FALSE + if (missing(gaussian) && insight::n_unique(y) == 2) gaussian <- FALSE - if (isTRUE(gaussian) && dplyr::n_distinct(y, na.rm = TRUE) == 2) + if (isTRUE(gaussian) && insight::n_unique(y) == 2) insight::format_alert("Priors were calculated based on assumption that the response is Gaussian, however it seems to be binary.") # nolint diff --git a/R/boot_ci.R b/R/boot_ci.R index d9d23ac1..ba3cf814 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -131,6 +131,7 @@ #' boot_ci()} #' @export boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95) { + insight::check_if_installed("dplyr") # match arguments method <- match.arg(method) @@ -160,6 +161,7 @@ boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95) { #' @rdname boot_ci #' @export boot_se <- function(data, ...) { + insight::check_if_installed("dplyr") # evaluate arguments, generate data .dat <- get_dot_data(data, dplyr::quos(...)) @@ -176,6 +178,7 @@ boot_se <- function(data, ...) { #' @rdname boot_ci #' @export boot_p <- function(data, ...) { + insight::check_if_installed("dplyr") # evaluate arguments, generate data .dat <- get_dot_data(data, dplyr::quos(...)) @@ -194,6 +197,7 @@ boot_p <- function(data, ...) { #' @rdname boot_ci #' @export boot_est <- function(data, ...) { + insight::check_if_installed("dplyr") # evaluate arguments, generate data .dat <- get_dot_data(data, dplyr::quos(...)) @@ -213,8 +217,8 @@ transform_boot_result <- function(res) { } -#' @importFrom dplyr select get_dot_data <- function(x, qs) { + insight::check_if_installed("dplyr") if (sjmisc::is_empty(qs)) as.data.frame(x) else diff --git a/R/gmd.R b/R/gmd.R index 9dab2e96..2fed09a9 100644 --- a/R/gmd.R +++ b/R/gmd.R @@ -23,12 +23,12 @@ #' gmd(efc$e17age) #' gmd(efc, e17age, c160age, c12hour) #' -#' @importFrom dplyr quos select #' @importFrom purrr map_df #' @importFrom sjmisc is_empty #' @export gmd <- function(x, ...) { - # evaluate dots + insight::check_if_installed("dplyr") + # evaluate dots qs <- dplyr::quos(...) if (!sjmisc::is_empty(qs)) x <- suppressMessages(dplyr::select(x, !!!qs)) diff --git a/R/weight.R b/R/weight.R index 43888455..11423688 100644 --- a/R/weight.R +++ b/R/weight.R @@ -38,7 +38,6 @@ #' table(x) #' table(weight(x, w)) #' -#' @importFrom sjlabelled as_numeric #' @export weight <- function(x, weights, digits = 0) { # remember if x is numeric @@ -75,7 +74,9 @@ weight <- function(x, weights, digits = 0) { # if we have NA values, weighted var is coerced to character. # coerce back to numeric then here - if (!is.numeric(weightedvar) && x.is.num) weightedvar <- sjlabelled::as_numeric(weightedvar) + if (!is.numeric(weightedvar) && x.is.num) { + weightedvar <- datawizard::to_numeric(weightedvar, dummy_factors = FALSE) + } # return result weightedvar diff --git a/R/wtd_cor.R b/R/wtd_cor.R index 6df781d0..f167f730 100644 --- a/R/wtd_cor.R +++ b/R/wtd_cor.R @@ -7,9 +7,9 @@ weighted_correlation <- function(data, ...) { #' @rdname weighted_se #' @export -weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...) { - if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) - stop("'ci.lvl' must be a single number between 0 and 1") +weighted_correlation.default <- function(data, x, y, weights, ci.lvl = 0.95, ...) { + if (!missing(ci.lvl) && (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) + insight::format_error("'ci.lvl' must be a single number between 0 and 1.") x.name <- deparse(substitute(x)) y.name <- deparse(substitute(y)) @@ -24,8 +24,8 @@ weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...) vars <- c(x.name, y.name, w.name) # get data - dat <- suppressMessages(dplyr::select(data, !! vars)) - dat <- na.omit(dat) + dat <- suppressMessages(data[vars]) + dat <- stats::na.omit(dat) xv <- dat[[x.name]] yv <- dat[[y.name]] @@ -37,10 +37,10 @@ weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...) #' @rdname weighted_se #' @export -weighted_correlation.formula <- function(formula, data, ci.lvl = .95, ...) { +weighted_correlation.formula <- function(formula, data, ci.lvl = 0.95, ...) { - if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) - stop("'ci.lvl' must be a single number between 0 and 1") + if (!missing(ci.lvl) && (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) + insight::format_error("'ci.lvl' must be a single number between 0 and 1.") vars <- all.vars(formula) @@ -50,7 +50,7 @@ weighted_correlation.formula <- function(formula, data, ci.lvl = .95, ...) { } # get data - dat <- suppressMessages(dplyr::select(data, !! vars)) + dat <- suppressMessages(data[vars]) dat <- na.omit(dat) xv <- dat[[vars[1]]] diff --git a/R/wtd_ttest.R b/R/wtd_ttest.R index 85459f6b..688cdaa8 100644 --- a/R/wtd_ttest.R +++ b/R/wtd_ttest.R @@ -28,11 +28,11 @@ weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = vars <- c(x.name, y.name, w.name) # get data - dat <- suppressMessages(dplyr::select(data, !! vars)) + dat <- suppressMessages(data[vars]) dat <- na.omit(dat) if (sjmisc::is_empty(dat) || nrow(dat) == 1) { - warning("Too less data to compute t-test.") + insight::format_alert("Too less data to compute t-test.") return(NULL) } @@ -96,15 +96,7 @@ weighted_ttest.formula <- function(formula, data, mu = 0, paired = FALSE, ci.lvl nx <- length(xv) ny <- length(yv) - labs <- sjlabelled::get_labels( - data[[vars[2]]], - attr.only = FALSE, - values = "p", - drop.na = TRUE, - drop.unused = TRUE - ) - - weighted_ttest_helper(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, vars[1], vars[2], labs) + weighted_ttest_helper(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, vars[1], vars[2], vars[2]) } diff --git a/R/xtab_statistics.R b/R/xtab_statistics.R index 21824194..2342385e 100644 --- a/R/xtab_statistics.R +++ b/R/xtab_statistics.R @@ -106,6 +106,7 @@ #' ) #' @export crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"), weights = NULL, ...) { + insight::check_if_installed("dplyr") # match arguments statistics <- match.arg(statistics) From 21339f382fe6d1d2e1a90cdd828bea5b8b0d3977 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 23:53:45 +0200 Subject: [PATCH 12/82] fix? --- NAMESPACE | 1 + R/S3-methods.R | 6 +++--- tests/testthat/test-wtd.R | 9 --------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 283fd85a..3de2e0c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -129,3 +129,4 @@ importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) importFrom(sjmisc,str_contains) importFrom(sjmisc,typical_value) +importFrom(stats,family) diff --git a/R/S3-methods.R b/R/S3-methods.R index 4fb00f63..a09e9891 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -85,8 +85,8 @@ tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) { term = substring(names(stats::coef(x)), 5), estimate = round(est, digits), std.error = round(se, digits), - conf.low = round(exp(est - stats::qnorm(.975) * se), digits), - conf.high = round(exp(est + stats::qnorm(.975) * se), digits), + conf.low = round(exp(est - stats::qnorm(0.975) * se), digits), + conf.high = round(exp(est + stats::qnorm(0.975) * se), digits), p.value = round(2 * stats::pnorm(abs(est / se), lower.tail = FALSE), digits) ) } @@ -108,7 +108,7 @@ model.frame.svyglm.zip <- function(formula, ...) { } - +#' @importFrom stats family #' @export family.svyglm.nb <- function(object, ...) { attr(object, "family", exact = TRUE) diff --git a/tests/testthat/test-wtd.R b/tests/testthat/test-wtd.R index 7642253b..f90e309f 100644 --- a/tests/testthat/test-wtd.R +++ b/tests/testthat/test-wtd.R @@ -4,17 +4,8 @@ if (require("testthat") && require("sjstats")) { efc$weight <- abs(rnorm(nrow(efc), 1, .3)) test_that("wtd", { - expect_equal(weighted_sd(efc$c12hour, weights = efc$weight), 51.18224, tolerance = 1e-5) - expect_equal(weighted_sd(efc$c12hour, weights = NULL), 50.80504, tolerance = 1e-5) - - expect_equal(weighted_mean(efc$c12hour, weights = efc$weight), 42.80723, tolerance = 1e-5) - expect_equal(weighted_mean(efc$c12hour, weights = NULL), 42.39911, tolerance = 1e-5) - expect_equal(weighted_se(efc$c12hour, weights = efc$weight), 1.704182, tolerance = 1e-5) expect_equal(weighted_se(efc$c12hour, weights = NULL), 1.691623, tolerance = 1e-5) - - expect_equal(weighted_median(efc$c12hour, weights = efc$weight), 20, tolerance = 1e-5) - expect_equal(weighted_median(efc$c12hour, weights = NULL), 20, tolerance = 1e-5) }) test_that("weighted_ttest", { From 02429bc7968b57f1f66ef88c09ae6141ad60dcaf Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 00:00:28 +0200 Subject: [PATCH 13/82] fix --- NAMESPACE | 2 -- R/boot_ci.R | 23 ++++++++++++++--------- R/gmd.R | 23 +++++++++++------------ man/boot_ci.Rd | 29 ++++++++++++++++------------- man/gmd.Rd | 4 ++-- 5 files changed, 43 insertions(+), 38 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 3de2e0c4..3fe93bc7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -123,9 +123,7 @@ importFrom(performance,mse) importFrom(performance,rmse) importFrom(purrr,map) importFrom(purrr,map_dbl) -importFrom(purrr,map_df) importFrom(purrr,map_lgl) -importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) importFrom(sjmisc,str_contains) importFrom(sjmisc,typical_value) diff --git a/R/boot_ci.R b/R/boot_ci.R index ba3cf814..dd8254e4 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -61,11 +61,14 @@ #' bs <- bootstrap(efc, 100) #' #' # now run models for each bootstrapped sample -#' bs$models <- map(bs$strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)) +#' bs$models <- lapply( +#' bs$strap, +#' function(.x) lm(neg_c_7 ~ e42dep + c161sex, data = .x) +#' ) #' #' # extract coefficient "dependency" and "gender" from each model -#' bs$dependency <- map_dbl(bs$models, ~coef(.x)[2]) -#' bs$gender <- map_dbl(bs$models, ~coef(.x)[3]) +#' bs$dependency <- vapply(bs$models, function(x) coef(x)[2], numeric(1)) +#' bs$gender <- vapply(bs$models, function(x) coef(x)[3], numeric(1)) #' #' # get bootstrapped confidence intervals #' boot_ci(bs$dependency) @@ -76,9 +79,9 @@ #' #' # alternative function calls. #' boot_ci(bs$dependency) -#' boot_ci(bs, dependency) -#' boot_ci(bs, dependency, gender) -#' boot_ci(bs, dependency, gender, method = "q") +#' boot_ci(bs, "dependency") +#' boot_ci(bs, c("dependency", "gender")) +#' boot_ci(bs, c("dependency", "gender"), method = "q") #' #' #' # compare coefficients @@ -130,13 +133,15 @@ #' # compute the CI for all bootstrapped model coefficients #' boot_ci()} #' @export -boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95) { - insight::check_if_installed("dplyr") +boot_ci <- function(data, select = NULL, method = c("dist", "quantile"), ci.lvl = 0.95) { # match arguments method <- match.arg(method) # evaluate arguments, generate data - .dat <- get_dot_data(data, dplyr::quos(...)) + if (is.null(select)) + .dat <- as.data.frame(data) + else + .dat <- data[select] # compute confidence intervals for all values transform_boot_result(lapply(.dat, function(x) { diff --git a/R/gmd.R b/R/gmd.R index 2fed09a9..98ad9c53 100644 --- a/R/gmd.R +++ b/R/gmd.R @@ -21,21 +21,20 @@ #' @examples #' data(efc) #' gmd(efc$e17age) -#' gmd(efc, e17age, c160age, c12hour) +#' gmd(efc, c("e17age", "c160age", "c12hour")) #' -#' @importFrom purrr map_df -#' @importFrom sjmisc is_empty #' @export -gmd <- function(x, ...) { - insight::check_if_installed("dplyr") - # evaluate dots - qs <- dplyr::quos(...) - if (!sjmisc::is_empty(qs)) x <- suppressMessages(dplyr::select(x, !!!qs)) - - if (is.data.frame(x)) - purrr::map_df(x, gmd_helper) - else +gmd <- function(x, select = NULL) { + if (is.data.frame(x)) { + do.call(rbind, lapply(select, function(i) { + data.frame( + variable = i, + gmd = gmd_helper(x[[i]]) + ) + })) + } else { gmd_helper(x) + } } diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd index 7f4c252e..4d57d310 100644 --- a/man/boot_ci.Rd +++ b/man/boot_ci.Rd @@ -7,7 +7,7 @@ \alias{boot_est} \title{Standard error and confidence intervals for bootstrapped estimates} \usage{ -boot_ci(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95) +boot_ci(data, select = NULL, method = c("dist", "quantile"), ci.lvl = 0.95) boot_se(data, ...) @@ -19,12 +19,6 @@ boot_est(data, ...) \item{data}{A data frame that containts the vector with bootstrapped estimates, or directly the vector (see 'Examples').} -\item{...}{Optional, unquoted names of variables with bootstrapped estimates. -Required, if either \code{data} is a data frame (and no vector), -and only selected variables from \code{data} should be processed. -You may also use functions like \code{:} or tidyselect's -\code{select_helpers()}.} - \item{method}{Character vector, indicating if confidence intervals should be based on bootstrap standard error, multiplied by the value of the quantile function of the t-distribution (default), or on sample @@ -32,6 +26,12 @@ quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.} \item{ci.lvl}{Numeric, the level of the confidence intervals.} + +\item{...}{Optional, unquoted names of variables with bootstrapped estimates. +Required, if either \code{data} is a data frame (and no vector), +and only selected variables from \code{data} should be processed. +You may also use functions like \code{:} or tidyselect's +\code{select_helpers()}.} } \value{ A data frame with either bootstrap estimate, @@ -79,11 +79,14 @@ data(efc) bs <- bootstrap(efc, 100) # now run models for each bootstrapped sample -bs$models <- map(bs$strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)) +bs$models <- lapply( + bs$strap, + function(.x) lm(neg_c_7 ~ e42dep + c161sex, data = .x) +) # extract coefficient "dependency" and "gender" from each model -bs$dependency <- map_dbl(bs$models, ~coef(.x)[2]) -bs$gender <- map_dbl(bs$models, ~coef(.x)[3]) +bs$dependency <- vapply(bs$models, function(x) coef(x)[2], numeric(1)) +bs$gender <- vapply(bs$models, function(x) coef(x)[3], numeric(1)) # get bootstrapped confidence intervals boot_ci(bs$dependency) @@ -94,9 +97,9 @@ confint(fit)[2, ] # alternative function calls. boot_ci(bs$dependency) -boot_ci(bs, dependency) -boot_ci(bs, dependency, gender) -boot_ci(bs, dependency, gender, method = "q") +boot_ci(bs, "dependency") +boot_ci(bs, c("dependency", "gender")) +boot_ci(bs, c("dependency", "gender"), method = "q") # compare coefficients diff --git a/man/gmd.Rd b/man/gmd.Rd index 03823df5..f8ca77fa 100644 --- a/man/gmd.Rd +++ b/man/gmd.Rd @@ -4,7 +4,7 @@ \alias{gmd} \title{Gini's Mean Difference} \usage{ -gmd(x, ...) +gmd(x, select = NULL) } \arguments{ \item{x}{A vector or data frame.} @@ -30,7 +30,7 @@ silently removed. \examples{ data(efc) gmd(efc$e17age) -gmd(efc, e17age, c160age, c12hour) +gmd(efc, c("e17age", "c160age", "c12hour")) } \references{ From 2414ba50cf392cc137a43d83e875953a51bfe46d Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 10:31:11 +0200 Subject: [PATCH 14/82] fixes --- DESCRIPTION | 4 +- NAMESPACE | 5 - R/S3-methods.R | 5 +- R/anova_stats.R | 24 +++-- R/boot_ci.R | 165 ++++++++++---------------------- R/gmd.R | 15 ++- R/helpfunctions.R | 8 +- R/is_prime.R | 14 ++- R/prop.R | 21 ++-- R/re-exports.R | 4 - R/var_pop.R | 8 +- R/wtd_cor.R | 2 +- R/wtd_ttest.R | 16 ++-- R/xtab_statistics.R | 2 +- man/boot_ci.Rd | 117 ++++++---------------- man/crosstable_statistics.Rd | 7 +- man/gmd.Rd | 11 +-- man/reexports.Rd | 3 - tests/testthat/test-autoprior.R | 1 - 19 files changed, 154 insertions(+), 278 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 339ecf1d..58b7b27b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,9 +26,7 @@ Imports: lme4, parameters, performance, - purrr, - sjmisc, - tidyr + purrr Suggests: brms, broom, diff --git a/NAMESPACE b/NAMESPACE index 3fe93bc7..74ff537d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -96,7 +96,6 @@ export(survey_median) export(svyglm.nb) export(svyglm.zip) export(table_values) -export(typical_value) export(var_pop) export(weight) export(weight2) @@ -123,8 +122,4 @@ importFrom(performance,mse) importFrom(performance,rmse) importFrom(purrr,map) importFrom(purrr,map_dbl) -importFrom(purrr,map_lgl) -importFrom(sjmisc,is_float) -importFrom(sjmisc,str_contains) -importFrom(sjmisc,typical_value) importFrom(stats,family) diff --git a/R/S3-methods.R b/R/S3-methods.R index a09e9891..c7d94216 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -134,7 +134,7 @@ formula.svyglm.zip <- function(x, ...) { predict.svyglm.nb <- function(object, newdata = NULL, type = c("link", "response", "terms"), se.fit = FALSE, dispersion = NULL, terms = NULL, - na.action = na.pass, ...) { + na.action = stats::na.pass, ...) { insight::check_if_installed(c("survey", "MASS")) fnb <- MASS::glm.nb( @@ -238,8 +238,7 @@ print.tidy_stan <- function(x, ...) { clean_term_name <- function(x) { - insight::check_if_installed("sjmisc") - x <- sjmisc::trim(x) + x <- insight::trim_ws(x) format(x, width = max(nchar(x))) } diff --git a/R/anova_stats.R b/R/anova_stats.R index 84849c30..ae9c083e 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -29,7 +29,7 @@ #' } #' @export anova_stats <- function(model, digits = 3) { - insight::check_if_installed(c("pwr", "sjmisc")) + insight::check_if_installed("pwr") # .Deprecated("effectsize::effectsize()", package = "effectsize") @@ -51,7 +51,7 @@ anova_stats <- function(model, digits = 3) { data.frame(etasq, partial.etasq, omegasq, partial.omegasq, epsilonsq, cohens.f), data.frame(etasq = NA, partial.etasq = NA, omegasq = NA, partial.omegasq = NA, epsilonsq = NA, cohens.f = NA) ) - anov_stat <- sjmisc::add_columns(anov_stat, aov.sum) + anov_stat <- cbind(anov_stat, data.frame(aov.sum)) # get nr of terms nt <- nrow(anov_stat) - 1 @@ -71,8 +71,14 @@ anova_stats <- function(model, digits = 3) { } ) - out <- sjmisc::add_variables(anov_stat, power = as_power) - out <- as.data.frame(sjmisc::round_num(out, digits = digits)) + out <- cbind(anov_stat, data.frame(power = as_power)) + out[] <- lapply(out, function(i) { + if (is.numeric(i)) { + round(i, digits) + } else { + i + } + }) class(out) <- c("sj_anova_stat", class(out)) out @@ -131,8 +137,14 @@ aov_stat_summary <- function(model) { colnames(aov.sum) <- c("term", "df", "sumsq", "meansq", "statistic", "p.value") # for car::Anova, the meansq-column might be missing, so add it manually - if (!obj_has_name(aov.sum, "meansq")) - aov.sum <- sjmisc::add_variables(aov.sum, meansq = aov.sum$sumsq / aov.sum$df, .after = "sumsq") + if (!obj_has_name(aov.sum, "meansq")) { + pos_sumsq <- which(colnames(aov.sum) == "sumsq") + aov.sum <- cbind( + aov.sum[1:pos_sumsq], + data.frame(meansq = aov.sum$sumsq / aov.sum$df), + aov.sum[(pos_sumsq + 1):ncol(aov.sum)] + ) + } intercept <- .which_intercept(aov.sum$term) if (length(intercept) > 0) { diff --git a/R/boot_ci.R b/R/boot_ci.R index dd8254e4..acc63ca5 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -6,57 +6,43 @@ #' replicate estimates. #' #' @param data A data frame that containts the vector with bootstrapped -#' estimates, or directly the vector (see 'Examples'). +#' estimates, or directly the vector (see 'Examples'). #' @param ci.lvl Numeric, the level of the confidence intervals. -#' @param ... Optional, unquoted names of variables with bootstrapped estimates. -#' Required, if either \code{data} is a data frame (and no vector), -#' and only selected variables from \code{data} should be processed. -#' You may also use functions like \code{:} or tidyselect's -#' \code{select_helpers()}. +#' @param Select Optional, unquoted names of variables (as character vector) +#' with bootstrapped estimates. Required, if either `data` is a data frame +#' (and no vector), and only selected variables from `data` should be processed. #' @param method Character vector, indicating if confidence intervals should be -#' based on bootstrap standard error, multiplied by the value of the -#' quantile function of the t-distribution (default), or on sample -#' quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}. -#' May be abbreviated. -#' -#' @return A data frame with either bootstrap estimate, -#' standard error, the lower and upper confidence intervals or the -#' p-value for all bootstrapped estimates. -#' -#' @details The methods require one or more vectors of bootstrap replicate estimates -#' as input. -#' \itemize{ -#' \item{ -#' \code{boot_est()} returns the bootstrapped estimate, simply by -#' computing the mean value of all bootstrap estimates. -#' } -#' \item{ -#' \code{boot_se()} computes the nonparametric bootstrap standard -#' error by calculating the standard deviation of the input vector. -#' } -#' \item{ -#' The mean value of the input vector and its standard error is used -#' by \code{boot_ci()} to calculate the lower and upper confidence -#' interval, assuming a t-distribution of bootstrap estimate replicates -#' (for \code{method = "dist"}, the default, which is -#' \code{mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)}); for -#' \code{method = "quantile"}, 95\% sample quantiles are used to compute -#' the confidence intervals (\code{quantile(x, probs = c(.025, .975))}). -#' Use \code{ci.lvl} to change the level for the confidence interval. -#' } -#' \item{ -#' P-values from \code{boot_p()} are also based on t-statistics, -#' assuming normal distribution. -#' } -#' } +#' based on bootstrap standard error, multiplied by the value of the quantile +#' function of the t-distribution (default), or on sample quantiles of the +#' bootstrapped values. See 'Details' in `boot_ci()`. May be abbreviated. +#' +#' @return A data frame with either bootstrap estimate, standard error, the +#' lower and upper confidence intervals or the p-value for all bootstrapped +#' estimates. +#' +#' @details The methods require one or more vectors of bootstrap replicate +#' estimates as input. +#' +#' - `boot_est()`: returns the bootstrapped estimate, simply by computing +#' the mean value of all bootstrap estimates. +#' - `boot_se()`: computes the nonparametric bootstrap standard error by +#' calculating the standard deviation of the input vector. +#' - The mean value of the input vector and its standard error is used by +#' `boot_ci()` to calculate the lower and upper confidence interval, +#' assuming a t-distribution of bootstrap estimate replicates (for +#' `method = "dist"`, the default, which is +#' `mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)`); for +#' `method = "quantile"`, 95\% sample quantiles are used to compute the +#' confidence intervals (`quantile(x, probs = c(0.025, 0.975))`). Use +#' `ci.lvl` to change the level for the confidence interval. +#' - P-values from `boot_p()` are also based on t-statistics, assuming normal +#' distribution. #' #' @references Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164 #' -#' @seealso \code{\link{bootstrap}} to generate nonparametric bootstrap samples. +#' @seealso []`bootstrap()`] to generate nonparametric bootstrap samples. #' -#' @examplesIf getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE) -#' library(dplyr) -#' library(purrr) +#' @examplesIf getRversion() >= "4.2.0" #' data(efc) #' bs <- bootstrap(efc, 100) #' @@ -88,60 +74,17 @@ #' mean(bs$dependency) #' boot_est(bs$dependency) #' coef(fit)[2] -#' -#' -#' # bootstrap() and boot_ci() work fine within pipe-chains -#' efc |> -#' bootstrap(100) |> -#' mutate( -#' models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)), -#' dependency = map_dbl(models, ~coef(.x)[2]) -#' ) |> -#' boot_ci(dependency) -#' -#' # check p-value -#' boot_p(bs$gender) -#' summary(fit)$coefficients[3, ] -#' -#' \dontrun{ -#' # 'spread_coef()' from the 'sjmisc'-package makes it easy to generate -#' # bootstrapped statistics like confidence intervals or p-values -#' library(dplyr) -#' library(sjmisc) -#' efc |> -#' # generate bootstrap replicates -#' bootstrap(100) |> -#' # apply lm to all bootstrapped data sets -#' mutate( -#' models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) -#' ) |> -#' # spread model coefficient for all 100 models -#' spread_coef(models) |> -#' # compute the CI for all bootstrapped model coefficients -#' boot_ci(e42dep, c161sex, c172code) -#' -#' # or... -#' efc |> -#' # generate bootstrap replicates -#' bootstrap(100) |> -#' # apply lm to all bootstrapped data sets -#' mutate( -#' models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) -#' ) |> -#' # spread model coefficient for all 100 models -#' spread_coef(models, append = FALSE) |> -#' # compute the CI for all bootstrapped model coefficients -#' boot_ci()} #' @export boot_ci <- function(data, select = NULL, method = c("dist", "quantile"), ci.lvl = 0.95) { # match arguments method <- match.arg(method) # evaluate arguments, generate data - if (is.null(select)) + if (is.null(select)) { .dat <- as.data.frame(data) - else + } else { .dat <- data[select] + } # compute confidence intervals for all values transform_boot_result(lapply(.dat, function(x) { @@ -165,11 +108,13 @@ boot_ci <- function(data, select = NULL, method = c("dist", "quantile"), ci.lvl #' @rdname boot_ci #' @export -boot_se <- function(data, ...) { - insight::check_if_installed("dplyr") +boot_se <- function(data, select = NULL) { # evaluate arguments, generate data - .dat <- get_dot_data(data, dplyr::quos(...)) - + if (is.null(select)) { + .dat <- as.data.frame(data) + } else { + .dat <- data[select] + } # compute confidence intervalls for all values transform_boot_result(lapply(.dat, function(x) { # get bootstrap standard error @@ -182,11 +127,13 @@ boot_se <- function(data, ...) { #' @rdname boot_ci #' @export -boot_p <- function(data, ...) { - insight::check_if_installed("dplyr") +boot_p <- function(data, select = NULL) { # evaluate arguments, generate data - .dat <- get_dot_data(data, dplyr::quos(...)) - + if (is.null(select)) { + .dat <- as.data.frame(data) + } else { + .dat <- data[select] + } # compute confidence intervalls for all values transform_boot_result(lapply(.dat, function(x) { # compute t-statistic @@ -201,11 +148,13 @@ boot_p <- function(data, ...) { #' @rdname boot_ci #' @export -boot_est <- function(data, ...) { - insight::check_if_installed("dplyr") +boot_est <- function(data, select = NULL) { # evaluate arguments, generate data - .dat <- get_dot_data(data, dplyr::quos(...)) - + if (is.null(select)) { + .dat <- as.data.frame(data) + } else { + .dat <- data[select] + } # compute mean for all values (= bootstrapped estimate) transform_boot_result(lapply(.dat, function(x) { estimate <- mean(x, na.rm = TRUE) @@ -215,17 +164,7 @@ boot_est <- function(data, ...) { } - transform_boot_result <- function(res) { # transform a bit, so we have each estimate in a row, and ci's as columns... rownames_as_column(as.data.frame(t(as.data.frame(res))), var = "term") } - - -get_dot_data <- function(x, qs) { - insight::check_if_installed("dplyr") - if (sjmisc::is_empty(qs)) - as.data.frame(x) - else - suppressWarnings(dplyr::select(x, !!!qs)) -} diff --git a/R/gmd.R b/R/gmd.R index 98ad9c53..dcfe9678 100644 --- a/R/gmd.R +++ b/R/gmd.R @@ -1,20 +1,19 @@ #' @title Gini's Mean Difference #' @name gmd -#' @description \code{gmd()} computes Gini's mean difference for a numeric vector +#' @description `gmd()` computes Gini's mean difference for a numeric vector #' or for all numeric vectors in a data frame. #' #' @param x A vector or data frame. -#' @param ... Optional, unquoted names of variables that should be selected for -#' further processing. Required, if \code{x} is a data frame (and no vector) -#' and only selected variables from \code{x} should be processed. You may also -#' use functions like \code{:} or tidyselect's \code{select_helpers()}. +#' @param select Optional, names of variables as character vector that should be +#' selected for further processing. Required, if `x` is a data frame (and no vector) +#' and only selected variables from `x` should be processed. #' #' @return For numeric vectors, Gini's mean difference. For non-numeric vectors -#' or vectors of length < 2, returns \code{NA}. +#' or vectors of length < 2, returns `NA`. #' #' @note Gini's mean difference is defined as the mean absolute difference between -#' any two distinct elements of a vector. Missing values from \code{x} are -#' silently removed. +#' any two distinct elements of a vector. Missing values from `x` are silently +#' removed. #' #' @references David HA. Gini's mean difference rediscovered. Biometrika 1968(55): 573-575 #' diff --git a/R/helpfunctions.R b/R/helpfunctions.R index 8a792d1d..e3f2a69f 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -17,7 +17,6 @@ is_stan_model <- function(fit) { } -#' @importFrom sjmisc str_contains get_glm_family <- function(fit) { c.f <- class(fit) @@ -34,7 +33,7 @@ get_glm_family <- function(fit) { # create logical for family binom_fam <- fitfam %in% c("binomial", "quasibinomial") poisson_fam <- fitfam %in% c("poisson", "quasipoisson") || - sjmisc::str_contains(fitfam, "negative binomial", ignore.case = TRUE) + grepl("negative binomial", fitfram, ignore.case = TRUE, fixed = TRUE) list(is_bin = binom_fam, is_pois = poisson_fam, is_logit = logit_link) } @@ -85,3 +84,8 @@ dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) ) l10n_info()[["UTF-8"]] && ((win_os && getRversion() >= "4.2") || (!win_os && getRversion() >= "4.0")) } + + +.is_pseudo_numeric <- function(x) { + (is.character(x) && !anyNA(suppressWarnings(as.numeric(stats::na.omit(x[nzchar(x, keepNA = TRUE)]))))) || (is.factor(x) && !anyNA(suppressWarnings(as.numeric(levels(x))))) # nolint +} diff --git a/R/is_prime.R b/R/is_prime.R index 213fcbb5..f5b5ecef 100644 --- a/R/is_prime.R +++ b/R/is_prime.R @@ -2,23 +2,21 @@ #' @name is_prime #' #' @description This functions checks whether a number is, or numbers in a -#' vector are prime numbers. +#' vector are prime numbers. #' #' @param x An integer, or a vector of integers. #' -#' @return \code{TRUE} for each prime number in \code{x}, \code{FALSE} otherwise. +#' @return `TRUE` for each prime number in `x`, `FALSE` otherwise. #' #' @examples #' is_prime(89) #' is_prime(15) #' is_prime(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) #' -#' @importFrom purrr map_lgl -#' @importFrom sjmisc is_float #' @export is_prime <- function(x) { - if (sjmisc::is_float(x)) - stop("`x` needs to be an integer value.", call. = F) - - purrr::map_lgl(x, ~ .x == 2L || all(.x %% 2L:max(2, floor(sqrt(.x))) != 0)) + if (is.numeric(x) && !all(x %% 1 == 0, na.rm = TRUE)) { + insight::format_error("`x` needs to be an integer value.") + } + vapply(x, function(.x) .x == 2L || all(.x %% 2L:max(2, floor(sqrt(.x))) != 0), logical(1)) } diff --git a/R/prop.R b/R/prop.R index 834d88e2..b656f5df 100644 --- a/R/prop.R +++ b/R/prop.R @@ -160,19 +160,16 @@ get_proportion <- function(x, data, weight.by, na.rm, digits) { if (!is.null(weight.by)) f <- weight(f, weights = weight.by) # get proportions - if (x.parts[2] == "==") - dummy <- f == v - else if (x.parts[2] == "!=") - dummy <- f != v - else if (x.parts[2] == "<") - dummy <- f < v - else if (x.parts[2] == ">") - dummy <- f > v - else - dummy <- f == v + dummy <- switch(x.parts[2], + "==" = f == v, + "!=" = f != v, + "<" = f < v, + ">" = f > v, + f == v + ) # remove missings? - if (na.rm) dummy <- na.omit(dummy) + if (na.rm) dummy <- stats::na.omit(dummy) # get proportion round(sum(dummy, na.rm = TRUE) / length(dummy), digits = digits) @@ -184,7 +181,7 @@ get_multiple_proportion <- function(x, data, na.rm, digits) { dummy <- with(data, eval(parse(text = deparse(x)))) # remove missings? - if (na.rm) dummy <- na.omit(dummy) + if (na.rm) dummy <- stats::na.omit(dummy) # get proportion round(sum(dummy, na.rm = TRUE) / length(dummy), digits = digits) diff --git a/R/re-exports.R b/R/re-exports.R index aea31ba3..1b5605ae 100644 --- a/R/re-exports.R +++ b/R/re-exports.R @@ -1,7 +1,3 @@ -#' @importFrom sjmisc typical_value -#' @export -sjmisc::typical_value - #' @importFrom performance mse #' @export performance::mse diff --git a/R/var_pop.R b/R/var_pop.R index a269d763..db53d6b8 100644 --- a/R/var_pop.R +++ b/R/var_pop.R @@ -24,13 +24,13 @@ #' sd_pop(efc$c12hour) #' @export var_pop <- function(x) { - insight::check_if_installed(c("sjmisc", "datawizard")) + insight::check_if_installed("datawizard") # check for categorical if (is.factor(x)) { # only allow numeric factors - if (!sjmisc::is_num_fac(x)) { - warning("`x` must be numeric vector or a factor with numeric levels.", call. = F) - return(NA) + + if (!.is_pseudo_numeric(x)) { + insight::format_error("`x` must be numeric vector or a factor with numeric levels.") } # convert factor to numeric x <- datawizard::to_numeric(x, dummy_factors = FALSE) diff --git a/R/wtd_cor.R b/R/wtd_cor.R index f167f730..19a1d7fb 100644 --- a/R/wtd_cor.R +++ b/R/wtd_cor.R @@ -51,7 +51,7 @@ weighted_correlation.formula <- function(formula, data, ci.lvl = 0.95, ...) { # get data dat <- suppressMessages(data[vars]) - dat <- na.omit(dat) + dat <- stats::na.omit(dat) xv <- dat[[vars[1]]] yv <- dat[[vars[2]]] diff --git a/R/wtd_ttest.R b/R/wtd_ttest.R index 688cdaa8..38652af2 100644 --- a/R/wtd_ttest.R +++ b/R/wtd_ttest.R @@ -8,8 +8,9 @@ weighted_ttest <- function(data, ...) { #' @export weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) { - if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) - stop("'ci.lvl' must be a single number between 0 and 1") + if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) { + insight::format_error("'ci.lvl' must be a single number between 0 and 1") + } alternative <- match.arg(alternative) @@ -29,9 +30,9 @@ weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = # get data dat <- suppressMessages(data[vars]) - dat <- na.omit(dat) + dat <- stats::na.omit(dat) - if (sjmisc::is_empty(dat) || nrow(dat) == 1) { + if (insight::is_empty_object(dat) || nrow(dat) == 1) { insight::format_alert("Too less data to compute t-test.") return(NULL) } @@ -54,8 +55,9 @@ weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = #' @export weighted_ttest.formula <- function(formula, data, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) { - if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) - stop("'ci.lvl' must be a single number between 0 and 1") + if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) { + insight::format_error("'ci.lvl' must be a single number between 0 and 1") + } alternative <- match.arg(alternative) @@ -66,7 +68,7 @@ weighted_ttest.formula <- function(formula, data, mu = 0, paired = FALSE, ci.lvl if (is.factor(g)) grps <- levels(g) else - grps <- na.omit(sort(unique(g))) + grps <- stats::na.omit(sort(unique(g))) if (length(grps) > 2) stop("Grouping factor has more than two levels.") diff --git a/R/xtab_statistics.R b/R/xtab_statistics.R index 2342385e..4e40732d 100644 --- a/R/xtab_statistics.R +++ b/R/xtab_statistics.R @@ -125,7 +125,7 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au x2 <- gsub("\"", "", x2, fixed = TRUE) weights <- gsub("\"", "", weights, fixed = TRUE) - if (sjmisc::is_empty(weights) || weights == "NULL") + if (insight::is_empty_object(weights) || weights == "NULL") weights <- NULL else weights <- data[[weights]] diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd index 4d57d310..9e98fbbb 100644 --- a/man/boot_ci.Rd +++ b/man/boot_ci.Rd @@ -9,34 +9,31 @@ \usage{ boot_ci(data, select = NULL, method = c("dist", "quantile"), ci.lvl = 0.95) -boot_se(data, ...) +boot_se(data, select = NULL) -boot_p(data, ...) +boot_p(data, select = NULL) -boot_est(data, ...) +boot_est(data, select = NULL) } \arguments{ \item{data}{A data frame that containts the vector with bootstrapped estimates, or directly the vector (see 'Examples').} \item{method}{Character vector, indicating if confidence intervals should be -based on bootstrap standard error, multiplied by the value of the -quantile function of the t-distribution (default), or on sample -quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}. -May be abbreviated.} +based on bootstrap standard error, multiplied by the value of the quantile +function of the t-distribution (default), or on sample quantiles of the +bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.} \item{ci.lvl}{Numeric, the level of the confidence intervals.} -\item{...}{Optional, unquoted names of variables with bootstrapped estimates. -Required, if either \code{data} is a data frame (and no vector), -and only selected variables from \code{data} should be processed. -You may also use functions like \code{:} or tidyselect's -\code{select_helpers()}.} +\item{Select}{Optional, unquoted names of variables (as character vector) +with bootstrapped estimates. Required, if either \code{data} is a data frame +(and no vector), and only selected variables from \code{data} should be processed.} } \value{ -A data frame with either bootstrap estimate, -standard error, the lower and upper confidence intervals or the -p-value for all bootstrapped estimates. +A data frame with either bootstrap estimate, standard error, the +lower and upper confidence intervals or the p-value for all bootstrapped +estimates. } \description{ Compute nonparametric bootstrap estimate, standard error, @@ -44,37 +41,27 @@ confidence intervals and p-value for a vector of bootstrap replicate estimates. } \details{ -The methods require one or more vectors of bootstrap replicate estimates -as input. +The methods require one or more vectors of bootstrap replicate +estimates as input. \itemize{ -\item{ -\code{boot_est()} returns the bootstrapped estimate, simply by -computing the mean value of all bootstrap estimates. -} -\item{ -\code{boot_se()} computes the nonparametric bootstrap standard -error by calculating the standard deviation of the input vector. -} -\item{ -The mean value of the input vector and its standard error is used -by \code{boot_ci()} to calculate the lower and upper confidence -interval, assuming a t-distribution of bootstrap estimate replicates -(for \code{method = "dist"}, the default, which is -\code{mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)}); for -\code{method = "quantile"}, 95\\% sample quantiles are used to compute -the confidence intervals (\code{quantile(x, probs = c(.025, .975))}). -Use \code{ci.lvl} to change the level for the confidence interval. -} -\item{ -P-values from \code{boot_p()} are also based on t-statistics, -assuming normal distribution. -} +\item \code{boot_est()}: returns the bootstrapped estimate, simply by computing +the mean value of all bootstrap estimates. +\item \code{boot_se()}: computes the nonparametric bootstrap standard error by +calculating the standard deviation of the input vector. +\item The mean value of the input vector and its standard error is used by +\code{boot_ci()} to calculate the lower and upper confidence interval, +assuming a t-distribution of bootstrap estimate replicates (for +\code{method = "dist"}, the default, which is +\verb{mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)}); for +\code{method = "quantile"}, 95\\% sample quantiles are used to compute the +confidence intervals (\code{quantile(x, probs = c(0.025, 0.975))}). Use +\code{ci.lvl} to change the level for the confidence interval. +\item P-values from \code{boot_p()} are also based on t-statistics, assuming normal +distribution. } } \examples{ -\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} -library(dplyr) -library(purrr) +\dontshow{if (getRversion() >= "4.2.0") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) bs <- bootstrap(efc, 100) @@ -106,55 +93,11 @@ boot_ci(bs, c("dependency", "gender"), method = "q") mean(bs$dependency) boot_est(bs$dependency) coef(fit)[2] - - -# bootstrap() and boot_ci() work fine within pipe-chains -efc |> - bootstrap(100) |> - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)), - dependency = map_dbl(models, ~coef(.x)[2]) - ) |> - boot_ci(dependency) - -# check p-value -boot_p(bs$gender) -summary(fit)$coefficients[3, ] - -\dontrun{ -# 'spread_coef()' from the 'sjmisc'-package makes it easy to generate -# bootstrapped statistics like confidence intervals or p-values -library(dplyr) -library(sjmisc) -efc |> - # generate bootstrap replicates - bootstrap(100) |> - # apply lm to all bootstrapped data sets - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) - ) |> - # spread model coefficient for all 100 models - spread_coef(models) |> - # compute the CI for all bootstrapped model coefficients - boot_ci(e42dep, c161sex, c172code) - -# or... -efc |> - # generate bootstrap replicates - bootstrap(100) |> - # apply lm to all bootstrapped data sets - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) - ) |> - # spread model coefficient for all 100 models - spread_coef(models, append = FALSE) |> - # compute the CI for all bootstrapped model coefficients - boot_ci()} \dontshow{\}) # examplesIf} } \references{ Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164 } \seealso{ -\code{\link{bootstrap}} to generate nonparametric bootstrap samples. +[]\code{bootstrap()}] to generate nonparametric bootstrap samples. } diff --git a/man/crosstable_statistics.Rd b/man/crosstable_statistics.Rd index cbea412f..7ae56212 100644 --- a/man/crosstable_statistics.Rd +++ b/man/crosstable_statistics.Rd @@ -65,10 +65,9 @@ frame including lower and upper confidence intervals.} \item{n}{Number of bootstraps to be generated.} \item{method}{Character vector, indicating if confidence intervals should be -based on bootstrap standard error, multiplied by the value of the -quantile function of the t-distribution (default), or on sample -quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}. -May be abbreviated.} +based on bootstrap standard error, multiplied by the value of the quantile +function of the t-distribution (default), or on sample quantiles of the +bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.} \item{x1}{Name of first variable that should be used to compute the contingency table. If \code{data} is a table object, this argument will be diff --git a/man/gmd.Rd b/man/gmd.Rd index f8ca77fa..7c549f8a 100644 --- a/man/gmd.Rd +++ b/man/gmd.Rd @@ -9,10 +9,9 @@ gmd(x, select = NULL) \arguments{ \item{x}{A vector or data frame.} -\item{...}{Optional, unquoted names of variables that should be selected for -further processing. Required, if \code{x} is a data frame (and no vector) -and only selected variables from \code{x} should be processed. You may also -use functions like \code{:} or tidyselect's \code{select_helpers()}.} +\item{select}{Optional, names of variables as character vector that should be +selected for further processing. Required, if \code{x} is a data frame (and no vector) +and only selected variables from \code{x} should be processed.} } \value{ For numeric vectors, Gini's mean difference. For non-numeric vectors @@ -24,8 +23,8 @@ or for all numeric vectors in a data frame. } \note{ Gini's mean difference is defined as the mean absolute difference between -any two distinct elements of a vector. Missing values from \code{x} are -silently removed. +any two distinct elements of a vector. Missing values from \code{x} are silently +removed. } \examples{ data(efc) diff --git a/man/reexports.Rd b/man/reexports.Rd index 33c21fa3..8a7b6105 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -3,7 +3,6 @@ \docType{import} \name{reexports} \alias{reexports} -\alias{typical_value} \alias{mse} \alias{rmse} \alias{ci} @@ -26,7 +25,5 @@ below to see their documentation. \item{insight}{\code{\link[insight]{link_inverse}}} \item{performance}{\code{\link[performance:performance_mse]{mse}}, \code{\link[performance:performance_rmse]{rmse}}} - - \item{sjmisc}{\code{\link[sjmisc]{typical_value}}} }} diff --git a/tests/testthat/test-autoprior.R b/tests/testthat/test-autoprior.R index 17f1e459..33431018 100644 --- a/tests/testthat/test-autoprior.R +++ b/tests/testthat/test-autoprior.R @@ -5,7 +5,6 @@ if (.runThisTest) { if (suppressWarnings( require("testthat") && require("sjstats") && - require("sjmisc") && require("brms") )) { context("sjstats, autoprior") From 0924f25355f4f362f963f9a4973d6fca395754ac Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 10:34:43 +0200 Subject: [PATCH 15/82] deps --- DESCRIPTION | 4 +--- NAMESPACE | 3 --- R/se_ybar.R | 8 ++++---- R/select_helpers.R | 3 +-- R/wtd_se.R | 4 ++-- 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 58b7b27b..51af31cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,10 +23,8 @@ Imports: datawizard, effectsize, insight, - lme4, parameters, - performance, - purrr + performance Suggests: brms, broom, diff --git a/NAMESPACE b/NAMESPACE index 74ff537d..46e54f79 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -117,9 +117,6 @@ importFrom(insight,format_value) importFrom(insight,get_response) importFrom(insight,link_inverse) importFrom(insight,print_color) -importFrom(lme4,ngrps) importFrom(performance,mse) importFrom(performance,rmse) -importFrom(purrr,map) -importFrom(purrr,map_dbl) importFrom(stats,family) diff --git a/R/se_ybar.R b/R/se_ybar.R index 59335f3f..a0066fa3 100644 --- a/R/se_ybar.R +++ b/R/se_ybar.R @@ -17,8 +17,6 @@ #' fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) #' se_ybar(fit) #' } -#' @importFrom lme4 ngrps -#' @importFrom purrr map_dbl #' @export se_ybar <- function(fit) { # get model icc @@ -31,7 +29,7 @@ se_ybar <- function(fit) { tot_var <- sum(tau.00, vars$var.residual) # get number of groups - m.cnt <- lme4::ngrps(fit) + m.cnt <- vapply(fit@flist, nlevels, 1) # compute number of observations per group (level-2-unit) obs <- round(stats::nobs(fit) / m.cnt) @@ -40,7 +38,9 @@ se_ybar <- function(fit) { icc <- tau.00 / tot_var # compute standard error of sample mean - se <- purrr::map_dbl(seq_len(length(m.cnt)), ~ sqrt((tot_var / stats::nobs(fit)) * design_effect(n = obs[.x], icc = icc[.x]))) + se <- unlist(lapply(seq_len(length(m.cnt)), function(.x) { + sqrt((tot_var / stats::nobs(fit)) * design_effect(n = obs[.x], icc = icc[.x])) + })) # give names for se, so user sees, which random effect has what impact names(se) <- names(m.cnt) diff --git a/R/select_helpers.R b/R/select_helpers.R index de7de9b2..72c01f9d 100644 --- a/R/select_helpers.R +++ b/R/select_helpers.R @@ -13,9 +13,8 @@ string_ends_with <- function(pattern, x) { grep(pattern, x, perl = TRUE) } -#' @importFrom purrr map string_one_of <- function(pattern, x) { - m <- unlist(purrr::map(pattern, ~ grep(., x, fixed = TRUE, useBytes = TRUE))) + m <- unlist(lapply(pattern, grep, x = x, fixed = TRUE, useBytes = TRUE)) x[m] } diff --git a/R/wtd_se.R b/R/wtd_se.R index 37c55e42..dde3850c 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -87,7 +87,7 @@ weighted_se <- function(x, weights = NULL) { #' @export weighted_se.data.frame <- function(x, weights = NULL) { - se_result <- purrr::map_dbl(x, ~ weighted_se_helper(.x, weights = weights)) + se_result <- vapply(x, weighted_se_helper, numeric(1), weights = weights) names(se_result) <- colnames(x) se_result @@ -95,7 +95,7 @@ weighted_se.data.frame <- function(x, weights = NULL) { #' @export weighted_se.matrix <- function(x, weights = NULL) { - se_result <- purrr::map_dbl(x, ~ weighted_se_helper(.x, weights = weights)) + se_result <- vapply(x, weighted_se_helper, numeric(1), weights = weights) names(se_result) <- colnames(x) se_result From 82417ca9afe91b07ba95afd4a6c4375fd54f45e1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 10:42:51 +0200 Subject: [PATCH 16/82] fix --- DESCRIPTION | 1 - R/bootstrap.R | 15 +++++-------- R/xtab_statistics.R | 53 ++++++++++++++++++++++----------------------- man/bootstrap.Rd | 15 ++++--------- 4 files changed, 35 insertions(+), 49 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 51af31cd..7cb8cb03 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,6 @@ Suggests: broom, car, coin, - dplyr, ggplot2, graphics, MASS, diff --git a/R/bootstrap.R b/R/bootstrap.R index 1f8bdbd1..444896a2 100644 --- a/R/bootstrap.R +++ b/R/bootstrap.R @@ -40,7 +40,7 @@ #' @seealso \code{\link{boot_ci}} to calculate confidence intervals from #' bootstrap samples. #' -#' @examplesIf getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE) +#' @examples #' data(efc) #' bs <- bootstrap(efc, 5) #' @@ -61,16 +61,11 @@ #' mean(as.data.frame(x)$c12hour, na.rm = TRUE) #' })) #' -#' # or as tidyverse-approach -#' library(dplyr) -#' library(purrr) -#' bs <- efc |> -#' bootstrap(100) |> -#' mutate( -#' c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) -#' ) #' # bootstrapped standard error -#' boot_se(bs, c12hour) +#' boot_se(bs, "c12hour") +#' +#' # bootstrapped CI +#' boot_ci(bs, "c12hour") #' @export bootstrap <- function(data, n, size) { if (!missing(size) && !is.null(size)) { diff --git a/R/xtab_statistics.R b/R/xtab_statistics.R index 4e40732d..19fc5abf 100644 --- a/R/xtab_statistics.R +++ b/R/xtab_statistics.R @@ -106,7 +106,6 @@ #' ) #' @export crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"), weights = NULL, ...) { - insight::check_if_installed("dplyr") # match arguments statistics <- match.arg(statistics) @@ -114,7 +113,20 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au stat.html <- NULL # check if data is a table - if (!is.table(data)) { + if (is.table(data)) { + # 'data' is a table - copy to table object + tab <- data + # check if statistics are possible to compute + if (statistics %in% c("spearman", "kendall", "pearson")) { + stop( + sprintf( + "Need arguments `data`, `x1` and `x2` to compute %s-statistics.", + statistics + ), + call. = FALSE + ) + } + } else { # evaluate unquoted names x1 <- deparse(substitute(x1)) x2 <- deparse(substitute(x2)) @@ -146,19 +158,6 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au } else { tab <- table(data) } - } else { - # 'data' is a table - copy to table object - tab <- data - # check if statistics are possible to compute - if (statistics %in% c("spearman", "kendall", "pearson")) { - stop( - sprintf( - "Need arguments `data`, `x1` and `x2` to compute %s-statistics.", - statistics - ), - call. = FALSE - ) - } } # get expected values @@ -218,21 +217,21 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au } # compute method string - method <- dplyr::case_when( - statistics == "kendall" ~ "Kendall's tau", - statistics == "spearman" ~ "Spearman's rho", - statistics == "pearson" ~ "Pearson's r", - statistics == "cramer" ~ "Cramer's V", - statistics == "phi" ~ "Phi" + method <- ifelse(statistics == "kendall", "Kendall's tau", + ifelse(statistics == "spearman", "Spearman's rho", # nolint + ifelse(statistics == "pearson", "Pearson's r", # nolint + ifelse(statistics == "cramer", "Cramer's V", "Phi") # nolint + ) + ) ) # compute method string - method.html <- dplyr::case_when( - statistics == "kendall" ~ "Kendall's τ", - statistics == "spearman" ~ "Spearman's ρ", - statistics == "pearson" ~ "Pearson's r", - statistics == "cramer" ~ "Cramer's V", - statistics == "phi" ~ "φ" + method.html <- ifelse(statistics == "kendall", "Kendall's τ", + ifelse(statistics == "spearman", "Spearman's ρ", # nolint + ifelse(statistics == "pearson", "Pearson's r", # nolint + ifelse(statistics == "cramer", "Cramer's V", "&phi") # nolint + ) + ) ) # return result diff --git a/man/bootstrap.Rd b/man/bootstrap.Rd index b54c6396..e40b7845 100644 --- a/man/bootstrap.Rd +++ b/man/bootstrap.Rd @@ -49,7 +49,6 @@ method automatically applies whenever coercion is done because a data frame is required as input. See 'Examples' in \code{\link{boot_ci}}. } \examples{ -\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("dplyr", quietly = TRUE) && requireNamespace("purrr", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) bs <- bootstrap(efc, 5) @@ -70,17 +69,11 @@ bs$c12hour <- unlist(lapply(bs$strap, function(x) { mean(as.data.frame(x)$c12hour, na.rm = TRUE) })) -# or as tidyverse-approach -library(dplyr) -library(purrr) -bs <- efc |> - bootstrap(100) |> - mutate( - c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) - ) # bootstrapped standard error -boot_se(bs, c12hour) -\dontshow{\}) # examplesIf} +boot_se(bs, "c12hour") + +# bootstrapped CI +boot_ci(bs, "c12hour") } \seealso{ \code{\link{boot_ci}} to calculate confidence intervals from From 70ce5f2d888b0918f59d499d3a0b8ab9b7f0fdcf Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 10:56:59 +0200 Subject: [PATCH 17/82] fixes --- DESCRIPTION | 3 --- NAMESPACE | 1 - R/S3-methods.R | 14 +++++++++----- R/anova_stats.R | 8 +++----- R/auto_prior.R | 22 +++++++--------------- R/boot_ci.R | 2 +- R/cramer.R | 8 ++++---- R/cv.R | 3 +-- R/find_beta.R | 9 ++++----- man/anova_stats.Rd | 4 ++-- man/auto_prior.Rd | 22 ++++++++-------------- man/boot_ci.Rd | 2 -- 12 files changed, 39 insertions(+), 59 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7cb8cb03..013843b5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,17 +27,14 @@ Imports: performance Suggests: brms, - broom, car, coin, ggplot2, - graphics, MASS, pscl, pwr, sjPlot, survey, - rstan, testthat URL: https://strengejacke.github.io/sjstats/ BugReports: https://github.com/strengejacke/sjstats/issues diff --git a/NAMESPACE b/NAMESPACE index 46e54f79..558b8f69 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -114,7 +114,6 @@ importFrom(datawizard,weighted_sd) importFrom(insight,export_table) importFrom(insight,format_p) importFrom(insight,format_value) -importFrom(insight,get_response) importFrom(insight,link_inverse) importFrom(insight,print_color) importFrom(performance,mse) diff --git a/R/S3-methods.R b/R/S3-methods.R index c7d94216..131f3d9d 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -63,8 +63,8 @@ tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) { estimate = round(est, digits), irr = round(exp(est), digits), std.error = round(se, digits), - conf.low = round(exp(est - stats::qnorm(.975) * se), digits), - conf.high = round(exp(est + stats::qnorm(.975) * se), digits), + conf.low = round(exp(est - stats::qnorm(0.975) * se), digits), + conf.high = round(exp(est + stats::qnorm(0.975) * se), digits), p.value = round(2 * stats::pnorm(abs(est / se), lower.tail = FALSE), digits) ) } @@ -265,9 +265,13 @@ print.sj_resample <- function(x, ...) { else id10 <- x$id - cat("<", paste0("id's of resample [", prettyNum(nrow(x$data), big.mark = ","), " x ", - prettyNum(ncol(x$data), big.mark = ","), "]"), "> ", - paste(id10, collapse = ", "), "\n", sep = "") + cat("<", paste0( + "id's of resample [", prettyNum(nrow(x$data), big.mark = ","), " x ", + prettyNum(ncol(x$data), big.mark = ","), "]" + ), "> ", + toString(id10), "\n", + sep = "" + ) } diff --git a/R/anova_stats.R b/R/anova_stats.R index ae9c083e..28f360ae 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -15,7 +15,7 @@ #' \cr \cr #' Tippey K, Longnecker MT (2016): An Ad Hoc Method for Computing Pseudo-Effect Size for Mixed Model. #' -#' @examples +#' @examplesIf requireNamespace("car") #' # load sample data #' data(efc) #' @@ -24,9 +24,7 @@ #' c12hour ~ as.factor(e42dep) + as.factor(c172code) + c160age, #' data = efc #' ) -#' \dontrun{ #' anova_stats(car::Anova(fit, type = 2)) -#' } #' @export anova_stats <- function(model, digits = 3) { insight::check_if_installed("pwr") @@ -97,7 +95,7 @@ aov_stat <- function(model, type) { aov_stat_summary <- function(model) { - insight::check_if_installed("broom") + insight::check_if_installed("parameters") # check if we have a mixed model mm <- is_merMod(model) ori.model <- model @@ -108,7 +106,7 @@ aov_stat_summary <- function(model) { model <- stats::anova(model) # get summary table - aov.sum <- as.data.frame(broom::tidy(model)) + aov.sum <- insight::standardize_names(as.data.frame(parameters::model_parameters(model)), style = "broom") # for mixed models, add information on residuals if (mm) { diff --git a/R/auto_prior.R b/R/auto_prior.R index 7c17243f..1e036369 100644 --- a/R/auto_prior.R +++ b/R/auto_prior.R @@ -34,20 +34,16 @@ #' formula used in \code{brms::brm()} must be rewritten to something like #' \code{y ~ 0 + intercept ...}, see \code{\link[brms]{set_prior}}. #' -#' @examples -#' library(sjmisc) +#' @examplesIf requireNamespace("brms") #' data(efc) #' efc$c172code <- as.factor(efc$c172code) -#' efc$c161sex <- to_label(efc$c161sex) +#' efc$c161sex <- as.factor(efc$c161sex) #' #' mf <- formula(neg_c_7 ~ c161sex + c160age + c172code) -#' -#' if (requireNamespace("brms", quietly = TRUE)) -#' auto_prior(mf, efc, TRUE) +#' auto_prior(mf, efc, TRUE) #' #' ## compare to -#' # library(rstanarm) -#' # m <- stan_glm(mf, data = efc, chains = 2, iter = 200) +#' # m <- rstanarm::stan_glm(mf, data = efc, chains = 2, iter = 200) #' # ps <- prior_summary(m) #' # ps$prior_intercept$adjusted_scale #' # ps$prior$adjusted_scale @@ -59,17 +55,13 @@ #' # add informative priors #' mf <- formula(neg_c_7 ~ c161sex + c172code) #' -#' if (requireNamespace("brms", quietly = TRUE)) { -#' auto_prior(mf, efc, TRUE) + -#' brms::prior(normal(.1554, 40), class = "b", coef = "c160age") -#' } +#' auto_prior(mf, efc, TRUE) + +#' brms::prior(normal(.1554, 40), class = "b", coef = "c160age") #' #' # example with binary response #' efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) #' mf <- formula(neg_c_7d ~ c161sex + c160age + c172code + e17age) -#' -#' if (requireNamespace("brms", quietly = TRUE)) -#' auto_prior(mf, efc, FALSE) +#' auto_prior(mf, efc, FALSE) #' @export auto_prior <- function(formula, data, gaussian, locations = NULL) { insight::check_if_installed("brms") diff --git a/R/boot_ci.R b/R/boot_ci.R index acc63ca5..46450312 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -42,7 +42,7 @@ #' #' @seealso []`bootstrap()`] to generate nonparametric bootstrap samples. #' -#' @examplesIf getRversion() >= "4.2.0" +#' @examples #' data(efc) #' bs <- bootstrap(efc, 100) #' diff --git a/R/cramer.R b/R/cramer.R index a623da44..5c12251b 100644 --- a/R/cramer.R +++ b/R/cramer.R @@ -23,19 +23,19 @@ cramers_v.ftable <- function(tab, ...) { #' @rdname crosstable_statistics #' @export cramers_v.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist", "quantile"), ...) { - terms <- all.vars(formula) - tab <- table(data[[terms[1]]], data[[terms[2]]]) + fterms <- all.vars(formula) + tab <- table(data[[fterms[1]]], data[[fterms[2]]]) method <- match.arg(method) if (is.null(ci.lvl) || is.na(ci.lvl)) { .cramers_v(tab) } else { - straps <- sjstats::bootstrap(data[terms], n) + straps <- sjstats::bootstrap(data[fterms], n) tables <- lapply(straps$strap, function(x) { dat <- as.data.frame(x) table(dat[[1]], dat[[2]]) }) - cramers <- sapply(tables, function(x) .cramers_v(x)) + cramers <- sapply(tables, .cramers_v) ci <- boot_ci(cramers, ci.lvl = ci.lvl, method = method) data_frame( diff --git a/R/cv.R b/R/cv.R index e21a0fc9..c68844e1 100644 --- a/R/cv.R +++ b/R/cv.R @@ -36,9 +36,8 @@ cv <- function(x, ...) { } -#' @importFrom performance rmse -#' @importFrom insight get_response cv_helper <- function(x) { + insight::check_if_installed("performance") # check if we have a fitted linear model if (inherits(x, c("lm", "lmerMod", "lme", "merModLmerTest")) && !inherits(x, "glm")) { # get response diff --git a/R/find_beta.R b/R/find_beta.R index 1ae733cc..66d1529a 100644 --- a/R/find_beta.R +++ b/R/find_beta.R @@ -137,24 +137,24 @@ find_beta2 <- function(x, se, ci, n) { # need to compute proportion x <- x / n - p2 <- .95 + p2 <- 0.95 x2 <- x + bvar } # for standard errors, we assume a 68% quantile if (!missing(se)) { - p2 <- .68 + p2 <- 0.68 x2 <- x + se } # for CI, we assume a 68% quantile if (!missing(ci)) { - p2 <- .95 + p2 <- 0.95 x2 <- ci } # the probability is assumed to be the median - p1 <- .5 + p1 <- 0.5 x1 <- x find_beta(x1, p1, x2, p2) @@ -182,4 +182,3 @@ find_normal <- function(x1, p1, x2, p2) { list(mean = mw, sd = stddev) } - diff --git a/man/anova_stats.Rd b/man/anova_stats.Rd index 77f7e82f..c2811389 100644 --- a/man/anova_stats.Rd +++ b/man/anova_stats.Rd @@ -22,6 +22,7 @@ epsilon-squared statistic or Cohen's F for all terms in an anovas. and power for each term. } \examples{ +\dontshow{if (requireNamespace("car")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # load sample data data(efc) @@ -30,9 +31,8 @@ fit <- aov( c12hour ~ as.factor(e42dep) + as.factor(c172code) + c160age, data = efc ) -\dontrun{ anova_stats(car::Anova(fit, type = 2)) -} +\dontshow{\}) # examplesIf} } \references{ Levine TR, Hullett CR (2002): Eta Squared, Partial Eta Squared, and Misreporting of Effect Size in Communication Research. diff --git a/man/auto_prior.Rd b/man/auto_prior.Rd index f7701564..64047399 100644 --- a/man/auto_prior.Rd +++ b/man/auto_prior.Rd @@ -48,19 +48,16 @@ formula used in \code{brms::brm()} must be rewritten to something like \code{y ~ 0 + intercept ...}, see \code{\link[brms]{set_prior}}. } \examples{ -library(sjmisc) +\dontshow{if (requireNamespace("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) efc$c172code <- as.factor(efc$c172code) -efc$c161sex <- to_label(efc$c161sex) +efc$c161sex <- as.factor(efc$c161sex) mf <- formula(neg_c_7 ~ c161sex + c160age + c172code) - -if (requireNamespace("brms", quietly = TRUE)) - auto_prior(mf, efc, TRUE) +auto_prior(mf, efc, TRUE) ## compare to -# library(rstanarm) -# m <- stan_glm(mf, data = efc, chains = 2, iter = 200) +# m <- rstanarm::stan_glm(mf, data = efc, chains = 2, iter = 200) # ps <- prior_summary(m) # ps$prior_intercept$adjusted_scale # ps$prior$adjusted_scale @@ -72,15 +69,12 @@ if (requireNamespace("brms", quietly = TRUE)) # add informative priors mf <- formula(neg_c_7 ~ c161sex + c172code) -if (requireNamespace("brms", quietly = TRUE)) { - auto_prior(mf, efc, TRUE) + - brms::prior(normal(.1554, 40), class = "b", coef = "c160age") -} +auto_prior(mf, efc, TRUE) + + brms::prior(normal(.1554, 40), class = "b", coef = "c160age") # example with binary response efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) mf <- formula(neg_c_7d ~ c161sex + c160age + c172code + e17age) - -if (requireNamespace("brms", quietly = TRUE)) - auto_prior(mf, efc, FALSE) +auto_prior(mf, efc, FALSE) +\dontshow{\}) # examplesIf} } diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd index 9e98fbbb..eec0835b 100644 --- a/man/boot_ci.Rd +++ b/man/boot_ci.Rd @@ -61,7 +61,6 @@ distribution. } } \examples{ -\dontshow{if (getRversion() >= "4.2.0") (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) bs <- bootstrap(efc, 100) @@ -93,7 +92,6 @@ boot_ci(bs, c("dependency", "gender"), method = "q") mean(bs$dependency) boot_est(bs$dependency) coef(fit)[2] -\dontshow{\}) # examplesIf} } \references{ Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164 From 8cfa72e32e63066d64ea4263d24fe5b33dfc3462 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:03:30 +0200 Subject: [PATCH 18/82] fixes --- DESCRIPTION | 2 +- NAMESPACE | 11 --- R/S3-methods.R | 185 --------------------------------------------- R/boot_ci.R | 2 +- R/helpfunctions.R | 2 +- R/se_ybar.R | 8 +- R/select_helpers.R | 2 +- man/boot_ci.Rd | 8 +- man/se_ybar.Rd | 8 +- 9 files changed, 15 insertions(+), 213 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 013843b5..5198098e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,10 +30,10 @@ Suggests: car, coin, ggplot2, + lme4, MASS, pscl, pwr, - sjPlot, survey, testthat URL: https://strengejacke.github.io/sjstats/ diff --git a/NAMESPACE b/NAMESPACE index 558b8f69..3e933bbc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,21 +19,15 @@ S3method(plot,sj_inequ_trend) S3method(predict,svyglm.nb) S3method(print,sj_anova_stat) S3method(print,sj_chi2gof) -S3method(print,sj_grpmean) S3method(print,sj_htest_chi) S3method(print,sj_htest_kw) S3method(print,sj_htest_mwu) -S3method(print,sj_mwu) -S3method(print,sj_outliers) S3method(print,sj_resample) S3method(print,sj_ttest) S3method(print,sj_wcor) -S3method(print,sj_wmwu) S3method(print,sj_xtab_stat) -S3method(print,sj_xtab_stat2) S3method(print,svyglm.nb) S3method(print,svyglm.zip) -S3method(print,tidy_stan) S3method(residuals,svyglm.nb) S3method(terms,svyglm.nb) S3method(weighted_correlation,default) @@ -111,11 +105,6 @@ importFrom(bayestestR,equivalence_test) importFrom(datawizard,weighted_mean) importFrom(datawizard,weighted_median) importFrom(datawizard,weighted_sd) -importFrom(insight,export_table) -importFrom(insight,format_p) -importFrom(insight,format_value) importFrom(insight,link_inverse) -importFrom(insight,print_color) importFrom(performance,mse) importFrom(performance,rmse) -importFrom(stats,family) diff --git a/R/S3-methods.R b/R/S3-methods.R index 131f3d9d..cff58b44 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -24,7 +24,6 @@ print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) { } - #' @export print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) { se <- match.arg(se) @@ -47,7 +46,6 @@ print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) { } - tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) { v_se <- match.arg(v_se) @@ -70,7 +68,6 @@ tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) { } - tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) { v_se <- match.arg(v_se) @@ -92,7 +89,6 @@ tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) { } - #' @export model.frame.svyglm.nb <- function(formula, ...) { pred <- attr(formula, "nb.terms", exact = TRUE) @@ -100,7 +96,6 @@ model.frame.svyglm.nb <- function(formula, ...) { } - #' @export model.frame.svyglm.zip <- function(formula, ...) { pred <- attr(formula, "zip.terms", exact = TRUE) @@ -108,28 +103,24 @@ model.frame.svyglm.zip <- function(formula, ...) { } -#' @importFrom stats family #' @export family.svyglm.nb <- function(object, ...) { attr(object, "family", exact = TRUE) } - #' @export formula.svyglm.nb <- function(x, ...) { attr(x, "nb.formula", exact = TRUE) } - #' @export formula.svyglm.zip <- function(x, ...) { attr(x, "zip.formula", exact = TRUE) } - #' @export predict.svyglm.nb <- function(object, newdata = NULL, type = c("link", "response", "terms"), @@ -212,51 +203,18 @@ deviance.svyglm.nb <- function(object, ...) { } -#' @export -print.tidy_stan <- function(x, ...) { - insight::print_color("\nSummary Statistics of Stan-Model\n\n", "blue") - digits <- attr(x, "digits") - - for (i in x) { - insight::print_color(paste0("# ", attr(i, "main_title")), "blue") - cat(" ") - insight::print_color(attr(i, "sub_title"), "red") - cat("\n\n") - rem <- which(colnames(i) %in% c("Parameter", "Group", "Response", "Function")) - i <- i[, -rem] - colnames(i)[1] <- "Parameter" - i$ESS <- as.character(i$ESS) - i$pd <- sprintf("%.1f%%", 100 * i$pd) - i[] <- lapply(i, function(.j) { - if (is.numeric(.j)) .j <- sprintf("%.*f", digits, .j) - .j - }) - print.data.frame(i, quote = FALSE, row.names = FALSE) - cat("\n\n") - } -} - - -clean_term_name <- function(x) { - x <- insight::trim_ws(x) - format(x, width = max(nchar(x))) -} - - #' @export as.integer.sj_resample <- function(x, ...) { x$id } - #' @export as.data.frame.sj_resample <- function(x, ...) { x$data[x$id, , drop = FALSE] } - #' @export print.sj_resample <- function(x, ...) { n <- length(x$id) @@ -275,7 +233,6 @@ print.sj_resample <- function(x, ...) { } - #' @export plot.sj_inequ_trend <- function(x, ...) { .data <- NULL @@ -319,78 +276,6 @@ plot.sj_inequ_trend <- function(x, ...) { } -#' @export -print.sj_mwu <- function(x, ...) { - insight::print_color("\n# Mann-Whitney-U-Test\n\n", "blue") - # get data - .dat <- x$df - # print to console - for (i in seq_len(nrow(.dat))) { - # get value labels - l1 <- .dat[i, "grp1.label"] - l2 <- .dat[i, "grp2.label"] - # do we have value labels? - if (!is.null(l1) && !is.na(l1) %% !is.null(l2) && !is.na(l2)) { - insight::print_color( - sprintf( - "Groups %i = %s (n = %i) | %i = %s (n = %i):\n", - .dat[i, "grp1"], - l1, - .dat[i, "grp1.n"], - .dat[i, "grp2"], - l2, - .dat[i, "grp2.n"] - ), "cyan" - ) - } else { - insight::print_color( - sprintf("Groups (%i|%i), n = %i/%i:\n", - .dat[i, "grp1"], .dat[i, "grp2"], - .dat[i, "grp1.n"], .dat[i, "grp2.n"]), - "cyan" - ) - } - - cat(sprintf( - " U = %.3f, W = %.3f, %s, Z = %.3f\n", - .dat[i, "u"], .dat[i, "w"], insight::format_p(.dat[i, "p"]), .dat[i, "z"] - )) - - string_es <- "effect-size r" - string_r <- sprintf("%.3f", .dat[i, "r"]) - string_group1 <- sprintf("rank-mean(%i)", .dat[i, "grp1"]) - string_group2 <- sprintf("rank-mean(%i)", .dat[i, "grp2"]) - string_rm1 <- sprintf("%.2f", .dat[i, "rank.mean.grp1"]) - string_rm2 <- sprintf("%.2f", .dat[i, "rank.mean.grp2"]) - - space1 <- max(nchar(c(string_es, string_group1, string_group2))) - space2 <- max(nchar(c(string_r, string_rm1, string_rm2))) - - cat( - sprintf(" %*s = %*s\n", space1, string_es, space2 + 1, string_r), - sprintf(" %*s = %*s\n", space1, string_group1, space2, string_rm1), - sprintf(" %*s = %*s\n\n", space1, string_group2, space2, string_rm2) - ) - } - - # if we have more than 2 groups, also perfom kruskal-wallis-test - if (length(unique(stats::na.omit(x$data$grp))) > 2) { - insight::print_color("# Kruskal-Wallis-Test\n\n", "blue") - kw <- stats::kruskal.test(x$data$dv, x$data$grp) - cat(sprintf("chi-squared = %.3f\n", kw$statistic)) - cat(sprintf("df = %i\n", kw$parameter)) - cat(paste(insight::format_p(kw$p.value, stars = TRUE), "\n")) - } -} - - -#' @export -print.sj_outliers <- function(x, ...) { - print(x$result, ...) -} - - -#' @importFrom insight format_p #' @export print.sj_xtab_stat <- function(x, ...) { # get length of method name, to align output @@ -414,63 +299,6 @@ print.sj_xtab_stat <- function(x, ...) { } - -#' @export -print.sj_xtab_stat2 <- function(x, ...) { - # get length of method name, to align output - l <- max(nchar(c(x$stat.name, "p-value", "Observations"))) - - # headline - insight::print_color(paste0("\n# ", x$method, "\n\n"), "blue") - - # print test statistic - cat(sprintf(" %*s: %.4f\n", l, x$stat.name, x$estimate)) - cat(sprintf(" %*s: %g\n", l, "df", x$df)) - cat(sprintf(" %*s: %s\n", l, "p-value", insight::format_p(x$p.value, stars = TRUE, name = NULL))) - cat(sprintf(" %*s: %g\n", l, "Observations", x$n_obs)) -} - - - -#' @export -print.sj_grpmean <- function(x, ...) { - cat("\n") - print_grpmean(x, digits = attributes(x)$digits, ...) -} - - -#' @importFrom insight export_table print_color format_value format_p -print_grpmean <- function(x, digits = NULL, ...) { - # headline - insight::print_color(sprintf( - "# Grouped Means for %s by %s\n\n", - attr(x, "dv.label", exact = TRUE), - attr(x, "grp.label", exact = TRUE) - ), "blue") - - if (is.null(digits)) { - digits <- 2 - } - - x$mean <- insight::format_value(x$mean, digits = digits) - x$std.dev <- insight::format_value(x$std.dev, digits = digits) - x$std.error <- insight::format_value(x$std.error, digits = digits) - x$p.value <- insight::format_p(x$p.value, name = NULL) - - colnames(x) <- c("Category", "Mean", "N", "SD", "SE", "p") - cat(insight::export_table(x)) - - # statistics - cat(sprintf( - "\nAnova: R2=%.3f; adj.R2=%.3f; F=%.3f; p=%.3f\n", - attr(x, "r2", exact = TRUE), - attr(x, "adj.r2", exact = TRUE), - attr(x, "fstat", exact = TRUE), - attr(x, "p.value", exact = TRUE) - )) -} - - #' @export print.sj_chi2gof <- function(x, ...) { insight::print_color("\n# Chi-squared Goodness-of-Fit Test\n\n", "blue") @@ -538,18 +366,6 @@ print.sj_ttest <- function(x, ...) { } -#' @export -print.sj_wmwu <- function(x, ...) { - insight::print_color(sprintf("\n# %s\n", x$method), "blue") - - group <- attr(x, "group.name", exact = TRUE) - xn <- attr(x, "x.name", exact = TRUE) - - insight::print_color(sprintf("\n comparison of %s by %s\n", xn, group), "cyan") - cat(sprintf(" Chisq=%.2f df=%i p-value=%.3f\n\n", x$statistic, as.integer(x$parameter), x$p.value)) -} - - #' @export print.sj_wcor <- function(x, ...) { insight::print_color(sprintf("\nWeighted %s\n\n", x$method), "blue") @@ -565,7 +381,6 @@ print.sj_wcor <- function(x, ...) { } -#' @importFrom insight export_table #' @export print.sj_anova_stat <- function(x, digits = 3, ...) { x$p.value <- insight::format_p(x$p.value, name = NULL) diff --git a/R/boot_ci.R b/R/boot_ci.R index 46450312..33b8b9f3 100644 --- a/R/boot_ci.R +++ b/R/boot_ci.R @@ -8,7 +8,7 @@ #' @param data A data frame that containts the vector with bootstrapped #' estimates, or directly the vector (see 'Examples'). #' @param ci.lvl Numeric, the level of the confidence intervals. -#' @param Select Optional, unquoted names of variables (as character vector) +#' @param select Optional, unquoted names of variables (as character vector) #' with bootstrapped estimates. Required, if either `data` is a data frame #' (and no vector), and only selected variables from `data` should be processed. #' @param method Character vector, indicating if confidence intervals should be diff --git a/R/helpfunctions.R b/R/helpfunctions.R index e3f2a69f..21d54f66 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -33,7 +33,7 @@ get_glm_family <- function(fit) { # create logical for family binom_fam <- fitfam %in% c("binomial", "quasibinomial") poisson_fam <- fitfam %in% c("poisson", "quasipoisson") || - grepl("negative binomial", fitfram, ignore.case = TRUE, fixed = TRUE) + grepl("negative binomial", fitfam, ignore.case = TRUE, fixed = TRUE) list(is_bin = binom_fam, is_pois = poisson_fam, is_logit = logit_link) } diff --git a/R/se_ybar.R b/R/se_ybar.R index a0066fa3..642dc856 100644 --- a/R/se_ybar.R +++ b/R/se_ybar.R @@ -12,11 +12,9 @@ #' #' @references Gelman A, Hill J. 2007. Data analysis using regression and multilevel/hierarchical models. Cambridge, New York: Cambridge University Press #' -#' @examples -#' if (require("lme4")) { -#' fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) -#' se_ybar(fit) -#' } +#' @examplesIf require("lme4") +#' fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) +#' se_ybar(fit) #' @export se_ybar <- function(fit) { # get model icc diff --git a/R/select_helpers.R b/R/select_helpers.R index 72c01f9d..87d6104c 100644 --- a/R/select_helpers.R +++ b/R/select_helpers.R @@ -31,5 +31,5 @@ obj_has_name <- function(x, name) { } obj_has_rownames <- function(x) { - !identical(as.character(1:nrow(x)), rownames(x)) + !identical(as.character(seq_len(nrow(x))), rownames(x)) } diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd index eec0835b..893080e8 100644 --- a/man/boot_ci.Rd +++ b/man/boot_ci.Rd @@ -19,16 +19,16 @@ boot_est(data, select = NULL) \item{data}{A data frame that containts the vector with bootstrapped estimates, or directly the vector (see 'Examples').} +\item{select}{Optional, unquoted names of variables (as character vector) +with bootstrapped estimates. Required, if either \code{data} is a data frame +(and no vector), and only selected variables from \code{data} should be processed.} + \item{method}{Character vector, indicating if confidence intervals should be based on bootstrap standard error, multiplied by the value of the quantile function of the t-distribution (default), or on sample quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.} \item{ci.lvl}{Numeric, the level of the confidence intervals.} - -\item{Select}{Optional, unquoted names of variables (as character vector) -with bootstrapped estimates. Required, if either \code{data} is a data frame -(and no vector), and only selected variables from \code{data} should be processed.} } \value{ A data frame with either bootstrap estimate, standard error, the diff --git a/man/se_ybar.Rd b/man/se_ybar.Rd index 3b976fc2..5a16bafb 100644 --- a/man/se_ybar.Rd +++ b/man/se_ybar.Rd @@ -19,10 +19,10 @@ May be used as part of the multilevel power calculation for cluster sampling (see \cite{Gelman and Hill 2007, 447ff}). } \examples{ -if (require("lme4")) { - fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) - se_ybar(fit) -} +\dontshow{if (require("lme4")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) +se_ybar(fit) +\dontshow{\}) # examplesIf} } \references{ Gelman A, Hill J. 2007. Data analysis using regression and multilevel/hierarchical models. Cambridge, New York: Cambridge University Press From 626492cc058806c2071495ab4365b0fcebe9bcd9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:09:13 +0200 Subject: [PATCH 19/82] fix? --- DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5198098e..72ca8194 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,15 +16,15 @@ Description: Collection of convenient functions for common statistical computati License: GPL-3 Depends: R (>= 3.4), - utils, - stats + utils Imports: bayestestR, datawizard, effectsize, insight, parameters, - performance + performance, + stats Suggests: brms, car, From 05757c24911d63bcf11da6eef1ac88b2ea6240e1 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:19:11 +0200 Subject: [PATCH 20/82] Update prop.R --- R/prop.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/prop.R b/R/prop.R index b656f5df..6c980541 100644 --- a/R/prop.R +++ b/R/prop.R @@ -78,7 +78,8 @@ prop <- function(data, ..., weights = NULL, na.rm = TRUE, digits = 4) { if (!is.data.frame(data)) { insight::format_error("`data` needs to be a data frame.") } - .proportions(data, dots = list(...), weight.by = weights, na.rm, digits, multi_logical = FALSE) + dots <- match.call(expand.dots = FALSE)[["..."]] + .proportions(data, dots = dots, weight.by = weights, na.rm, digits, multi_logical = FALSE) } @@ -89,7 +90,8 @@ props <- function(data, ..., na.rm = TRUE, digits = 4) { if (!is.data.frame(data)) { insight::format_error("`data` needs to be a data frame.") } - .proportions(data, dots = list(...), NULL, na.rm, digits, multi_logical = TRUE) + dots <- match.call(expand.dots = FALSE)[["..."]] + .proportions(data, dots = dots, NULL, na.rm, digits, multi_logical = TRUE) } From cfd04470e9633d7424d9a9c4dcee8dc32002c51a Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:22:57 +0200 Subject: [PATCH 21/82] fix --- NAMESPACE | 1 + R/S3-methods.R | 1 + 2 files changed, 2 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 3e933bbc..652bad97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -108,3 +108,4 @@ importFrom(datawizard,weighted_sd) importFrom(insight,link_inverse) importFrom(performance,mse) importFrom(performance,rmse) +importFrom(stats,family) diff --git a/R/S3-methods.R b/R/S3-methods.R index cff58b44..568d22a6 100644 --- a/R/S3-methods.R +++ b/R/S3-methods.R @@ -103,6 +103,7 @@ model.frame.svyglm.zip <- function(formula, ...) { } +#' @importFrom stats family #' @export family.svyglm.nb <- function(object, ...) { attr(object, "family", exact = TRUE) From fbba58095629f6b3cdc5922cf1584c04492f0ac9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:33:51 +0200 Subject: [PATCH 22/82] fix --- R/wtd_se.R | 61 +++++++++++++++++----------------------------- man/weighted_se.Rd | 44 +++++++++------------------------ 2 files changed, 33 insertions(+), 72 deletions(-) diff --git a/R/wtd_se.R b/R/wtd_se.R index dde3850c..bf6265bf 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -1,60 +1,43 @@ #' @title Weighted statistics for tests and variables #' @name weighted_se -#' @description \strong{Weighted statistics for variables} -#' \cr \cr -#' \code{weighted_sd()}, \code{weighted_se()}, \code{weighted_mean()} and \code{weighted_median()} -#' compute weighted standard deviation, standard error, mean or median for a -#' variable or for all variables of a data frame. \code{survey_median()} computes the -#' median for a variable in a survey-design (see \code{\link[survey]{svydesign}}). -#' \code{weighted_correlation()} computes a weighted correlation for a two-sided alternative -#' hypothesis. -#' \cr \cr -#' \strong{Weighted tests} -#' \cr \cr -#' \code{weighted_ttest()} computes a weighted t-test, while \code{weighted_mannwhitney()} -#' computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test -#' (for more than two groups). `weighted_ranktest()` is an alias for `weighted_mannwhitney()`. -#' \code{weighted_chisqtest()} computes a weighted Chi-squared test for contingency tables. +#' @description +#' `weighted_se()` computes weighted standard errors of a variable or for +#' all variables of a data frame. `survey_median()` computes the median for +#' a variable in a survey-design (see [`survey::svydesign()]`). +#' `weighted_correlation()` computes a weighted correlation for a two-sided +#' alternative hypothesis. #' -#' @param x (Numeric) vector or a data frame. For \code{survey_median()}, \code{weighted_ttest()}, -#' \code{weighted_mannwhitney()} and \code{weighted_chisqtest()} the bare (unquoted) variable -#' name, or a character vector with the variable name. +#' @param x (Numeric) vector or a data frame. For `survey_median()` or `weighted_ttest()`, +#' the bare (unquoted) variable name, or a character vector with the variable name. #' @param weights Bare (unquoted) variable name, or a character vector with -#' the variable name of the numeric vector of weights. If \code{weights = NULL}, -#' unweighted statistic is reported. +#' the variable name of the numeric vector of weights. If `weights = NULL`, +#' unweighted statistic is reported. #' @param data A data frame. -#' @param formula A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a -#' numeric variable giving the data values and \code{rhs1} a factor with two -#' levels giving the corresponding groups and \code{rhs2} a variable with weights. +#' @param formula A formula of the form `lhs ~ rhs1 + rhs2` where `lhs` is a +#' numeric variable giving the data values and `rhs1` a factor with two +#' levels giving the corresponding groups and `rhs2` a variable with weights. #' @param y Optional, bare (unquoted) variable name, or a character vector with -#' the variable name. +#' the variable name. #' @param mu A number indicating the true value of the mean (or difference in -#' means if you are performing a two sample test). +#' means if you are performing a two sample test). #' @param ci.lvl Confidence level of the interval. #' @param alternative A character string specifying the alternative hypothesis, -#' must be one of \code{"two.sided"} (default), \code{"greater"} or -#' \code{"less"}. You can specify just the initial letter. +#' must be one of `"two.sided"` (default), `"greater"` or `"less"`. You can +#' specify just the initial letter. #' @param paired Logical, whether to compute a paired t-test. -#' @param ... For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used. -#' For \code{weighted_chisqtest()}, further arguments passed down to -#' \code{\link[stats]{chisq.test}}. +#' @param ... Currently not used. #' #' @inheritParams svyglm.nb #' #' @return The weighted (test) statistic. #' -#' @note \code{weighted_chisq()} is a convenient wrapper for \code{\link{crosstable_statistics}}. -#' For a weighted one-way Anova, use \code{means_by_group()} with -#' \code{weights}-argument. +#' @note `weighted_chisq()` is a convenient wrapper for `\link{crosstable_statistics`}. +#' For a weighted one-way Anova, use `means_by_group()` with +#' `weights`-argument. #' \cr \cr -#' \code{weighted_ttest()} assumes unequal variance between the two groups. +#' `weighted_ttest()` assumes unequal variance between the two groups. #' #' @examples -#' # weighted sd and se ---- -#' weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100)) -#' -#' data(efc) -#' weighted_sd(efc[, 1:3], runif(n = nrow(efc))) #' weighted_se(efc[, 1:3], runif(n = nrow(efc))) #' #' # survey_median ---- diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index 22c2ee6e..806da87a 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -47,18 +47,15 @@ weighted_ttest(data, ...) ) } \arguments{ -\item{x}{(Numeric) vector or a data frame. For \code{survey_median()}, \code{weighted_ttest()}, -\code{weighted_mannwhitney()} and \code{weighted_chisqtest()} the bare (unquoted) variable -name, or a character vector with the variable name.} +\item{x}{(Numeric) vector or a data frame. For \code{survey_median()} or \code{weighted_ttest()}, +the bare (unquoted) variable name, or a character vector with the variable name.} \item{design}{An object of class \code{\link[survey]{svydesign}}, providing a specification of the survey design.} \item{data}{A data frame.} -\item{...}{For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used. -For \code{weighted_chisqtest()}, further arguments passed down to -\code{\link[stats]{chisq.test}}.} +\item{...}{Currently not used.} \item{y}{Optional, bare (unquoted) variable name, or a character vector with the variable name.} @@ -79,42 +76,23 @@ means if you are performing a two sample test).} \item{paired}{Logical, whether to compute a paired t-test.} \item{alternative}{A character string specifying the alternative hypothesis, -must be one of \code{"two.sided"} (default), \code{"greater"} or -\code{"less"}. You can specify just the initial letter.} +must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. You can +specify just the initial letter.} } \value{ The weighted (test) statistic. } \description{ -\strong{Weighted statistics for variables} -\cr \cr -\code{weighted_sd()}, \code{weighted_se()}, \code{weighted_mean()} and \code{weighted_median()} -compute weighted standard deviation, standard error, mean or median for a -variable or for all variables of a data frame. \code{survey_median()} computes the -median for a variable in a survey-design (see \code{\link[survey]{svydesign}}). -\code{weighted_correlation()} computes a weighted correlation for a two-sided alternative -hypothesis. -\cr \cr -\strong{Weighted tests} -\cr \cr -\code{weighted_ttest()} computes a weighted t-test, while \code{weighted_mannwhitney()} -computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test -(for more than two groups). \code{weighted_ranktest()} is an alias for \code{weighted_mannwhitney()}. -\code{weighted_chisqtest()} computes a weighted Chi-squared test for contingency tables. +\code{weighted_se()} computes weighted standard errors of a variable or for +all variables of a data frame. \code{survey_median()} computes the median for +a variable in a survey-design (see [\verb{survey::svydesign()]}). +\code{weighted_correlation()} computes a weighted correlation for a two-sided +alternative hypothesis. } \note{ -\code{weighted_chisq()} is a convenient wrapper for \code{\link{crosstable_statistics}}. -For a weighted one-way Anova, use \code{means_by_group()} with -\code{weights}-argument. -\cr \cr -\code{weighted_ttest()} assumes unequal variance between the two groups. +\code{weighted_chisq()} is a convenient wrapper for \verb{\link{crosstable_statistics`}. For a weighted one-way Anova, use }means_by_group()\code{with}weights\verb{-argument. \\cr \\cr }weighted_ttest()` assumes unequal variance between the two groups. } \examples{ -# weighted sd and se ---- -weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100)) - -data(efc) -weighted_sd(efc[, 1:3], runif(n = nrow(efc))) weighted_se(efc[, 1:3], runif(n = nrow(efc))) # survey_median ---- From cc4449bd919b9f7fc4098a88d72dd1a0bb47bfb9 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:36:16 +0200 Subject: [PATCH 23/82] fix warning --- R/svyglmzip.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/svyglmzip.R b/R/svyglmzip.R index a56c4e5c..ba6dd5f8 100644 --- a/R/svyglmzip.R +++ b/R/svyglmzip.R @@ -56,7 +56,7 @@ svyglm.zip <- function(formula, design, ...) { design <- stats::update(design, scaled.weights = dw / mean(dw, na.rm = TRUE)) # fit ZIP model, with scaled design weights - mod <- pscl::zeroinfl(formula, data = stats::model.frame(design), weights = scaled.weights, ...) + mod <- suppressWarnings(pscl::zeroinfl(formula, data = stats::model.frame(design), weights = scaled.weights, ...)) ff <- insight::find_formula(mod) # fit survey model, using maximum likelihood estimation From 984900481235423ea6450c7a375cd68993e951e2 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 11:41:39 +0200 Subject: [PATCH 24/82] fix --- NAMESPACE | 4 ++-- R/wtd_se.R | 7 +------ man/svyglm.nb.Rd | 2 +- man/svyglm.zip.Rd | 2 +- man/weighted_se.Rd | 4 +--- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 652bad97..433968b4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,8 @@ S3method(print,sj_xtab_stat) S3method(print,svyglm.nb) S3method(print,svyglm.zip) S3method(residuals,svyglm.nb) +S3method(svyglm,nb) +S3method(svyglm,zip) S3method(terms,svyglm.nb) S3method(weighted_correlation,default) S3method(weighted_correlation,formula) @@ -87,8 +89,6 @@ export(se) export(se_ybar) export(smpsize_lmm) export(survey_median) -export(svyglm.nb) -export(svyglm.zip) export(table_values) export(var_pop) export(weight) diff --git a/R/wtd_se.R b/R/wtd_se.R index bf6265bf..c87203bc 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -31,13 +31,8 @@ #' #' @return The weighted (test) statistic. #' -#' @note `weighted_chisq()` is a convenient wrapper for `\link{crosstable_statistics`}. -#' For a weighted one-way Anova, use `means_by_group()` with -#' `weights`-argument. -#' \cr \cr -#' `weighted_ttest()` assumes unequal variance between the two groups. -#' #' @examples +#' data(efc) #' weighted_se(efc[, 1:3], runif(n = nrow(efc))) #' #' # survey_median ---- diff --git a/man/svyglm.nb.Rd b/man/svyglm.nb.Rd index 42b3c141..768f9ae5 100644 --- a/man/svyglm.nb.Rd +++ b/man/svyglm.nb.Rd @@ -4,7 +4,7 @@ \alias{svyglm.nb} \title{Survey-weighted negative binomial generalised linear model} \usage{ -svyglm.nb(formula, design, ...) +\method{svyglm}{nb}(formula, design, ...) } \arguments{ \item{formula}{An object of class \code{formula}, i.e. a symbolic description diff --git a/man/svyglm.zip.Rd b/man/svyglm.zip.Rd index 95c201d0..0756a015 100644 --- a/man/svyglm.zip.Rd +++ b/man/svyglm.zip.Rd @@ -4,7 +4,7 @@ \alias{svyglm.zip} \title{Survey-weighted zero-inflated Poisson model} \usage{ -svyglm.zip(formula, design, ...) +\method{svyglm}{zip}(formula, design, ...) } \arguments{ \item{formula}{An object of class \code{formula}, i.e. a symbolic description diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index 806da87a..ca87d91b 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -89,10 +89,8 @@ a variable in a survey-design (see [\verb{survey::svydesign()]}). \code{weighted_correlation()} computes a weighted correlation for a two-sided alternative hypothesis. } -\note{ -\code{weighted_chisq()} is a convenient wrapper for \verb{\link{crosstable_statistics`}. For a weighted one-way Anova, use }means_by_group()\code{with}weights\verb{-argument. \\cr \\cr }weighted_ttest()` assumes unequal variance between the two groups. -} \examples{ +data(efc) weighted_se(efc[, 1:3], runif(n = nrow(efc))) # survey_median ---- From 512278652abf9cfde35de5d74df368a04633fc9c Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 13:01:46 +0200 Subject: [PATCH 25/82] draft --- NAMESPACE | 2 + R/helpfunctions.R | 82 +++++++---- R/mann_whitney_test.R | 64 +-------- R/t_test.R | 299 +++++++++++++++++++++++++++++++++++++++ R/wtd_ttest.R | 4 +- man/mann_whitney_test.Rd | 8 +- man/t_test.Rd | 64 +++++++++ 7 files changed, 427 insertions(+), 96 deletions(-) create mode 100644 R/t_test.R create mode 100644 man/t_test.Rd diff --git a/NAMESPACE b/NAMESPACE index 433968b4..8bfe01ac 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -22,6 +22,7 @@ S3method(print,sj_chi2gof) S3method(print,sj_htest_chi) S3method(print,sj_htest_kw) S3method(print,sj_htest_mwu) +S3method(print,sj_htest_t) S3method(print,sj_resample) S3method(print,sj_ttest) S3method(print,sj_wcor) @@ -89,6 +90,7 @@ export(se) export(se_ybar) export(smpsize_lmm) export(survey_median) +export(t_test) export(table_values) export(var_pop) export(weight) diff --git a/R/helpfunctions.R b/R/helpfunctions.R index 21d54f66..134ad328 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -12,36 +12,6 @@ is_merMod <- function(fit) { } -is_stan_model <- function(fit) { - inherits(fit, c("stanreg", "stanfit", "brmsfit")) -} - - -get_glm_family <- function(fit) { - c.f <- class(fit) - - # do we have glm? if so, get link family. make exceptions - # for specific models that don't have family function - if (any(c.f %in% c("lme", "plm"))) { - fitfam <- "" - logit_link <- FALSE - } else { - fitfam <- stats::family(fit)$family - logit_link <- stats::family(fit)$link == "logit" - } - - # create logical for family - binom_fam <- fitfam %in% c("binomial", "quasibinomial") - poisson_fam <- fitfam %in% c("poisson", "quasipoisson") || - grepl("negative binomial", fitfam, ignore.case = TRUE, fixed = TRUE) - - list(is_bin = binom_fam, is_pois = poisson_fam, is_logit = logit_link) -} - -# return names of objects passed as ellipses argument -dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) - - .compact_character <- function(x) { x[!sapply(x, function(i) is.null(i) || !nzchar(i, keepNA = TRUE) || is.na(i) || any(i == "NULL", na.rm = TRUE))] } @@ -89,3 +59,55 @@ dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) .is_pseudo_numeric <- function(x) { (is.character(x) && !anyNA(suppressWarnings(as.numeric(stats::na.omit(x[nzchar(x, keepNA = TRUE)]))))) || (is.factor(x) && !anyNA(suppressWarnings(as.numeric(levels(x))))) # nolint } + + +.misspelled_string <- function(source, searchterm, default_message = NULL) { + if (is.null(searchterm) || length(searchterm) < 1) { + return(default_message) + } + # used for many matches + more_found <- "" + # init default + msg <- "" + # remove matching strings + same <- intersect(source, searchterm) + searchterm <- setdiff(searchterm, same) + source <- setdiff(source, same) + # guess the misspelled string + possible_strings <- unlist(lapply(searchterm, function(s) { + source[.fuzzy_grep(source, s)] # nolint + }), use.names = FALSE) + if (length(possible_strings)) { + msg <- "Did you mean " + if (length(possible_strings) > 1) { + # make sure we don't print dozens of alternatives for larger data frames + if (length(possible_strings) > 5) { + more_found <- sprintf( + " We even found %i more possible matches, not shown here.", + length(possible_strings) - 5 + ) + possible_strings <- possible_strings[1:5] + } + msg <- paste0(msg, "one of ", toString(paste0("\"", possible_strings, "\""))) + } else { + msg <- paste0(msg, "\"", possible_strings, "\"") + } + msg <- paste0(msg, "?", more_found) + } else { + msg <- default_message + } + # no double white space + insight::trim_ws(msg) +} + + +.fuzzy_grep <- function(x, pattern, precision = NULL) { + if (is.null(precision)) { + precision <- round(nchar(pattern) / 3) + } + if (precision > nchar(pattern)) { + return(NULL) + } + p <- sprintf("(%s){~%i}", pattern, precision) + grep(pattern = p, x = x, ignore.case = FALSE) +} diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index d16f1788..67ebf83a 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -1,9 +1,9 @@ -#' @title Mann-Whitney-Test +#' @title Mann-Whitney test #' @name mann_whitney_test -#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank +#' @description This function performs a Mann-Whitney test (or Wilcoxon rank #' sum test for _unpaired_ samples. #' -#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +#' A Mann-Whitney test is a non-parametric test for the null hypothesis that two #' independent samples have identical continuous distributions. It can be used #' when the two continuous variables are not normally distributed. #' @@ -42,7 +42,7 @@ #' #' @examples #' data(efc) -#' # Mann-Whitney-U-Tests for elder's age by elder's sex. +#' # Mann-Whitney-U tests for elder's age by elder's sex. #' mann_whitney_test(efc, "e17age", by = "e16sex") #' #' # when data is in wide-format, specify all relevant continuous @@ -238,67 +238,11 @@ mann_whitney_test <- function(data, # helper ---------------------------------------------------------------------- -.misspelled_string <- function(source, searchterm, default_message = NULL) { - if (is.null(searchterm) || length(searchterm) < 1) { - return(default_message) - } - # used for many matches - more_found <- "" - # init default - msg <- "" - # remove matching strings - same <- intersect(source, searchterm) - searchterm <- setdiff(searchterm, same) - source <- setdiff(source, same) - # guess the misspelled string - possible_strings <- unlist(lapply(searchterm, function(s) { - source[.fuzzy_grep(source, s)] # nolint - }), use.names = FALSE) - if (length(possible_strings)) { - msg <- "Did you mean " - if (length(possible_strings) > 1) { - # make sure we don't print dozens of alternatives for larger data frames - if (length(possible_strings) > 5) { - more_found <- sprintf( - " We even found %i more possible matches, not shown here.", - length(possible_strings) - 5 - ) - possible_strings <- possible_strings[1:5] - } - msg <- paste0(msg, "one of ", toString(paste0("\"", possible_strings, "\""))) - } else { - msg <- paste0(msg, "\"", possible_strings, "\"") - } - msg <- paste0(msg, "?", more_found) - } else { - msg <- default_message - } - # no double white space - insight::trim_ws(msg) -} - - -.fuzzy_grep <- function(x, pattern, precision = NULL) { - if (is.null(precision)) { - precision <- round(nchar(pattern) / 3) - } - if (precision > nchar(pattern)) { - return(NULL) - } - p <- sprintf("(%s){~%i}", pattern, precision) - grep(pattern = p, x = x, ignore.case = FALSE) -} - - .sanitize_htest_input <- function(data, select, by, weights) { # check if arguments are NULL if (is.null(select)) { insight::format_error("Argument `select` is missing.") } - # `by` is only allowed to be NULL if `select` specifies more than one variable - if (is.null(by) && length(select) == 1) { - insight::format_error("Arguments `by` is missing.") - } # check if arguments have correct length or are of correct type if (!is.character(select)) { diff --git a/R/t_test.R b/R/t_test.R new file mode 100644 index 00000000..d20e86ad --- /dev/null +++ b/R/t_test.R @@ -0,0 +1,299 @@ +#' @title Student's t test +#' @name t_test +#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank +#' sum test for _unpaired_ samples. +#' +#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +#' independent samples have identical continuous distributions. It can be used +#' when the two continuous variables are not normally distributed. +#' +#' @param data A data frame. +#' @param select Name of the dependent variable (as string) to be used for the +#' test. `select` can also be a character vector, specifying the names of +#' multiple continuous variables. In this case, `by` is ignored and variables +#' specified in `select` are used to compute the test. This can be useful if +#' the data is in wide-format and no grouping variable is available. +#' @param by Name of the grouping variable to be used for the test. If `by` is +#' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if +#' `probabilities` is provided, `by` must be `NULL`. +#' @param weights Name of an (optional) weighting variable to be used for the test. +#' @param alternative A character string specifying the alternative hypothesis, +#' must be one of `"two.sided"` (default), `"greater"` or `"less"`. See `?t.test()`. +#' +#' @return A data frame with test results. The function returns p and Z-values +#' as well as effect size r and group-rank-means. +#' +#' @examples +#' data(efc) +#' # Mann-Whitney-U-Tests for elder's age by elder's sex. +#' t_test(efc, "e17age", by = "e16sex") +#' +#' # when data is in wide-format, specify all relevant continuous +#' # variables in `select` and omit `by` +#' set.seed(123) +#' wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) +#' t_test(wide_data, select = c("scale1", "scale2")) +#' +#' # same as if we had data in long format, with grouping variable +#' long_data <- data.frame( +#' scales = c(wide_data$scale1, wide_data$scale2), +#' groups = rep(c("A", "B"), each = 20) +#' ) +#' t_test(long_data, select = "scales", by = "groups") +#' @export +t_test <- function(data, + select = NULL, + by = NULL, + weights = NULL, + paired = FALSE, + mu = 0, + alternative = "two.sided") { + insight::check_if_installed("datawizard") + alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater")) + + # sanity checks + .sanitize_htest_input(data, select, by, weights) + + # does select indicate more than one variable? + if (length(select) > 1) { + # sanity check - may only specify two variable names + if (length(select) > 2) { + insight::format_error("You may only specify two variables for Student's t test.") + } + if (!is.null(by)) { + insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") + } + # we convert the data into long format, and create a grouping variable + data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + by <- select[2] + select <- select[1] + # after converting to long, we have the "grouping" variable first in the data + colnames(data) <- c(by, select) + } + + # get data + dv <- data[[select]] + + # for two-sample t-test... + if (!is.null(by)) { + grp <- data[[by]] + # coerce to factor + grp <- datawizard::to_factor(grp) + # only two groups allowed + if (insight::n_unique(grp) > 2) { + insight::format_error("Only two groups are allowed for Student's t test.") # nolint + } + # value labels + group_labels <- names(attr(data[[by]], "labels", exact = TRUE)) + if (is.null(group_labels)) { + group_labels <- levels(droplevels(grp)) + } + data_name <- paste(select, "by", by) + } else { + group_labels <- select + data_name <- select + } + + if (is.null(weights)) { + .calculate_ttest(dv, grp, mu, paired, alternative, group_labels, data_name) + } else { + .calculate_weighted_ttest(dv, grp, mu, paired, alternative, data[[weights]], group_labels, data_name) + } +} + + +# Mann-Whitney-Test for two groups -------------------------------------------- + +.calculate_ttest <- function(dv, grp, mu, alternative, group_labels, data_name) { + insight::check_if_installed("effectsize") + # prepare data + if (is.null(grp)) { + tdat <- data.frame(dv) + t_formula <- as.formula("dv ~ 1") + } else { + tdat <- data.frame(dv, grp) + t_formula <- as.formula("dv ~ grp") + } + # perfom wilcox test + htest <- stats::t.test( + t_formula, + data = tdat, + alternative = alternative, + mu = mu, + paired = paired + ) + test_statistic <- htest$statistic + if (nrow(tdat) > 20) { + effect_size <- stats::setNames( + effectsize::cohens_d( + t_formula, + data = tdat, + alternative = alternative, + mu = mu, + paired = paired + )$Cohens_d, + "Cohens_d" + ) + } else { + effect_size <- stats::setNames( + effectsize::hedges_g( + t_formula, + data = tdat, + alternative = alternative, + mu = mu, + paired = paired + )$Hedges_g, + "Hedges_g" + ) + } + + # return result + out <- data.frame( + data = data_name, + statistic_name = "t", + statistic = test_statistic, + effect_size_name = names(effect_size), + effect_size = as.numeric(effect_size), + p = as.numeric(htest$p.value), + df = as.numeric(htest$parameter), + method = htest$method, + alternative = alternative, + stringsAsFactors = FALSE + ) + class(out) <- c("sj_htest_t", "data.frame") + attr(out, "means") <- stats::setNames(htest$estimate, c("Mean Group 1", "Mean Group 2")) + attr(out, "n_groups") <- stats::setNames( + c(as.numeric(table(grp))), + c("N Group 1", "N Group 2") + ) + attr(out, "paired") <- isTRUE(paired) + attr(out, "weighted") <- FALSE + out +} + + +# Weighted Mann-Whitney-Test for two groups ---------------------------------- + +.calculate_weighted_ttest <- function(dv, grp, mu, paired, alternative, weights, group_labels, data_name) { + insight::check_if_installed("datawizard") + if (is.null(grp)) { + x_values <- dv + x_weights <- weights + y_values <- NULL + } else { + dat <- stats::na.omit(data.frame(dv, grp, weights)) + colnames(dat) <- c("y", "g", "w") + # unique groups + groups <- unique(dat$grp) + # values for sample 1 + x_values <- dat$y[dat$g == groups[1]] + x_weights <- dat$w[dat$g == groups[1]] + # values for sample 2 + y_values <- dat$y[dat$g == groups[2]] + y_weights <- dat$w[dat$g == groups[2]] + # paired t-test? + if (paired) { + x_values <- x_values - y_values + y_values <- NULL + } + } + + mu_x <- stats::weighted.mean(x_values, x_weights) + var_x <- datawizard::weighted_sd(x_values, x_weights)^2 + se_x <- sqrt(var_x / length(x_values)) + + if (paired || is.null(y_values)) { + # paired + se <- se_x + dof <- length(x_values) - 1 + test_statistic <- (mu_x - mu) / se + estimate <- stats::setNames(mu_x, if (paired) "mean of the differences" else "mean of x") + method <- if (paired) "Paired t-test" else "One Sample t-test" + } else { + # unpaired t-test + mu_y <- stats::weighted.mean(y_values, y_weights) + var_y <- datawizard::weighted_sd(y_values, y_weights)^2 + se_y <- sqrt(var_y / length(y_values)) + + se <- sqrt(se_x^2 + se_y^2) + dof <- se^4 / (se_x^4 / (length(x_values) - 1) + se_y^4 / (length(y_values) - 1)) + test_statistic <- (mu_x - mu_y - mu) / se + + estimate <- c(mu_x, mu_y) + names(estimate) <- c("mean of x", "mean of y") + method <- "Two-Sample t-test" + } + + if (alternative == "less") { + pval <- stats::pt(test_statistic, dof) + } else if (alternative == "greater") { + pval <- stats::pt(test_statistic, dof, lower.tail = FALSE) + } else { + pval <- 2 * stats::pt(-abs(test_statistic), dof) + } + + # return result + out <- data.frame( + data = data_name, + statistic_name = "t", + statistic = test_statistic, + effect_size_name = names(effect_size), + effect_size = as.numeric(effect_size), + p = pval, + df = dof, + method = method, + alternative = alternative, + stringsAsFactors = FALSE + ) + class(out) <- c("sj_htest_t", "data.frame") + attr(out, "means") <- stats::setNames(htest$estimate, c("Mean Group 1", "Mean Group 2")) + attr(out, "n_groups") <- stats::setNames( + c(as.numeric(table(grp))), + c("N Group 1", "N Group 2") + ) + attr(out, "paired") <- isTRUE(paired) + attr(out, "weighted") <- FALSE + out +} + + +# methods --------------------------------------------------------------------- + +#' @export +print.sj_htest_t <- function(x, ...) { + # fetch attributes + group_labels <- attributes(x)$group_labels + rank_means <- attributes(x)$rank_means + n_groups <- attributes(x)$n_groups + weighted <- attributes(x)$weighted + + if (weighted) { + weight_string <- " (weighted)" + } else { + weight_string <- "" + } + + # same width + group_labels <- format(group_labels) + + # header + insight::print_color(sprintf("# Mann-Whitney test%s\n\n", weight_string), "blue") + + # group-1-info + insight::print_color( + sprintf( + " Group 1: %s (n = %i, rank mean = %s)\n", + group_labels[1], n_groups[1], insight::format_value(rank_means[1], protect_integers = TRUE) + ), "cyan" + ) + + # group-2-info + insight::print_color( + sprintf( + " Group 2: %s (n = %i, rank mean = %s)\n", + group_labels[2], n_groups[2], insight::format_value(rank_means[2], protect_integers = TRUE) + ), "cyan" + ) + + cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) +} diff --git a/R/wtd_ttest.R b/R/wtd_ttest.R index 38652af2..c6c6ee2a 100644 --- a/R/wtd_ttest.R +++ b/R/wtd_ttest.R @@ -109,12 +109,12 @@ weighted_ttest_helper <- function(xv, yv, wx, wy, nx, ny, mu, paired, alternativ } mu.x.w <- stats::weighted.mean(xv, wx) - var.x.w <- weighted_sd(xv, wx)^2 + var.x.w <- datawizard::weighted_sd(xv, wx)^2 se.x <- sqrt(var.x.w / nx) if (!is.null(yv)) { mu.y.w <- stats::weighted.mean(yv, wy) - var.y.w <- weighted_sd(yv, wy)^2 + var.y.w <- datawizard::weighted_sd(yv, wy)^2 se.y <- sqrt(var.y.w / ny) se <- sqrt(se.x^2 + se.y^2) diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index ab376af7..4d9b6f2c 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/mann_whitney_test.R \name{mann_whitney_test} \alias{mann_whitney_test} -\title{Mann-Whitney-Test} +\title{Mann-Whitney test} \usage{ mann_whitney_test( data, @@ -36,10 +36,10 @@ A data frame with test results. The function returns p and Z-values as well as effect size r and group-rank-means. } \description{ -This function performs a Mann-Whitney-Test (or Wilcoxon rank +This function performs a Mann-Whitney test (or Wilcoxon rank sum test for \emph{unpaired} samples. -A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +A Mann-Whitney test is a non-parametric test for the null hypothesis that two independent samples have identical continuous distributions. It can be used when the two continuous variables are not normally distributed. } @@ -62,7 +62,7 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: } \examples{ data(efc) -# Mann-Whitney-U-Tests for elder's age by elder's sex. +# Mann-Whitney-U tests for elder's age by elder's sex. mann_whitney_test(efc, "e17age", by = "e16sex") # when data is in wide-format, specify all relevant continuous diff --git a/man/t_test.Rd b/man/t_test.Rd new file mode 100644 index 00000000..a35abd03 --- /dev/null +++ b/man/t_test.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/t_test.R +\name{t_test} +\alias{t_test} +\title{Student's t test} +\usage{ +t_test( + data, + select = NULL, + by = NULL, + weights = NULL, + paired = FALSE, + mu = 0, + alternative = "two.sided" +) +} +\arguments{ +\item{data}{A data frame.} + +\item{select}{Name of the dependent variable (as string) to be used for the +test. \code{select} can also be a character vector, specifying the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} + +\item{by}{Name of the grouping variable to be used for the test. If \code{by} is +not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if +\code{probabilities} is provided, \code{by} must be \code{NULL}.} + +\item{weights}{Name of an (optional) weighting variable to be used for the test.} + +\item{alternative}{A character string specifying the alternative hypothesis, +must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test()}.} +} +\value{ +A data frame with test results. The function returns p and Z-values +as well as effect size r and group-rank-means. +} +\description{ +This function performs a Mann-Whitney-Test (or Wilcoxon rank +sum test for \emph{unpaired} samples. + +A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +independent samples have identical continuous distributions. It can be used +when the two continuous variables are not normally distributed. +} +\examples{ +data(efc) +# Mann-Whitney-U-Tests for elder's age by elder's sex. +t_test(efc, "e17age", by = "e16sex") + +# when data is in wide-format, specify all relevant continuous +# variables in `select` and omit `by` +set.seed(123) +wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) +t_test(wide_data, select = c("scale1", "scale2")) + +# same as if we had data in long format, with grouping variable +long_data <- data.frame( + scales = c(wide_data$scale1, wide_data$scale2), + groups = rep(c("A", "B"), each = 20) +) +t_test(long_data, select = "scales", by = "groups") +} From 3cd21a587f612300ae9f112d20307707c62bcf37 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 13:54:59 +0200 Subject: [PATCH 26/82] fix --- NAMESPACE | 4 ++-- man/svyglm.nb.Rd | 2 +- man/svyglm.zip.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8bfe01ac..b06a0cda 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,8 +30,6 @@ S3method(print,sj_xtab_stat) S3method(print,svyglm.nb) S3method(print,svyglm.zip) S3method(residuals,svyglm.nb) -S3method(svyglm,nb) -S3method(svyglm,zip) S3method(terms,svyglm.nb) S3method(weighted_correlation,default) S3method(weighted_correlation,formula) @@ -90,6 +88,8 @@ export(se) export(se_ybar) export(smpsize_lmm) export(survey_median) +export(svyglm.nb) +export(svyglm.zip) export(t_test) export(table_values) export(var_pop) diff --git a/man/svyglm.nb.Rd b/man/svyglm.nb.Rd index 768f9ae5..42b3c141 100644 --- a/man/svyglm.nb.Rd +++ b/man/svyglm.nb.Rd @@ -4,7 +4,7 @@ \alias{svyglm.nb} \title{Survey-weighted negative binomial generalised linear model} \usage{ -\method{svyglm}{nb}(formula, design, ...) +svyglm.nb(formula, design, ...) } \arguments{ \item{formula}{An object of class \code{formula}, i.e. a symbolic description diff --git a/man/svyglm.zip.Rd b/man/svyglm.zip.Rd index 0756a015..95c201d0 100644 --- a/man/svyglm.zip.Rd +++ b/man/svyglm.zip.Rd @@ -4,7 +4,7 @@ \alias{svyglm.zip} \title{Survey-weighted zero-inflated Poisson model} \usage{ -\method{svyglm}{zip}(formula, design, ...) +svyglm.zip(formula, design, ...) } \arguments{ \item{formula}{An object of class \code{formula}, i.e. a symbolic description From 858612a88f4aa4dbf4f9383fc6a864c220d7edfb Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 13:56:03 +0200 Subject: [PATCH 27/82] fix --- R/t_test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/t_test.R b/R/t_test.R index d20e86ad..61b372c6 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -104,7 +104,7 @@ t_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- -.calculate_ttest <- function(dv, grp, mu, alternative, group_labels, data_name) { +.calculate_ttest <- function(dv, grp, mu, paired, alternative, group_labels, data_name) { insight::check_if_installed("effectsize") # prepare data if (is.null(grp)) { From cf96c2e0d37aeea58424ac9e1b8260fd508adc50 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 13:58:31 +0200 Subject: [PATCH 28/82] fix --- R/t_test.R | 2 ++ _pkgdown.yml | 1 + 2 files changed, 3 insertions(+) diff --git a/R/t_test.R b/R/t_test.R index 61b372c6..bcc20738 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -177,6 +177,8 @@ t_test <- function(data, .calculate_weighted_ttest <- function(dv, grp, mu, paired, alternative, weights, group_labels, data_name) { insight::check_if_installed("datawizard") if (is.null(grp)) { + dat <- stats::na.omit(data.frame(dv, weights)) + colnames(dat) <- c("y", "w") x_values <- dv x_weights <- weights y_values <- NULL diff --git a/_pkgdown.yml b/_pkgdown.yml index 2fb1d7b8..8335357c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -33,6 +33,7 @@ reference: - chi_squared_test - kruskal_wallis_test - mann_whitney_test + - t_test - var_pop - title: "Tools for Regression Models" From 716e9d26def601200020184347f660aa69414b16 Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 14:24:07 +0200 Subject: [PATCH 29/82] update --- R/t_test.R | 151 ++++++++++++++++++++++++++++++++++---------------- man/t_test.Rd | 18 +++--- 2 files changed, 114 insertions(+), 55 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index bcc20738..2bbbfd36 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -1,27 +1,18 @@ #' @title Student's t test #' @name t_test -#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank -#' sum test for _unpaired_ samples. +#' @description This function performs a Student's t test for two independent +#' samples, for paired samples, or for one sample. #' -#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two -#' independent samples have identical continuous distributions. It can be used -#' when the two continuous variables are not normally distributed. -#' -#' @param data A data frame. -#' @param select Name of the dependent variable (as string) to be used for the -#' test. `select` can also be a character vector, specifying the names of -#' multiple continuous variables. In this case, `by` is ignored and variables -#' specified in `select` are used to compute the test. This can be useful if -#' the data is in wide-format and no grouping variable is available. -#' @param by Name of the grouping variable to be used for the test. If `by` is -#' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if -#' `probabilities` is provided, `by` must be `NULL`. -#' @param weights Name of an (optional) weighting variable to be used for the test. +#' @inheritParams mann_whitney_test #' @param alternative A character string specifying the alternative hypothesis, #' must be one of `"two.sided"` (default), `"greater"` or `"less"`. See `?t.test()`. +#' @param paired Logical, whether to compute a paired t-test. +#' @param mu The hypothesized difference in means. If `paired = TRUE`, or for a +#' one-sample t-test, this is the hypothesized true mean value. If +#' `paired = FALSE`, this is the hypothesized difference in means between the +#' two groups. #' -#' @return A data frame with test results. The function returns p and Z-values -#' as well as effect size r and group-rank-means. +#' @return A data frame with test results. #' #' @examples #' data(efc) @@ -90,6 +81,7 @@ t_test <- function(data, } data_name <- paste(select, "by", by) } else { + grp <- NULL group_labels <- select data_name <- select } @@ -161,13 +153,16 @@ t_test <- function(data, stringsAsFactors = FALSE ) class(out) <- c("sj_htest_t", "data.frame") - attr(out, "means") <- stats::setNames(htest$estimate, c("Mean Group 1", "Mean Group 2")) - attr(out, "n_groups") <- stats::setNames( - c(as.numeric(table(grp))), - c("N Group 1", "N Group 2") - ) + attr(out, "means") <- as.numeric(htest$estimate) attr(out, "paired") <- isTRUE(paired) + attr(out, "one_sample") <- is.null(grp) attr(out, "weighted") <- FALSE + if (!is.null(gpr)) { + attr(out, "n_groups") <- stats::setNames( + c(as.numeric(table(grp))), + c("N Group 1", "N Group 2") + ) + } out } @@ -209,23 +204,21 @@ t_test <- function(data, se <- se_x dof <- length(x_values) - 1 test_statistic <- (mu_x - mu) / se - estimate <- stats::setNames(mu_x, if (paired) "mean of the differences" else "mean of x") + estimate <- mu_x method <- if (paired) "Paired t-test" else "One Sample t-test" } else { # unpaired t-test mu_y <- stats::weighted.mean(y_values, y_weights) var_y <- datawizard::weighted_sd(y_values, y_weights)^2 se_y <- sqrt(var_y / length(y_values)) - se <- sqrt(se_x^2 + se_y^2) dof <- se^4 / (se_x^4 / (length(x_values) - 1) + se_y^4 / (length(y_values) - 1)) test_statistic <- (mu_x - mu_y - mu) / se - estimate <- c(mu_x, mu_y) - names(estimate) <- c("mean of x", "mean of y") method <- "Two-Sample t-test" } + # p-values if (alternative == "less") { pval <- stats::pt(test_statistic, dof) } else if (alternative == "greater") { @@ -234,6 +227,38 @@ t_test <- function(data, pval <- 2 * stats::pt(-abs(test_statistic), dof) } + # effect size + dat$y <- dat$y * dat$w + if (is.null(y_values)) { + t_formula <- as.formula("y ~ 1") + } else { + t_formula <- as.formula("y ~ g") + } + + if (nrow(dat) > 20) { + effect_size <- stats::setNames( + effectsize::cohens_d( + t_formula, + data = dat, + alternative = alternative, + mu = mu, + paired = FALSE + )$Cohens_d, + "Cohens_d" + ) + } else { + effect_size <- stats::setNames( + effectsize::hedges_g( + t_formula, + data = dat, + alternative = alternative, + mu = mu, + paired = FALSE + )$Hedges_g, + "Hedges_g" + ) + } + # return result out <- data.frame( data = data_name, @@ -248,12 +273,15 @@ t_test <- function(data, stringsAsFactors = FALSE ) class(out) <- c("sj_htest_t", "data.frame") - attr(out, "means") <- stats::setNames(htest$estimate, c("Mean Group 1", "Mean Group 2")) - attr(out, "n_groups") <- stats::setNames( - c(as.numeric(table(grp))), - c("N Group 1", "N Group 2") - ) + attr(out, "means") <- estimate + if (!is.null(grp)) { + attr(out, "n_groups") <- stats::setNames( + as.numeric(as.table(round(stats::xtabs(dat[[3]] ~ dat[[1]] + dat[[2]])))), + c("N Group 1", "N Group 2") + ) + } attr(out, "paired") <- isTRUE(paired) + attr(out, "one_sample") <- is.null(y_values) && !isTRUE(paired) attr(out, "weighted") <- FALSE out } @@ -265,9 +293,11 @@ t_test <- function(data, print.sj_htest_t <- function(x, ...) { # fetch attributes group_labels <- attributes(x)$group_labels - rank_means <- attributes(x)$rank_means + means <- attributes(x)$means n_groups <- attributes(x)$n_groups weighted <- attributes(x)$weighted + paired <- isTRUE(attributes(x)$paired) + one_sample <- isTRUE(attributes(x)$one_sample) if (weighted) { weight_string <- " (weighted)" @@ -279,23 +309,50 @@ print.sj_htest_t <- function(x, ...) { group_labels <- format(group_labels) # header - insight::print_color(sprintf("# Mann-Whitney test%s\n\n", weight_string), "blue") + insight::print_color(sprintf("# %s%s\n\n", x$method, weight_string), "blue") # group-1-info - insight::print_color( - sprintf( - " Group 1: %s (n = %i, rank mean = %s)\n", - group_labels[1], n_groups[1], insight::format_value(rank_means[1], protect_integers = TRUE) - ), "cyan" - ) + if (is.null(n_groups)) { + insight::print_color( + sprintf( + " Group 1: %s (mean = %s)\n", + group_labels[1], insight::format_value(means[1], protect_integers = TRUE) + ), "cyan" + ) + } else { + insight::print_color( + sprintf( + " Group 1: %s (n = %i, mean = %s)\n", + group_labels[1], n_groups[1], insight::format_value(means[1], protect_integers = TRUE) + ), "cyan" + ) + } # group-2-info - insight::print_color( - sprintf( - " Group 2: %s (n = %i, rank mean = %s)\n", - group_labels[2], n_groups[2], insight::format_value(rank_means[2], protect_integers = TRUE) - ), "cyan" - ) + if (length(group_labels) > 1) { + if (is.null(n_groups)) { + insight::print_color( + sprintf( + " Group 2: %s (mean = %s)\n", + group_labels[2], insight::format_value(means[2], protect_integers = TRUE) + ), "cyan" + ) + } else { + insight::print_color( + sprintf( + " Group 2: %s (n = %i, rank mean = %s)\n", + group_labels[2], n_groups[2], insight::format_value(means[2], protect_integers = TRUE) + ), "cyan" + ) + } + } - cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) + cat(sprintf( + "\n t = %.3f, %s = %.3f, df = %.1f, %s\n\n", + x$statistic, + x$effect_size_name, + x$effect_size, + x$df, + insight::format_p(x$p) + )) } diff --git a/man/t_test.Rd b/man/t_test.Rd index a35abd03..86b554e4 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -29,20 +29,22 @@ not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if \item{weights}{Name of an (optional) weighting variable to be used for the test.} +\item{paired}{Logical, whether to compute a paired t-test.} + +\item{mu}{The hypothesized difference in means. If \code{paired = TRUE}, or for a +one-sample t-test, this is the hypothesized true mean value. If +\code{paired = FALSE}, this is the hypothesized difference in means between the +two groups.} + \item{alternative}{A character string specifying the alternative hypothesis, must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test()}.} } \value{ -A data frame with test results. The function returns p and Z-values -as well as effect size r and group-rank-means. +A data frame with test results. } \description{ -This function performs a Mann-Whitney-Test (or Wilcoxon rank -sum test for \emph{unpaired} samples. - -A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two -independent samples have identical continuous distributions. It can be used -when the two continuous variables are not normally distributed. +This function performs a Student's t test for two independent +samples, for paired samples, or for one sample. } \examples{ data(efc) From 15344fb864b1a4b1c6d7e4b3ae716f516cc7719a Mon Sep 17 00:00:00 2001 From: Daniel Date: Fri, 10 May 2024 15:07:27 +0200 Subject: [PATCH 30/82] Update t_test.R --- R/t_test.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index 2bbbfd36..c074248b 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -101,10 +101,10 @@ t_test <- function(data, # prepare data if (is.null(grp)) { tdat <- data.frame(dv) - t_formula <- as.formula("dv ~ 1") + t_formula <- stats::as.formula("dv ~ 1") } else { tdat <- data.frame(dv, grp) - t_formula <- as.formula("dv ~ grp") + t_formula <- stats::as.formula("dv ~ grp") } # perfom wilcox test htest <- stats::t.test( @@ -230,9 +230,9 @@ t_test <- function(data, # effect size dat$y <- dat$y * dat$w if (is.null(y_values)) { - t_formula <- as.formula("y ~ 1") + t_formula <- stats::as.formula("y ~ 1") } else { - t_formula <- as.formula("y ~ g") + t_formula <- stats::as.formula("y ~ g") } if (nrow(dat) > 20) { From 26632d16c3ccc09aeaaa983a3b058b7caeaa85ce Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 09:15:33 +0200 Subject: [PATCH 31/82] update --- R/mann_whitney_test.R | 16 +++++++-------- R/t_test.R | 42 +++++++++++++++++++++----------------- man/chi_squared_test.Rd | 16 +++++++-------- man/kruskal_wallis_test.Rd | 16 +++++++-------- man/mann_whitney_test.Rd | 16 +++++++-------- man/t_test.Rd | 16 +++++++-------- 6 files changed, 63 insertions(+), 59 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 67ebf83a..55c6c374 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -8,14 +8,14 @@ #' when the two continuous variables are not normally distributed. #' #' @param data A data frame. -#' @param select Name of the dependent variable (as string) to be used for the -#' test. `select` can also be a character vector, specifying the names of -#' multiple continuous variables. In this case, `by` is ignored and variables -#' specified in `select` are used to compute the test. This can be useful if -#' the data is in wide-format and no grouping variable is available. -#' @param by Name of the grouping variable to be used for the test. If `by` is -#' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if -#' `probabilities` is provided, `by` must be `NULL`. +#' @param select One or more name of the continuous variable (as character +#' vector) to be used as samples for the test. If `select` only specified one +#' variable, a one-sample test is carried out (only applicable for `t_test()`). +#' Else, `by` must be provided to indicate the groups of comparison. +#' @param by Name of the variable indicating the groups. Required if `select` +#' specifies only one variable that contains all samples to be compared in the +#' test. If `by` is not a factor, it will be coerced to a factor. For +#' `chi_squared_test()`, if `probabilities` is provided, `by` must be `NULL`. #' @param weights Name of an (optional) weighting variable to be used for the test. #' @param distribution Indicates how the null distribution of the test statistic #' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"` diff --git a/R/t_test.R b/R/t_test.R index c074248b..fe062ab6 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -44,6 +44,7 @@ t_test <- function(data, # sanity checks .sanitize_htest_input(data, select, by, weights) + data_name <- NULL # does select indicate more than one variable? if (length(select) > 1) { @@ -54,12 +55,21 @@ t_test <- function(data, if (!is.null(by)) { insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") } - # we convert the data into long format, and create a grouping variable - data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") - by <- select[2] - select <- select[1] - # after converting to long, we have the "grouping" variable first in the data - colnames(data) <- c(by, select) + # paired? + if (paired) { + # subtract the two variables for paired t-test, and set by to NULL + data[[select[1]]] <- data[[select[1]]] - data[[select[2]]] + data_name <- paste(select[1], "by", select[2]) + select <- select[1] + by <- NULL + } else { + # we convert the data into long format, and create a grouping variable + data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + by <- select[2] + select <- select[1] + # after converting to long, we have the "grouping" variable first in the data + colnames(data) <- c(by, select) + } } # get data @@ -83,7 +93,9 @@ t_test <- function(data, } else { grp <- NULL group_labels <- select - data_name <- select + if (is.null(data_name)) { + data_name <- select + } } if (is.null(weights)) { @@ -111,8 +123,7 @@ t_test <- function(data, t_formula, data = tdat, alternative = alternative, - mu = mu, - paired = paired + mu = mu ) test_statistic <- htest$statistic if (nrow(tdat) > 20) { @@ -121,8 +132,7 @@ t_test <- function(data, t_formula, data = tdat, alternative = alternative, - mu = mu, - paired = paired + mu = mu )$Cohens_d, "Cohens_d" ) @@ -132,8 +142,7 @@ t_test <- function(data, t_formula, data = tdat, alternative = alternative, - mu = mu, - paired = paired + mu = mu )$Hedges_g, "Hedges_g" ) @@ -188,11 +197,6 @@ t_test <- function(data, # values for sample 2 y_values <- dat$y[dat$g == groups[2]] y_weights <- dat$w[dat$g == groups[2]] - # paired t-test? - if (paired) { - x_values <- x_values - y_values - y_values <- NULL - } } mu_x <- stats::weighted.mean(x_values, x_weights) @@ -340,7 +344,7 @@ print.sj_htest_t <- function(x, ...) { } else { insight::print_color( sprintf( - " Group 2: %s (n = %i, rank mean = %s)\n", + " Group 2: %s (n = %i, mean = %s)\n", group_labels[2], n_groups[2], insight::format_value(means[2], protect_integers = TRUE) ), "cyan" ) diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index f6dd5d77..49ba7783 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -17,15 +17,15 @@ chi_squared_test( \arguments{ \item{data}{A data frame.} -\item{select}{Name of the dependent variable (as string) to be used for the -test. \code{select} can also be a character vector, specifying the names of -multiple continuous variables. In this case, \code{by} is ignored and variables -specified in \code{select} are used to compute the test. This can be useful if -the data is in wide-format and no grouping variable is available.} +\item{select}{One or more name of the continuous variable (as character +vector) to be used as samples for the test. If \code{select} only specified one +variable, a one-sample test is carried out (only applicable for \code{t_test()}). +Else, \code{by} must be provided to indicate the groups of comparison.} -\item{by}{Name of the grouping variable to be used for the test. If \code{by} is -not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if -\code{probabilities} is provided, \code{by} must be \code{NULL}.} +\item{by}{Name of the variable indicating the groups. Required if \code{select} +specifies only one variable that contains all samples to be compared in the +test. If \code{by} is not a factor, it will be coerced to a factor. For +\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.} \item{probabilities}{A numeric vector of probabilities for each cell in the contingency table. The length of the vector must match the number of cells diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index d3033c42..bd0909c8 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -9,15 +9,15 @@ kruskal_wallis_test(data, select = NULL, by = NULL, weights = NULL) \arguments{ \item{data}{A data frame.} -\item{select}{Name of the dependent variable (as string) to be used for the -test. \code{select} can also be a character vector, specifying the names of -multiple continuous variables. In this case, \code{by} is ignored and variables -specified in \code{select} are used to compute the test. This can be useful if -the data is in wide-format and no grouping variable is available.} +\item{select}{One or more name of the continuous variable (as character +vector) to be used as samples for the test. If \code{select} only specified one +variable, a one-sample test is carried out (only applicable for \code{t_test()}). +Else, \code{by} must be provided to indicate the groups of comparison.} -\item{by}{Name of the grouping variable to be used for the test. If \code{by} is -not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if -\code{probabilities} is provided, \code{by} must be \code{NULL}.} +\item{by}{Name of the variable indicating the groups. Required if \code{select} +specifies only one variable that contains all samples to be compared in the +test. If \code{by} is not a factor, it will be coerced to a factor. For +\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.} \item{weights}{Name of an (optional) weighting variable to be used for the test.} } diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 4d9b6f2c..55c3aff6 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -15,15 +15,15 @@ mann_whitney_test( \arguments{ \item{data}{A data frame.} -\item{select}{Name of the dependent variable (as string) to be used for the -test. \code{select} can also be a character vector, specifying the names of -multiple continuous variables. In this case, \code{by} is ignored and variables -specified in \code{select} are used to compute the test. This can be useful if -the data is in wide-format and no grouping variable is available.} +\item{select}{One or more name of the continuous variable (as character +vector) to be used as samples for the test. If \code{select} only specified one +variable, a one-sample test is carried out (only applicable for \code{t_test()}). +Else, \code{by} must be provided to indicate the groups of comparison.} -\item{by}{Name of the grouping variable to be used for the test. If \code{by} is -not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if -\code{probabilities} is provided, \code{by} must be \code{NULL}.} +\item{by}{Name of the variable indicating the groups. Required if \code{select} +specifies only one variable that contains all samples to be compared in the +test. If \code{by} is not a factor, it will be coerced to a factor. For +\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.} \item{weights}{Name of an (optional) weighting variable to be used for the test.} diff --git a/man/t_test.Rd b/man/t_test.Rd index 86b554e4..7d33e7b8 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -17,15 +17,15 @@ t_test( \arguments{ \item{data}{A data frame.} -\item{select}{Name of the dependent variable (as string) to be used for the -test. \code{select} can also be a character vector, specifying the names of -multiple continuous variables. In this case, \code{by} is ignored and variables -specified in \code{select} are used to compute the test. This can be useful if -the data is in wide-format and no grouping variable is available.} +\item{select}{One or more name of the continuous variable (as character +vector) to be used as samples for the test. If \code{select} only specified one +variable, a one-sample test is carried out (only applicable for \code{t_test()}). +Else, \code{by} must be provided to indicate the groups of comparison.} -\item{by}{Name of the grouping variable to be used for the test. If \code{by} is -not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if -\code{probabilities} is provided, \code{by} must be \code{NULL}.} +\item{by}{Name of the variable indicating the groups. Required if \code{select} +specifies only one variable that contains all samples to be compared in the +test. If \code{by} is not a factor, it will be coerced to a factor. For +\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.} \item{weights}{Name of an (optional) weighting variable to be used for the test.} From f7fafc692680b065aa3ebf68d98aea04c237f927 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 09:19:54 +0200 Subject: [PATCH 32/82] fix --- R/t_test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/t_test.R b/R/t_test.R index fe062ab6..1b55661f 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -166,7 +166,7 @@ t_test <- function(data, attr(out, "paired") <- isTRUE(paired) attr(out, "one_sample") <- is.null(grp) attr(out, "weighted") <- FALSE - if (!is.null(gpr)) { + if (!is.null(grp)) { attr(out, "n_groups") <- stats::setNames( c(as.numeric(table(grp))), c("N Group 1", "N Group 2") From f432f65bb57252fde51f95a8340c0c1f9c515ffd Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 09:29:06 +0200 Subject: [PATCH 33/82] fixes --- R/t_test.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index 1b55661f..2eb31d6b 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -162,6 +162,7 @@ t_test <- function(data, stringsAsFactors = FALSE ) class(out) <- c("sj_htest_t", "data.frame") + attr(out, "group_labels") <- group_labels attr(out, "means") <- as.numeric(htest$estimate) attr(out, "paired") <- isTRUE(paired) attr(out, "one_sample") <- is.null(grp) @@ -284,6 +285,7 @@ t_test <- function(data, c("N Group 1", "N Group 2") ) } + attr(out, "group_labels") <- group_labels attr(out, "paired") <- isTRUE(paired) attr(out, "one_sample") <- is.null(y_values) && !isTRUE(paired) attr(out, "weighted") <- FALSE @@ -315,6 +317,9 @@ print.sj_htest_t <- function(x, ...) { # header insight::print_color(sprintf("# %s%s\n\n", x$method, weight_string), "blue") + # data + insight::print_color(sprintf(" Data: %s\n", x$data), "cyan") + # group-1-info if (is.null(n_groups)) { insight::print_color( @@ -352,11 +357,11 @@ print.sj_htest_t <- function(x, ...) { } cat(sprintf( - "\n t = %.3f, %s = %.3f, df = %.1f, %s\n\n", + "\n t = %.3f, %s = %.3f, df = %s, %s\n\n", x$statistic, x$effect_size_name, x$effect_size, - x$df, + insight::format_value(x$df, digits = 1, protect_integers = TRUE), insight::format_p(x$p) )) } From 1cccd11f90406de6e64b890d1ebe2e1ad6353aa8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 09:55:50 +0200 Subject: [PATCH 34/82] fix --- R/t_test.R | 62 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index 2eb31d6b..a45547e6 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -59,7 +59,7 @@ t_test <- function(data, if (paired) { # subtract the two variables for paired t-test, and set by to NULL data[[select[1]]] <- data[[select[1]]] - data[[select[2]]] - data_name <- paste(select[1], "by", select[2]) + data_name <- paste(select[1], "and", select[2]) select <- select[1] by <- NULL } else { @@ -157,7 +157,7 @@ t_test <- function(data, effect_size = as.numeric(effect_size), p = as.numeric(htest$p.value), df = as.numeric(htest$parameter), - method = htest$method, + method = ifelse(paired, "Paired t-test", htest$method), alternative = alternative, stringsAsFactors = FALSE ) @@ -317,43 +317,53 @@ print.sj_htest_t <- function(x, ...) { # header insight::print_color(sprintf("# %s%s\n\n", x$method, weight_string), "blue") - # data - insight::print_color(sprintf(" Data: %s\n", x$data), "cyan") - - # group-1-info - if (is.null(n_groups)) { - insight::print_color( - sprintf( - " Group 1: %s (mean = %s)\n", - group_labels[1], insight::format_value(means[1], protect_integers = TRUE) - ), "cyan" - ) + # print for paired t-test + if (paired) { + # data + insight::print_color(sprintf( + " Data: %s (mean difference = %s)\n", + x$data, + insight::format_value(means[1], protect_integers = TRUE) + ), "cyan") } else { - insight::print_color( - sprintf( - " Group 1: %s (n = %i, mean = %s)\n", - group_labels[1], n_groups[1], insight::format_value(means[1], protect_integers = TRUE) - ), "cyan" - ) - } + # data + insight::print_color(sprintf(" Data: %s\n", x$data), "cyan") - # group-2-info - if (length(group_labels) > 1) { + # group-1-info if (is.null(n_groups)) { insight::print_color( sprintf( - " Group 2: %s (mean = %s)\n", - group_labels[2], insight::format_value(means[2], protect_integers = TRUE) + " Group 1: %s (mean = %s)\n", + group_labels[1], insight::format_value(means[1], protect_integers = TRUE) ), "cyan" ) } else { insight::print_color( sprintf( - " Group 2: %s (n = %i, mean = %s)\n", - group_labels[2], n_groups[2], insight::format_value(means[2], protect_integers = TRUE) + " Group 1: %s (n = %i, mean = %s)\n", + group_labels[1], n_groups[1], insight::format_value(means[1], protect_integers = TRUE) ), "cyan" ) } + + # group-2-info + if (length(group_labels) > 1) { + if (is.null(n_groups)) { + insight::print_color( + sprintf( + " Group 2: %s (mean = %s)\n", + group_labels[2], insight::format_value(means[2], protect_integers = TRUE) + ), "cyan" + ) + } else { + insight::print_color( + sprintf( + " Group 2: %s (n = %i, mean = %s)\n", + group_labels[2], n_groups[2], insight::format_value(means[2], protect_integers = TRUE) + ), "cyan" + ) + } + } } cat(sprintf( From 2c3b61d41e4cec24d662ff11817addcf714ae5d6 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 09:57:02 +0200 Subject: [PATCH 35/82] fix --- R/t_test.R | 24 ++++++++++-------------- man/t_test.Rd | 28 ++++++++++++---------------- 2 files changed, 22 insertions(+), 30 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index a45547e6..3cf440f1 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -15,22 +15,18 @@ #' @return A data frame with test results. #' #' @examples -#' data(efc) -#' # Mann-Whitney-U-Tests for elder's age by elder's sex. -#' t_test(efc, "e17age", by = "e16sex") +#' data(sleep) +#' # one-sample t-test +#' t_test(sleep, "extra") +#' t.test(extra ~ 1, data = sleep) #' -#' # when data is in wide-format, specify all relevant continuous -#' # variables in `select` and omit `by` -#' set.seed(123) -#' wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) -#' t_test(wide_data, select = c("scale1", "scale2")) +#' # two-sample t-test, by group +#' t.test(mpg ~ am, data = mtcars) +#' t_test(mtcars, "mpg", by = "am") #' -#' # same as if we had data in long format, with grouping variable -#' long_data <- data.frame( -#' scales = c(wide_data$scale1, wide_data$scale2), -#' groups = rep(c("A", "B"), each = 20) -#' ) -#' t_test(long_data, select = "scales", by = "groups") +#' # paired t-test +#' t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) +#' t_test(mtcars, c("mpg", "hp"), paired = TRUE) #' @export t_test <- function(data, select = NULL, diff --git a/man/t_test.Rd b/man/t_test.Rd index 7d33e7b8..17d0cfd0 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -47,20 +47,16 @@ This function performs a Student's t test for two independent samples, for paired samples, or for one sample. } \examples{ -data(efc) -# Mann-Whitney-U-Tests for elder's age by elder's sex. -t_test(efc, "e17age", by = "e16sex") - -# when data is in wide-format, specify all relevant continuous -# variables in `select` and omit `by` -set.seed(123) -wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) -t_test(wide_data, select = c("scale1", "scale2")) - -# same as if we had data in long format, with grouping variable -long_data <- data.frame( - scales = c(wide_data$scale1, wide_data$scale2), - groups = rep(c("A", "B"), each = 20) -) -t_test(long_data, select = "scales", by = "groups") +data(sleep) +# one-sample t-test +t_test(sleep, "extra") +t.test(extra ~ 1, data = sleep) + +# two-sample t-test, by group +t.test(mpg ~ am, data = mtcars) +t_test(mtcars, "mpg", by = "am") + +# paired t-test +t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) +t_test(mtcars, c("mpg", "hp"), paired = TRUE) } From 2e09ef8d8a1a87b3369f10a765a3263817b1dd65 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 10:31:02 +0200 Subject: [PATCH 36/82] fix --- R/mann_whitney_test.R | 2 +- R/t_test.R | 2 +- man/mann_whitney_test.Rd | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 55c6c374..b84aff05 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -40,7 +40,7 @@ #' r = |Z| / sqrt(n1 + n2) #' ``` #' -#' @examples +#' @examplesIf requireNamespace("coin", quietly = TRUE) && requireNamespace("survey", quietly = TRUE) #' data(efc) #' # Mann-Whitney-U tests for elder's age by elder's sex. #' mann_whitney_test(efc, "e17age", by = "e16sex") diff --git a/R/t_test.R b/R/t_test.R index 3cf440f1..6a0a9786 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -35,7 +35,7 @@ t_test <- function(data, paired = FALSE, mu = 0, alternative = "two.sided") { - insight::check_if_installed("datawizard") + insight::check_if_installed(c("datawizard", "effectsize")) alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater")) # sanity checks diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 55c3aff6..cfbfacfe 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -61,6 +61,7 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: }\if{html}{\out{}} } \examples{ +\dontshow{if (requireNamespace("coin", quietly = TRUE) && requireNamespace("survey", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) # Mann-Whitney-U tests for elder's age by elder's sex. mann_whitney_test(efc, "e17age", by = "e16sex") @@ -77,4 +78,5 @@ long_data <- data.frame( groups = rep(c("A", "B"), each = 20) ) mann_whitney_test(long_data, select = "scales", by = "groups") +\dontshow{\}) # examplesIf} } From 3e59ed4c66d6e25dfeba5cd3ac396c6f29ed3147 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 10:31:14 +0200 Subject: [PATCH 37/82] fix --- R/t_test.R | 2 +- man/t_test.Rd | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/t_test.R b/R/t_test.R index 6a0a9786..222726d4 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -14,7 +14,7 @@ #' #' @return A data frame with test results. #' -#' @examples +#' @examplesIf requireNamespace("effectsize", quietly = TRUE) #' data(sleep) #' # one-sample t-test #' t_test(sleep, "extra") diff --git a/man/t_test.Rd b/man/t_test.Rd index 17d0cfd0..3e776fc7 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -47,6 +47,7 @@ This function performs a Student's t test for two independent samples, for paired samples, or for one sample. } \examples{ +\dontshow{if (requireNamespace("effectsize", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(sleep) # one-sample t-test t_test(sleep, "extra") @@ -59,4 +60,5 @@ t_test(mtcars, "mpg", by = "am") # paired t-test t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) t_test(mtcars, c("mpg", "hp"), paired = TRUE) +\dontshow{\}) # examplesIf} } From 4821418512e977e71708baefd925971b07395457 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:16:40 +0200 Subject: [PATCH 38/82] fixes --- R/anova_stats.R | 2 +- R/samplesize_mixed.R | 10 ++++++++-- R/t_test.R | 4 ++-- man/anova_stats.Rd | 2 +- man/samplesize_mixed.Rd | 2 ++ man/t_test.Rd | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/R/anova_stats.R b/R/anova_stats.R index 28f360ae..d1e6e640 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -15,7 +15,7 @@ #' \cr \cr #' Tippey K, Longnecker MT (2016): An Ad Hoc Method for Computing Pseudo-Effect Size for Mixed Model. #' -#' @examplesIf requireNamespace("car") +#' @examplesIf requireNamespace("car") && requireNamespace("pwr") #' # load sample data #' data(efc) #' diff --git a/R/samplesize_mixed.R b/R/samplesize_mixed.R index 66f170cf..42521118 100644 --- a/R/samplesize_mixed.R +++ b/R/samplesize_mixed.R @@ -36,7 +36,7 @@ #' additionally include repeated measures (three-level-designs) may work #' as well, however, the computed sample size may be less accurate. #' -#' @examples +#' @examplesIf requireNamespace("pwr") #' # Sample size for multilevel model with 30 cluster groups and a small to #' # medium effect size (Cohen's d) of 0.3. 27 subjects per cluster and #' # hence a total sample size of about 802 observations is needed. @@ -47,7 +47,13 @@ #' # hence a total sample size of about 107 observations is needed. #' samplesize_mixed(eff.size = .2, df.n = 5, k = 20, power = .9) #' @export -samplesize_mixed <- function(eff.size, df.n = NULL, power = .8, sig.level = .05, k, n, icc = 0.05) { +samplesize_mixed <- function(eff.size, + df.n = NULL, + power = 0.8, + sig.level = 0.05, + k, + n, + icc = 0.05) { if (!requireNamespace("pwr", quietly = TRUE)) { stop("Package `pwr` needed for this function to work. Please install it.", call. = FALSE) } diff --git a/R/t_test.R b/R/t_test.R index 222726d4..5c0aa3b2 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -14,7 +14,7 @@ #' #' @return A data frame with test results. #' -#' @examplesIf requireNamespace("effectsize", quietly = TRUE) +#' @examplesIf requireNamespace("effectsize") #' data(sleep) #' # one-sample t-test #' t_test(sleep, "extra") @@ -176,7 +176,7 @@ t_test <- function(data, # Weighted Mann-Whitney-Test for two groups ---------------------------------- .calculate_weighted_ttest <- function(dv, grp, mu, paired, alternative, weights, group_labels, data_name) { - insight::check_if_installed("datawizard") + insight::check_if_installed(c("datawizard", "effectsize")) if (is.null(grp)) { dat <- stats::na.omit(data.frame(dv, weights)) colnames(dat) <- c("y", "w") diff --git a/man/anova_stats.Rd b/man/anova_stats.Rd index c2811389..a64263e9 100644 --- a/man/anova_stats.Rd +++ b/man/anova_stats.Rd @@ -22,7 +22,7 @@ epsilon-squared statistic or Cohen's F for all terms in an anovas. and power for each term. } \examples{ -\dontshow{if (requireNamespace("car")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("car") && requireNamespace("pwr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # load sample data data(efc) diff --git a/man/samplesize_mixed.Rd b/man/samplesize_mixed.Rd index 39e18690..9035ea9d 100644 --- a/man/samplesize_mixed.Rd +++ b/man/samplesize_mixed.Rd @@ -65,6 +65,7 @@ additionally include repeated measures (three-level-designs) may work as well, however, the computed sample size may be less accurate. } \examples{ +\dontshow{if (requireNamespace("pwr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} # Sample size for multilevel model with 30 cluster groups and a small to # medium effect size (Cohen's d) of 0.3. 27 subjects per cluster and # hence a total sample size of about 802 observations is needed. @@ -74,6 +75,7 @@ samplesize_mixed(eff.size = .3, k = 30) # to large effect size for linear models of 0.2. Five subjects per cluster and # hence a total sample size of about 107 observations is needed. samplesize_mixed(eff.size = .2, df.n = 5, k = 20, power = .9) +\dontshow{\}) # examplesIf} } \references{ Cohen J. 1988. Statistical power analysis for the behavioral sciences (2nd ed.). Hillsdale,NJ: Lawrence Erlbaum. diff --git a/man/t_test.Rd b/man/t_test.Rd index 3e776fc7..6810181b 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -47,7 +47,7 @@ This function performs a Student's t test for two independent samples, for paired samples, or for one sample. } \examples{ -\dontshow{if (requireNamespace("effectsize", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(sleep) # one-sample t-test t_test(sleep, "extra") From 8d8660153f990fa1e4e0a7e9775fa13ecf5c597d Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:28:09 +0200 Subject: [PATCH 39/82] fix --- R/mann_whitney_test.R | 25 +++++++++++++++---------- R/t_test.R | 7 +++++-- man/mann_whitney_test.Rd | 22 ++++++++++------------ man/t_test.Rd | 7 +++++-- 4 files changed, 35 insertions(+), 26 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index b84aff05..9ce468b1 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -17,9 +17,8 @@ #' test. If `by` is not a factor, it will be coerced to a factor. For #' `chi_squared_test()`, if `probabilities` is provided, `by` must be `NULL`. #' @param weights Name of an (optional) weighting variable to be used for the test. -#' @param distribution Indicates how the null distribution of the test statistic -#' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"` -#' (default). See [`coin::wilcox_test()`] for details. +#' @param ... Additional arguments passed to `wilcox.test()` (for unweighted +#' tests, i.e. when `weights = NULL`). #' #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. @@ -44,25 +43,30 @@ #' data(efc) #' # Mann-Whitney-U tests for elder's age by elder's sex. #' mann_whitney_test(efc, "e17age", by = "e16sex") +#' # base R equivalent +#' wilcox.test(e17age ~ e16sex, data = efc) #' #' # when data is in wide-format, specify all relevant continuous #' # variables in `select` and omit `by` #' set.seed(123) #' wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) #' mann_whitney_test(wide_data, select = c("scale1", "scale2")) -#' +#' # base R equivalent +#' wilcox.test(wide_data$scale1, wide_data$scale2) #' # same as if we had data in long format, with grouping variable #' long_data <- data.frame( #' scales = c(wide_data$scale1, wide_data$scale2), -#' groups = rep(c("A", "B"), each = 20) +#' groups = as.factor(rep(c("A", "B"), each = 20)) #' ) #' mann_whitney_test(long_data, select = "scales", by = "groups") +#' # base R equivalent +#' wilcox.test(scales ~ groups, long_data) #' @export mann_whitney_test <- function(data, select = NULL, by = NULL, weights = NULL, - distribution = "asymptotic") { + ...) { insight::check_if_installed("datawizard") # sanity checks @@ -104,7 +108,7 @@ mann_whitney_test <- function(data, } if (is.null(weights)) { - .calculate_mwu(dv, grp, distribution, group_labels) + .calculate_mwu(dv, grp, distribution, group_labels, ...) } else { .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) } @@ -113,7 +117,7 @@ mann_whitney_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- -.calculate_mwu <- function(dv, grp, distribution, group_labels) { +.calculate_mwu <- function(dv, grp, distribution, group_labels, ...) { insight::check_if_installed("coin") # prepare data wcdat <- data.frame(dv, grp) @@ -126,9 +130,10 @@ mann_whitney_test <- function(data, # compute statistics u <- as.numeric(coin::statistic(wt, type = "linear")) z <- as.numeric(coin::statistic(wt, type = "standardized")) - p <- coin::pvalue(wt) r <- abs(z / sqrt(length(dv))) - w <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat)$statistic) + htest <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat, ...)) + w <- htest$statistic + p <- htest$p.value # group means dat_gr1 <- stats::na.omit(dv[grp == group_levels[1]]) diff --git a/R/t_test.R b/R/t_test.R index 5c0aa3b2..5cabd4a8 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -18,15 +18,18 @@ #' data(sleep) #' # one-sample t-test #' t_test(sleep, "extra") +#' # base R equivalent #' t.test(extra ~ 1, data = sleep) #' #' # two-sample t-test, by group -#' t.test(mpg ~ am, data = mtcars) #' t_test(mtcars, "mpg", by = "am") +#' # base R equivalent +#' t.test(mpg ~ am, data = mtcars) #' #' # paired t-test -#' t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) #' t_test(mtcars, c("mpg", "hp"), paired = TRUE) +#' # base R equivalent +#' t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) #' @export t_test <- function(data, select = NULL, diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index cfbfacfe..ff3ae98b 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -4,13 +4,7 @@ \alias{mann_whitney_test} \title{Mann-Whitney test} \usage{ -mann_whitney_test( - data, - select = NULL, - by = NULL, - weights = NULL, - distribution = "asymptotic" -) +mann_whitney_test(data, select = NULL, by = NULL, weights = NULL, ...) } \arguments{ \item{data}{A data frame.} @@ -27,9 +21,8 @@ test. If \code{by} is not a factor, it will be coerced to a factor. For \item{weights}{Name of an (optional) weighting variable to be used for the test.} -\item{distribution}{Indicates how the null distribution of the test statistic -should be computed. May be one of \code{"exact"}, \code{"approximate"} or \code{"asymptotic"} -(default). See \code{\link[coin:LocationTests]{coin::wilcox_test()}} for details.} +\item{...}{Additional arguments passed to \code{wilcox.test()} (for unweighted +tests, i.e. when \code{weights = NULL}).} } \value{ A data frame with test results. The function returns p and Z-values @@ -65,18 +58,23 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: data(efc) # Mann-Whitney-U tests for elder's age by elder's sex. mann_whitney_test(efc, "e17age", by = "e16sex") +# base R equivalent +wilcox.test(e17age ~ e16sex, data = efc) # when data is in wide-format, specify all relevant continuous # variables in `select` and omit `by` set.seed(123) wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) mann_whitney_test(wide_data, select = c("scale1", "scale2")) - +# base R equivalent +wilcox.test(wide_data$scale1, wide_data$scale2) # same as if we had data in long format, with grouping variable long_data <- data.frame( scales = c(wide_data$scale1, wide_data$scale2), - groups = rep(c("A", "B"), each = 20) + groups = as.factor(rep(c("A", "B"), each = 20)) ) mann_whitney_test(long_data, select = "scales", by = "groups") +# base R equivalent +wilcox.test(scales ~ groups, long_data) \dontshow{\}) # examplesIf} } diff --git a/man/t_test.Rd b/man/t_test.Rd index 6810181b..e3010a92 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -51,14 +51,17 @@ samples, for paired samples, or for one sample. data(sleep) # one-sample t-test t_test(sleep, "extra") +# base R equivalent t.test(extra ~ 1, data = sleep) # two-sample t-test, by group -t.test(mpg ~ am, data = mtcars) t_test(mtcars, "mpg", by = "am") +# base R equivalent +t.test(mpg ~ am, data = mtcars) # paired t-test -t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) t_test(mtcars, c("mpg", "hp"), paired = TRUE) +# base R equivalent +t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) \dontshow{\}) # examplesIf} } From 1621eb7eba943da3552714679ee5e3f0c7a1683e Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:31:49 +0200 Subject: [PATCH 40/82] fix --- R/mann_whitney_test.R | 16 +++++++++++++--- R/t_test.R | 2 -- man/mann_whitney_test.Rd | 13 ++++++++++++- man/t_test.Rd | 3 ++- 4 files changed, 27 insertions(+), 7 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 9ce468b1..719f9890 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -17,6 +17,9 @@ #' test. If `by` is not a factor, it will be coerced to a factor. For #' `chi_squared_test()`, if `probabilities` is provided, `by` must be `NULL`. #' @param weights Name of an (optional) weighting variable to be used for the test. +#' @param alternative A character string specifying the alternative hypothesis, +#' must be one of `"two.sided"` (default), `"greater"` or `"less"`. See `?t.test` +#' and `?wilcox.test`. #' @param ... Additional arguments passed to `wilcox.test()` (for unweighted #' tests, i.e. when `weights = NULL`). #' @@ -66,12 +69,19 @@ mann_whitney_test <- function(data, select = NULL, by = NULL, weights = NULL, + alternative = "two.sided", ...) { insight::check_if_installed("datawizard") + alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater")) # sanity checks .sanitize_htest_input(data, select, by, weights) + # alternative only if weights are NULL + if (!is.null(weights) && alternative != "two.sided") { + insight::format_error("Argument `alternative` must be `two.sided` if `weights` are specified.") + } + # does select indicate more than one variable? if (length(select) > 1) { # sanity check - may only specify two variable names @@ -108,7 +118,7 @@ mann_whitney_test <- function(data, } if (is.null(weights)) { - .calculate_mwu(dv, grp, distribution, group_labels, ...) + .calculate_mwu(dv, grp, distribution, group_labels, alternative, ...) } else { .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) } @@ -117,7 +127,7 @@ mann_whitney_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- -.calculate_mwu <- function(dv, grp, distribution, group_labels, ...) { +.calculate_mwu <- function(dv, grp, distribution, group_labels, alternative, ...) { insight::check_if_installed("coin") # prepare data wcdat <- data.frame(dv, grp) @@ -131,7 +141,7 @@ mann_whitney_test <- function(data, u <- as.numeric(coin::statistic(wt, type = "linear")) z <- as.numeric(coin::statistic(wt, type = "standardized")) r <- abs(z / sqrt(length(dv))) - htest <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat, ...)) + htest <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat, alternative = alternative, ...)) w <- htest$statistic p <- htest$p.value diff --git a/R/t_test.R b/R/t_test.R index 5cabd4a8..b22eb070 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -4,8 +4,6 @@ #' samples, for paired samples, or for one sample. #' #' @inheritParams mann_whitney_test -#' @param alternative A character string specifying the alternative hypothesis, -#' must be one of `"two.sided"` (default), `"greater"` or `"less"`. See `?t.test()`. #' @param paired Logical, whether to compute a paired t-test. #' @param mu The hypothesized difference in means. If `paired = TRUE`, or for a #' one-sample t-test, this is the hypothesized true mean value. If diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index ff3ae98b..6d62a992 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -4,7 +4,14 @@ \alias{mann_whitney_test} \title{Mann-Whitney test} \usage{ -mann_whitney_test(data, select = NULL, by = NULL, weights = NULL, ...) +mann_whitney_test( + data, + select = NULL, + by = NULL, + weights = NULL, + alternative = "two.sided", + ... +) } \arguments{ \item{data}{A data frame.} @@ -21,6 +28,10 @@ test. If \code{by} is not a factor, it will be coerced to a factor. For \item{weights}{Name of an (optional) weighting variable to be used for the test.} +\item{alternative}{A character string specifying the alternative hypothesis, +must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test} +and \code{?wilcox.test}.} + \item{...}{Additional arguments passed to \code{wilcox.test()} (for unweighted tests, i.e. when \code{weights = NULL}).} } diff --git a/man/t_test.Rd b/man/t_test.Rd index e3010a92..142478b4 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -37,7 +37,8 @@ one-sample t-test, this is the hypothesized true mean value. If two groups.} \item{alternative}{A character string specifying the alternative hypothesis, -must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test()}.} +must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test} +and \code{?wilcox.test}.} } \value{ A data frame with test results. From 54d3fd2142654ed4cf95e60b7f96ec22cf394965 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:33:52 +0200 Subject: [PATCH 41/82] fix --- R/mann_whitney_test.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 719f9890..85a7861b 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -118,7 +118,7 @@ mann_whitney_test <- function(data, } if (is.null(weights)) { - .calculate_mwu(dv, grp, distribution, group_labels, alternative, ...) + .calculate_mwu(dv, grp, alternative, group_labels, ...) } else { .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) } @@ -127,12 +127,12 @@ mann_whitney_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- -.calculate_mwu <- function(dv, grp, distribution, group_labels, alternative, ...) { +.calculate_mwu <- function(dv, grp, alternative, group_labels, ...) { insight::check_if_installed("coin") # prepare data wcdat <- data.frame(dv, grp) # perfom wilcox test - wt <- coin::wilcox_test(dv ~ grp, data = wcdat, distribution = distribution) + wt <- coin::wilcox_test(dv ~ grp, data = wcdat) # for rank mean group_levels <- levels(grp) From 4b0cfdaedc1cf49e3ea0600b675acf4db7141b79 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:49:56 +0200 Subject: [PATCH 42/82] fix --- R/t_test.R | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/R/t_test.R b/R/t_test.R index b22eb070..69a6a03c 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -156,6 +156,7 @@ t_test <- function(data, df = as.numeric(htest$parameter), method = ifelse(paired, "Paired t-test", htest$method), alternative = alternative, + mu = mu, stringsAsFactors = FALSE ) class(out) <- c("sj_htest_t", "data.frame") @@ -272,6 +273,7 @@ t_test <- function(data, df = dof, method = method, alternative = alternative, + mu = mu, stringsAsFactors = FALSE ) class(out) <- c("sj_htest_t", "data.frame") @@ -324,7 +326,7 @@ print.sj_htest_t <- function(x, ...) { ), "cyan") } else { # data - insight::print_color(sprintf(" Data: %s\n", x$data), "cyan") + insight::print_color(sprintf(" Data: %s\n", x$data), "cyan") # group-1-info if (is.null(n_groups)) { @@ -363,6 +365,21 @@ print.sj_htest_t <- function(x, ...) { } } + # alternative hypothesis + alt_string <- switch(x$alternative, + two.sided = "not equal to", + less = "less than", + greater = "greater than" + ) + if (one_sample) { + alt_string <- paste("true mean is", alt_string, x$mu) + } else if (paired) { + alt_string <- paste("true mean difference is", alt_string, x$mu) + } else { + alt_string <- paste("true difference in means is", alt_string, x$mu) + } + insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") + cat(sprintf( "\n t = %.3f, %s = %.3f, df = %s, %s\n\n", x$statistic, From d32390a8eb019d7202c834aaf30c3de17b4c9e00 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:54:17 +0200 Subject: [PATCH 43/82] fix --- R/mann_whitney_test.R | 18 +++++++++++++++++- R/t_test.R | 4 ---- man/mann_whitney_test.Rd | 4 ++++ man/t_test.Rd | 6 ++---- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 85a7861b..b2317520 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -20,6 +20,8 @@ #' @param alternative A character string specifying the alternative hypothesis, #' must be one of `"two.sided"` (default), `"greater"` or `"less"`. See `?t.test` #' and `?wilcox.test`. +#' @param mu The hypothesized difference in means (for `t_test()`) or location +#' shift (for `mann_whitney_test()`). The default is 0. #' @param ... Additional arguments passed to `wilcox.test()` (for unweighted #' tests, i.e. when `weights = NULL`). #' @@ -69,6 +71,7 @@ mann_whitney_test <- function(data, select = NULL, by = NULL, weights = NULL, + mu = 0, alternative = "two.sided", ...) { insight::check_if_installed("datawizard") @@ -164,7 +167,9 @@ mann_whitney_test <- function(data, w = w, z = z, r = r, - p = as.numeric(p) + p = as.numeric(p), + mu = mu, + alternative = alternative, ) attr(out, "rank_means") <- stats::setNames( c(rank_mean_1, rank_mean_2), @@ -333,5 +338,16 @@ print.sj_htest_mwu <- function(x, ...) { ), "cyan" ) + # alternative hypothesis + if (!is.null(x$alternative) && !is.null(x$mu)) { + alt_string <- switch(x$alternative, + two.sided = "not equal to", + less = "less than", + greater = "greater than" + ) + alt_string <- paste("true location shift is", alt_string, x$mu) + insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") + } + cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) } diff --git a/R/t_test.R b/R/t_test.R index 69a6a03c..367e4f4f 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -5,10 +5,6 @@ #' #' @inheritParams mann_whitney_test #' @param paired Logical, whether to compute a paired t-test. -#' @param mu The hypothesized difference in means. If `paired = TRUE`, or for a -#' one-sample t-test, this is the hypothesized true mean value. If -#' `paired = FALSE`, this is the hypothesized difference in means between the -#' two groups. #' #' @return A data frame with test results. #' diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 6d62a992..111f60c2 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -9,6 +9,7 @@ mann_whitney_test( select = NULL, by = NULL, weights = NULL, + mu = 0, alternative = "two.sided", ... ) @@ -28,6 +29,9 @@ test. If \code{by} is not a factor, it will be coerced to a factor. For \item{weights}{Name of an (optional) weighting variable to be used for the test.} +\item{mu}{The hypothesized difference in means (for \code{t_test()}) or location +shift (for \code{mann_whitney_test()}). The default is 0.} + \item{alternative}{A character string specifying the alternative hypothesis, must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test} and \code{?wilcox.test}.} diff --git a/man/t_test.Rd b/man/t_test.Rd index 142478b4..3c730e0f 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -31,10 +31,8 @@ test. If \code{by} is not a factor, it will be coerced to a factor. For \item{paired}{Logical, whether to compute a paired t-test.} -\item{mu}{The hypothesized difference in means. If \code{paired = TRUE}, or for a -one-sample t-test, this is the hypothesized true mean value. If -\code{paired = FALSE}, this is the hypothesized difference in means between the -two groups.} +\item{mu}{The hypothesized difference in means (for \code{t_test()}) or location +shift (for \code{mann_whitney_test()}). The default is 0.} \item{alternative}{A character string specifying the alternative hypothesis, must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test} From af43804b3d7ad6a99d0dd8d5f988456cc8706ea8 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 11:56:15 +0200 Subject: [PATCH 44/82] docs --- R/kruskal_wallis_test.R | 4 +++- R/mann_whitney_test.R | 4 +++- R/t_test.R | 4 +++- man/kruskal_wallis_test.Rd | 4 +++- man/mann_whitney_test.Rd | 4 +++- man/t_test.Rd | 4 +++- 6 files changed, 18 insertions(+), 6 deletions(-) diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index a66e3c15..1b330892 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -2,7 +2,9 @@ #' @name kruskal_wallis_test #' @description This function performs a Kruskal-Wallis rank sum test, to test #' the null hypothesis that the population median of all of the groups are -#' equal. The alternative is that they differ in at least one. +#' equal. The alternative is that they differ in at least one. Unlike the +#' underlying base R function `kruskal.test()`, this function allows for +#' weighted tests. #' #' @inheritParams mann_whitney_test #' diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index b2317520..80363725 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -1,7 +1,9 @@ #' @title Mann-Whitney test #' @name mann_whitney_test #' @description This function performs a Mann-Whitney test (or Wilcoxon rank -#' sum test for _unpaired_ samples. +#' sum test for _unpaired_ samples. Unlike the underlying base R function +#' `wilcox.test()`, this function allows for weighted tests and automatically +#' calculates effect sizes. #' #' A Mann-Whitney test is a non-parametric test for the null hypothesis that two #' independent samples have identical continuous distributions. It can be used diff --git a/R/t_test.R b/R/t_test.R index 367e4f4f..45649015 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -1,7 +1,9 @@ #' @title Student's t test #' @name t_test #' @description This function performs a Student's t test for two independent -#' samples, for paired samples, or for one sample. +#' samples, for paired samples, or for one sample. Unlike the underlying +#' base R function `t.test()`, this function allows for weighted tests and +#' automatically calculates effect sizes. #' #' @inheritParams mann_whitney_test #' @param paired Logical, whether to compute a paired t-test. diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index bd0909c8..f2efefc0 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -27,7 +27,9 @@ A data frame with test results. \description{ This function performs a Kruskal-Wallis rank sum test, to test the null hypothesis that the population median of all of the groups are -equal. The alternative is that they differ in at least one. +equal. The alternative is that they differ in at least one. Unlike the +underlying base R function \code{kruskal.test()}, this function allows for +weighted tests. } \details{ The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test()}}. The diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 111f60c2..96073bc4 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -45,7 +45,9 @@ as well as effect size r and group-rank-means. } \description{ This function performs a Mann-Whitney test (or Wilcoxon rank -sum test for \emph{unpaired} samples. +sum test for \emph{unpaired} samples. Unlike the underlying base R function +\code{wilcox.test()}, this function allows for weighted tests and automatically +calculates effect sizes. A Mann-Whitney test is a non-parametric test for the null hypothesis that two independent samples have identical continuous distributions. It can be used diff --git a/man/t_test.Rd b/man/t_test.Rd index 3c730e0f..9f089cec 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -43,7 +43,9 @@ A data frame with test results. } \description{ This function performs a Student's t test for two independent -samples, for paired samples, or for one sample. +samples, for paired samples, or for one sample. Unlike the underlying +base R function \code{t.test()}, this function allows for weighted tests and +automatically calculates effect sizes. } \examples{ \dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} From 0a7c837211e1f18663a52c296c1616ce624c69e4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 12:42:18 +0200 Subject: [PATCH 45/82] fix --- R/mann_whitney_test.R | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 80363725..6030a612 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -123,7 +123,7 @@ mann_whitney_test <- function(data, } if (is.null(weights)) { - .calculate_mwu(dv, grp, alternative, group_labels, ...) + .calculate_mwu(dv, grp, alternative, mu, group_labels, ...) } else { .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) } @@ -132,7 +132,7 @@ mann_whitney_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- -.calculate_mwu <- function(dv, grp, alternative, group_labels, ...) { +.calculate_mwu <- function(dv, grp, alternative, mu, group_labels, ...) { insight::check_if_installed("coin") # prepare data wcdat <- data.frame(dv, grp) @@ -146,7 +146,13 @@ mann_whitney_test <- function(data, u <- as.numeric(coin::statistic(wt, type = "linear")) z <- as.numeric(coin::statistic(wt, type = "standardized")) r <- abs(z / sqrt(length(dv))) - htest <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat, alternative = alternative, ...)) + htest <- suppressWarnings(stats::wilcox.test( + dv ~ grp, + data = wcdat, + alternative = alternative, + mu = mu, + ... + )) w <- htest$statistic p <- htest$p.value From 090fc67ea512e736416a811a893b7fb192a761db Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 12:48:05 +0200 Subject: [PATCH 46/82] refactor --- R/kruskal_wallis_test.R | 5 +---- R/mann_whitney_test.R | 20 ++++++++++++++------ R/t_test.R | 9 +-------- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index 1b330892..49206f26 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -43,13 +43,10 @@ kruskal_wallis_test <- function(data, insight::check_if_installed("datawizard") # sanity checks - .sanitize_htest_input(data, select, by, weights) + .sanitize_htest_input(data, select, by, weights, test = "kruskal_wallis_test") # does select indicate more than one variable? if (length(select) > 1) { - if (!is.null(by)) { - insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") - } # we convert the data into long format, and create a grouping variable data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") by <- select[2] diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 6030a612..3b39a731 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -80,7 +80,7 @@ mann_whitney_test <- function(data, alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater")) # sanity checks - .sanitize_htest_input(data, select, by, weights) + .sanitize_htest_input(data, select, by, weights, test = "mann_whitney_test") # alternative only if weights are NULL if (!is.null(weights) && alternative != "two.sided") { @@ -89,10 +89,6 @@ mann_whitney_test <- function(data, # does select indicate more than one variable? if (length(select) > 1) { - # sanity check - may only specify two variable names - if (length(select) > 2) { - insight::format_error("You may only specify two variables for Mann-Whitney test.") - } if (!is.null(by)) { insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") } @@ -266,11 +262,23 @@ mann_whitney_test <- function(data, # helper ---------------------------------------------------------------------- -.sanitize_htest_input <- function(data, select, by, weights) { +.sanitize_htest_input <- function(data, select, by, weights, test = NULL) { # check if arguments are NULL if (is.null(select)) { insight::format_error("Argument `select` is missing.") } + # sanity check - may only specify two variable names + if (identical(test, "mann_whitney_test") && length(select) > 2) { + insight::format_error("You may only specify two variables for Mann-Whitney test.") + } + + # sanity check - may only specify two variable names + if (identical(test, "t_test") && length(select) > 2) { + insight::format_error("You may only specify two variables for Student's t test.") + } + if ((identical(test, "t_test") || identical(test, "kruskal_wallis_test")) && length(select) > 1 && !is.null(by)) { + insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") + } # check if arguments have correct length or are of correct type if (!is.character(select)) { diff --git a/R/t_test.R b/R/t_test.R index 45649015..3afd0afb 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -38,18 +38,11 @@ t_test <- function(data, alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater")) # sanity checks - .sanitize_htest_input(data, select, by, weights) + .sanitize_htest_input(data, select, by, weights, test = "t_test") data_name <- NULL # does select indicate more than one variable? if (length(select) > 1) { - # sanity check - may only specify two variable names - if (length(select) > 2) { - insight::format_error("You may only specify two variables for Student's t test.") - } - if (!is.null(by)) { - insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") - } # paired? if (paired) { # subtract the two variables for paired t-test, and set by to NULL From e2bda19612f28a4baf2e937b4e8dd4e56bfacbd0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 12:49:23 +0200 Subject: [PATCH 47/82] fix --- R/mann_whitney_test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 3b39a731..a388a874 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -287,7 +287,7 @@ mann_whitney_test <- function(data, if (!is.null(by) && (length(by) != 1 || !is.character(by))) { insight::format_error("Argument `by` must be a character string with the name of a single variable.") } - if (!is.null(weights) && length(weights) != 1) { + if (!is.null(weights) && (length(weights) != 1 || !is.character(weights))) { insight::format_error("Argument `weights` must be a character string with the name of a single variable.") } From 0b7853c583bebc4b9159d06a042b754819209911 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 12:50:58 +0200 Subject: [PATCH 48/82] docs --- R/kruskal_wallis_test.R | 2 ++ man/kruskal_wallis_test.Rd | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index 49206f26..0f1e9735 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -35,6 +35,8 @@ #' groups = rep(c("A", "B", "C"), each = 20) #' ) #' kruskal_wallis_test(long_data, select = "scales", by = "groups") +#' # base R equivalent +#' kruskal.test(scales ~ groups, data = long_data) #' @export kruskal_wallis_test <- function(data, select = NULL, diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index f2efefc0..480e567d 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -57,4 +57,6 @@ long_data <- data.frame( groups = rep(c("A", "B", "C"), each = 20) ) kruskal_wallis_test(long_data, select = "scales", by = "groups") +# base R equivalent +kruskal.test(scales ~ groups, data = long_data) } From 8f966fc2cf3c67cc39fd26701e97b7a277a9a11d Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 12:54:35 +0200 Subject: [PATCH 49/82] fix --- R/mann_whitney_test.R | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index a388a874..9719006a 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -89,9 +89,6 @@ mann_whitney_test <- function(data, # does select indicate more than one variable? if (length(select) > 1) { - if (!is.null(by)) { - insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") - } # we convert the data into long format, and create a grouping variable data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") by <- select[2] @@ -276,7 +273,7 @@ mann_whitney_test <- function(data, if (identical(test, "t_test") && length(select) > 2) { insight::format_error("You may only specify two variables for Student's t test.") } - if ((identical(test, "t_test") || identical(test, "kruskal_wallis_test")) && length(select) > 1 && !is.null(by)) { + if ((!is.null(test) && test %in% c("t_test", "kruskal_wallis_test", "mann_whitney_test")) && length(select) > 1 && !is.null(by)) { # nolint insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") } From 519f66a6c9406f83a5851f6efc1c2409b22729b3 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 13:02:44 +0200 Subject: [PATCH 50/82] fix --- R/mann_whitney_test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 9719006a..d1da31d5 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -170,7 +170,7 @@ mann_whitney_test <- function(data, r = r, p = as.numeric(p), mu = mu, - alternative = alternative, + alternative = alternative ) attr(out, "rank_means") <- stats::setNames( c(rank_mean_1, rank_mean_2), From 0607131e9889fcf915a87328f6aa05bd0b6bfd55 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 14:46:04 +0200 Subject: [PATCH 51/82] try --- R/chi_squared_test.R | 2 +- R/mann_whitney_test.R | 2 +- R/t_test.R | 2 +- man/chi_squared_test.Rd | 2 ++ man/mann_whitney_test.Rd | 2 +- man/t_test.Rd | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 426883a6..9573dafc 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -38,7 +38,7 @@ #' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. #' \doi{10.3390/math11091982} #' -#' @examples +#' @examplesIf require("effectsize") #' data(efc) #' efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) #' diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index d1da31d5..c8157c34 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -46,7 +46,7 @@ #' r = |Z| / sqrt(n1 + n2) #' ``` #' -#' @examplesIf requireNamespace("coin", quietly = TRUE) && requireNamespace("survey", quietly = TRUE) +#' @examplesIf require("coin") && require("survey") #' data(efc) #' # Mann-Whitney-U tests for elder's age by elder's sex. #' mann_whitney_test(efc, "e17age", by = "e16sex") diff --git a/R/t_test.R b/R/t_test.R index 3afd0afb..a247298f 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -10,7 +10,7 @@ #' #' @return A data frame with test results. #' -#' @examplesIf requireNamespace("effectsize") +#' @examplesIf require("effectsize") #' data(sleep) #' # one-sample t-test #' t_test(sleep, "extra") diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 49ba7783..14a2a316 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -67,6 +67,7 @@ The weighted version of the chi-squared test is based on the a weighted table, using \code{\link[=xtabs]{xtabs()}} as input for \code{chisq.test()}. } \examples{ +\dontshow{if (require("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) @@ -78,6 +79,7 @@ chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") # Chi-squared test for given probabilities chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) +\dontshow{\}) # examplesIf} } \references{ Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 96073bc4..1ebc10dd 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -71,7 +71,7 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: }\if{html}{\out{}} } \examples{ -\dontshow{if (requireNamespace("coin", quietly = TRUE) && requireNamespace("survey", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("coin") && require("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) # Mann-Whitney-U tests for elder's age by elder's sex. mann_whitney_test(efc, "e17age", by = "e16sex") diff --git a/man/t_test.Rd b/man/t_test.Rd index 9f089cec..7f33ba97 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -48,7 +48,7 @@ base R function \code{t.test()}, this function allows for weighted tests and automatically calculates effect sizes. } \examples{ -\dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (require("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(sleep) # one-sample t-test t_test(sleep, "extra") From 913b59869905a6c66aeaaa3d7371918960e4a16b Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 14:49:06 +0200 Subject: [PATCH 52/82] draft wilcox --- NAMESPACE | 2 + R/mann_whitney_test.R | 5 +- R/wilcoxon_test.R | 236 +++++++++++++++++++++++++++++++++++++++ _pkgdown.yml | 1 + man/mann_whitney_test.Rd | 5 +- man/wilcoxon_test.Rd | 50 +++++++++ 6 files changed, 295 insertions(+), 4 deletions(-) create mode 100644 R/wilcoxon_test.R create mode 100644 man/wilcoxon_test.Rd diff --git a/NAMESPACE b/NAMESPACE index b06a0cda..7fe9e77b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ S3method(print,sj_htest_chi) S3method(print,sj_htest_kw) S3method(print,sj_htest_mwu) S3method(print,sj_htest_t) +S3method(print,sj_htest_wilcox) S3method(print,sj_resample) S3method(print,sj_ttest) S3method(print,sj_wcor) @@ -101,6 +102,7 @@ export(weighted_median) export(weighted_sd) export(weighted_se) export(weighted_ttest) +export(wilcoxon_test) export(xtab_statistics) importFrom(bayestestR,ci) importFrom(bayestestR,equivalence_test) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index c8157c34..8c6deaa8 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -3,10 +3,11 @@ #' @description This function performs a Mann-Whitney test (or Wilcoxon rank #' sum test for _unpaired_ samples. Unlike the underlying base R function #' `wilcox.test()`, this function allows for weighted tests and automatically -#' calculates effect sizes. +#' calculates effect sizes. For _paired_ (dependent) samples, or for one-sample +#' tests, please use the `wilcoxon_test()` function. #' #' A Mann-Whitney test is a non-parametric test for the null hypothesis that two -#' independent samples have identical continuous distributions. It can be used +#' _independent_ samples have identical continuous distributions. It can be used #' when the two continuous variables are not normally distributed. #' #' @param data A data frame. diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R new file mode 100644 index 00000000..1d999a26 --- /dev/null +++ b/R/wilcoxon_test.R @@ -0,0 +1,236 @@ +#' @title Wilcoxon rank sum test +#' @name wilcoxon_test +#' @description This function performs Wilcoxon rank sum tests for one sample +#' or for two _paired_ (dependent) samples. For _unpaired_ (independent) +#' samples, please use the `mann_whitney_test()` function. +#' +#' @inheritParams mann_whitney_test +#' +#' @return A data frame with test results. The function returns p and Z-values +#' as well as effect size r and group-rank-means. +#' +#' @export +wilcoxon_test <- function(data, + select = NULL, + by = NULL, + weights = NULL, + mu = 0, + alternative = "two.sided", + ...) { + insight::check_if_installed("datawizard") + alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater")) + + # sanity checks + .sanitize_htest_input(data, select, by, weights, test = "wilcoxon_test") + + # alternative only if weights are NULL + if (!is.null(weights) && alternative != "two.sided") { + insight::format_error("Argument `alternative` must be `two.sided` if `weights` are specified.") + } + + # for paired two-sample, do groups all have same length? + group_len <- as.numeric(table(data[[by]])) + if (!is.null(by)) { + if (!all(group_len == group_len[1])) { + insight::format_error("For paired two-sample Wilcoxon test, all groups specified in `by` must have the same length.") # nolint + } + # convert to wide format + data <- datawizard::data_to_wide(data, values_from = select, names_from = by) + select <- colnames(data) + } + + # value labels + group_labels <- select + + x <- data[[select[1]]] + if (length(select) > 1) { + y <- data[[select[2]]] + } else { + y <- NULL + } + + if (is.null(weights)) { + .calculate_wilcox(x, y, alternative, mu, group_labels, select, ...) + } else { + .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) + } +} + + +# Mann-Whitney-Test for two groups -------------------------------------------- + +.calculate_wilcox <- function(x, y, alternative, mu, group_labels, select, ...) { + # for paired Wilcoxon test, we have effect sizes + if (!is.null(y)) { + # prepare data + wcdat <- data.frame(x, y) + # perfom wilcox test + wt <- coin::wilcoxsign_test(x ~ y, data = wcdat) + # compute statistics + u <- as.numeric(coin::statistic(wt, type = "linear")) + z <- as.numeric(coin::statistic(wt, type = "standardized")) + r <- abs(z / sqrt(length(dv))) + } else { + wt <- u <- z <- r <- NULL + } + + # prepare data + if (is.null(y)) { + dv <- x + } else { + dv <- x - y + } + htest <- suppressWarnings(stats::wilcox.test(dv ~ 1, alternative = alternative, mu = mu, ...)) + w <- htest$statistic + p <- htest$p.value + + one_sample <- length(select) > 1 + + out <- data.frame( + group1 = group_levels[1], + group2 = group_levels[2], + estimate = rank_mean_1 - rank_mean_2, + u = u, + w = w, + z = z, + r = r, + p = as.numeric(p), + mu = mu, + alternative = alternative + ) + attr(out, "rank_means") <- stats::setNames( + c(rank_mean_1, rank_mean_2), + c("Mean Group 1", "Mean Group 2") + ) + attr(out, "n_groups") <- stats::setNames( + c(n_grp1, n_grp2), + c("N Group 1", "N Group 2") + ) + attr(out, "group_labels") <- group_labels + attr(out, "method") <- "wilcoxon" + attr(out, "weighted") <- FALSE + class(out) <- c("sj_htest_wilcox", "data.frame") + + out +} + + +# Weighted Mann-Whitney-Test for two groups ---------------------------------- + +.calculate_weighted_mwu <- function(dv, grp, weights, group_labels) { + # check if pkg survey is available + insight::check_if_installed("survey") + + dat <- stats::na.omit(data.frame(dv, grp, weights)) + colnames(dat) <- c("x", "g", "w") + + design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) + result <- survey::svyranktest(formula = x ~ g, design, test = "wilcoxon") + + # for rank mean + group_levels <- levels(droplevels(grp)) + # subgroups + dat_gr1 <- dat[dat$g == group_levels[1], ] + dat_gr2 <- dat[dat$g == group_levels[2], ] + dat_gr1$rank_x <- rank(dat_gr1$x) + dat_gr2$rank_x <- rank(dat_gr2$x) + + # rank means + design_mean1 <- survey::svydesign( + ids = ~0, + data = dat_gr1, + weights = ~w + ) + rank_mean_1 <- survey::svymean(~rank_x, design_mean1) + + design_mean2 <- survey::svydesign( + ids = ~0, + data = dat_gr2, + weights = ~w + ) + rank_mean_2 <- survey::svymean(~rank_x, design_mean2) + + # group Ns + n_grp1 <- round(sum(dat_gr1$w)) + n_grp2 <- round(sum(dat_gr2$w)) + + # statistics and effect sizes + z <- result$statistic + r <- abs(z / sqrt(sum(n_grp1, n_grp2))) + + out <- data.frame( + group1 = group_levels[1], + group2 = group_levels[2], + estimate = result$estimate, + z = z, + r = r, + p = as.numeric(result$p.value) + ) + + attr(out, "rank_means") <- stats::setNames( + c(rank_mean_1, rank_mean_2), + c("Mean Group 1", "Mean Group 2") + ) + attr(out, "n_groups") <- stats::setNames( + c(n_grp1, n_grp2), + c("N Group 1", "N Group 2") + ) + attr(out, "group_labels") <- group_labels + attr(out, "weighted") <- TRUE + class(out) <- c("sj_htest_wilcox", "data.frame") + + out +} + + +# methods --------------------------------------------------------------------- + +#' @export +print.sj_htest_wilcox <- function(x, ...) { + # fetch attributes + group_labels <- attributes(x)$group_labels + rank_means <- attributes(x)$rank_means + n_groups <- attributes(x)$n_groups + weighted <- attributes(x)$weighted + + if (weighted) { + weight_string <- " (weighted)" + } else { + weight_string <- "" + } + + # same width + group_labels <- format(group_labels) + + # header + insight::print_color(sprintf("# Mann-Whitney test%s\n\n", weight_string), "blue") + + # group-1-info + insight::print_color( + sprintf( + " Group 1: %s (n = %i, rank mean = %s)\n", + group_labels[1], n_groups[1], insight::format_value(rank_means[1], protect_integers = TRUE) + ), "cyan" + ) + + # group-2-info + insight::print_color( + sprintf( + " Group 2: %s (n = %i, rank mean = %s)\n", + group_labels[2], n_groups[2], insight::format_value(rank_means[2], protect_integers = TRUE) + ), "cyan" + ) + + # alternative hypothesis + if (!is.null(x$alternative) && !is.null(x$mu)) { + alt_string <- switch(x$alternative, + two.sided = "not equal to", + less = "less than", + greater = "greater than" + ) + alt_string <- paste("true location shift is", alt_string, x$mu) + insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") + } + + cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) +} diff --git a/_pkgdown.yml b/_pkgdown.yml index 8335357c..4a992ea1 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -34,6 +34,7 @@ reference: - kruskal_wallis_test - mann_whitney_test - t_test + - wilcoxon_test - var_pop - title: "Tools for Regression Models" diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 1ebc10dd..404736b2 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -47,10 +47,11 @@ as well as effect size r and group-rank-means. This function performs a Mann-Whitney test (or Wilcoxon rank sum test for \emph{unpaired} samples. Unlike the underlying base R function \code{wilcox.test()}, this function allows for weighted tests and automatically -calculates effect sizes. +calculates effect sizes. For \emph{paired} (dependent) samples, or for one-sample +tests, please use the \code{wilcoxon_test()} function. A Mann-Whitney test is a non-parametric test for the null hypothesis that two -independent samples have identical continuous distributions. It can be used +\emph{independent} samples have identical continuous distributions. It can be used when the two continuous variables are not normally distributed. } \details{ diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd new file mode 100644 index 00000000..7d68b1e4 --- /dev/null +++ b/man/wilcoxon_test.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wilcoxon_test.R +\name{wilcoxon_test} +\alias{wilcoxon_test} +\title{Wilcoxon rank sum test} +\usage{ +wilcoxon_test( + data, + select = NULL, + by = NULL, + weights = NULL, + mu = 0, + alternative = "two.sided", + ... +) +} +\arguments{ +\item{data}{A data frame.} + +\item{select}{One or more name of the continuous variable (as character +vector) to be used as samples for the test. If \code{select} only specified one +variable, a one-sample test is carried out (only applicable for \code{t_test()}). +Else, \code{by} must be provided to indicate the groups of comparison.} + +\item{by}{Name of the variable indicating the groups. Required if \code{select} +specifies only one variable that contains all samples to be compared in the +test. If \code{by} is not a factor, it will be coerced to a factor. For +\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.} + +\item{weights}{Name of an (optional) weighting variable to be used for the test.} + +\item{mu}{The hypothesized difference in means (for \code{t_test()}) or location +shift (for \code{mann_whitney_test()}). The default is 0.} + +\item{alternative}{A character string specifying the alternative hypothesis, +must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test} +and \code{?wilcox.test}.} + +\item{...}{Additional arguments passed to \code{wilcox.test()} (for unweighted +tests, i.e. when \code{weights = NULL}).} +} +\value{ +A data frame with test results. The function returns p and Z-values +as well as effect size r and group-rank-means. +} +\description{ +This function performs Wilcoxon rank sum tests for one sample +or for two \emph{paired} (dependent) samples. For \emph{unpaired} (independent) +samples, please use the \code{mann_whitney_test()} function. +} From e0773fe1354e831cf89c1123befb3e49016010e0 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 19:07:17 +0200 Subject: [PATCH 53/82] fix --- R/chi_squared_test.R | 2 +- R/mann_whitney_test.R | 7 +-- R/t_test.R | 8 +-- R/wilcoxon_test.R | 111 ++++++++++++++++----------------------- man/chi_squared_test.Rd | 2 +- man/mann_whitney_test.Rd | 2 +- man/t_test.Rd | 2 +- 7 files changed, 58 insertions(+), 76 deletions(-) diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 9573dafc..70d1ec37 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -38,7 +38,7 @@ #' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. #' \doi{10.3390/math11091982} #' -#' @examplesIf require("effectsize") +#' @examplesIf requireNamespace("effectsize") #' data(efc) #' efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) #' diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 8c6deaa8..86e7a4d7 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -47,7 +47,7 @@ #' r = |Z| / sqrt(n1 + n2) #' ``` #' -#' @examplesIf require("coin") && require("survey") +#' @examplesIf requireNamespace("coin") && requireNamespace("survey") #' data(efc) #' # Mann-Whitney-U tests for elder's age by elder's sex. #' mann_whitney_test(efc, "e17age", by = "e16sex") @@ -233,13 +233,14 @@ mann_whitney_test <- function(data, z <- result$statistic r <- abs(z / sqrt(sum(n_grp1, n_grp2))) - out <- data.frame( + out <- data_frame( group1 = group_levels[1], group2 = group_levels[2], estimate = result$estimate, z = z, r = r, - p = as.numeric(result$p.value) + p = as.numeric(result$p.value), + alternative = "two.sided" ) attr(out, "rank_means") <- stats::setNames( diff --git a/R/t_test.R b/R/t_test.R index a247298f..b487b47e 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -10,7 +10,7 @@ #' #' @return A data frame with test results. #' -#' @examplesIf require("effectsize") +#' @examplesIf requireNamespace("effectsize") #' data(sleep) #' # one-sample t-test #' t_test(sleep, "extra") @@ -41,11 +41,12 @@ t_test <- function(data, .sanitize_htest_input(data, select, by, weights, test = "t_test") data_name <- NULL - # does select indicate more than one variable? + # does select indicate more than one variable? We than reshape the data + # to have one continous scale and one grouping variable if (length(select) > 1) { # paired? if (paired) { - # subtract the two variables for paired t-test, and set by to NULL + # subtract the two variables for paired t-test, and "set" by to NULL data[[select[1]]] <- data[[select[1]]] - data[[select[2]]] data_name <- paste(select[1], "and", select[2]) select <- select[1] @@ -79,6 +80,7 @@ t_test <- function(data, } data_name <- paste(select, "by", by) } else { + # one-sample t-test... grp <- NULL group_labels <- select if (is.null(data_name)) { diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 1d999a26..0b9447cb 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -50,7 +50,7 @@ wilcoxon_test <- function(data, } if (is.null(weights)) { - .calculate_wilcox(x, y, alternative, mu, group_labels, select, ...) + .calculate_wilcox(x, y, alternative, mu, group_labels, ...) } else { .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) } @@ -59,7 +59,7 @@ wilcoxon_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- -.calculate_wilcox <- function(x, y, alternative, mu, group_labels, select, ...) { +.calculate_wilcox <- function(x, y, alternative, mu, group_labels, ...) { # for paired Wilcoxon test, we have effect sizes if (!is.null(y)) { # prepare data @@ -69,7 +69,7 @@ wilcoxon_test <- function(data, # compute statistics u <- as.numeric(coin::statistic(wt, type = "linear")) z <- as.numeric(coin::statistic(wt, type = "standardized")) - r <- abs(z / sqrt(length(dv))) + r <- abs(z / sqrt(nrow(wcdat))) } else { wt <- u <- z <- r <- NULL } @@ -80,35 +80,36 @@ wilcoxon_test <- function(data, } else { dv <- x - y } - htest <- suppressWarnings(stats::wilcox.test(dv ~ 1, alternative = alternative, mu = mu, ...)) - w <- htest$statistic + htest <- suppressWarnings(stats::wilcox.test( + dv ~ 1, + alternative = alternative, + mu = mu, + ... + )) + v <- htest$statistic p <- htest$p.value - one_sample <- length(select) > 1 - out <- data.frame( - group1 = group_levels[1], - group2 = group_levels[2], - estimate = rank_mean_1 - rank_mean_2, - u = u, - w = w, - z = z, - r = r, + group1 = group_labels[1], + v = v, p = as.numeric(p), mu = mu, alternative = alternative ) - attr(out, "rank_means") <- stats::setNames( - c(rank_mean_1, rank_mean_2), - c("Mean Group 1", "Mean Group 2") - ) - attr(out, "n_groups") <- stats::setNames( - c(n_grp1, n_grp2), - c("N Group 1", "N Group 2") - ) + # two groups? + if (length(group_labels) > 1) { + out$group2 <- group_labels[2] + } + # add effectsizes, when we have + if (!is.null(wt)) { + out$u <- u + out$z <- z + out$r <- r + } attr(out, "group_labels") <- group_labels attr(out, "method") <- "wilcoxon" attr(out, "weighted") <- FALSE + attr(out, "one_sample") <- length(group_labels) > 1 class(out) <- c("sj_htest_wilcox", "data.frame") out @@ -117,66 +118,44 @@ wilcoxon_test <- function(data, # Weighted Mann-Whitney-Test for two groups ---------------------------------- -.calculate_weighted_mwu <- function(dv, grp, weights, group_labels) { +.calculate_weighted_wilcox <- function(x, y, weights, group_labels) { # check if pkg survey is available insight::check_if_installed("survey") - dat <- stats::na.omit(data.frame(dv, grp, weights)) - colnames(dat) <- c("x", "g", "w") - - design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) - result <- survey::svyranktest(formula = x ~ g, design, test = "wilcoxon") - - # for rank mean - group_levels <- levels(droplevels(grp)) - # subgroups - dat_gr1 <- dat[dat$g == group_levels[1], ] - dat_gr2 <- dat[dat$g == group_levels[2], ] - dat_gr1$rank_x <- rank(dat_gr1$x) - dat_gr2$rank_x <- rank(dat_gr2$x) - - # rank means - design_mean1 <- survey::svydesign( - ids = ~0, - data = dat_gr1, - weights = ~w - ) - rank_mean_1 <- survey::svymean(~rank_x, design_mean1) + # prepare data + if (is.null(y)) { + dv <- x + } else { + dv <- x - y + } - design_mean2 <- survey::svydesign( - ids = ~0, - data = dat_gr2, - weights = ~w - ) - rank_mean_2 <- survey::svymean(~rank_x, design_mean2) + dat <- stats::na.omit(data.frame(dv, weights)) + colnames(dat) <- c("y", "w") - # group Ns - n_grp1 <- round(sum(dat_gr1$w)) - n_grp2 <- round(sum(dat_gr2$w)) + design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) + result <- survey::svyranktest(formula = y ~ 1, design, test = "wilcoxon") # statistics and effect sizes z <- result$statistic - r <- abs(z / sqrt(sum(n_grp1, n_grp2))) + r <- abs(z / sqrt(nrow(dat))) - out <- data.frame( - group1 = group_levels[1], - group2 = group_levels[2], + out <- data_frame( + group1 = group_labels[1], estimate = result$estimate, z = z, r = r, - p = as.numeric(result$p.value) + p = as.numeric(result$p.value), + alternative = "two.sided" ) + # two groups? + if (length(group_labels) > 1) { + out$group2 <- select[2] + } - attr(out, "rank_means") <- stats::setNames( - c(rank_mean_1, rank_mean_2), - c("Mean Group 1", "Mean Group 2") - ) - attr(out, "n_groups") <- stats::setNames( - c(n_grp1, n_grp2), - c("N Group 1", "N Group 2") - ) attr(out, "group_labels") <- group_labels attr(out, "weighted") <- TRUE + attr(out, "one_sample") <- length(group_labels) > 1 + attr(out, "method") <- "wilcoxon" class(out) <- c("sj_htest_wilcox", "data.frame") out diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 14a2a316..93d687f2 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -67,7 +67,7 @@ The weighted version of the chi-squared test is based on the a weighted table, using \code{\link[=xtabs]{xtabs()}} as input for \code{chisq.test()}. } \examples{ -\dontshow{if (require("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 404736b2..69050b0a 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -72,7 +72,7 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: }\if{html}{\out{}} } \examples{ -\dontshow{if (require("coin") && require("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("coin") && requireNamespace("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) # Mann-Whitney-U tests for elder's age by elder's sex. mann_whitney_test(efc, "e17age", by = "e16sex") diff --git a/man/t_test.Rd b/man/t_test.Rd index 7f33ba97..9f089cec 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -48,7 +48,7 @@ base R function \code{t.test()}, this function allows for weighted tests and automatically calculates effect sizes. } \examples{ -\dontshow{if (require("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} +\dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(sleep) # one-sample t-test t_test(sleep, "extra") From 038ba7c15ba183d49b1d3071db622fbef27b1d2f Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 19:09:09 +0200 Subject: [PATCH 54/82] fix --- R/wilcoxon_test.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 0b9447cb..739c357e 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -29,8 +29,8 @@ wilcoxon_test <- function(data, } # for paired two-sample, do groups all have same length? - group_len <- as.numeric(table(data[[by]])) if (!is.null(by)) { + group_len <- as.numeric(table(data[[by]])) if (!all(group_len == group_len[1])) { insight::format_error("For paired two-sample Wilcoxon test, all groups specified in `by` must have the same length.") # nolint } From 43ce6d0c7384513760b52c4462a70f066307e7f4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sat, 11 May 2024 19:17:57 +0200 Subject: [PATCH 55/82] fixes --- R/wilcoxon_test.R | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 739c357e..023f19e0 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -30,7 +30,7 @@ wilcoxon_test <- function(data, # for paired two-sample, do groups all have same length? if (!is.null(by)) { - group_len <- as.numeric(table(data[[by]])) + group_len <- as.numeric(table(as.vector(data[[by]]))) if (!all(group_len == group_len[1])) { insight::format_error("For paired two-sample Wilcoxon test, all groups specified in `by` must have the same length.") # nolint } @@ -109,7 +109,7 @@ wilcoxon_test <- function(data, attr(out, "group_labels") <- group_labels attr(out, "method") <- "wilcoxon" attr(out, "weighted") <- FALSE - attr(out, "one_sample") <- length(group_labels) > 1 + attr(out, "one_sample") <- length(group_labels) == 1 class(out) <- c("sj_htest_wilcox", "data.frame") out @@ -145,6 +145,7 @@ wilcoxon_test <- function(data, z = z, r = r, p = as.numeric(result$p.value), + mu = 0, alternative = "two.sided" ) # two groups? @@ -154,7 +155,7 @@ wilcoxon_test <- function(data, attr(out, "group_labels") <- group_labels attr(out, "weighted") <- TRUE - attr(out, "one_sample") <- length(group_labels) > 1 + attr(out, "one_sample") <- length(group_labels) == 1 attr(out, "method") <- "wilcoxon" class(out) <- c("sj_htest_wilcox", "data.frame") @@ -168,9 +169,8 @@ wilcoxon_test <- function(data, print.sj_htest_wilcox <- function(x, ...) { # fetch attributes group_labels <- attributes(x)$group_labels - rank_means <- attributes(x)$rank_means - n_groups <- attributes(x)$n_groups weighted <- attributes(x)$weighted + one_sample <- attributes(x)$one_sample if (weighted) { weight_string <- " (weighted)" @@ -178,27 +178,21 @@ print.sj_htest_wilcox <- function(x, ...) { weight_string <- "" } + if (one_sample) { + onesample_string <- "One Sample " + } else { + onesample_string <- "Paired " + } + # same width group_labels <- format(group_labels) # header - insight::print_color(sprintf("# Mann-Whitney test%s\n\n", weight_string), "blue") - - # group-1-info - insight::print_color( - sprintf( - " Group 1: %s (n = %i, rank mean = %s)\n", - group_labels[1], n_groups[1], insight::format_value(rank_means[1], protect_integers = TRUE) - ), "cyan" - ) - - # group-2-info - insight::print_color( - sprintf( - " Group 2: %s (n = %i, rank mean = %s)\n", - group_labels[2], n_groups[2], insight::format_value(rank_means[2], protect_integers = TRUE) - ), "cyan" - ) + insight::print_color(sprintf( + "# %sWilcoxon signed rank test%s\n\n", + onesample_string, + weight_string + ), "blue") # alternative hypothesis if (!is.null(x$alternative) && !is.null(x$mu)) { @@ -210,6 +204,9 @@ print.sj_htest_wilcox <- function(x, ...) { alt_string <- paste("true location shift is", alt_string, x$mu) insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") } - - cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) + if (!is.null(x[["r"]])) { + cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) + } else { + cat(sprintf("\n V = %i, %s\n\n", round(x$v), insight::format_p(x$p))) + } } From bc9b641e524ae3e53a218a6e032b24350d85e791 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 09:55:29 +0200 Subject: [PATCH 56/82] fixes --- R/mann_whitney_test.R | 7 ++++++- R/wilcoxon_test.R | 32 +++++++++++++++++++++++++------- man/wilcoxon_test.Rd | 12 ++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 86e7a4d7..3aedab99 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -364,5 +364,10 @@ print.sj_htest_mwu <- function(x, ...) { insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") } - cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) + if (!is.null(x$w)) { + w_stat <- paste("W =", insight::format_value(x$w, protect_integers = TRUE), ", ") + } else { + w_stat <- "" + } + cat(sprintf("\n %sr = %.3f, Z = %.3f, %s\n\n", w_stat, x$r, x$z, insight::format_p(x$p))) } diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 023f19e0..1c8df0dc 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -9,6 +9,17 @@ #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. #' +#' @examples +#' data(mtcars) +#' # one-sample test +#' wilcoxon_test(mtcars, "mpg") +#' # base R equivalent +#' wilcox.test(mtcars$mpg ~ 1) +#' +#' # paired test +#' wilcoxon_test(mtcars, c("mpg", "hp")) +#' # base R equivalent +#' wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) #' @export wilcoxon_test <- function(data, select = NULL, @@ -52,7 +63,7 @@ wilcoxon_test <- function(data, if (is.null(weights)) { .calculate_wilcox(x, y, alternative, mu, group_labels, ...) } else { - .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) + .calculate_weighted_mwu(x, y, data[[weights]], group_labels) } } @@ -150,7 +161,7 @@ wilcoxon_test <- function(data, ) # two groups? if (length(group_labels) > 1) { - out$group2 <- select[2] + out$group2 <- group_labels[2] } attr(out, "group_labels") <- group_labels @@ -179,9 +190,9 @@ print.sj_htest_wilcox <- function(x, ...) { } if (one_sample) { - onesample_string <- "One Sample " + onesample_string <- "One Sample" } else { - onesample_string <- "Paired " + onesample_string <- "Paired" } # same width @@ -189,7 +200,7 @@ print.sj_htest_wilcox <- function(x, ...) { # header insight::print_color(sprintf( - "# %sWilcoxon signed rank test%s\n\n", + "# %s Wilcoxon signed rank test%s\n\n", onesample_string, weight_string ), "blue") @@ -204,9 +215,16 @@ print.sj_htest_wilcox <- function(x, ...) { alt_string <- paste("true location shift is", alt_string, x$mu) insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") } + + if (!is.null(x[["v"]])) { + v_stat <- sprintf("V = %i, ", round(x$v)) + } else { + v_stat <- "" + } + if (!is.null(x[["r"]])) { - cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) + cat(sprintf("\n %sr = %.3f, Z = %.3f, %s\n\n", v_stat, x$r, x$z, insight::format_p(x$p))) } else { - cat(sprintf("\n V = %i, %s\n\n", round(x$v), insight::format_p(x$p))) + cat(sprintf("\n %s%s\n\n", v_stat, insight::format_p(x$p))) } } diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index 7d68b1e4..ff17cdc1 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -48,3 +48,15 @@ This function performs Wilcoxon rank sum tests for one sample or for two \emph{paired} (dependent) samples. For \emph{unpaired} (independent) samples, please use the \code{mann_whitney_test()} function. } +\examples{ +data(mtcars) +# one-sample test +wilcoxon_test(mtcars, "mpg") +# base R equivalent +wilcox.test(mtcars$mpg ~ 1) + +# paired test +wilcoxon_test(mtcars, c("mpg", "hp")) +# base R equivalent +wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) +} From ca5485092caa114c96d767120a66a12c331ecef6 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 11:18:09 +0200 Subject: [PATCH 57/82] xref --- R/chi_squared_test.R | 2 ++ R/kruskal_wallis_test.R | 1 + R/mann_whitney_test.R | 11 +++++++- R/t_test.R | 4 ++- R/wilcoxon_test.R | 21 ++++++++++++++-- man/chi_squared_test.Rd | 9 +++++++ man/kruskal_wallis_test.Rd | 9 +++++++ man/mann_whitney_test.Rd | 13 +++++++++- man/t_test.Rd | 12 ++++++++- man/wilcoxon_test.Rd | 24 ++++++++++++++++++ sjstats.code-workspace | 51 ++++++++++++++++++++++++++++++++++++-- 11 files changed, 149 insertions(+), 8 deletions(-) diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 70d1ec37..40bb598e 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -17,6 +17,8 @@ #' @param ... Additional arguments passed down to [`chisq.test()`]. #' @inheritParams mann_whitney_test #' +#' @inherit mann_whitney_test seealso +#' #' @return A data frame with test results. The returned effects sizes are #' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) #' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index 0f1e9735..1305cac8 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -7,6 +7,7 @@ #' weighted tests. #' #' @inheritParams mann_whitney_test +#' @inherit mann_whitney_test seealso #' #' @return A data frame with test results. #' diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 3aedab99..0414d88a 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -8,7 +8,9 @@ #' #' A Mann-Whitney test is a non-parametric test for the null hypothesis that two #' _independent_ samples have identical continuous distributions. It can be used -#' when the two continuous variables are not normally distributed. +#' for ordinal scales or when the two continuous variables are not normally +#' distributed. For large samples, or approximately normally distributed variables, +#' the `t_test()` function can be used. #' #' @param data A data frame. #' @param select One or more name of the continuous variable (as character @@ -28,6 +30,13 @@ #' @param ... Additional arguments passed to `wilcox.test()` (for unweighted #' tests, i.e. when `weights = NULL`). #' +#' @seealso +#' - [`mann_whitney_test()`] for unpaired (independent) samples. +#' - [`t_test()`] for parametric t-tests. +#' - [`kruskal_wallis_test()`] for non-parametric ANOVA (i.e. more than two samples). +#' - [`wilcoxon_test()`] for Wilcoxon rank sum tests for paired (dependent) samples. +#' - [`chi_squared_test()`] for chi-squared tests (two categorical variables). +#' #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. #' diff --git a/R/t_test.R b/R/t_test.R index b487b47e..e5c39c5f 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -6,7 +6,9 @@ #' automatically calculates effect sizes. #' #' @inheritParams mann_whitney_test -#' @param paired Logical, whether to compute a paired t-test. +#' @param paired Logical, whether to compute a paired t-test for dependent +#' samples. +#' @inherit mann_whitney_test seealso #' #' @return A data frame with test results. #' diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 1c8df0dc..4289b1f4 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -4,12 +4,23 @@ #' or for two _paired_ (dependent) samples. For _unpaired_ (independent) #' samples, please use the `mann_whitney_test()` function. #' +#' A Wilcoxon rank sum test is a non-parametric test for the null hypothesis +#' that two samples have identical continuous distributions. The implementation +#' in `wilcoxon_test()` is only used for _paired_, i.e. _dependent_ samples. For +#' independent (unpaired) samples, use `mann_whitney_test()`. +#' +#' `wilcoxon_test()` can be used for ordinal scales or when the continuous +#' variables are not normally distributed. For large samples, or approximately +#' normally distributed variables, the `t_test()` function can be used (with +#' `paired = TRUE`). +#' #' @inheritParams mann_whitney_test +#' @inherit mann_whitney_test seealso #' #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. #' -#' @examples +#' @examples #' data(mtcars) #' # one-sample test #' wilcoxon_test(mtcars, "mpg") @@ -20,6 +31,11 @@ #' wilcoxon_test(mtcars, c("mpg", "hp")) #' # base R equivalent #' wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) +#' +#' # when `by` is specified, each group must be of same length +#' data(iris) +#' d <- iris[iris$Species != "setosa", ] +#' wilcoxon_test(d, "Sepal.Width", by = "Species") #' @export wilcoxon_test <- function(data, select = NULL, @@ -46,7 +62,8 @@ wilcoxon_test <- function(data, insight::format_error("For paired two-sample Wilcoxon test, all groups specified in `by` must have the same length.") # nolint } # convert to wide format - data <- datawizard::data_to_wide(data, values_from = select, names_from = by) + out <- split(data[select], as.character(data[[by]])) + data <- stats::setNames(do.call(cbind, out), names(out)) select <- colnames(data) } diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 93d687f2..4e0ccedc 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -87,3 +87,12 @@ Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. \doi{10.3390/math11091982} } +\seealso{ +\itemize{ +\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. +\item \code{\link[=t_test]{t_test()}} for parametric t-tests. +\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples). +\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples. +\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables). +} +} diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index 480e567d..5b5ba09e 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -60,3 +60,12 @@ kruskal_wallis_test(long_data, select = "scales", by = "groups") # base R equivalent kruskal.test(scales ~ groups, data = long_data) } +\seealso{ +\itemize{ +\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. +\item \code{\link[=t_test]{t_test()}} for parametric t-tests. +\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples). +\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples. +\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables). +} +} diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 69050b0a..218abece 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -52,7 +52,9 @@ tests, please use the \code{wilcoxon_test()} function. A Mann-Whitney test is a non-parametric test for the null hypothesis that two \emph{independent} samples have identical continuous distributions. It can be used -when the two continuous variables are not normally distributed. +for ordinal scales or when the two continuous variables are not normally +distributed. For large samples, or approximately normally distributed variables, +the \code{t_test()} function can be used. } \details{ This function is based on \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}} @@ -96,3 +98,12 @@ mann_whitney_test(long_data, select = "scales", by = "groups") wilcox.test(scales ~ groups, long_data) \dontshow{\}) # examplesIf} } +\seealso{ +\itemize{ +\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. +\item \code{\link[=t_test]{t_test()}} for parametric t-tests. +\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples). +\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples. +\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables). +} +} diff --git a/man/t_test.Rd b/man/t_test.Rd index 9f089cec..06790711 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -29,7 +29,8 @@ test. If \code{by} is not a factor, it will be coerced to a factor. For \item{weights}{Name of an (optional) weighting variable to be used for the test.} -\item{paired}{Logical, whether to compute a paired t-test.} +\item{paired}{Logical, whether to compute a paired t-test for dependent +samples.} \item{mu}{The hypothesized difference in means (for \code{t_test()}) or location shift (for \code{mann_whitney_test()}). The default is 0.} @@ -66,3 +67,12 @@ t_test(mtcars, c("mpg", "hp"), paired = TRUE) t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) \dontshow{\}) # examplesIf} } +\seealso{ +\itemize{ +\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. +\item \code{\link[=t_test]{t_test()}} for parametric t-tests. +\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples). +\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples. +\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables). +} +} diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index ff17cdc1..b702bfee 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -47,6 +47,16 @@ as well as effect size r and group-rank-means. This function performs Wilcoxon rank sum tests for one sample or for two \emph{paired} (dependent) samples. For \emph{unpaired} (independent) samples, please use the \code{mann_whitney_test()} function. + +A Wilcoxon rank sum test is a non-parametric test for the null hypothesis +that two samples have identical continuous distributions. The implementation +in \code{wilcoxon_test()} is only used for \emph{paired}, i.e. \emph{dependent} samples. For +independent (unpaired) samples, use \code{mann_whitney_test()}. + +\code{wilcoxon_test()} can be used for ordinal scales or when the continuous +variables are not normally distributed. For large samples, or approximately +normally distributed variables, the \code{t_test()} function can be used (with +\code{paired = TRUE}). } \examples{ data(mtcars) @@ -59,4 +69,18 @@ wilcox.test(mtcars$mpg ~ 1) wilcoxon_test(mtcars, c("mpg", "hp")) # base R equivalent wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) + +# when `by` is specified, each group must be of same length +data(iris) +d <- iris[iris$Species != "setosa", ] +wilcoxon_test(d, "Sepal.Width", by = "Species") +} +\seealso{ +\itemize{ +\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. +\item \code{\link[=t_test]{t_test()}} for parametric t-tests. +\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples). +\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples. +\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables). +} } diff --git a/sjstats.code-workspace b/sjstats.code-workspace index 362d7c25..c3045df9 100644 --- a/sjstats.code-workspace +++ b/sjstats.code-workspace @@ -3,5 +3,52 @@ { "path": "." } - ] -} \ No newline at end of file + ], + "launch": { + "version": "0.2.0", + "configurations": [ + { + "type": "R-Debugger", + "name": "Launch R-Workspace", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}" + }, + { + "type": "R-Debugger", + "name": "Debug R-File", + "request": "launch", + "debugMode": "file", + "workingDirectory": "${workspaceFolder}", + "file": "${file}" + }, + { + "type": "R-Debugger", + "name": "Debug R-Function", + "request": "launch", + "debugMode": "function", + "workingDirectory": "${workspaceFolder}", + "file": "${file}", + "mainFunction": "main", + "allowGlobalDebugging": false + }, + { + "type": "R-Debugger", + "name": "Debug R-Package", + "request": "launch", + "debugMode": "workspace", + "workingDirectory": "${workspaceFolder}", + "includePackageScopes": true, + "loadPackages": [ + "." + ] + }, + { + "type": "R-Debugger", + "request": "attach", + "name": "Attach to R process", + "splitOverwrittenOutput": true + } + ] + } +} From 32622de42fdaf7c9cde0d844838d65df173be816 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 11:31:22 +0200 Subject: [PATCH 58/82] fix --- R/chi_squared_test.R | 2 +- R/kruskal_wallis_test.R | 2 +- R/mann_whitney_test.R | 2 +- R/t_test.R | 10 +++++----- R/wilcoxon_test.R | 2 +- tests/testthat.R | 7 ------- tests/testthat/test-chi_squared_test.R | 10 ++++++++++ tests/testthat/test-wtd.R | 20 +++++--------------- 8 files changed, 24 insertions(+), 31 deletions(-) create mode 100644 tests/testthat/test-chi_squared_test.R diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 40bb598e..4df6ca08 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -263,7 +263,7 @@ print.sj_htest_chi <- function(x, ...) { stat_symbol <- .format_symbols(x$statistic_name) cat(sprintf( - "\n %s = %.4f, %s = %.4f, df = %i, %s\n\n", + "\n %s = %.3f, %s = %.3f, df = %i, %s\n\n", stat_symbol, x$statistic, eff_symbol, x$effect_size, round(x$df), insight::format_p(x$p) )) } diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index 1305cac8..9318b96a 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -184,7 +184,7 @@ print.sj_htest_kw <- function(x, ...) { stat_symbol <- .format_symbols("Chi2") cat(sprintf( - "\n %s = %.3f, df = %i, %s\n\n", + "\n %s = %.2f, df = %i, %s\n\n", stat_symbol, x$Chi2, round(x$df), insight::format_p(x$p) )) } diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 0414d88a..43452f36 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -378,5 +378,5 @@ print.sj_htest_mwu <- function(x, ...) { } else { w_stat <- "" } - cat(sprintf("\n %sr = %.3f, Z = %.3f, %s\n\n", w_stat, x$r, x$z, insight::format_p(x$p))) + cat(sprintf("\n %sr = %.2f, Z = %.2f, %s\n\n", w_stat, x$r, x$z, insight::format_p(x$p))) } diff --git a/R/t_test.R b/R/t_test.R index e5c39c5f..dcf60826 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -177,8 +177,8 @@ t_test <- function(data, if (is.null(grp)) { dat <- stats::na.omit(data.frame(dv, weights)) colnames(dat) <- c("y", "w") - x_values <- dv - x_weights <- weights + x_values <- dat$y + x_weights <- dat$w y_values <- NULL } else { dat <- stats::na.omit(data.frame(dv, grp, weights)) @@ -193,7 +193,7 @@ t_test <- function(data, y_weights <- dat$w[dat$g == groups[2]] } - mu_x <- stats::weighted.mean(x_values, x_weights) + mu_x <- stats::weighted.mean(x_values, x_weights, na.rm = TRUE) var_x <- datawizard::weighted_sd(x_values, x_weights)^2 se_x <- sqrt(var_x / length(x_values)) @@ -376,9 +376,9 @@ print.sj_htest_t <- function(x, ...) { insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") cat(sprintf( - "\n t = %.3f, %s = %.3f, df = %s, %s\n\n", + "\n t = %.2f, %s = %.2f, df = %s, %s\n\n", x$statistic, - x$effect_size_name, + gsub("_", " ", x$effect_size_name, fixed = TRUE), x$effect_size, insight::format_value(x$df, digits = 1, protect_integers = TRUE), insight::format_p(x$p) diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 4289b1f4..1c6c16e4 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -240,7 +240,7 @@ print.sj_htest_wilcox <- function(x, ...) { } if (!is.null(x[["r"]])) { - cat(sprintf("\n %sr = %.3f, Z = %.3f, %s\n\n", v_stat, x$r, x$z, insight::format_p(x$p))) + cat(sprintf("\n %sr = %.2f, Z = %.2f, %s\n\n", v_stat, x$r, x$z, insight::format_p(x$p))) } else { cat(sprintf("\n %s%s\n\n", v_stat, insight::format_p(x$p))) } diff --git a/tests/testthat.R b/tests/testthat.R index f19230e3..43bae94d 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -1,10 +1,3 @@ library(testthat) library(sjstats) - -if (length(strsplit(packageDescription("sjstats")$Version, "\\.")[[1]]) > 3) { - Sys.setenv("RunAllsjstatsTests" = "yes") -} else { - Sys.setenv("RunAllsjstatsTests" = "no") -} - test_check("sjstats") diff --git a/tests/testthat/test-chi_squared_test.R b/tests/testthat/test-chi_squared_test.R new file mode 100644 index 00000000..c0c0bc54 --- /dev/null +++ b/tests/testthat/test-chi_squared_test.R @@ -0,0 +1,10 @@ +skip_if_not_installed("effectsize") +test_that("chi_squared_test", { + data(efc) + set.seed(123) + efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) + chi_squared_test(efc, "c161sex", by = "e16sex") + chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") + chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) + +}) diff --git a/tests/testthat/test-wtd.R b/tests/testthat/test-wtd.R index f90e309f..7e452000 100644 --- a/tests/testthat/test-wtd.R +++ b/tests/testthat/test-wtd.R @@ -1,17 +1,7 @@ -if (require("testthat") && require("sjstats")) { +test_that("wtd", { data(efc) set.seed(123) - efc$weight <- abs(rnorm(nrow(efc), 1, .3)) - - test_that("wtd", { - expect_equal(weighted_se(efc$c12hour, weights = efc$weight), 1.704182, tolerance = 1e-5) - expect_equal(weighted_se(efc$c12hour, weights = NULL), 1.691623, tolerance = 1e-5) - }) - - test_that("weighted_ttest", { - weighted_ttest(efc, e17age, weights = weight) - weighted_ttest(efc, e17age, c160age, weights = weight) - weighted_ttest(e17age ~ e16sex + weight, efc) - weighted_ttest(efc, e17age, c160age, weights = weight, ci.lvl = .8) - }) -} + efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) + expect_equal(weighted_se(efc$c12hour, weights = efc$weight), 1.704182, tolerance = 1e-5) + expect_equal(weighted_se(efc$c12hour, weights = NULL), 1.691623, tolerance = 1e-5) +}) From 9d7dcf5206d5f13fb56232aee406617b4413750a Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 14:30:46 +0200 Subject: [PATCH 59/82] fixes --- tests/testthat/test-chi_squared_test.R | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-chi_squared_test.R b/tests/testthat/test-chi_squared_test.R index c0c0bc54..2823e7af 100644 --- a/tests/testthat/test-chi_squared_test.R +++ b/tests/testthat/test-chi_squared_test.R @@ -3,8 +3,22 @@ test_that("chi_squared_test", { data(efc) set.seed(123) efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) - chi_squared_test(efc, "c161sex", by = "e16sex") - chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") - chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) + out1 <- chi_squared_test(efc, "c161sex", by = "e16sex") + out2 <- chisq.test(efc$c161sex, efc$e16sex) + expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + out <- chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") + expect_equal(out$statistic, 2.415755, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$effect_size, 0.05448519, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$p, 0.1201201, tolerance = 1e-4, ignore_attr = TRUE) + + out1 <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) + out2 <- chisq.test(table(efc$c161sex), p = c(0.3, 0.7)) + expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + + out <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7), weights = "weight") + expect_equal(out$statistic, 20.07379, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$effect_size, 0.0974456, tolerance = 1e-4, ignore_attr = TRUE) }) From 5e83f05f1b556516786d5d57c24e1e30cb66111a Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 20:11:25 +0200 Subject: [PATCH 60/82] add tests --- tests/testthat/test-chi_squared_test.R | 2 ++ tests/testthat/test-mann_whitney_test.R | 28 +++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tests/testthat/test-mann_whitney_test.R diff --git a/tests/testthat/test-chi_squared_test.R b/tests/testthat/test-chi_squared_test.R index 2823e7af..38310348 100644 --- a/tests/testthat/test-chi_squared_test.R +++ b/tests/testthat/test-chi_squared_test.R @@ -1,4 +1,6 @@ skip_if_not_installed("effectsize") +skip_if_not_installed("datawizard") + test_that("chi_squared_test", { data(efc) set.seed(123) diff --git a/tests/testthat/test-mann_whitney_test.R b/tests/testthat/test-mann_whitney_test.R new file mode 100644 index 00000000..96e3f87f --- /dev/null +++ b/tests/testthat/test-mann_whitney_test.R @@ -0,0 +1,28 @@ +skip_if_not_installed("coin") +skip_if_not_installed("survey") +skip_if_not_installed("datawizard") + +test_that("mann_whitney_test", { + data(efc) + set.seed(123) + efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) + out1 <- mann_whitney_test(efc, "e17age", by = "e16sex") + out2 <- wilcox.test(e17age ~ e16sex, data = efc) + expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$estimate, -1561, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$r, 0.2571254, tolerance = 1e-4, ignore_attr = TRUE) + + set.seed(123) + wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) + out1 <- mann_whitney_test(wide_data, select = c("scale1", "scale2")) + out2 <- wilcox.test(wide_data$scale1, wide_data$scale2) + expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$r, 0.05132394, tolerance = 1e-4, ignore_attr = TRUE) + + out <- mann_whitney_test(efc, "e17age", by = "e16sex", weights = "weight") + expect_equal(out$p, 1.976729e-14, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$estimate, 0.1594972, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$r, 0.2599877, tolerance = 1e-4, ignore_attr = TRUE) +}) From c00243d02a3c468cb42700e75ab4edd7d6adf774 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 21:23:53 +0200 Subject: [PATCH 61/82] tests --- R/mann_whitney_test.R | 3 +++ tests/testthat/test-chi_squared_test.R | 11 +++++++++++ tests/testthat/test-mann_whitney_test.R | 8 ++++++++ 3 files changed, 22 insertions(+) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 43452f36..5f177ebf 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -279,6 +279,9 @@ mann_whitney_test <- function(data, if (identical(test, "mann_whitney_test") && length(select) > 2) { insight::format_error("You may only specify two variables for Mann-Whitney test.") } + if (identical(test, "mann_whitney_test") && length(select) == 1 && is.null(by)) { + insight::format_error("Only one variable provided in `select`, but none in `by`. You need to specify a second continuous variable in `select`, or a grouping variable in `by` for Mann-Whitney test.") # nolint + } # sanity check - may only specify two variable names if (identical(test, "t_test") && length(select) > 2) { diff --git a/tests/testthat/test-chi_squared_test.R b/tests/testthat/test-chi_squared_test.R index 38310348..3778644f 100644 --- a/tests/testthat/test-chi_squared_test.R +++ b/tests/testthat/test-chi_squared_test.R @@ -23,4 +23,15 @@ test_that("chi_squared_test", { out <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7), weights = "weight") expect_equal(out$statistic, 20.07379, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$effect_size, 0.0974456, tolerance = 1e-4, ignore_attr = TRUE) + + set.seed(1234) + d <- data.frame( + survey_1 = sample(c("Approve", "Disapprove"), size = 1000, replace = TRUE, prob = c(0.45, 0.55)), + survey_2 = sample(c("Approve", "Disapprove"), size = 1000, replace = TRUE, prob = c(0.42, 0.58)) + ) + out1 <- chi_squared_test(d, "survey_1", "survey_2", paired = TRUE) + out2 <- mcnemar.test(table(d)) + expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$effect_size, 0.03170437, tolerance = 1e-4, ignore_attr = TRUE) }) diff --git a/tests/testthat/test-mann_whitney_test.R b/tests/testthat/test-mann_whitney_test.R index 96e3f87f..e079cb93 100644 --- a/tests/testthat/test-mann_whitney_test.R +++ b/tests/testthat/test-mann_whitney_test.R @@ -26,3 +26,11 @@ test_that("mann_whitney_test", { expect_equal(out$estimate, 0.1594972, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$r, 0.2599877, tolerance = 1e-4, ignore_attr = TRUE) }) + +test_that("mann_whitney_test, sanity checks", { + data(efc) + expect_errpr(mann_whitney_test(efc, "e17age", by = "c172code"), regex = "Only two groups are") + expect_errpr(mann_whitney_test(efc, c("e17age", "c172code", "e16sex")), regex = "You may only specify") + expect_errpr(mann_whitney_test(efc, c("e17age", "c172code"), by = "e17age"), regex = "If `select` specifies more") + expect_errpr(mann_whitney_test(efc, "e17age"), regex = "Only one variable provided") +}) From 4b4e2fe41a5b2e469d9b1ca25475a5e0e01ddec5 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 21:33:01 +0200 Subject: [PATCH 62/82] fix --- tests/testthat/test-kruskal_wallis_test.R | 49 +++++++++++++++++++++++ tests/testthat/test-mann_whitney_test.R | 8 ++-- 2 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 tests/testthat/test-kruskal_wallis_test.R diff --git a/tests/testthat/test-kruskal_wallis_test.R b/tests/testthat/test-kruskal_wallis_test.R new file mode 100644 index 00000000..9320816d --- /dev/null +++ b/tests/testthat/test-kruskal_wallis_test.R @@ -0,0 +1,49 @@ +skip_if_not_installed("survey") +skip_if_not_installed("datawizard") + +test_that("kruskal_wallis_test", { +#' data(efc) +#' # Kruskal-Wallis test for elder's age by education +#' kruskal_wallis_test(efc, "e17age", by = "c172code") +#' +#' # when data is in wide-format, specify all relevant continuous +#' # variables in `select` and omit `by` +#' set.seed(123) +#' wide_data <- data.frame( +#' scale1 = runif(20), +#' scale2 = runif(20), +#' scale3 = runif(20) +#' ) +#' kruskal_wallis_test(wide_data, select = c("scale1", "scale2", "scale3")) +#' +#' # same as if we had data in long format, with grouping variable +#' long_data <- data.frame( +#' scales = c(wide_data$scale1, wide_data$scale2, wide_data$scale3), +#' groups = rep(c("A", "B", "C"), each = 20) +#' ) +#' kruskal_wallis_test(long_data, select = "scales", by = "groups") +#' # base R equivalent +#' kruskal.test(scales ~ groups, data = long_data) + data(efc) + set.seed(123) + efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) + out1 <- mann_whitney_test(efc, "e17age", by = "e16sex") + out2 <- wilcox.test(e17age ~ e16sex, data = efc) + expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$estimate, -1561, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$r, 0.2571254, tolerance = 1e-4, ignore_attr = TRUE) + + set.seed(123) + wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) + out1 <- mann_whitney_test(wide_data, select = c("scale1", "scale2")) + out2 <- wilcox.test(wide_data$scale1, wide_data$scale2) + expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$r, 0.05132394, tolerance = 1e-4, ignore_attr = TRUE) + + out <- mann_whitney_test(efc, "e17age", by = "e16sex", weights = "weight") + expect_equal(out$p, 1.976729e-14, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$estimate, 0.1594972, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out$r, 0.2599877, tolerance = 1e-4, ignore_attr = TRUE) +}) diff --git a/tests/testthat/test-mann_whitney_test.R b/tests/testthat/test-mann_whitney_test.R index e079cb93..846cae03 100644 --- a/tests/testthat/test-mann_whitney_test.R +++ b/tests/testthat/test-mann_whitney_test.R @@ -29,8 +29,8 @@ test_that("mann_whitney_test", { test_that("mann_whitney_test, sanity checks", { data(efc) - expect_errpr(mann_whitney_test(efc, "e17age", by = "c172code"), regex = "Only two groups are") - expect_errpr(mann_whitney_test(efc, c("e17age", "c172code", "e16sex")), regex = "You may only specify") - expect_errpr(mann_whitney_test(efc, c("e17age", "c172code"), by = "e17age"), regex = "If `select` specifies more") - expect_errpr(mann_whitney_test(efc, "e17age"), regex = "Only one variable provided") + expect_error(mann_whitney_test(efc, "e17age", by = "c172code"), regex = "Only two groups are") + expect_error(mann_whitney_test(efc, c("e17age", "c172code", "e16sex")), regex = "You may only specify") + expect_error(mann_whitney_test(efc, c("e17age", "c172code"), by = "e17age"), regex = "If `select` specifies more") + expect_error(mann_whitney_test(efc, "e17age"), regex = "Only one variable provided") }) From 070d9dfeb46a2d79e33c77e5e311056a74944fae Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 21:51:07 +0200 Subject: [PATCH 63/82] add tests --- R/kruskal_wallis_test.R | 12 ++-- tests/testthat/_snaps/chi_squared_test.md | 66 ++++++++++++++++++++ tests/testthat/_snaps/kruskal_wallis_test.md | 24 +++++++ tests/testthat/_snaps/mann_whitney_test.md | 41 ++++++++++++ tests/testthat/test-chi_squared_test.R | 5 ++ tests/testthat/test-kruskal_wallis_test.R | 60 +++++++----------- tests/testthat/test-mann_whitney_test.R | 5 +- 7 files changed, 169 insertions(+), 44 deletions(-) create mode 100644 tests/testthat/_snaps/chi_squared_test.md create mode 100644 tests/testthat/_snaps/kruskal_wallis_test.md create mode 100644 tests/testthat/_snaps/mann_whitney_test.md diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index 9318b96a..d60bcc81 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -70,16 +70,16 @@ kruskal_wallis_test <- function(data, insight::format_error("At least two groups are required, i.e. data must have at least two unique levels in `by` for `kruskal_wallis_test()`.") # nolint } if (is.null(weights)) { - .calculate_kw(dv, grp) + .calculate_kw(dv, grp, group_labels = c(select, by)) } else { - .calculate_weighted_kw(dv, grp, data[[weights]]) + .calculate_weighted_kw(dv, grp, data[[weights]], group_labels = c(select, by)) } } # Kruskal-Wallis-Test -------------------------------------------- -.calculate_kw <- function(dv, grp, paired = FALSE) { +.calculate_kw <- function(dv, grp, paired = FALSE, group_labels = NULL) { # prepare data wcdat <- data.frame(dv, grp) if (paired) { @@ -97,7 +97,7 @@ kruskal_wallis_test <- function(data, ) out <- data.frame( - data = wt$data.name, + data = paste(group_labels[1], "by", group_labels[2]), Chi2 = wt$statistic, df = wt$parameter, p = as.numeric(wt$p.value), @@ -115,7 +115,7 @@ kruskal_wallis_test <- function(data, # Weighted Mann-Whitney-Test for two groups ---------------------------------- -.calculate_weighted_kw <- function(dv, grp, weights, paired = FALSE) { +.calculate_weighted_kw <- function(dv, grp, weights, paired = FALSE, group_labels = NULL) { # check if pkg survey is available insight::check_if_installed("survey") @@ -135,7 +135,7 @@ kruskal_wallis_test <- function(data, } out <- data.frame( - data = paste(dv, "by", grp), + data = paste(group_labels[1], "by", group_labels[2]), Chi2 = result$statistic, df = result$parameter, p = as.numeric(result$p.value), diff --git a/tests/testthat/_snaps/chi_squared_test.md b/tests/testthat/_snaps/chi_squared_test.md new file mode 100644 index 00000000..ef7d7bab --- /dev/null +++ b/tests/testthat/_snaps/chi_squared_test.md @@ -0,0 +1,66 @@ +# chi_squared_test + + Code + print(out1) + Output + + # Chi-squared test for contingency tables + + Data: c161sex by e16sex (n = 900) + + χ² = 2.233, ϕ = 0.053, df = 1, p = 0.135 + + +--- + + Code + print(out) + Output + + # Chi-squared test for contingency tables (weighted) + + Data: c161sex by e16sex (n = 904) + + χ² = 2.416, ϕ = 0.054, df = 1, p = 0.120 + + +--- + + Code + print(out1) + Output + + # Chi-squared test for given probabilities + + Data: c161sex against probabilities 30% and 70% (n = 901) + + χ² = 16.162, פ‎ = 0.088, df = 1, p < .001 + + +--- + + Code + print(out) + Output + + # Chi-squared test for given probabilities (weighted) + + Data: c161sex against probabilities 30% and 70% (n = 906) + + χ² = 20.074, פ‎ = 0.097, df = 1, p < .001 + + +--- + + Code + print(out1) + Output + + # Chi-squared test for contingency tables + (using McNemar's test for paired data) + + Data: survey_1 by survey_2 (n = 1000) + + χ² = 10.868, ϕ = 0.032, df = 1, p < .001 + + diff --git a/tests/testthat/_snaps/kruskal_wallis_test.md b/tests/testthat/_snaps/kruskal_wallis_test.md new file mode 100644 index 00000000..d0589424 --- /dev/null +++ b/tests/testthat/_snaps/kruskal_wallis_test.md @@ -0,0 +1,24 @@ +# kruskal_wallis_test + + Code + print(out1) + Output + # Kruskal-Wallis test + + Data: e17age by c172code (3 groups, n = 506, 180 and 156) + + χ² = 4.05, df = 2, p = 0.132 + + +--- + + Code + print(out1) + Output + # Kruskal-Wallis test + + Data: scale1 by scale2 (3 groups, n = 20, 20 and 20) + + χ² = 4.86, df = 2, p = 0.088 + + diff --git a/tests/testthat/_snaps/mann_whitney_test.md b/tests/testthat/_snaps/mann_whitney_test.md new file mode 100644 index 00000000..ed7526fb --- /dev/null +++ b/tests/testthat/_snaps/mann_whitney_test.md @@ -0,0 +1,41 @@ +# mann_whitney_test + + Code + print(out1) + Output + # Mann-Whitney test + + Group 1: male (n = 294, rank mean = 147.50) + Group 2: female (n = 596, rank mean = 298.50) + Alternative hypothesis: true location shift is not equal to 0 + + W = 59684 , r = 0.26, Z = -7.75, p < .001 + + +--- + + Code + print(out1) + Output + # Mann-Whitney test + + Group 1: scale1 (n = 20, rank mean = 10.50) + Group 2: scale2 (n = 20, rank mean = 10.50) + Alternative hypothesis: true location shift is not equal to 0 + + W = 188 , r = 0.05, Z = -0.32, p = 0.758 + + +--- + + Code + print(out) + Output + # Mann-Whitney test (weighted) + + Group 1: male (n = 296, rank mean = 147.58) + Group 2: female (n = 600, rank mean = 299.42) + + r = 0.26, Z = 7.78, p < .001 + + diff --git a/tests/testthat/test-chi_squared_test.R b/tests/testthat/test-chi_squared_test.R index 3778644f..3031b3a8 100644 --- a/tests/testthat/test-chi_squared_test.R +++ b/tests/testthat/test-chi_squared_test.R @@ -9,20 +9,24 @@ test_that("chi_squared_test", { out2 <- chisq.test(efc$c161sex, efc$e16sex) expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) out <- chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") expect_equal(out$statistic, 2.415755, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$effect_size, 0.05448519, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$p, 0.1201201, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out)) out1 <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) out2 <- chisq.test(table(efc$c161sex), p = c(0.3, 0.7)) expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) out <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7), weights = "weight") expect_equal(out$statistic, 20.07379, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$effect_size, 0.0974456, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out)) set.seed(1234) d <- data.frame( @@ -34,4 +38,5 @@ test_that("chi_squared_test", { expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$effect_size, 0.03170437, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) }) diff --git a/tests/testthat/test-kruskal_wallis_test.R b/tests/testthat/test-kruskal_wallis_test.R index 9320816d..9bf98b6c 100644 --- a/tests/testthat/test-kruskal_wallis_test.R +++ b/tests/testthat/test-kruskal_wallis_test.R @@ -2,48 +2,34 @@ skip_if_not_installed("survey") skip_if_not_installed("datawizard") test_that("kruskal_wallis_test", { -#' data(efc) -#' # Kruskal-Wallis test for elder's age by education -#' kruskal_wallis_test(efc, "e17age", by = "c172code") -#' -#' # when data is in wide-format, specify all relevant continuous -#' # variables in `select` and omit `by` -#' set.seed(123) -#' wide_data <- data.frame( -#' scale1 = runif(20), -#' scale2 = runif(20), -#' scale3 = runif(20) -#' ) -#' kruskal_wallis_test(wide_data, select = c("scale1", "scale2", "scale3")) -#' -#' # same as if we had data in long format, with grouping variable -#' long_data <- data.frame( -#' scales = c(wide_data$scale1, wide_data$scale2, wide_data$scale3), -#' groups = rep(c("A", "B", "C"), each = 20) -#' ) -#' kruskal_wallis_test(long_data, select = "scales", by = "groups") -#' # base R equivalent -#' kruskal.test(scales ~ groups, data = long_data) data(efc) set.seed(123) efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) - out1 <- mann_whitney_test(efc, "e17age", by = "e16sex") - out2 <- wilcox.test(e17age ~ e16sex, data = efc) - expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + out1 <- kruskal_wallis_test(efc, "e17age", by = "c172code") + out2 <- kruskal.test(e17age ~ c172code, data = efc) + expect_equal(out1$Chi2, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out1$estimate, -1561, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out1$r, 0.2571254, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) set.seed(123) - wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) - out1 <- mann_whitney_test(wide_data, select = c("scale1", "scale2")) - out2 <- wilcox.test(wide_data$scale1, wide_data$scale2) - expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out1$r, 0.05132394, tolerance = 1e-4, ignore_attr = TRUE) + wide_data <- data.frame( + scale1 = runif(20), + scale2 = runif(20), + scale3 = runif(20) + ) + long_data <- data.frame( + scales = c(wide_data$scale1, wide_data$scale2, wide_data$scale3), + groups = as.factor(rep(c("A", "B", "C"), each = 20)), + stringsAsFactors = FALSE + ) + out1 <- kruskal_wallis_test(wide_data, select = c("scale1", "scale2", "scale3")) + out2 <- kruskal_wallis_test(long_data, select = "scales", by = "groups") + out3 <- kruskal.test(scales ~ groups, data = long_data) + expect_equal(out1$Chi2, out2$Chi2, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$Chi2, out3$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out3$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) - out <- mann_whitney_test(efc, "e17age", by = "e16sex", weights = "weight") - expect_equal(out$p, 1.976729e-14, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out$estimate, 0.1594972, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out$r, 0.2599877, tolerance = 1e-4, ignore_attr = TRUE) + out1 <- kruskal_wallis_test(efc, "e17age", by = "c172code", weights = "weight") }) diff --git a/tests/testthat/test-mann_whitney_test.R b/tests/testthat/test-mann_whitney_test.R index 846cae03..aade0b01 100644 --- a/tests/testthat/test-mann_whitney_test.R +++ b/tests/testthat/test-mann_whitney_test.R @@ -10,8 +10,9 @@ test_that("mann_whitney_test", { out2 <- wilcox.test(e17age ~ e16sex, data = efc) expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) - expect_equal(out1$estimate, -1561, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$estimate, -151, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$r, 0.2571254, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) set.seed(123) wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) @@ -20,11 +21,13 @@ test_that("mann_whitney_test", { expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$r, 0.05132394, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) out <- mann_whitney_test(efc, "e17age", by = "e16sex", weights = "weight") expect_equal(out$p, 1.976729e-14, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$estimate, 0.1594972, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out$r, 0.2599877, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out)) }) test_that("mann_whitney_test, sanity checks", { From 8d4822102009fc0e870b1448ed738875b865af85 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 22:57:08 +0200 Subject: [PATCH 64/82] updates --- DESCRIPTION | 2 +- R/chi_squared_test.R | 31 ++++++++++++++++++++--- R/t_test.R | 25 +++++++++++++++--- man/chi_squared_test.Rd | 6 ++++- man/t_test.Rd | 4 +++ tests/testthat/_snaps/chi_squared_test.md | 10 ++++---- 6 files changed, 65 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 72ca8194..358379d0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,7 +20,7 @@ Depends: Imports: bayestestR, datawizard, - effectsize, + effectsize (>= 0.8.8), insight, parameters, performance, diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 4df6ca08..c1009982 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -1,6 +1,6 @@ #' @title Chi-Squared test #' @name chi_squared_test -#' @description This function performs a \eqn{chi}^2 test for contingency +#' @description This function performs a \eqn{chi^2} test for contingency #' tables or tests for given probabilities. The returned effects sizes are #' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) #' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against @@ -35,6 +35,10 @@ #' The weighted version of the chi-squared test is based on the a weighted #' table, using [`xtabs()`] as input for `chisq.test()`. #' +#' Interpretation of effect sizes are based on rules described in +#' [`effectsize::interpret_phi()`], [`effectsize::interpret_cramers_v()`], +#' and [`effectsize::interpret_fei()`]. +#' #' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., #' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data #' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. @@ -262,8 +266,29 @@ print.sj_htest_chi <- function(x, ...) { eff_symbol <- .format_symbols(x$effect_size_name) stat_symbol <- .format_symbols(x$statistic_name) + # string for effectsizes + eff_string <- switch(x$effect_size_name, + Fei = sprintf( + "%s = %.3f (%s effect)", + eff_symbol, + x$effect_size, + effectsize::interpret_fei(x$effect_size) + ), + Phi = sprintf( + "%s = %.3f (%s effect)", + eff_symbol, + x$effect_size, + effectsize::interpret_phi(x$effect_size) + ), + sprintf( + "Cramer's V = %.3f (%s effect)", + x$effect_size, + effectsize::interpret_cramers_v(x$effect_size) + ) + ) + cat(sprintf( - "\n %s = %.3f, %s = %.3f, df = %i, %s\n\n", - stat_symbol, x$statistic, eff_symbol, x$effect_size, round(x$df), insight::format_p(x$p) + "\n %s = %.3f, %s, df = %i, %s\n\n", + stat_symbol, x$statistic, eff_string, round(x$df), insight::format_p(x$p) )) } diff --git a/R/t_test.R b/R/t_test.R index dcf60826..809eed10 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -10,6 +10,9 @@ #' samples. #' @inherit mann_whitney_test seealso #' +#' @details Interpretation of effect sizes are based on rules described in +#' [`effectsize::interpret_cohens_d()`] and [`effectsize::interpret_hedges_g()`]. +#' #' @return A data frame with test results. #' #' @examplesIf requireNamespace("effectsize") @@ -291,6 +294,8 @@ t_test <- function(data, #' @export print.sj_htest_t <- function(x, ...) { + insight::check_if_installed("effectsize") + # fetch attributes group_labels <- attributes(x)$group_labels means <- attributes(x)$means @@ -375,11 +380,25 @@ print.sj_htest_t <- function(x, ...) { } insight::print_color(sprintf(" Alternative hypothesis: %s\n", alt_string), "cyan") + # string for effectsizes + if (x$effect_size_name == "Cohens_d") { + eff_string <- sprintf( + "Cohen's d = %.2f (%s effect)", + x$effect_size, + effectsize::interpret_cohens_d(x$effect_size) + ) + } else { + eff_string <- sprintf( + "Hedges' g = %.2f (%s effect)", + x$effect_size, + effectsize::interpret_hedges_g(x$effect_size) + ) + } + cat(sprintf( - "\n t = %.2f, %s = %.2f, df = %s, %s\n\n", + "\n t = %.2f, %s, df = %s, %s\n\n", x$statistic, - gsub("_", " ", x$effect_size_name, fixed = TRUE), - x$effect_size, + eff_string, insight::format_value(x$df, digits = 1, protect_integers = TRUE), insight::format_p(x$p) )) diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 4e0ccedc..3a7d2425 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -48,7 +48,7 @@ for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against given probabilities. } \description{ -This function performs a \eqn{chi}^2 test for contingency +This function performs a \eqn{chi^2} test for contingency tables or tests for given probabilities. The returned effects sizes are Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against @@ -65,6 +65,10 @@ a McNemar test (see \code{\link[=mcnemar.test]{mcnemar.test()}}) is conducted. The weighted version of the chi-squared test is based on the a weighted table, using \code{\link[=xtabs]{xtabs()}} as input for \code{chisq.test()}. + +Interpretation of effect sizes are based on rules described in +\code{\link[effectsize:interpret_r]{effectsize::interpret_phi()}}, \code{\link[effectsize:interpret_r]{effectsize::interpret_cramers_v()}}, +and \code{\link[effectsize:interpret_r]{effectsize::interpret_fei()}}. } \examples{ \dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/man/t_test.Rd b/man/t_test.Rd index 06790711..b48cee8f 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -48,6 +48,10 @@ samples, for paired samples, or for one sample. Unlike the underlying base R function \code{t.test()}, this function allows for weighted tests and automatically calculates effect sizes. } +\details{ +Interpretation of effect sizes are based on rules described in +\code{\link[effectsize:interpret_cohens_d]{effectsize::interpret_cohens_d()}} and \code{\link[effectsize:interpret_cohens_d]{effectsize::interpret_hedges_g()}}. +} \examples{ \dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(sleep) diff --git a/tests/testthat/_snaps/chi_squared_test.md b/tests/testthat/_snaps/chi_squared_test.md index ef7d7bab..5c4478f2 100644 --- a/tests/testthat/_snaps/chi_squared_test.md +++ b/tests/testthat/_snaps/chi_squared_test.md @@ -8,7 +8,7 @@ Data: c161sex by e16sex (n = 900) - χ² = 2.233, ϕ = 0.053, df = 1, p = 0.135 + χ² = 2.233, ϕ = 0.053 (very small effect), df = 1, p = 0.135 --- @@ -21,7 +21,7 @@ Data: c161sex by e16sex (n = 904) - χ² = 2.416, ϕ = 0.054, df = 1, p = 0.120 + χ² = 2.416, ϕ = 0.054 (very small effect), df = 1, p = 0.120 --- @@ -34,7 +34,7 @@ Data: c161sex against probabilities 30% and 70% (n = 901) - χ² = 16.162, פ‎ = 0.088, df = 1, p < .001 + χ² = 16.162, פ‎ = 0.088 (very small effect), df = 1, p < .001 --- @@ -47,7 +47,7 @@ Data: c161sex against probabilities 30% and 70% (n = 906) - χ² = 20.074, פ‎ = 0.097, df = 1, p < .001 + χ² = 20.074, פ‎ = 0.097 (very small effect), df = 1, p < .001 --- @@ -61,6 +61,6 @@ Data: survey_1 by survey_2 (n = 1000) - χ² = 10.868, ϕ = 0.032, df = 1, p < .001 + χ² = 10.868, ϕ = 0.032 (tiny effect), df = 1, p < .001 From e909494fb68d46b014884a5a6cfb9e32222e7e00 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:24:28 +0200 Subject: [PATCH 65/82] fix --- R/svy_median.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/svy_median.R b/R/svy_median.R index 973cd0cd..9a0d68f2 100644 --- a/R/svy_median.R +++ b/R/svy_median.R @@ -2,7 +2,7 @@ #' @export survey_median <- function(x, design) { # check if pkg survey is available - insight::check_if_installed("suvey") + insight::check_if_installed("survey") # deparse v <- stats::as.formula(paste("~", as.character(substitute(x)))) From 52b272ff500b70f00b79d3c91d5d1aeb17892678 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:25:38 +0200 Subject: [PATCH 66/82] docs --- R/wilcoxon_test.R | 3 ++- man/wilcoxon_test.Rd | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 1c6c16e4..7e75fd15 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -20,7 +20,7 @@ #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. #' -#' @examples +#' @examplesIf requireNamespace("coin") #' data(mtcars) #' # one-sample test #' wilcoxon_test(mtcars, "mpg") @@ -88,6 +88,7 @@ wilcoxon_test <- function(data, # Mann-Whitney-Test for two groups -------------------------------------------- .calculate_wilcox <- function(x, y, alternative, mu, group_labels, ...) { + insight::check_if_installed("coin") # for paired Wilcoxon test, we have effect sizes if (!is.null(y)) { # prepare data diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index b702bfee..19a3118d 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -59,6 +59,7 @@ normally distributed variables, the \code{t_test()} function can be used (with \code{paired = TRUE}). } \examples{ +\dontshow{if (requireNamespace("coin")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(mtcars) # one-sample test wilcoxon_test(mtcars, "mpg") @@ -74,6 +75,7 @@ wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) data(iris) d <- iris[iris$Species != "setosa", ] wilcoxon_test(d, "Sepal.Width", by = "Species") +\dontshow{\}) # examplesIf} } \seealso{ \itemize{ From f02d6d1fc10e5e3fd679e5a487af1af4b1aeab33 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:29:33 +0200 Subject: [PATCH 67/82] add tests --- tests/testthat/_snaps/wilcoxon_test.md | 36 ++++++++++++++++++++++++++ tests/testthat/test-wilcoxon_test.R | 23 ++++++++++++++++ 2 files changed, 59 insertions(+) create mode 100644 tests/testthat/_snaps/wilcoxon_test.md create mode 100644 tests/testthat/test-wilcoxon_test.R diff --git a/tests/testthat/_snaps/wilcoxon_test.md b/tests/testthat/_snaps/wilcoxon_test.md new file mode 100644 index 00000000..4649a8eb --- /dev/null +++ b/tests/testthat/_snaps/wilcoxon_test.md @@ -0,0 +1,36 @@ +# wilcoxon_test + + Code + print(out1) + Output + # One Sample Wilcoxon signed rank test + + Alternative hypothesis: true location shift is not equal to 0 + + V = 528, p < .001 + + +--- + + Code + print(out1) + Output + # Paired Wilcoxon signed rank test + + Alternative hypothesis: true location shift is not equal to 0 + + V = 0, r = 0.87, Z = -4.94, p < .001 + + +--- + + Code + print(out) + Output + # Paired Wilcoxon signed rank test + + Alternative hypothesis: true location shift is not equal to 0 + + V = 247, r = 0.39, Z = -2.76, p = 0.006 + + diff --git a/tests/testthat/test-wilcoxon_test.R b/tests/testthat/test-wilcoxon_test.R new file mode 100644 index 00000000..f3cdee67 --- /dev/null +++ b/tests/testthat/test-wilcoxon_test.R @@ -0,0 +1,23 @@ +skip_if_not_installed("survey") +skip_if_not_installed("datawizard") +skip_if_not_installed("coin") + +test_that("wilcoxon_test", { + data(mtcars) + out1 <- wilcoxon_test(mtcars, "mpg") + out2 <- suppressWarnings(wilcox.test(mtcars$mpg ~ 1)) + expect_equal(out1$v, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) + + out1 <- wilcoxon_test(mtcars, c("mpg", "hp")) + out2 <- suppressWarnings(wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE)) + expect_equal(out1$v, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_snapshot(print(out1)) + + data(iris) + d <- iris[iris$Species != "setosa", ] + out <- wilcoxon_test(d, "Sepal.Width", by = "Species") + expect_snapshot(print(out)) +}) From c4bff8f5e2711d9e663fa5c1721aba22ed367c78 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:49:59 +0200 Subject: [PATCH 68/82] fixes --- R/t_test.R | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index 809eed10..988b9a7a 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -46,6 +46,9 @@ t_test <- function(data, .sanitize_htest_input(data, select, by, weights, test = "t_test") data_name <- NULL + # filter and remove NA + data <- stats::na.omit(data[c(select, by, weights)]) + # does select indicate more than one variable? We than reshape the data # to have one continous scale and one grouping variable if (length(select) > 1) { @@ -58,11 +61,16 @@ t_test <- function(data, by <- NULL } else { # we convert the data into long format, and create a grouping variable - data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + data <- datawizard::data_to_long( + data[c(select, weights)], + select = select, + names_to = "group", + values_to = "scale" + ) by <- select[2] select <- select[1] # after converting to long, we have the "grouping" variable first in the data - colnames(data) <- c(by, select) + colnames(data) <- c(weights, by, select) } } @@ -183,17 +191,24 @@ t_test <- function(data, x_values <- dat$y x_weights <- dat$w y_values <- NULL + # group N's + n_groups <- stats::setNames(length(x_values), "N Group 1") } else { dat <- stats::na.omit(data.frame(dv, grp, weights)) colnames(dat) <- c("y", "g", "w") # unique groups - groups <- unique(dat$grp) + groups <- unique(dat$g) # values for sample 1 x_values <- dat$y[dat$g == groups[1]] x_weights <- dat$w[dat$g == groups[1]] # values for sample 2 y_values <- dat$y[dat$g == groups[2]] y_weights <- dat$w[dat$g == groups[2]] + # group N's + n_groups <- stats::setNames( + c(length(x_values), length(y_values)), + c("N Group 1", "N Group 2") + ) } mu_x <- stats::weighted.mean(x_values, x_weights, na.rm = TRUE) @@ -285,7 +300,7 @@ t_test <- function(data, attr(out, "group_labels") <- group_labels attr(out, "paired") <- isTRUE(paired) attr(out, "one_sample") <- is.null(y_values) && !isTRUE(paired) - attr(out, "weighted") <- FALSE + attr(out, "weighted") <- TRUE out } From 941c0196f422b8a969dd52cbe233fdf4359c488c Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:52:41 +0200 Subject: [PATCH 69/82] fix --- R/wtd_se.R | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/R/wtd_se.R b/R/wtd_se.R index c87203bc..5bf0eb36 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -31,32 +31,23 @@ #' #' @return The weighted (test) statistic. #' -#' @examples +#' @examplesIf requireNamespace("survey") #' data(efc) -#' weighted_se(efc[, 1:3], runif(n = nrow(efc))) +#' weighted_se(efc$c12hour, abs(runif(n = nrow(efc)))) #' #' # survey_median ---- #' # median for variables from weighted survey designs -#' if (require("survey")) { -#' data(nhanes_sample) +#' data(nhanes_sample) #' -#' des <- svydesign( -#' id = ~SDMVPSU, -#' strat = ~SDMVSTRA, -#' weights = ~WTINT2YR, -#' nest = TRUE, -#' data = nhanes_sample -#' ) -#' -#' survey_median(total, des) -#' survey_median("total", des) -#' } -#' -#' # weighted t-test ---- -#' efc$weight <- abs(rnorm(nrow(efc), 1, .3)) -#' weighted_ttest(efc, e17age, weights = weight) -#' weighted_ttest(efc, e17age, c160age, weights = weight) -#' weighted_ttest(e17age ~ e16sex + weight, efc) +#' des <- survey::svydesign( +#' id = ~SDMVPSU, +#' strat = ~SDMVSTRA, +#' weights = ~WTINT2YR, +#' nest = TRUE, +#' data = nhanes_sample +#' ) +#' survey_median(total, des) +#' survey_median("total", des) #' @export weighted_se <- function(x, weights = NULL) { UseMethod("weighted_se") From d2cd58d1ac6f7865a003a09d9e3fa0690ae39261 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:52:56 +0200 Subject: [PATCH 70/82] docs --- man/weighted_se.Rd | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index ca87d91b..600172a9 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -90,29 +90,22 @@ a variable in a survey-design (see [\verb{survey::svydesign()]}). alternative hypothesis. } \examples{ +\dontshow{if (requireNamespace("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) -weighted_se(efc[, 1:3], runif(n = nrow(efc))) +weighted_se(efc$c12hour, abs(runif(n = nrow(efc)))) # survey_median ---- # median for variables from weighted survey designs -if (require("survey")) { - data(nhanes_sample) - - des <- svydesign( - id = ~SDMVPSU, - strat = ~SDMVSTRA, - weights = ~WTINT2YR, - nest = TRUE, - data = nhanes_sample - ) - - survey_median(total, des) - survey_median("total", des) -} - -# weighted t-test ---- -efc$weight <- abs(rnorm(nrow(efc), 1, .3)) -weighted_ttest(efc, e17age, weights = weight) -weighted_ttest(efc, e17age, c160age, weights = weight) -weighted_ttest(e17age ~ e16sex + weight, efc) +data(nhanes_sample) + +des <- survey::svydesign( + id = ~SDMVPSU, + strat = ~SDMVSTRA, + weights = ~WTINT2YR, + nest = TRUE, + data = nhanes_sample +) +survey_median(total, des) +survey_median("total", des) +\dontshow{\}) # examplesIf} } From 827e3000d629255c62c3b5ae2ee4db28734326a4 Mon Sep 17 00:00:00 2001 From: Daniel Date: Sun, 12 May 2024 23:54:06 +0200 Subject: [PATCH 71/82] remove weighted_ttest --- NAMESPACE | 3 - R/wtd_ttest.R | 176 --------------------------------------------- man/weighted_se.Rd | 34 +-------- 3 files changed, 3 insertions(+), 210 deletions(-) delete mode 100644 R/wtd_ttest.R diff --git a/NAMESPACE b/NAMESPACE index 7fe9e77b..e410b8f3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,8 +37,6 @@ S3method(weighted_correlation,formula) S3method(weighted_se,data.frame) S3method(weighted_se,default) S3method(weighted_se,matrix) -S3method(weighted_ttest,default) -S3method(weighted_ttest,formula) export(anova_stats) export(auto_prior) export(boot_ci) @@ -101,7 +99,6 @@ export(weighted_mean) export(weighted_median) export(weighted_sd) export(weighted_se) -export(weighted_ttest) export(wilcoxon_test) export(xtab_statistics) importFrom(bayestestR,ci) diff --git a/R/wtd_ttest.R b/R/wtd_ttest.R deleted file mode 100644 index c6c6ee2a..00000000 --- a/R/wtd_ttest.R +++ /dev/null @@ -1,176 +0,0 @@ -#' @rdname weighted_se -#' @export -weighted_ttest <- function(data, ...) { - UseMethod("weighted_ttest") -} - -#' @rdname weighted_se -#' @export -weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) { - - if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) { - insight::format_error("'ci.lvl' must be a single number between 0 and 1") - } - - alternative <- match.arg(alternative) - - x.name <- deparse(substitute(x)) - y.name <- deparse(substitute(y)) - w.name <- deparse(substitute(weights)) - - if (y.name == "NULL") y.name <- NULL - - if (w.name == "NULL") { - w.name <- "weights" - data$weights <- 1 - } - - # create string with variable names - vars <- c(x.name, y.name, w.name) - - # get data - dat <- suppressMessages(data[vars]) - dat <- stats::na.omit(dat) - - if (insight::is_empty_object(dat) || nrow(dat) == 1) { - insight::format_alert("Too less data to compute t-test.") - return(NULL) - } - - xv <- dat[[x.name]] - wx <- wy <- dat[[w.name]] - - if (!is.null(y.name)) - yv <- dat[[y.name]] - else - yv <- NULL - - nx <- ny <- nrow(dat) - - weighted_ttest_helper(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, x.name, y.name, NULL) -} - - -#' @rdname weighted_se -#' @export -weighted_ttest.formula <- function(formula, data, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) { - - if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1)) { - insight::format_error("'ci.lvl' must be a single number between 0 and 1") - } - - alternative <- match.arg(alternative) - - vars <- all.vars(formula) - - g <- data[[vars[2]]] - - if (is.factor(g)) - grps <- levels(g) - else - grps <- stats::na.omit(sort(unique(g))) - - if (length(grps) > 2) - stop("Grouping factor has more than two levels.") - - if (length(vars) < 3) { - vars <- c(vars, "weights") - data$weights <- 1 - } - - x <- data[[vars[1]]] - y <- data[[vars[2]]] - w <- data[[vars[3]]] - - xv <- x[y == grps[1]] - yv <- x[y == grps[2]] - wx <- w[y == grps[1]] - wy <- w[y == grps[2]] - - mxv <- is.na(xv) - xv <- xv[!mxv] - wx <- wx[!mxv] - - myv <- is.na(yv) - yv <- yv[!myv] - wy <- wy[!myv] - - nx <- length(xv) - ny <- length(yv) - - weighted_ttest_helper(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, vars[1], vars[2], vars[2]) -} - - -weighted_ttest_helper <- function(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, x.name, y.name, group.name) { - if (paired) { - xv <- xv - yv - yv <- NULL - } - - mu.x.w <- stats::weighted.mean(xv, wx) - var.x.w <- datawizard::weighted_sd(xv, wx)^2 - se.x <- sqrt(var.x.w / nx) - - if (!is.null(yv)) { - mu.y.w <- stats::weighted.mean(yv, wy) - var.y.w <- datawizard::weighted_sd(yv, wy)^2 - se.y <- sqrt(var.y.w / ny) - - se <- sqrt(se.x^2 + se.y^2) - df <- se^4 / (se.x^4 / (nx - 1) + se.y^4 / (ny - 1)) - tstat <- (mu.x.w - mu.y.w - mu) / se - - estimate <- c(mu.x.w, mu.y.w) - names(estimate) <- c("mean of x", "mean of y") - method <- "Two-Sample t-test" - } else { - se <- se.x - df <- nx - 1 - tstat <- (mu.x.w - mu) / se - - estimate <- stats::setNames(mu.x.w, if (paired) "mean of the differences" else "mean of x") - method <- if (paired) "Paired t-test" else "One Sample t-test" - } - - - - if (alternative == "less") { - pval <- stats::pt(tstat, df) - cint <- c(-Inf, tstat + stats::qt(ci.lvl, df)) - } else if (alternative == "greater") { - pval <- stats::pt(tstat, df, lower.tail = FALSE) - cint <- c(tstat - stats::qt(ci.lvl, df), Inf) - } else { - pval <- 2 * stats::pt(-abs(tstat), df) - alpha <- 1 - ci.lvl - cint <- stats::qt(1 - alpha / 2, df) - cint <- tstat + c(-cint, cint) - } - - cint <- mu + cint * se - - names(tstat) <- "t" - names(df) <- "df" - names(mu) <- if (paired || !is.null(yv)) "difference in means" else "mean" - - - tt <- structure( - class = "sj_ttest", - list( - estimate = estimate, - statistic = tstat, - df = df, - p.value = pval, - ci = cint, - alternative = alternative, - method = method - ) - ) - - attr(tt, "x.name") <- x.name - attr(tt, "y.name") <- y.name - attr(tt, "group.name") <- group.name - - tt -} diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index 600172a9..27044bc4 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -1,15 +1,11 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_se.R, -% R/wtd_ttest.R +% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_se.R \name{survey_median} \alias{survey_median} \alias{weighted_correlation} \alias{weighted_correlation.default} \alias{weighted_correlation.formula} \alias{weighted_se} -\alias{weighted_ttest} -\alias{weighted_ttest.default} -\alias{weighted_ttest.formula} \title{Weighted statistics for tests and variables} \usage{ survey_median(x, design) @@ -21,30 +17,6 @@ weighted_correlation(data, ...) \method{weighted_correlation}{formula}(formula, data, ci.lvl = 0.95, ...) weighted_se(x, weights = NULL) - -weighted_ttest(data, ...) - -\method{weighted_ttest}{default}( - data, - x, - y = NULL, - weights, - mu = 0, - paired = FALSE, - ci.lvl = 0.95, - alternative = c("two.sided", "less", "greater"), - ... -) - -\method{weighted_ttest}{formula}( - formula, - data, - mu = 0, - paired = FALSE, - ci.lvl = 0.95, - alternative = c("two.sided", "less", "greater"), - ... -) } \arguments{ \item{x}{(Numeric) vector or a data frame. For \code{survey_median()} or \code{weighted_ttest()}, @@ -73,11 +45,11 @@ levels giving the corresponding groups and \code{rhs2} a variable with weights.} \item{mu}{A number indicating the true value of the mean (or difference in means if you are performing a two sample test).} -\item{paired}{Logical, whether to compute a paired t-test.} - \item{alternative}{A character string specifying the alternative hypothesis, must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. You can specify just the initial letter.} + +\item{paired}{Logical, whether to compute a paired t-test.} } \value{ The weighted (test) statistic. From 3d6f052212dce35f03d5fc130ec29879307ac681 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 00:01:16 +0200 Subject: [PATCH 72/82] fix --- R/t_test.R | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/R/t_test.R b/R/t_test.R index 988b9a7a..3236d330 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -192,7 +192,7 @@ t_test <- function(data, x_weights <- dat$w y_values <- NULL # group N's - n_groups <- stats::setNames(length(x_values), "N Group 1") + n_groups <- stats::setNames(sum(x_values * x_weights), "N Group 1") } else { dat <- stats::na.omit(data.frame(dv, grp, weights)) colnames(dat) <- c("y", "g", "w") @@ -206,7 +206,7 @@ t_test <- function(data, y_weights <- dat$w[dat$g == groups[2]] # group N's n_groups <- stats::setNames( - c(length(x_values), length(y_values)), + c(sum(x_values * x_weights), length(y_values * y_weights)), c("N Group 1", "N Group 2") ) } @@ -291,12 +291,8 @@ t_test <- function(data, ) class(out) <- c("sj_htest_t", "data.frame") attr(out, "means") <- estimate - if (!is.null(grp)) { - attr(out, "n_groups") <- stats::setNames( - as.numeric(as.table(round(stats::xtabs(dat[[3]] ~ dat[[1]] + dat[[2]])))), - c("N Group 1", "N Group 2") - ) - } + attr(out, "n_groups") <- n_groups + attr(out, "means") <- estimate attr(out, "group_labels") <- group_labels attr(out, "paired") <- isTRUE(paired) attr(out, "one_sample") <- is.null(y_values) && !isTRUE(paired) From 83135140b57943378a6a27b246bba0b2eeef76db Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 00:07:25 +0200 Subject: [PATCH 73/82] update docs+tests --- R/t_test.R | 4 +- R/wtd_se.R | 6 -- man/weighted_se.Rd | 9 --- tests/testthat/_snaps/t_test.md | 114 ++++++++++++++++++++++++++++++++ tests/testthat/test-t_test.R | 17 +++++ 5 files changed, 133 insertions(+), 17 deletions(-) create mode 100644 tests/testthat/_snaps/t_test.md create mode 100644 tests/testthat/test-t_test.R diff --git a/R/t_test.R b/R/t_test.R index 3236d330..ac33ecbf 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -192,7 +192,7 @@ t_test <- function(data, x_weights <- dat$w y_values <- NULL # group N's - n_groups <- stats::setNames(sum(x_values * x_weights), "N Group 1") + n_groups <- stats::setNames(round(sum(x_weights)), "N Group 1") } else { dat <- stats::na.omit(data.frame(dv, grp, weights)) colnames(dat) <- c("y", "g", "w") @@ -206,7 +206,7 @@ t_test <- function(data, y_weights <- dat$w[dat$g == groups[2]] # group N's n_groups <- stats::setNames( - c(sum(x_values * x_weights), length(y_values * y_weights)), + c(round(sum(x_weights)), round(sum(y_weights))), c("N Group 1", "N Group 2") ) } diff --git a/R/wtd_se.R b/R/wtd_se.R index 5bf0eb36..3e1bf72d 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -18,13 +18,7 @@ #' levels giving the corresponding groups and `rhs2` a variable with weights. #' @param y Optional, bare (unquoted) variable name, or a character vector with #' the variable name. -#' @param mu A number indicating the true value of the mean (or difference in -#' means if you are performing a two sample test). #' @param ci.lvl Confidence level of the interval. -#' @param alternative A character string specifying the alternative hypothesis, -#' must be one of `"two.sided"` (default), `"greater"` or `"less"`. You can -#' specify just the initial letter. -#' @param paired Logical, whether to compute a paired t-test. #' @param ... Currently not used. #' #' @inheritParams svyglm.nb diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index 27044bc4..71ed17bc 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -41,15 +41,6 @@ unweighted statistic is reported.} \item{formula}{A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a numeric variable giving the data values and \code{rhs1} a factor with two levels giving the corresponding groups and \code{rhs2} a variable with weights.} - -\item{mu}{A number indicating the true value of the mean (or difference in -means if you are performing a two sample test).} - -\item{alternative}{A character string specifying the alternative hypothesis, -must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. You can -specify just the initial letter.} - -\item{paired}{Logical, whether to compute a paired t-test.} } \value{ The weighted (test) statistic. diff --git a/tests/testthat/_snaps/t_test.md b/tests/testthat/_snaps/t_test.md new file mode 100644 index 00000000..cae87ef3 --- /dev/null +++ b/tests/testthat/_snaps/t_test.md @@ -0,0 +1,114 @@ +# t_test + + Code + t_test(efc, "e17age") + Output + # One Sample t-test + + Data: e17age + Group 1: e17age (mean = 79.12) + Alternative hypothesis: true mean is not equal to 0 + + t = 291.78, Cohen's d = 9.77 (large effect), df = 890, p < .001 + + +--- + + Code + t_test(efc, "e17age", "e16sex") + Output + # Welch Two Sample t-test + + Data: e17age by e16sex + Group 1: 1 (n = 294, mean = 76.16) + Group 2: 2 (n = 596, mean = 80.57) + Alternative hypothesis: true difference in means is not equal to 0 + + t = -8.05, Cohen's d = -0.56 (medium effect), df = 610.8, p < .001 + + +--- + + Code + t_test(efc, c("e17age", "c160age")) + Output + # Welch Two Sample t-test + + Data: e17age by c160age + Group 1: c160age (n = 890, mean = 53.42) + Group 2: e17age (n = 890, mean = 79.12) + Alternative hypothesis: true difference in means is not equal to 0 + + t = -49.22, Cohen's d = -2.33 (large effect), df = 1468.1, p < .001 + + +--- + + Code + t_test(efc, c("e17age", "c160age"), paired = TRUE) + Output + # Paired t-test + + Data: e17age and c160age (mean difference = 25.70) + Alternative hypothesis: true mean is not equal to 0 + + t = 54.11, Cohen's d = 1.81 (large effect), df = 889, p < .001 + + +--- + + Code + t_test(efc, "e17age", weights = "weight") + Output + # One Sample t-test (weighted) + + Data: e17age + Group 1: e17age (n = 897, mean = 79.17) + Alternative hypothesis: true mean is not equal to 0 + + t = 291.31, Cohen's d = 3.17 (large effect), df = 890, p < .001 + + +--- + + Code + t_test(efc, "e17age", "e16sex", weights = "weight") + Output + # Two-Sample t-test (weighted) + + Data: e17age by e16sex + Group 1: 1 (n = 600, mean = 80.63) + Group 2: 2 (n = 296, mean = 76.19) + Alternative hypothesis: true difference in means is not equal to 0 + + t = 8.03, Cohen's d = -0.17 (very small effect), df = 604.5, p < .001 + + +--- + + Code + t_test(efc, c("e17age", "c160age"), weights = "weight") + Output + # Two-Sample t-test (weighted) + + Data: e17age by c160age + Group 1: c160age (n = 896, mean = 79.17) + Group 2: e17age (n = 896, mean = 53.40) + Alternative hypothesis: true difference in means is not equal to 0 + + t = 49.31, Cohen's d = -1.12 (large effect), df = 1470.0, p < .001 + + +--- + + Code + t_test(efc, c("e17age", "c160age"), weights = "weight", paired = TRUE) + Output + # Paired t-test (weighted) + + Data: e17age and c160age (mean difference = 25.77) + Alternative hypothesis: true mean difference is not equal to 0 + + t = 54.37, Cohen's d = 1.54 (large effect), df = 889, p < .001 + + diff --git a/tests/testthat/test-t_test.R b/tests/testthat/test-t_test.R new file mode 100644 index 00000000..03f38110 --- /dev/null +++ b/tests/testthat/test-t_test.R @@ -0,0 +1,17 @@ +skip_if_not_installed("datawizard") +skip_if_not_installed("effectsize") + +test_that("t_test", { + data(efc) + set.seed(123) + efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) + expect_snapshot(t_test(efc, "e17age")) + expect_snapshot(t_test(efc, "e17age", "e16sex")) + expect_snapshot(t_test(efc, c("e17age", "c160age"))) + expect_snapshot(t_test(efc, c("e17age", "c160age"), paired = TRUE)) + + expect_snapshot(t_test(efc, "e17age", weights = "weight")) + expect_snapshot(t_test(efc, "e17age", "e16sex", weights = "weight")) + expect_snapshot(t_test(efc, c("e17age", "c160age"), weights = "weight")) + expect_snapshot(t_test(efc, c("e17age", "c160age"), weights = "weight", paired = TRUE)) +}) From 6f413910fe1d0bcb8cc25af2cca0abdf7a9bf4ab Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 00:11:43 +0200 Subject: [PATCH 74/82] docs --- R/chi_squared_test.R | 2 +- man/chi_squared_test.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index c1009982..1a299632 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -1,6 +1,6 @@ #' @title Chi-Squared test #' @name chi_squared_test -#' @description This function performs a \eqn{chi^2} test for contingency +#' @description This function performs a \eqn{\chi^2} test for contingency #' tables or tests for given probabilities. The returned effects sizes are #' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) #' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 3a7d2425..5bfab815 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -48,7 +48,7 @@ for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against given probabilities. } \description{ -This function performs a \eqn{chi^2} test for contingency +This function performs a \eqn{\chi^2} test for contingency tables or tests for given probabilities. The returned effects sizes are Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against From e97b988b2958d3ee54f1bc289b0730db38c9446a Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 00:14:41 +0200 Subject: [PATCH 75/82] fix --- R/mann_whitney_test.R | 2 +- _pkgdown.yml | 2 +- man/mann_whitney_test.Rd | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 5f177ebf..78ab36c6 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -1,7 +1,7 @@ #' @title Mann-Whitney test #' @name mann_whitney_test #' @description This function performs a Mann-Whitney test (or Wilcoxon rank -#' sum test for _unpaired_ samples. Unlike the underlying base R function +#' sum test for _unpaired_ samples). Unlike the underlying base R function #' `wilcox.test()`, this function allows for weighted tests and automatically #' calculates effect sizes. For _paired_ (dependent) samples, or for one-sample #' tests, please use the `wilcoxon_test()` function. diff --git a/_pkgdown.yml b/_pkgdown.yml index 4a992ea1..fa456107 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -25,7 +25,7 @@ reference: - title: "Weighted Estimates and Dispersion" contents: - weight - - weighted_ttest + - weighted_se - title: "Summary Statistics and Tests" contents: diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 218abece..8bf194ad 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -45,7 +45,7 @@ as well as effect size r and group-rank-means. } \description{ This function performs a Mann-Whitney test (or Wilcoxon rank -sum test for \emph{unpaired} samples. Unlike the underlying base R function +sum test for \emph{unpaired} samples). Unlike the underlying base R function \code{wilcox.test()}, this function allows for weighted tests and automatically calculates effect sizes. For \emph{paired} (dependent) samples, or for one-sample tests, please use the \code{wilcoxon_test()} function. From f926dcad588963b8823bfcff9884885a784b88c7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 00:15:44 +0200 Subject: [PATCH 76/82] docs --- R/wtd_se.R | 2 +- man/weighted_se.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/wtd_se.R b/R/wtd_se.R index 3e1bf72d..62c48c3e 100644 --- a/R/wtd_se.R +++ b/R/wtd_se.R @@ -1,4 +1,4 @@ -#' @title Weighted statistics for tests and variables +#' @title Weighted statistics for variables #' @name weighted_se #' @description #' `weighted_se()` computes weighted standard errors of a variable or for diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd index 71ed17bc..1aa07abd 100644 --- a/man/weighted_se.Rd +++ b/man/weighted_se.Rd @@ -6,7 +6,7 @@ \alias{weighted_correlation.default} \alias{weighted_correlation.formula} \alias{weighted_se} -\title{Weighted statistics for tests and variables} +\title{Weighted statistics for variables} \usage{ survey_median(x, design) From 84036e7f38debf7930905e60e0ef601f301c13e7 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 00:36:08 +0200 Subject: [PATCH 77/82] fix --- R/mann_whitney_test.R | 22 +++++++++++++--------- R/wilcoxon_test.R | 2 +- man/chi_squared_test.Rd | 16 ++++++++++++---- man/kruskal_wallis_test.Rd | 16 ++++++++++++---- man/mann_whitney_test.Rd | 21 +++++++++++++-------- man/t_test.Rd | 16 ++++++++++++---- man/wilcoxon_test.Rd | 18 +++++++++++++----- 7 files changed, 76 insertions(+), 35 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 78ab36c6..2a89e78d 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -13,10 +13,18 @@ #' the `t_test()` function can be used. #' #' @param data A data frame. -#' @param select One or more name of the continuous variable (as character -#' vector) to be used as samples for the test. If `select` only specified one -#' variable, a one-sample test is carried out (only applicable for `t_test()`). -#' Else, `by` must be provided to indicate the groups of comparison. +#' @param select Name(s) of the continuous variable(s) (as character vector) +#' to be used as samples for the test. `select` can be one of the following: +#' +#' - `select` specifies **one** variable and `by` is provided to indicate the +#' groups of comparison. In this case, the samples in `select` are split by +#' the groups in `by`. +#' - If `select` specifies **one** variable and `by = NULL`, a one-sample test +#' is carried out (only applicable for `t_test()` and `wilcoxon_test()`). +#' - If `select` specifies **two** variables and `by` is `NULL`, the two +#' samples are compared. In combination with `paired`, a paired test is +#' carried out, i.e. samples are considered as *dependent*. +#' #' @param by Name of the variable indicating the groups. Required if `select` #' specifies only one variable that contains all samples to be compared in the #' test. If `by` is not a factor, it will be coerced to a factor. For @@ -50,11 +58,7 @@ #' - medium effect >= 0.3 #' - large effect >= 0.5 #' -#' **r** is calcuated as: -#' -#' ``` -#' r = |Z| / sqrt(n1 + n2) -#' ``` +#' **r** is calcuated as \eqn{r = \frac{|Z|}{\sqrt{n1 + n2}}}. #' #' @examplesIf requireNamespace("coin") && requireNamespace("survey") #' data(efc) diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 7e75fd15..92c59a00 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -25,7 +25,7 @@ #' # one-sample test #' wilcoxon_test(mtcars, "mpg") #' # base R equivalent -#' wilcox.test(mtcars$mpg ~ 1) +#' suppressWarning(wilcox.test(mtcars$mpg ~ 1)) #' #' # paired test #' wilcoxon_test(mtcars, c("mpg", "hp")) diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 5bfab815..272110e2 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -17,10 +17,18 @@ chi_squared_test( \arguments{ \item{data}{A data frame.} -\item{select}{One or more name of the continuous variable (as character -vector) to be used as samples for the test. If \code{select} only specified one -variable, a one-sample test is carried out (only applicable for \code{t_test()}). -Else, \code{by} must be provided to indicate the groups of comparison.} +\item{select}{Name(s) of the continuous variable(s) (as character vector) +to be used as samples for the test. \code{select} can be one of the following: +\itemize{ +\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the +groups of comparison. In this case, the samples in \code{select} are split by +the groups in \code{by}. +\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test +is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). +\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two +samples are compared. In combination with \code{paired}, a paired test is +carried out, i.e. samples are considered as \emph{dependent}. +}} \item{by}{Name of the variable indicating the groups. Required if \code{select} specifies only one variable that contains all samples to be compared in the diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index 5b5ba09e..7406d646 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -9,10 +9,18 @@ kruskal_wallis_test(data, select = NULL, by = NULL, weights = NULL) \arguments{ \item{data}{A data frame.} -\item{select}{One or more name of the continuous variable (as character -vector) to be used as samples for the test. If \code{select} only specified one -variable, a one-sample test is carried out (only applicable for \code{t_test()}). -Else, \code{by} must be provided to indicate the groups of comparison.} +\item{select}{Name(s) of the continuous variable(s) (as character vector) +to be used as samples for the test. \code{select} can be one of the following: +\itemize{ +\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the +groups of comparison. In this case, the samples in \code{select} are split by +the groups in \code{by}. +\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test +is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). +\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two +samples are compared. In combination with \code{paired}, a paired test is +carried out, i.e. samples are considered as \emph{dependent}. +}} \item{by}{Name of the variable indicating the groups. Required if \code{select} specifies only one variable that contains all samples to be compared in the diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 8bf194ad..7e926145 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -17,10 +17,18 @@ mann_whitney_test( \arguments{ \item{data}{A data frame.} -\item{select}{One or more name of the continuous variable (as character -vector) to be used as samples for the test. If \code{select} only specified one -variable, a one-sample test is carried out (only applicable for \code{t_test()}). -Else, \code{by} must be provided to indicate the groups of comparison.} +\item{select}{Name(s) of the continuous variable(s) (as character vector) +to be used as samples for the test. \code{select} can be one of the following: +\itemize{ +\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the +groups of comparison. In this case, the samples in \code{select} are split by +the groups in \code{by}. +\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test +is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). +\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two +samples are compared. In combination with \code{paired}, a paired test is +carried out, i.e. samples are considered as \emph{dependent}. +}} \item{by}{Name of the variable indicating the groups. Required if \code{select} specifies only one variable that contains all samples to be compared in the @@ -68,10 +76,7 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: \item large effect >= 0.5 } -\strong{r} is calcuated as: - -\if{html}{\out{
}}\preformatted{r = |Z| / sqrt(n1 + n2) -}\if{html}{\out{
}} +\strong{r} is calcuated as \eqn{r = \frac{|Z|}{\sqrt{n1 + n2}}}. } \examples{ \dontshow{if (requireNamespace("coin") && requireNamespace("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} diff --git a/man/t_test.Rd b/man/t_test.Rd index b48cee8f..d14e4f72 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -17,10 +17,18 @@ t_test( \arguments{ \item{data}{A data frame.} -\item{select}{One or more name of the continuous variable (as character -vector) to be used as samples for the test. If \code{select} only specified one -variable, a one-sample test is carried out (only applicable for \code{t_test()}). -Else, \code{by} must be provided to indicate the groups of comparison.} +\item{select}{Name(s) of the continuous variable(s) (as character vector) +to be used as samples for the test. \code{select} can be one of the following: +\itemize{ +\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the +groups of comparison. In this case, the samples in \code{select} are split by +the groups in \code{by}. +\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test +is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). +\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two +samples are compared. In combination with \code{paired}, a paired test is +carried out, i.e. samples are considered as \emph{dependent}. +}} \item{by}{Name of the variable indicating the groups. Required if \code{select} specifies only one variable that contains all samples to be compared in the diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index 19a3118d..2e2e35a2 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -17,10 +17,18 @@ wilcoxon_test( \arguments{ \item{data}{A data frame.} -\item{select}{One or more name of the continuous variable (as character -vector) to be used as samples for the test. If \code{select} only specified one -variable, a one-sample test is carried out (only applicable for \code{t_test()}). -Else, \code{by} must be provided to indicate the groups of comparison.} +\item{select}{Name(s) of the continuous variable(s) (as character vector) +to be used as samples for the test. \code{select} can be one of the following: +\itemize{ +\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the +groups of comparison. In this case, the samples in \code{select} are split by +the groups in \code{by}. +\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test +is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). +\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two +samples are compared. In combination with \code{paired}, a paired test is +carried out, i.e. samples are considered as \emph{dependent}. +}} \item{by}{Name of the variable indicating the groups. Required if \code{select} specifies only one variable that contains all samples to be compared in the @@ -64,7 +72,7 @@ data(mtcars) # one-sample test wilcoxon_test(mtcars, "mpg") # base R equivalent -wilcox.test(mtcars$mpg ~ 1) +suppressWarning(wilcox.test(mtcars$mpg ~ 1)) # paired test wilcoxon_test(mtcars, c("mpg", "hp")) From f3da4b4ba315535fa4ca90b2e377afd865c4259d Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 07:36:09 +0200 Subject: [PATCH 78/82] docs, fix --- R/mann_whitney_test.R | 15 ++++++++------- R/wilcoxon_test.R | 2 +- man/chi_squared_test.Rd | 14 ++++++++------ man/kruskal_wallis_test.Rd | 14 ++++++++------ man/mann_whitney_test.Rd | 14 ++++++++------ man/t_test.Rd | 14 ++++++++------ man/wilcoxon_test.Rd | 16 +++++++++------- 7 files changed, 50 insertions(+), 39 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 2a89e78d..b858918b 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -16,15 +16,16 @@ #' @param select Name(s) of the continuous variable(s) (as character vector) #' to be used as samples for the test. `select` can be one of the following: #' -#' - `select` specifies **one** variable and `by` is provided to indicate the -#' groups of comparison. In this case, the samples in `select` are split by -#' the groups in `by`. +#' - `select` can be used in combination with `by`, in which case `select` is +#' the name of the continous variable (and `by` indicates a grouping factor). +#' - `select` can also be a character vector of length two or more (more than +#' two names only apply to `kruskal_wallis_test()`), in which case the two +#' continuous variables are treated as samples to be compared. `by` must be +#' `NULL` in this case. +#' - If `select` select is of length **two** and `paired = TRUE`, the two samples +#' are considered as *dependent* and a paired test is carried out. #' - If `select` specifies **one** variable and `by = NULL`, a one-sample test #' is carried out (only applicable for `t_test()` and `wilcoxon_test()`). -#' - If `select` specifies **two** variables and `by` is `NULL`, the two -#' samples are compared. In combination with `paired`, a paired test is -#' carried out, i.e. samples are considered as *dependent*. -#' #' @param by Name of the variable indicating the groups. Required if `select` #' specifies only one variable that contains all samples to be compared in the #' test. If `by` is not a factor, it will be coerced to a factor. For diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index 92c59a00..da00f9b7 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -25,7 +25,7 @@ #' # one-sample test #' wilcoxon_test(mtcars, "mpg") #' # base R equivalent -#' suppressWarning(wilcox.test(mtcars$mpg ~ 1)) +#' suppressWarnings(wilcox.test(mtcars$mpg ~ 1)) #' #' # paired test #' wilcoxon_test(mtcars, c("mpg", "hp")) diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 272110e2..cc11aa83 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -20,14 +20,16 @@ chi_squared_test( \item{select}{Name(s) of the continuous variable(s) (as character vector) to be used as samples for the test. \code{select} can be one of the following: \itemize{ -\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the -groups of comparison. In this case, the samples in \code{select} are split by -the groups in \code{by}. +\item \code{select} can be used in combination with \code{by}, in which case \code{select} is +the name of the continous variable (and \code{by} indicates a grouping factor). +\item \code{select} can also be a character vector of length two or more (more than +two names only apply to \code{kruskal_wallis_test()}), in which case the two +continuous variables are treated as samples to be compared. \code{by} must be +\code{NULL} in this case. +\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples +are considered as \emph{dependent} and a paired test is carried out. \item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). -\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two -samples are compared. In combination with \code{paired}, a paired test is -carried out, i.e. samples are considered as \emph{dependent}. }} \item{by}{Name of the variable indicating the groups. Required if \code{select} diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index 7406d646..2f7b6790 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -12,14 +12,16 @@ kruskal_wallis_test(data, select = NULL, by = NULL, weights = NULL) \item{select}{Name(s) of the continuous variable(s) (as character vector) to be used as samples for the test. \code{select} can be one of the following: \itemize{ -\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the -groups of comparison. In this case, the samples in \code{select} are split by -the groups in \code{by}. +\item \code{select} can be used in combination with \code{by}, in which case \code{select} is +the name of the continous variable (and \code{by} indicates a grouping factor). +\item \code{select} can also be a character vector of length two or more (more than +two names only apply to \code{kruskal_wallis_test()}), in which case the two +continuous variables are treated as samples to be compared. \code{by} must be +\code{NULL} in this case. +\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples +are considered as \emph{dependent} and a paired test is carried out. \item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). -\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two -samples are compared. In combination with \code{paired}, a paired test is -carried out, i.e. samples are considered as \emph{dependent}. }} \item{by}{Name of the variable indicating the groups. Required if \code{select} diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 7e926145..be90ae7d 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -20,14 +20,16 @@ mann_whitney_test( \item{select}{Name(s) of the continuous variable(s) (as character vector) to be used as samples for the test. \code{select} can be one of the following: \itemize{ -\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the -groups of comparison. In this case, the samples in \code{select} are split by -the groups in \code{by}. +\item \code{select} can be used in combination with \code{by}, in which case \code{select} is +the name of the continous variable (and \code{by} indicates a grouping factor). +\item \code{select} can also be a character vector of length two or more (more than +two names only apply to \code{kruskal_wallis_test()}), in which case the two +continuous variables are treated as samples to be compared. \code{by} must be +\code{NULL} in this case. +\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples +are considered as \emph{dependent} and a paired test is carried out. \item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). -\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two -samples are compared. In combination with \code{paired}, a paired test is -carried out, i.e. samples are considered as \emph{dependent}. }} \item{by}{Name of the variable indicating the groups. Required if \code{select} diff --git a/man/t_test.Rd b/man/t_test.Rd index d14e4f72..085896fe 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -20,14 +20,16 @@ t_test( \item{select}{Name(s) of the continuous variable(s) (as character vector) to be used as samples for the test. \code{select} can be one of the following: \itemize{ -\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the -groups of comparison. In this case, the samples in \code{select} are split by -the groups in \code{by}. +\item \code{select} can be used in combination with \code{by}, in which case \code{select} is +the name of the continous variable (and \code{by} indicates a grouping factor). +\item \code{select} can also be a character vector of length two or more (more than +two names only apply to \code{kruskal_wallis_test()}), in which case the two +continuous variables are treated as samples to be compared. \code{by} must be +\code{NULL} in this case. +\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples +are considered as \emph{dependent} and a paired test is carried out. \item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). -\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two -samples are compared. In combination with \code{paired}, a paired test is -carried out, i.e. samples are considered as \emph{dependent}. }} \item{by}{Name of the variable indicating the groups. Required if \code{select} diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index 2e2e35a2..dacf41d7 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -20,14 +20,16 @@ wilcoxon_test( \item{select}{Name(s) of the continuous variable(s) (as character vector) to be used as samples for the test. \code{select} can be one of the following: \itemize{ -\item \code{select} specifies \strong{one} variable and \code{by} is provided to indicate the -groups of comparison. In this case, the samples in \code{select} are split by -the groups in \code{by}. +\item \code{select} can be used in combination with \code{by}, in which case \code{select} is +the name of the continous variable (and \code{by} indicates a grouping factor). +\item \code{select} can also be a character vector of length two or more (more than +two names only apply to \code{kruskal_wallis_test()}), in which case the two +continuous variables are treated as samples to be compared. \code{by} must be +\code{NULL} in this case. +\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples +are considered as \emph{dependent} and a paired test is carried out. \item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}). -\item If \code{select} specifies \strong{two} variables and \code{by} is \code{NULL}, the two -samples are compared. In combination with \code{paired}, a paired test is -carried out, i.e. samples are considered as \emph{dependent}. }} \item{by}{Name of the variable indicating the groups. Required if \code{select} @@ -72,7 +74,7 @@ data(mtcars) # one-sample test wilcoxon_test(mtcars, "mpg") # base R equivalent -suppressWarning(wilcox.test(mtcars$mpg ~ 1)) +suppressWarnings(wilcox.test(mtcars$mpg ~ 1)) # paired test wilcoxon_test(mtcars, c("mpg", "hp")) From f074a3e6a367772ca1022f6bb1204ff98fdc4405 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 07:40:17 +0200 Subject: [PATCH 79/82] add tests --- tests/testthat/test-t_test.R | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/testthat/test-t_test.R b/tests/testthat/test-t_test.R index 03f38110..1b61ed34 100644 --- a/tests/testthat/test-t_test.R +++ b/tests/testthat/test-t_test.R @@ -14,4 +14,16 @@ test_that("t_test", { expect_snapshot(t_test(efc, "e17age", "e16sex", weights = "weight")) expect_snapshot(t_test(efc, c("e17age", "c160age"), weights = "weight")) expect_snapshot(t_test(efc, c("e17age", "c160age"), weights = "weight", paired = TRUE)) + + out1 <- t_test(efc, "e17age") + out2 <- t.test(efc$e17age ~ 1) + expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$effect_size, 9.774916, tolerance = 1e-4, ignore_attr = TRUE) + + out1 <- t_test(efc, "e17age", "e16sex") + out2 <- t.test(efc$e17age ~ efc$e16sex) + expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) + expect_equal(out1$effect_size, -0.5641989, tolerance = 1e-4, ignore_attr = TRUE) }) From 2abee0e4a1dd6beac4ce1e20e43242d21c6fda0f Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 07:57:23 +0200 Subject: [PATCH 80/82] fix --- R/wilcoxon_test.R | 8 ++++---- man/wilcoxon_test.Rd | 8 ++++---- tests/testthat/test-wilcoxon_test.R | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index da00f9b7..e44a9049 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -24,13 +24,13 @@ #' data(mtcars) #' # one-sample test #' wilcoxon_test(mtcars, "mpg") -#' # base R equivalent -#' suppressWarnings(wilcox.test(mtcars$mpg ~ 1)) +#' # base R equivalent, we set exact = FALSE to avoid a warning +#' wilcox.test(mtcars$mpg ~ 1, exact = FALSE) #' #' # paired test #' wilcoxon_test(mtcars, c("mpg", "hp")) -#' # base R equivalent -#' wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) +#' # base R equivalent, we set exact = FALSE to avoid a warning +#' wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE, exact = FALSE) #' #' # when `by` is specified, each group must be of same length #' data(iris) diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index dacf41d7..7c9cc925 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -73,13 +73,13 @@ normally distributed variables, the \code{t_test()} function can be used (with data(mtcars) # one-sample test wilcoxon_test(mtcars, "mpg") -# base R equivalent -suppressWarnings(wilcox.test(mtcars$mpg ~ 1)) +# base R equivalent, we set exact = FALSE to avoid a warning +wilcox.test(mtcars$mpg ~ 1, exact = FALSE) # paired test wilcoxon_test(mtcars, c("mpg", "hp")) -# base R equivalent -wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE) +# base R equivalent, we set exact = FALSE to avoid a warning +wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE, exact = FALSE) # when `by` is specified, each group must be of same length data(iris) diff --git a/tests/testthat/test-wilcoxon_test.R b/tests/testthat/test-wilcoxon_test.R index f3cdee67..2ccfe81a 100644 --- a/tests/testthat/test-wilcoxon_test.R +++ b/tests/testthat/test-wilcoxon_test.R @@ -5,13 +5,13 @@ skip_if_not_installed("coin") test_that("wilcoxon_test", { data(mtcars) out1 <- wilcoxon_test(mtcars, "mpg") - out2 <- suppressWarnings(wilcox.test(mtcars$mpg ~ 1)) + out2 <- wilcox.test(mtcars$mpg ~ 1, exact = FALSE) expect_equal(out1$v, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) expect_snapshot(print(out1)) out1 <- wilcoxon_test(mtcars, c("mpg", "hp")) - out2 <- suppressWarnings(wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE)) + out2 <- wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE, exact = FALSE) expect_equal(out1$v, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE) expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE) expect_snapshot(print(out1)) From 718011ccd558e8f55b65e6893f744739cfd89075 Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 08:29:43 +0200 Subject: [PATCH 81/82] docs --- R/chi_squared_test.R | 23 +++++++++++++++---- R/kruskal_wallis_test.R | 9 ++++++++ R/mann_whitney_test.R | 43 ++++++++++++++++++++++++++++++++++++ R/t_test.R | 9 ++++++++ R/wilcoxon_test.R | 9 ++++++++ man/chi_squared_test.Rd | 41 +++++++++++++++++++++++++++++++++- man/kruskal_wallis_test.Rd | 41 ++++++++++++++++++++++++++++++++++ man/mann_whitney_test.Rd | 45 ++++++++++++++++++++++++++++++++++++++ man/t_test.Rd | 41 ++++++++++++++++++++++++++++++++++ man/wilcoxon_test.Rd | 41 ++++++++++++++++++++++++++++++++++ 10 files changed, 297 insertions(+), 5 deletions(-) diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 1a299632..474ecadd 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -17,6 +17,8 @@ #' @param ... Additional arguments passed down to [`chisq.test()`]. #' @inheritParams mann_whitney_test #' +#' @inheritSection mann_whitney_test Which test to use +#' #' @inherit mann_whitney_test seealso #' #' @return A data frame with test results. The returned effects sizes are @@ -39,10 +41,17 @@ #' [`effectsize::interpret_phi()`], [`effectsize::interpret_cramers_v()`], #' and [`effectsize::interpret_fei()`]. #' -#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., -#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data -#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. -#' \doi{10.3390/math11091982} +#' @references +#' - Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +#' \doi{10.3390/math11091982} +#' +#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +#' Dtsch Med Wochenschr 2007; 132: e24–e25 +#' +#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +#' Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 #' #' @examplesIf requireNamespace("effectsize") #' data(efc) @@ -64,6 +73,12 @@ chi_squared_test <- function(data, weights = NULL, paired = FALSE, ...) { + # sanity check - if we only have one variable in "select" and "by" and + # "probabilities" are NULL, set probalities + if (is.null(probabilities) && !is.null(select) && is.null(by) && length(select) == 1) { + probabilities <- rep(1 / length(data[[select]]), length(data[[select]])) + } + if (is.null(probabilities)) { .calculate_chisq(data, select, by, weights, paired, ...) } else { diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index d60bcc81..24bc2cea 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -11,6 +11,15 @@ #' #' @return A data frame with test results. #' +#' @inheritSection mann_whitney_test Which test to use +#' +#' @references +#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +#' Dtsch Med Wochenschr 2007; 132: e24–e25 +#' +#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +#' Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +#' #' @details The function simply is a wrapper around [`kruskal.test()`]. The #' weighted version of the Kruskal-Wallis test is based on the **survey** package, #' using [`survey::svyranktest()`]. diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index b858918b..b29b1e64 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -39,6 +39,37 @@ #' @param ... Additional arguments passed to `wilcox.test()` (for unweighted #' tests, i.e. when `weights = NULL`). #' +#' @section Which test to use: +#' The following table provides an overview of which test to use for different +#' types of data. The choice of test depends on the scale of the outcome +#' variable and the number of samples to compare. +#' +#' | Samples | Scale of Outcome | Significance Test | +#' |-----------------|------------------------|---------------------------------| +#' | 1 | binary / nominal | `chi_squared_test()` | +#' | 1 | continuous, not normal | `wilcoxon_test()` | +#' | 1 | continuous, normal | `t_test()` | +#' | 2, independent | binary / nominal | `chi_squared_test()` | +#' | 2, independent | continuous, not normal | `mann_whitney_test()` | +#' | 2, independent | continuous, normal | `t_test()` | +#' | 2, dependent | binary (only 2x2) | `chi_squared_test(paired=TRUE)` | +#' | 2, dependent | continuous, not normal | `wilcoxon_test()` | +#' | 2, dependent | continuous, normal | `t_test(paired=TRUE)` | +#' | >2, independent | continuous, not normal | `kruskal_wallis_test()` | +#' | >2, independent | continuous, normal | `datawizard::means_by_group()` | +#' | >2, dependent | continuous, not normal | _not yet implemented_ (1) | +#' | >2, dependent | continuous, normal | _not yet implemented_ (2) | +#' +#' (1) More than two dependent samples are considered as _repeated measurements_. +#' These samples are usually tested using a [`friedman.test()`], which +#' requires the samples in one variable, the groups to compare in another +#' variable, and a third variable indicating the repeated measurements +#' (subject IDs). +#' +#' (2) More than two independent samples are considered as _repeated measurements_. +#' These samples are usually tested using a ANOVA for repeated measurements. +#' A more sophisticated approach would be using a linear mixed model. +#' #' @seealso #' - [`mann_whitney_test()`] for unpaired (independent) samples. #' - [`t_test()`] for parametric t-tests. @@ -49,6 +80,18 @@ #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. #' +#' @references +#' - Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +#' \doi{10.3390/math11091982} +#' +#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +#' Dtsch Med Wochenschr 2007; 132: e24–e25 +#' +#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +#' Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +#' #' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`] #' (the latter to extract effect sizes). The weighted version of the test is #' based on [`survey::svyranktest()`]. diff --git a/R/t_test.R b/R/t_test.R index ac33ecbf..4f7e9bbf 100644 --- a/R/t_test.R +++ b/R/t_test.R @@ -10,11 +10,20 @@ #' samples. #' @inherit mann_whitney_test seealso #' +#' @inheritSection mann_whitney_test Which test to use +#' #' @details Interpretation of effect sizes are based on rules described in #' [`effectsize::interpret_cohens_d()`] and [`effectsize::interpret_hedges_g()`]. #' #' @return A data frame with test results. #' +#' @references +#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +#' Dtsch Med Wochenschr 2007; 132: e24–e25 +#' +#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +#' Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +#' #' @examplesIf requireNamespace("effectsize") #' data(sleep) #' # one-sample t-test diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R index e44a9049..5ef4d925 100644 --- a/R/wilcoxon_test.R +++ b/R/wilcoxon_test.R @@ -17,9 +17,18 @@ #' @inheritParams mann_whitney_test #' @inherit mann_whitney_test seealso #' +#' @inheritSection mann_whitney_test Which test to use +#' #' @return A data frame with test results. The function returns p and Z-values #' as well as effect size r and group-rank-means. #' +#' @references +#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +#' Dtsch Med Wochenschr 2007; 132: e24–e25 +#' +#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +#' Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +#' #' @examplesIf requireNamespace("coin") #' data(mtcars) #' # one-sample test diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index cc11aa83..a63b8b7f 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -80,6 +80,39 @@ Interpretation of effect sizes are based on rules described in \code{\link[effectsize:interpret_r]{effectsize::interpret_phi()}}, \code{\link[effectsize:interpret_r]{effectsize::interpret_cramers_v()}}, and \code{\link[effectsize:interpret_r]{effectsize::interpret_fei()}}. } +\section{Which test to use}{ + +The following table provides an overview of which test to use for different +types of data. The choice of test depends on the scale of the outcome +variable and the number of samples to compare.\tabular{lll}{ + Samples \tab Scale of Outcome \tab Significance Test \cr + 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr + 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 1 \tab continuous, normal \tab \code{t_test()} \cr + 2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr + 2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr + 2, independent \tab continuous, normal \tab \code{t_test()} \cr + 2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr + 2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr + >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr + >2, independent \tab continuous, normal \tab \code{datawizard::means_by_group()} \cr + >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr + >2, dependent \tab continuous, normal \tab \emph{not yet implemented} (2) \cr +} + + +(1) More than two dependent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which +requires the samples in one variable, the groups to compare in another +variable, and a third variable indicating the repeated measurements +(subject IDs). + +(2) More than two independent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a ANOVA for repeated measurements. +A more sophisticated approach would be using a linear mixed model. +} + \examples{ \dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) @@ -96,10 +129,16 @@ chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) \dontshow{\}) # examplesIf} } \references{ -Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +\itemize{ +\item Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. \doi{10.3390/math11091982} +\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +Dtsch Med Wochenschr 2007; 132: e24–e25 +\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +} } \seealso{ \itemize{ diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index 2f7b6790..de197f59 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -46,6 +46,39 @@ The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test( weighted version of the Kruskal-Wallis test is based on the \strong{survey} package, using \code{\link[survey:svyranktest]{survey::svyranktest()}}. } +\section{Which test to use}{ + +The following table provides an overview of which test to use for different +types of data. The choice of test depends on the scale of the outcome +variable and the number of samples to compare.\tabular{lll}{ + Samples \tab Scale of Outcome \tab Significance Test \cr + 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr + 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 1 \tab continuous, normal \tab \code{t_test()} \cr + 2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr + 2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr + 2, independent \tab continuous, normal \tab \code{t_test()} \cr + 2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr + 2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr + >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr + >2, independent \tab continuous, normal \tab \code{datawizard::means_by_group()} \cr + >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr + >2, dependent \tab continuous, normal \tab \emph{not yet implemented} (2) \cr +} + + +(1) More than two dependent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which +requires the samples in one variable, the groups to compare in another +variable, and a third variable indicating the repeated measurements +(subject IDs). + +(2) More than two independent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a ANOVA for repeated measurements. +A more sophisticated approach would be using a linear mixed model. +} + \examples{ data(efc) # Kruskal-Wallis test for elder's age by education @@ -70,6 +103,14 @@ kruskal_wallis_test(long_data, select = "scales", by = "groups") # base R equivalent kruskal.test(scales ~ groups, data = long_data) } +\references{ +\itemize{ +\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +Dtsch Med Wochenschr 2007; 132: e24–e25 +\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +} +} \seealso{ \itemize{ \item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index be90ae7d..5eaf58c8 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -80,6 +80,39 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: \strong{r} is calcuated as \eqn{r = \frac{|Z|}{\sqrt{n1 + n2}}}. } +\section{Which test to use}{ + +The following table provides an overview of which test to use for different +types of data. The choice of test depends on the scale of the outcome +variable and the number of samples to compare.\tabular{lll}{ + Samples \tab Scale of Outcome \tab Significance Test \cr + 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr + 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 1 \tab continuous, normal \tab \code{t_test()} \cr + 2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr + 2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr + 2, independent \tab continuous, normal \tab \code{t_test()} \cr + 2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr + 2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr + >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr + >2, independent \tab continuous, normal \tab \code{datawizard::means_by_group()} \cr + >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr + >2, dependent \tab continuous, normal \tab \emph{not yet implemented} (2) \cr +} + + +(1) More than two dependent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which +requires the samples in one variable, the groups to compare in another +variable, and a third variable indicating the repeated measurements +(subject IDs). + +(2) More than two independent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a ANOVA for repeated measurements. +A more sophisticated approach would be using a linear mixed model. +} + \examples{ \dontshow{if (requireNamespace("coin") && requireNamespace("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(efc) @@ -105,6 +138,18 @@ mann_whitney_test(long_data, select = "scales", by = "groups") wilcox.test(scales ~ groups, long_data) \dontshow{\}) # examplesIf} } +\references{ +\itemize{ +\item Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +\doi{10.3390/math11091982} +\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +Dtsch Med Wochenschr 2007; 132: e24–e25 +\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +} +} \seealso{ \itemize{ \item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. diff --git a/man/t_test.Rd b/man/t_test.Rd index 085896fe..ea89da9c 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -62,6 +62,39 @@ automatically calculates effect sizes. Interpretation of effect sizes are based on rules described in \code{\link[effectsize:interpret_cohens_d]{effectsize::interpret_cohens_d()}} and \code{\link[effectsize:interpret_cohens_d]{effectsize::interpret_hedges_g()}}. } +\section{Which test to use}{ + +The following table provides an overview of which test to use for different +types of data. The choice of test depends on the scale of the outcome +variable and the number of samples to compare.\tabular{lll}{ + Samples \tab Scale of Outcome \tab Significance Test \cr + 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr + 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 1 \tab continuous, normal \tab \code{t_test()} \cr + 2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr + 2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr + 2, independent \tab continuous, normal \tab \code{t_test()} \cr + 2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr + 2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr + >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr + >2, independent \tab continuous, normal \tab \code{datawizard::means_by_group()} \cr + >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr + >2, dependent \tab continuous, normal \tab \emph{not yet implemented} (2) \cr +} + + +(1) More than two dependent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which +requires the samples in one variable, the groups to compare in another +variable, and a third variable indicating the repeated measurements +(subject IDs). + +(2) More than two independent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a ANOVA for repeated measurements. +A more sophisticated approach would be using a linear mixed model. +} + \examples{ \dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(sleep) @@ -81,6 +114,14 @@ t_test(mtcars, c("mpg", "hp"), paired = TRUE) t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE) \dontshow{\}) # examplesIf} } +\references{ +\itemize{ +\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +Dtsch Med Wochenschr 2007; 132: e24–e25 +\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +} +} \seealso{ \itemize{ \item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index 7c9cc925..7b2ef6e4 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -68,6 +68,39 @@ variables are not normally distributed. For large samples, or approximately normally distributed variables, the \code{t_test()} function can be used (with \code{paired = TRUE}). } +\section{Which test to use}{ + +The following table provides an overview of which test to use for different +types of data. The choice of test depends on the scale of the outcome +variable and the number of samples to compare.\tabular{lll}{ + Samples \tab Scale of Outcome \tab Significance Test \cr + 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr + 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 1 \tab continuous, normal \tab \code{t_test()} \cr + 2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr + 2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr + 2, independent \tab continuous, normal \tab \code{t_test()} \cr + 2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr + 2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr + 2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr + >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr + >2, independent \tab continuous, normal \tab \code{datawizard::means_by_group()} \cr + >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr + >2, dependent \tab continuous, normal \tab \emph{not yet implemented} (2) \cr +} + + +(1) More than two dependent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which +requires the samples in one variable, the groups to compare in another +variable, and a third variable indicating the repeated measurements +(subject IDs). + +(2) More than two independent samples are considered as \emph{repeated measurements}. +These samples are usually tested using a ANOVA for repeated measurements. +A more sophisticated approach would be using a linear mixed model. +} + \examples{ \dontshow{if (requireNamespace("coin")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} data(mtcars) @@ -87,6 +120,14 @@ d <- iris[iris$Species != "setosa", ] wilcoxon_test(d, "Sepal.Width", by = "Species") \dontshow{\}) # examplesIf} } +\references{ +\itemize{ +\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests. +Dtsch Med Wochenschr 2007; 132: e24–e25 +\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer +Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 +} +} \seealso{ \itemize{ \item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples. From c7d93c93aee77698dded122894374efabdde3f6a Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 13 May 2024 08:34:23 +0200 Subject: [PATCH 82/82] docs --- R/mann_whitney_test.R | 19 ++++++++++--------- man/chi_squared_test.Rd | 19 ++++++++++--------- man/kruskal_wallis_test.Rd | 17 +++++++++-------- man/mann_whitney_test.Rd | 17 +++++++++-------- man/t_test.Rd | 17 +++++++++-------- man/wilcoxon_test.Rd | 17 +++++++++-------- 6 files changed, 56 insertions(+), 50 deletions(-) diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index b29b1e64..7874f5a6 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -44,7 +44,7 @@ #' types of data. The choice of test depends on the scale of the outcome #' variable and the number of samples to compare. #' -#' | Samples | Scale of Outcome | Significance Test | +#' | **Samples** | **Scale of Outcome** | **Significance Test** | #' |-----------------|------------------------|---------------------------------| #' | 1 | binary / nominal | `chi_squared_test()` | #' | 1 | continuous, not normal | `wilcoxon_test()` | @@ -61,14 +61,15 @@ #' | >2, dependent | continuous, normal | _not yet implemented_ (2) | #' #' (1) More than two dependent samples are considered as _repeated measurements_. -#' These samples are usually tested using a [`friedman.test()`], which -#' requires the samples in one variable, the groups to compare in another -#' variable, and a third variable indicating the repeated measurements -#' (subject IDs). +#' For ordinal or not-normally distributed outcomes, these samples are +#' usually tested using a [`friedman.test()`], which requires the samples +#' in one variable, the groups to compare in another variable, and a third +#' variable indicating the repeated measurements (subject IDs). #' -#' (2) More than two independent samples are considered as _repeated measurements_. -#' These samples are usually tested using a ANOVA for repeated measurements. -#' A more sophisticated approach would be using a linear mixed model. +#' (2) More than two dependent samples are considered as _repeated measurements_. +#' For normally distributed outcomes, these samples are usually tested using +#' a ANOVA for repeated measurements. A more sophisticated approach would +#' be using a linear mixed model. #' #' @seealso #' - [`mann_whitney_test()`] for unpaired (independent) samples. @@ -91,7 +92,7 @@ #' #' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer #' Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8 -#' +#' #' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`] #' (the latter to extract effect sizes). The weighted version of the test is #' based on [`survey::svyranktest()`]. diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index a63b8b7f..9b60c041 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -85,7 +85,7 @@ and \code{\link[effectsize:interpret_r]{effectsize::interpret_fei()}}. The following table provides an overview of which test to use for different types of data. The choice of test depends on the scale of the outcome variable and the number of samples to compare.\tabular{lll}{ - Samples \tab Scale of Outcome \tab Significance Test \cr + \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr 1 \tab continuous, normal \tab \code{t_test()} \cr @@ -103,14 +103,15 @@ variable and the number of samples to compare.\tabular{lll}{ (1) More than two dependent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which -requires the samples in one variable, the groups to compare in another -variable, and a third variable indicating the repeated measurements -(subject IDs). - -(2) More than two independent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a ANOVA for repeated measurements. -A more sophisticated approach would be using a linear mixed model. +For ordinal or not-normally distributed outcomes, these samples are +usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples +in one variable, the groups to compare in another variable, and a third +variable indicating the repeated measurements (subject IDs). + +(2) More than two dependent samples are considered as \emph{repeated measurements}. +For normally distributed outcomes, these samples are usually tested using +a ANOVA for repeated measurements. A more sophisticated approach would +be using a linear mixed model. } \examples{ diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index de197f59..8ca0bc9d 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -51,7 +51,7 @@ using \code{\link[survey:svyranktest]{survey::svyranktest()}}. The following table provides an overview of which test to use for different types of data. The choice of test depends on the scale of the outcome variable and the number of samples to compare.\tabular{lll}{ - Samples \tab Scale of Outcome \tab Significance Test \cr + \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr 1 \tab continuous, normal \tab \code{t_test()} \cr @@ -69,14 +69,15 @@ variable and the number of samples to compare.\tabular{lll}{ (1) More than two dependent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which -requires the samples in one variable, the groups to compare in another -variable, and a third variable indicating the repeated measurements -(subject IDs). +For ordinal or not-normally distributed outcomes, these samples are +usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples +in one variable, the groups to compare in another variable, and a third +variable indicating the repeated measurements (subject IDs). -(2) More than two independent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a ANOVA for repeated measurements. -A more sophisticated approach would be using a linear mixed model. +(2) More than two dependent samples are considered as \emph{repeated measurements}. +For normally distributed outcomes, these samples are usually tested using +a ANOVA for repeated measurements. A more sophisticated approach would +be using a linear mixed model. } \examples{ diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 5eaf58c8..2e3787be 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -85,7 +85,7 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb: The following table provides an overview of which test to use for different types of data. The choice of test depends on the scale of the outcome variable and the number of samples to compare.\tabular{lll}{ - Samples \tab Scale of Outcome \tab Significance Test \cr + \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr 1 \tab continuous, normal \tab \code{t_test()} \cr @@ -103,14 +103,15 @@ variable and the number of samples to compare.\tabular{lll}{ (1) More than two dependent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which -requires the samples in one variable, the groups to compare in another -variable, and a third variable indicating the repeated measurements -(subject IDs). +For ordinal or not-normally distributed outcomes, these samples are +usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples +in one variable, the groups to compare in another variable, and a third +variable indicating the repeated measurements (subject IDs). -(2) More than two independent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a ANOVA for repeated measurements. -A more sophisticated approach would be using a linear mixed model. +(2) More than two dependent samples are considered as \emph{repeated measurements}. +For normally distributed outcomes, these samples are usually tested using +a ANOVA for repeated measurements. A more sophisticated approach would +be using a linear mixed model. } \examples{ diff --git a/man/t_test.Rd b/man/t_test.Rd index ea89da9c..8a5e5194 100644 --- a/man/t_test.Rd +++ b/man/t_test.Rd @@ -67,7 +67,7 @@ Interpretation of effect sizes are based on rules described in The following table provides an overview of which test to use for different types of data. The choice of test depends on the scale of the outcome variable and the number of samples to compare.\tabular{lll}{ - Samples \tab Scale of Outcome \tab Significance Test \cr + \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr 1 \tab continuous, normal \tab \code{t_test()} \cr @@ -85,14 +85,15 @@ variable and the number of samples to compare.\tabular{lll}{ (1) More than two dependent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which -requires the samples in one variable, the groups to compare in another -variable, and a third variable indicating the repeated measurements -(subject IDs). +For ordinal or not-normally distributed outcomes, these samples are +usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples +in one variable, the groups to compare in another variable, and a third +variable indicating the repeated measurements (subject IDs). -(2) More than two independent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a ANOVA for repeated measurements. -A more sophisticated approach would be using a linear mixed model. +(2) More than two dependent samples are considered as \emph{repeated measurements}. +For normally distributed outcomes, these samples are usually tested using +a ANOVA for repeated measurements. A more sophisticated approach would +be using a linear mixed model. } \examples{ diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd index 7b2ef6e4..ca195c6c 100644 --- a/man/wilcoxon_test.Rd +++ b/man/wilcoxon_test.Rd @@ -73,7 +73,7 @@ normally distributed variables, the \code{t_test()} function can be used (with The following table provides an overview of which test to use for different types of data. The choice of test depends on the scale of the outcome variable and the number of samples to compare.\tabular{lll}{ - Samples \tab Scale of Outcome \tab Significance Test \cr + \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr 1 \tab binary / nominal \tab \code{chi_squared_test()} \cr 1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr 1 \tab continuous, normal \tab \code{t_test()} \cr @@ -91,14 +91,15 @@ variable and the number of samples to compare.\tabular{lll}{ (1) More than two dependent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which -requires the samples in one variable, the groups to compare in another -variable, and a third variable indicating the repeated measurements -(subject IDs). +For ordinal or not-normally distributed outcomes, these samples are +usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples +in one variable, the groups to compare in another variable, and a third +variable indicating the repeated measurements (subject IDs). -(2) More than two independent samples are considered as \emph{repeated measurements}. -These samples are usually tested using a ANOVA for repeated measurements. -A more sophisticated approach would be using a linear mixed model. +(2) More than two dependent samples are considered as \emph{repeated measurements}. +For normally distributed outcomes, these samples are usually tested using +a ANOVA for repeated measurements. A more sophisticated approach would +be using a linear mixed model. } \examples{