From bf5663b0192fcf72569abf6493cc76d6f42fde43 Mon Sep 17 00:00:00 2001 From: Daniel Date: Thu, 9 May 2024 14:32:32 +0200 Subject: [PATCH] docs --- NAMESPACE | 3 --- R/chi_squared_test.R | 43 ++++++++++++++++++++++++++----- R/find_beta.R | 53 +++++++++++++++++--------------------- R/helpfunctions.R | 1 + R/kruskal_wallis_test.R | 9 +++++-- R/mann_whitney_test.R | 14 +++++++--- man/chi_squared_test.Rd | 28 ++++++++++++++++---- man/find_beta.Rd | 29 ++++++++++----------- man/kruskal_wallis_test.Rd | 10 +++++-- man/mann_whitney_test.Rd | 14 +++++++--- 10 files changed, 133 insertions(+), 71 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index e351d2ca..5a224af0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -167,7 +167,6 @@ importFrom(sjmisc,is_num_fac) importFrom(sjmisc,str_contains) importFrom(sjmisc,trim) importFrom(sjmisc,typical_value) -importFrom(stats,approx) importFrom(stats,as.formula) importFrom(stats,chisq.test) importFrom(stats,coef) @@ -183,12 +182,10 @@ importFrom(stats,model.matrix) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,nobs) -importFrom(stats,pbeta) importFrom(stats,pf) importFrom(stats,pnorm) importFrom(stats,predict.glm) importFrom(stats,pt) -importFrom(stats,qcauchy) importFrom(stats,qf) importFrom(stats,qnorm) importFrom(stats,resid) diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R index 960ba31c..3fd69326 100644 --- a/R/chi_squared_test.R +++ b/R/chi_squared_test.R @@ -1,9 +1,10 @@ #' @title Chi-Squared Test #' @name chi_squared_test -#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank -#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]). -#' -#' The function reports p and Z-values as well as effect size r and group-rank-means. +#' @description This function performs a \eqn{chi}^2 test for contingency +#' tables or tests for given probabilities. The returned effects sizes are +#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +#' given probabilities (see _Ben-Shachar et al. 2023_). #' #' @param probabilities A numeric vector of probabilities for each cell in the #' contingency table. The length of the vector must match the number of cells @@ -14,7 +15,22 @@ #' @param ... Additional arguments passed down to [`chisq.test()`]. #' @inheritParams mann_whitney_test #' -#' @return A data frame with test results. +#' @return A data frame with test results. The returned effects sizes are +#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +#' given probabilities. +#' +#' @details The function is a wrapper around [`chisq.test()`] and +#' [`fisher.test()`] (for small expected values) for contingency tables, and +#' `chisq.test()` for given probabilities. When `probabilities` are provided, +#' these are rescaled to sum to 1 (i.e. `rescale.p = TRUE`). When `fisher.test()` +#' is called, simulated p-values are returned (i.e. `simulate.p.value = TRUE`, +#' see `?fisher.test`). +#' +#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +#' \doi{10.3390/math11091982} #' #' @examples #' data(efc) @@ -105,6 +121,7 @@ chi_squared_test <- function(data, ) class(out) <- c("sj_htest_chi", "data.frame") attr(out, "weighted") <- !is.null(weights) + attr(out, "fisher") <- isTRUE(startsWith(htest$method, "Fisher")) attr(out, "caption") <- "Contingency Tables" out } @@ -189,15 +206,27 @@ print.sj_htest_chi <- function(x, ...) { weight_string <- "" } + fisher <- attributes(x)$fisher + # headline insight::print_color(sprintf( - "\n# Chi-Squared Test for %s%s\n\n", + "\n# Chi-Squared Test for %s%s\n", attributes(x)$caption, weight_string ), "blue") + # Fisher's exact test? + if (fisher) { + insight::print_color(" (using Fisher's exact test due to small expected values)\n", "blue") # nolint + } + + cat("\n") + # data info - insight::print_color(sprintf(" Data: %s (n = %i)\n", x$data, round(x$n_obs)), "cyan") + insight::print_color( + sprintf(" Data: %s (n = %i)\n", x$data, round(x$n_obs)), + "cyan" + ) # prepare and align strings eff_symbol <- .format_symbols(x$effect_size_name) diff --git a/R/find_beta.R b/R/find_beta.R index 7e45408b..1ae733cc 100644 --- a/R/find_beta.R +++ b/R/find_beta.R @@ -1,10 +1,10 @@ #' @title Determining distribution parameters #' @name find_beta #' -#' @description \code{find_beta()}, \code{find_normal()} and \code{find_cauchy()} find the +#' @description `find_beta()`, `find_normal()` and `find_cauchy()` find the #' shape, mean and standard deviation resp. the location and scale parameters #' to describe the beta, normal or cauchy distribution, based on two -#' percentiles. \code{find_beta2()} finds the shape parameters for a Beta +#' percentiles. `find_beta2()` finds the shape parameters for a Beta #' distribution, based on a probability value and its standard error #' or confidence intervals. #' @@ -14,14 +14,14 @@ #' @param p2 Probability of the second percentile. #' @param x Numeric, a probability value between 0 and 1. Typically indicates #' a prevalence rate of an outcome of interest; Or an integer value -#' with the number of observed events. In this case, specify \code{n} +#' with the number of observed events. In this case, specify `n` #' to indicate the toral number of observations. -#' @param se The standard error of \code{x}. Either \code{se} or \code{ci} must +#' @param se The standard error of `x`. Either `se` or `ci` must #' be specified. -#' @param ci The upper limit of the confidence interval of \code{x}. Either -#' \code{se} or \code{ci} must be specified. +#' @param ci The upper limit of the confidence interval of `x`. Either +#' `se` or `ci` must be specified. #' @param n Numeric, number of total observations. Needs to be specified, if -#' \code{x} is an integer (number of observed events), and no +#' `x` is an integer (number of observed events), and no #' probability. See 'Examples'. #' #' @return A list of length two, with the two distribution parameters than can @@ -29,21 +29,20 @@ #' the shape for the given input parameters. #' #' @details These functions can be used to find parameter for various distributions, -#' to define prior probabilities for Bayesian analyses. \code{x1}, -#' \code{p1}, \code{x2} and \code{p2} are parameters that describe two -#' quantiles. Given this knowledge, the distribution parameters are -#' returned. \cr \cr -#' Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence -#' rate or similar probability, and its standard deviation or confidence -#' interval. In this case. \code{x} should be a probability, -#' for example a prevalence rate of a certain event. \code{se} then -#' needs to be the standard error for this probability. Alternatively, -#' \code{ci} can be specified, which should indicate the upper limit -#' of the confidence interval od the probability (prevalence rate) \code{x}. -#' If the number of events out of a total number of trials is known -#' (e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number -#' of observed events, while \code{n} indicates the total amount of trials -#' (in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}). +#' to define prior probabilities for Bayesian analyses. `x1`, `p1`, `x2` and +#' `p2` are parameters that describe two quantiles. Given this knowledge, the +#' distribution parameters are returned. +#' +#' Use `find_beta2()`, if the known parameters are, e.g. a prevalence rate or +#' similar probability, and its standard deviation or confidence interval. In +#' this case. `x` should be a probability, for example a prevalence rate of a +#' certain event. `se` then needs to be the standard error for this probability. +#' Alternatively, `ci` can be specified, which should indicate the upper limit +#' of the confidence interval od the probability (prevalence rate) `x`. If the +#' number of events out of a total number of trials is known (e.g. 12 heads out +#' of 30 coin tosses), `x` can also be the number of observed events, while `n` +#' indicates the total amount of trials (in the above example, the function +#' call would be: `find_beta2(x = 12, n = 30)`). #' #' @references Cook JD. Determining distribution parameters from quantiles. 2010: Department of Biostatistics, Texas (\href{https://www.johndcook.com/quantiles_parameters.pdf}{PDF}) #' @@ -79,14 +78,12 @@ #' shapes <- find_beta2(x = 3, n = 20) #' curve(dbeta(x, shapes[[1]], shapes[[2]])) #' -#' @importFrom stats pbeta approx -#' @importFrom purrr map_dbl #' @export find_beta <- function(x1, p1, x2, p2) { logK <- seq(-5, 10, length = 200) K <- exp(logK) - m <- purrr::map_dbl(K, ~ betaprior(.x, x1, p1)) + m <- unlist(lapply(K, betaprior, x = x1, p = p1)) prob2 <- stats::pbeta(x2, K * m, K * (1 - m)) ind <- ((prob2 > 0) & (prob2 < 1)) @@ -127,13 +124,13 @@ betaprior <- function(K, x, p) { find_beta2 <- function(x, se, ci, n) { # check if all required arguments are given if (missing(se) && missing(ci) && missing(n)) { - stop("Either `se` or `ci`, or `n` must be specified.", call. = F) + insight::format_error("Either `se` or `ci`, or `n` must be specified.") } # for number of observations, compute variance of beta distribution if (!missing(n)) { if (!is.integer(x) && x < 1) - stop("If `n` is given, x` must be an integer value greater than 0.", call. = F) + insight::format_error("If `n` is given, x` must be an integer value greater than 0.") # compute 2 SD from beta variance bvar <- 2 * sqrt((x * n) / ((x + n)^2 * (x + n + 1))) @@ -164,7 +161,6 @@ find_beta2 <- function(x, se, ci, n) { } -#' @importFrom stats qcauchy #' @rdname find_beta #' @export find_cauchy <- function(x1, p1, x2, p2) { @@ -177,7 +173,6 @@ find_cauchy <- function(x1, p1, x2, p2) { -#' @importFrom stats qnorm #' @rdname find_beta #' @export find_normal <- function(x1, p1, x2, p2) { diff --git a/R/helpfunctions.R b/R/helpfunctions.R index 856f955e..ce36f369 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -84,6 +84,7 @@ get_grouped_data <- function(x) { x <- gsub("Rho", "\u03C1", x, ignore.case = TRUE) x <- gsub("Mu", "\u03BC", x, ignore.case = TRUE) x <- gsub("Theta", "\u03B8", x, ignore.case = TRUE) + x <- gsub("Fei", "\u05E4\u200E", x, ignore.case = TRUE) } x } diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index 80b6b7b1..d004c738 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -1,12 +1,17 @@ #' @title Kruskal-Wallis-Test #' @name kruskal_wallis_test -#' @description This function performs a Kruskal-Wallis rank sum test, see -#' [`kruskal.test()`] and [`coin::kruskal_test()`]). +#' @description This function performs a Kruskal-Wallis rank sum test, to test +#' the null hypothesis that the population median of all of the groups are +#' equal. The alternative is that they differ in at least one. #' #' @inheritParams mann_whitney_test #' #' @return A data frame with test results. #' +#' @details The function simply is a wrapper around [`kruskal.test()`]. The +#' weighted version of the Kruskal-Wallis test is based on the `survey` package, +#' using [`survey::svyranktest()`]. +#' #' @examples #' data(efc) #' # Kruskal-Wallis-Test for elder's age by education diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index 6dbbc257..2473c81c 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -1,9 +1,11 @@ #' @title Mann-Whitney-Test #' @name mann_whitney_test #' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank -#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]). +#' sum test for _unpaired_ samples. #' -#' The function reports p and Z-values as well as effect size r and group-rank-means. +#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +#' independent samples have identical continuous distributions. It can be used +#' when the two continuous variables are not normally distributed. #' #' @param data A data frame. #' @param select Name of the dependent variable (as string) to be used for the @@ -16,9 +18,13 @@ #' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"` #' (default). See [`coin::wilcox_test()`] for details. #' -#' @return A data frame with test results. +#' @return A data frame with test results. The function returns p and Z-values +#' as well as effect size r and group-rank-means. +#' +#' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`] +#' (the latter to extract effect sizes). The weighted version of the test is +#' based on [`survey::svyranktest()`]. #' -#' @details This function calls [`coin::wilcox_test()`] to extract effect sizes. #' Interpretation of the effect size **r**, as a rule-of-thumb: #' #' - small effect >= 0.1 diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index abb066fb..e3196d9a 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -35,13 +35,25 @@ must be \code{NULL}. The probabilities must sum to 1.} \item{...}{Additional arguments passed down to \code{\link[=chisq.test]{chisq.test()}}.} } \value{ -A data frame with test results. +A data frame with test results. The returned effects sizes are +Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +given probabilities. } \description{ -This function performs a Mann-Whitney-Test (or Wilcoxon rank -sum test for \emph{unpaired} samples, see \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}). - -The function reports p and Z-values as well as effect size r and group-rank-means. +This function performs a \eqn{chi}^2 test for contingency +tables or tests for given probabilities. The returned effects sizes are +Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +given probabilities (see \emph{Ben-Shachar et al. 2023}). +} +\details{ +The function is a wrapper around \code{\link[=chisq.test]{chisq.test()}} and +\code{\link[=fisher.test]{fisher.test()}} (for small expected values) for contingency tables, and +\code{chisq.test()} for given probabilities. When \code{probabilities} are provided, +these are rescaled to sum to 1 (i.e. \code{rescale.p = TRUE}). When \code{fisher.test()} +is called, simulated p-values are returned (i.e. \code{simulate.p.value = TRUE}, +see \code{?fisher.test}). } \examples{ data(efc) @@ -53,3 +65,9 @@ chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") # Chi-squared-test for given probabilities chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) } +\references{ +Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +\doi{10.3390/math11091982} +} diff --git a/man/find_beta.Rd b/man/find_beta.Rd index 16d28385..ccd5dcc1 100644 --- a/man/find_beta.Rd +++ b/man/find_beta.Rd @@ -54,21 +54,20 @@ or confidence intervals. } \details{ These functions can be used to find parameter for various distributions, -to define prior probabilities for Bayesian analyses. \code{x1}, -\code{p1}, \code{x2} and \code{p2} are parameters that describe two -quantiles. Given this knowledge, the distribution parameters are -returned. \cr \cr -Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence -rate or similar probability, and its standard deviation or confidence -interval. In this case. \code{x} should be a probability, -for example a prevalence rate of a certain event. \code{se} then -needs to be the standard error for this probability. Alternatively, -\code{ci} can be specified, which should indicate the upper limit -of the confidence interval od the probability (prevalence rate) \code{x}. -If the number of events out of a total number of trials is known -(e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number -of observed events, while \code{n} indicates the total amount of trials -(in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}). +to define prior probabilities for Bayesian analyses. \code{x1}, \code{p1}, \code{x2} and +\code{p2} are parameters that describe two quantiles. Given this knowledge, the +distribution parameters are returned. + +Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence rate or +similar probability, and its standard deviation or confidence interval. In +this case. \code{x} should be a probability, for example a prevalence rate of a +certain event. \code{se} then needs to be the standard error for this probability. +Alternatively, \code{ci} can be specified, which should indicate the upper limit +of the confidence interval od the probability (prevalence rate) \code{x}. If the +number of events out of a total number of trials is known (e.g. 12 heads out +of 30 coin tosses), \code{x} can also be the number of observed events, while \code{n} +indicates the total amount of trials (in the above example, the function +call would be: \code{find_beta2(x = 12, n = 30)}). } \examples{ # example from blogpost: diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index 366a544c..7be4e2e5 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -22,8 +22,14 @@ not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if A data frame with test results. } \description{ -This function performs a Kruskal-Wallis rank sum test, see -\code{\link[=kruskal.test]{kruskal.test()}} and \code{\link[coin:LocationTests]{coin::kruskal_test()}}). +This function performs a Kruskal-Wallis rank sum test, to test +the null hypothesis that the population median of all of the groups are +equal. The alternative is that they differ in at least one. +} +\details{ +The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test()}}. The +weighted version of the Kruskal-Wallis test is based on the \code{survey} package, +using \code{\link[survey:svyranktest]{survey::svyranktest()}}. } \examples{ data(efc) diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 2256df8b..1401a040 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -29,16 +29,22 @@ should be computed. May be one of \code{"exact"}, \code{"approximate"} or \code{ (default). See \code{\link[coin:LocationTests]{coin::wilcox_test()}} for details.} } \value{ -A data frame with test results. +A data frame with test results. The function returns p and Z-values +as well as effect size r and group-rank-means. } \description{ This function performs a Mann-Whitney-Test (or Wilcoxon rank -sum test for \emph{unpaired} samples, see \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}). +sum test for \emph{unpaired} samples. -The function reports p and Z-values as well as effect size r and group-rank-means. +A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +independent samples have identical continuous distributions. It can be used +when the two continuous variables are not normally distributed. } \details{ -This function calls \code{\link[coin:LocationTests]{coin::wilcox_test()}} to extract effect sizes. +This function is based on \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}} +(the latter to extract effect sizes). The weighted version of the test is +based on \code{\link[survey:svyranktest]{survey::svyranktest()}}. + Interpretation of the effect size \strong{r}, as a rule-of-thumb: \itemize{ \item small effect >= 0.1