docs

strengejacke · May 9, 2024 · bf5663b · bf5663b
1 parent 947b485
commit bf5663b
Show file tree

Hide file tree

Showing 10 changed files with 133 additions and 71 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -167,7 +167,6 @@ importFrom(sjmisc,is_num_fac)
 importFrom(sjmisc,str_contains)
 importFrom(sjmisc,trim)
 importFrom(sjmisc,typical_value)
-importFrom(stats,approx)
 importFrom(stats,as.formula)
 importFrom(stats,chisq.test)
 importFrom(stats,coef)
@@ -183,12 +182,10 @@ importFrom(stats,model.matrix)
 importFrom(stats,na.omit)
 importFrom(stats,na.pass)
 importFrom(stats,nobs)
-importFrom(stats,pbeta)
 importFrom(stats,pf)
 importFrom(stats,pnorm)
 importFrom(stats,predict.glm)
 importFrom(stats,pt)
-importFrom(stats,qcauchy)
 importFrom(stats,qf)
 importFrom(stats,qnorm)
 importFrom(stats,resid)

diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R
@@ -1,9 +1,10 @@
 #' @title Chi-Squared Test
 #' @name chi_squared_test
-#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank
-#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]).
-#'
-#' The function reports p and Z-values as well as effect size r and group-rank-means.
+#' @description This function performs a \eqn{chi}^2 test for contingency
+#' tables or tests for given probabilities. The returned effects sizes are
+#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
+#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
+#' given probabilities (see _Ben-Shachar et al. 2023_).
 #'
 #' @param probabilities A numeric vector of probabilities for each cell in the
 #' contingency table. The length of the vector must match the number of cells
@@ -14,7 +15,22 @@
 #' @param ... Additional arguments passed down to [`chisq.test()`].
 #' @inheritParams mann_whitney_test
 #'
-#' @return A data frame with test results.
+#' @return A data frame with test results. The returned effects sizes are
+#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
+#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
+#' given probabilities.
+#'
+#' @details The function is a wrapper around [`chisq.test()`] and
+#' [`fisher.test()`] (for small expected values) for contingency tables, and
+#' `chisq.test()` for given probabilities. When `probabilities` are provided,
+#' these are rescaled to sum to 1 (i.e. `rescale.p = TRUE`). When `fisher.test()`
+#' is called, simulated p-values are returned (i.e. `simulate.p.value = TRUE`,
+#' see `?fisher.test`).
+#'
+#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
+#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
+#' \doi{10.3390/math11091982}
 #'
 #' @examples
 #' data(efc)
@@ -105,6 +121,7 @@ chi_squared_test <- function(data,
   )
   class(out) <- c("sj_htest_chi", "data.frame")
   attr(out, "weighted") <- !is.null(weights)
+  attr(out, "fisher") <- isTRUE(startsWith(htest$method, "Fisher"))
   attr(out, "caption") <- "Contingency Tables"
   out
 }
@@ -189,15 +206,27 @@ print.sj_htest_chi <- function(x, ...) {
     weight_string <- ""
   }
 
+  fisher <- attributes(x)$fisher
+
   # headline
   insight::print_color(sprintf(
-    "\n# Chi-Squared Test for %s%s\n\n",
+    "\n# Chi-Squared Test for %s%s\n",
     attributes(x)$caption,
     weight_string
   ), "blue")
 
+  # Fisher's exact test?
+  if (fisher) {
+    insight::print_color("  (using Fisher's exact test due to small expected values)\n", "blue") # nolint
+  }
+
+  cat("\n")
+
   # data info
-  insight::print_color(sprintf("  Data: %s (n = %i)\n", x$data, round(x$n_obs)), "cyan")
+  insight::print_color(
+    sprintf("  Data: %s (n = %i)\n", x$data, round(x$n_obs)),
+    "cyan"
+  )
 
   # prepare and align strings
   eff_symbol <- .format_symbols(x$effect_size_name)

diff --git a/R/find_beta.R b/R/find_beta.R
@@ -1,10 +1,10 @@
 #' @title Determining distribution parameters
 #' @name find_beta
 #'
-#' @description \code{find_beta()}, \code{find_normal()} and \code{find_cauchy()} find the
+#' @description `find_beta()`, `find_normal()` and `find_cauchy()` find the
 #'              shape, mean and standard deviation resp. the location and scale parameters
 #'              to describe the beta, normal or cauchy distribution, based on two
-#'              percentiles. \code{find_beta2()} finds the shape parameters for a Beta
+#'              percentiles. `find_beta2()` finds the shape parameters for a Beta
 #'              distribution, based on a probability value and its standard error
 #'              or confidence intervals.
 #'
@@ -14,36 +14,35 @@
 #' @param p2 Probability of the second percentile.
 #' @param x Numeric, a probability value between 0 and 1. Typically indicates
 #'          a prevalence rate of an outcome of interest; Or an integer value
-#'          with the number of observed events. In this case, specify \code{n}
+#'          with the number of observed events. In this case, specify `n`
 #'          to indicate the toral number of observations.
-#' @param se The standard error of \code{x}. Either \code{se} or \code{ci} must
+#' @param se The standard error of `x`. Either `se` or `ci` must
 #'          be specified.
-#' @param ci The upper limit of the confidence interval of \code{x}. Either
-#'          \code{se} or \code{ci} must be specified.
+#' @param ci The upper limit of the confidence interval of `x`. Either
+#'          `se` or `ci` must be specified.
 #' @param n Numeric, number of total observations. Needs to be specified, if
-#'          \code{x} is an integer (number of observed events), and no
+#'          `x` is an integer (number of observed events), and no
 #'          probability. See 'Examples'.
 #'
 #' @return A list of length two, with the two distribution parameters than can
 #'         be used to define the distribution, which (best) describes
 #'         the shape for the given input parameters.
 #'
 #' @details These functions can be used to find parameter for various distributions,
-#'          to define prior probabilities for Bayesian analyses. \code{x1},
-#'          \code{p1}, \code{x2} and \code{p2} are parameters that describe two
-#'          quantiles. Given this knowledge, the distribution parameters are
-#'          returned. \cr \cr
-#'          Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence
-#'          rate or similar probability, and its standard deviation or confidence
-#'          interval. In this case. \code{x} should be a probability,
-#'          for example a prevalence rate of a certain event. \code{se} then
-#'          needs to be the standard error for this probability. Alternatively,
-#'          \code{ci} can be specified, which should indicate the upper limit
-#'          of the confidence interval od the probability (prevalence rate) \code{x}.
-#'          If the number of events out of a total number of trials is known
-#'          (e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number
-#'          of observed events, while \code{n} indicates the total amount of trials
-#'          (in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}).
+#' to define prior probabilities for Bayesian analyses. `x1`, `p1`, `x2` and
+#' `p2` are parameters that describe two quantiles. Given this knowledge, the
+#' distribution parameters are returned.
+#'
+#' Use `find_beta2()`, if the known parameters are, e.g. a prevalence rate or
+#' similar probability, and its standard deviation or confidence interval. In
+#' this case. `x` should be a probability, for example a prevalence rate of a
+#' certain event. `se` then needs to be the standard error for this probability.
+#' Alternatively, `ci` can be specified, which should indicate the upper limit
+#' of the confidence interval od the probability (prevalence rate) `x`. If the
+#' number of events out of a total number of trials is known (e.g. 12 heads out
+#' of 30 coin tosses), `x` can also be the number of observed events, while `n`
+#' indicates the total amount of trials (in the above example, the function
+#' call would be: `find_beta2(x = 12, n = 30)`).
 #'
 #' @references Cook JD. Determining distribution parameters from quantiles. 2010: Department of Biostatistics, Texas (\href{https://www.johndcook.com/quantiles_parameters.pdf}{PDF})
 #'
@@ -79,14 +78,12 @@
 #' shapes <- find_beta2(x = 3, n = 20)
 #' curve(dbeta(x, shapes[[1]], shapes[[2]]))
 #'
-#' @importFrom stats pbeta approx
-#' @importFrom purrr map_dbl
 #' @export
 find_beta <- function(x1, p1, x2, p2) {
   logK <- seq(-5, 10, length = 200)
   K <- exp(logK)
 
-  m <- purrr::map_dbl(K, ~ betaprior(.x, x1, p1))
+  m <- unlist(lapply(K, betaprior, x = x1, p = p1))
 
   prob2 <- stats::pbeta(x2, K * m, K * (1 - m))
   ind <- ((prob2 > 0) & (prob2 < 1))
@@ -127,13 +124,13 @@ betaprior <- function(K, x, p) {
 find_beta2 <- function(x, se, ci, n) {
   # check if all required arguments are given
   if (missing(se) && missing(ci) && missing(n)) {
-    stop("Either `se` or `ci`, or `n` must be specified.", call. = F)
+    insight::format_error("Either `se` or `ci`, or `n` must be specified.")
   }
 
   # for number of observations, compute variance of beta distribution
   if (!missing(n)) {
     if (!is.integer(x) && x < 1)
-      stop("If `n` is given, x` must be an integer value greater than 0.", call. = F)
+      insight::format_error("If `n` is given, x` must be an integer value greater than 0.")
 
     # compute 2 SD from beta variance
     bvar <- 2 * sqrt((x * n) / ((x + n)^2 * (x + n + 1)))
@@ -164,7 +161,6 @@ find_beta2 <- function(x, se, ci, n) {
 }
 
 
-#' @importFrom stats qcauchy
 #' @rdname find_beta
 #' @export
 find_cauchy <- function(x1, p1, x2, p2) {
@@ -177,7 +173,6 @@ find_cauchy <- function(x1, p1, x2, p2) {
 
 
 
-#' @importFrom stats qnorm
 #' @rdname find_beta
 #' @export
 find_normal <- function(x1, p1, x2, p2) {

diff --git a/R/helpfunctions.R b/R/helpfunctions.R
@@ -84,6 +84,7 @@ get_grouped_data <- function(x) {
     x <- gsub("Rho", "\u03C1", x, ignore.case = TRUE)
     x <- gsub("Mu", "\u03BC", x, ignore.case = TRUE)
     x <- gsub("Theta", "\u03B8", x, ignore.case = TRUE)
+    x <- gsub("Fei", "\u05E4\u200E", x, ignore.case = TRUE)
   }
   x
 }

diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R
@@ -1,12 +1,17 @@
 #' @title Kruskal-Wallis-Test
 #' @name kruskal_wallis_test
-#' @description This function performs a Kruskal-Wallis rank sum test, see
-#' [`kruskal.test()`] and [`coin::kruskal_test()`]).
+#' @description This function performs a Kruskal-Wallis rank sum test, to test
+#' the null hypothesis that the population median of all of the groups are
+#' equal. The alternative is that they differ in at least one.
 #'
 #' @inheritParams mann_whitney_test
 #'
 #' @return A data frame with test results.
 #'
+#' @details The function simply is a wrapper around [`kruskal.test()`]. The
+#' weighted version of the Kruskal-Wallis test is based on the `survey` package,
+#' using [`survey::svyranktest()`].
+#'
 #' @examples
 #' data(efc)
 #' # Kruskal-Wallis-Test for elder's age by education

diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R
@@ -1,9 +1,11 @@
 #' @title Mann-Whitney-Test
 #' @name mann_whitney_test
 #' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank
-#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]).
+#' sum test for _unpaired_ samples.
 #'
-#' The function reports p and Z-values as well as effect size r and group-rank-means.
+#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two
+#' independent samples have identical continuous distributions. It can be used
+#' when the two continuous variables are not normally distributed.
 #'
 #' @param data A data frame.
 #' @param select Name of the dependent variable (as string) to be used for the
@@ -16,9 +18,13 @@
 #' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"`
 #' (default). See [`coin::wilcox_test()`] for details.
 #'
-#' @return A data frame with test results.
+#' @return A data frame with test results. The function returns p and Z-values
+#' as well as effect size r and group-rank-means.
+#'
+#' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`]
+#' (the latter to extract effect sizes). The weighted version of the test is
+#' based on [`survey::svyranktest()`].
 #'
-#' @details This function calls [`coin::wilcox_test()`] to extract effect sizes.
 #' Interpretation of the effect size **r**, as a rule-of-thumb:
 #'
 #' - small effect >= 0.1

diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd
diff --git a/man/find_beta.Rd b/man/find_beta.Rd
diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd
diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd