From bf5663b0192fcf72569abf6493cc76d6f42fde43 Mon Sep 17 00:00:00 2001
From: Daniel <mail@danielluedecke.de>
Date: Thu, 9 May 2024 14:32:32 +0200
Subject: [PATCH] docs

---
 NAMESPACE                  |  3 ---
 R/chi_squared_test.R       | 43 ++++++++++++++++++++++++++-----
 R/find_beta.R              | 53 +++++++++++++++++---------------------
 R/helpfunctions.R          |  1 +
 R/kruskal_wallis_test.R    |  9 +++++--
 R/mann_whitney_test.R      | 14 +++++++---
 man/chi_squared_test.Rd    | 28 ++++++++++++++++----
 man/find_beta.Rd           | 29 ++++++++++-----------
 man/kruskal_wallis_test.Rd | 10 +++++--
 man/mann_whitney_test.Rd   | 14 +++++++---
 10 files changed, 133 insertions(+), 71 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index e351d2ca..5a224af0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -167,7 +167,6 @@ importFrom(sjmisc,is_num_fac)
 importFrom(sjmisc,str_contains)
 importFrom(sjmisc,trim)
 importFrom(sjmisc,typical_value)
-importFrom(stats,approx)
 importFrom(stats,as.formula)
 importFrom(stats,chisq.test)
 importFrom(stats,coef)
@@ -183,12 +182,10 @@ importFrom(stats,model.matrix)
 importFrom(stats,na.omit)
 importFrom(stats,na.pass)
 importFrom(stats,nobs)
-importFrom(stats,pbeta)
 importFrom(stats,pf)
 importFrom(stats,pnorm)
 importFrom(stats,predict.glm)
 importFrom(stats,pt)
-importFrom(stats,qcauchy)
 importFrom(stats,qf)
 importFrom(stats,qnorm)
 importFrom(stats,resid)
diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R
index 960ba31c..3fd69326 100644
--- a/R/chi_squared_test.R
+++ b/R/chi_squared_test.R
@@ -1,9 +1,10 @@
 #' @title Chi-Squared Test
 #' @name chi_squared_test
-#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank
-#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]).
-#'
-#' The function reports p and Z-values as well as effect size r and group-rank-means.
+#' @description This function performs a \eqn{chi}^2 test for contingency
+#' tables or tests for given probabilities. The returned effects sizes are
+#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
+#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
+#' given probabilities (see _Ben-Shachar et al. 2023_).
 #'
 #' @param probabilities A numeric vector of probabilities for each cell in the
 #' contingency table. The length of the vector must match the number of cells
@@ -14,7 +15,22 @@
 #' @param ... Additional arguments passed down to [`chisq.test()`].
 #' @inheritParams mann_whitney_test
 #'
-#' @return A data frame with test results.
+#' @return A data frame with test results. The returned effects sizes are
+#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
+#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
+#' given probabilities.
+#'
+#' @details The function is a wrapper around [`chisq.test()`] and
+#' [`fisher.test()`] (for small expected values) for contingency tables, and
+#' `chisq.test()` for given probabilities. When `probabilities` are provided,
+#' these are rescaled to sum to 1 (i.e. `rescale.p = TRUE`). When `fisher.test()`
+#' is called, simulated p-values are returned (i.e. `simulate.p.value = TRUE`,
+#' see `?fisher.test`).
+#'
+#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
+#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
+#' \doi{10.3390/math11091982}
 #'
 #' @examples
 #' data(efc)
@@ -105,6 +121,7 @@ chi_squared_test <- function(data,
   )
   class(out) <- c("sj_htest_chi", "data.frame")
   attr(out, "weighted") <- !is.null(weights)
+  attr(out, "fisher") <- isTRUE(startsWith(htest$method, "Fisher"))
   attr(out, "caption") <- "Contingency Tables"
   out
 }
@@ -189,15 +206,27 @@ print.sj_htest_chi <- function(x, ...) {
     weight_string <- ""
   }
 
+  fisher <- attributes(x)$fisher
+
   # headline
   insight::print_color(sprintf(
-    "\n# Chi-Squared Test for %s%s\n\n",
+    "\n# Chi-Squared Test for %s%s\n",
     attributes(x)$caption,
     weight_string
   ), "blue")
 
+  # Fisher's exact test?
+  if (fisher) {
+    insight::print_color("  (using Fisher's exact test due to small expected values)\n", "blue") # nolint
+  }
+
+  cat("\n")
+
   # data info
-  insight::print_color(sprintf("  Data: %s (n = %i)\n", x$data, round(x$n_obs)), "cyan")
+  insight::print_color(
+    sprintf("  Data: %s (n = %i)\n", x$data, round(x$n_obs)),
+    "cyan"
+  )
 
   # prepare and align strings
   eff_symbol <- .format_symbols(x$effect_size_name)
diff --git a/R/find_beta.R b/R/find_beta.R
index 7e45408b..1ae733cc 100644
--- a/R/find_beta.R
+++ b/R/find_beta.R
@@ -1,10 +1,10 @@
 #' @title Determining distribution parameters
 #' @name find_beta
 #'
-#' @description \code{find_beta()}, \code{find_normal()} and \code{find_cauchy()} find the
+#' @description `find_beta()`, `find_normal()` and `find_cauchy()` find the
 #'              shape, mean and standard deviation resp. the location and scale parameters
 #'              to describe the beta, normal or cauchy distribution, based on two
-#'              percentiles. \code{find_beta2()} finds the shape parameters for a Beta
+#'              percentiles. `find_beta2()` finds the shape parameters for a Beta
 #'              distribution, based on a probability value and its standard error
 #'              or confidence intervals.
 #'
@@ -14,14 +14,14 @@
 #' @param p2 Probability of the second percentile.
 #' @param x Numeric, a probability value between 0 and 1. Typically indicates
 #'          a prevalence rate of an outcome of interest; Or an integer value
-#'          with the number of observed events. In this case, specify \code{n}
+#'          with the number of observed events. In this case, specify `n`
 #'          to indicate the toral number of observations.
-#' @param se The standard error of \code{x}. Either \code{se} or \code{ci} must
+#' @param se The standard error of `x`. Either `se` or `ci` must
 #'          be specified.
-#' @param ci The upper limit of the confidence interval of \code{x}. Either
-#'          \code{se} or \code{ci} must be specified.
+#' @param ci The upper limit of the confidence interval of `x`. Either
+#'          `se` or `ci` must be specified.
 #' @param n Numeric, number of total observations. Needs to be specified, if
-#'          \code{x} is an integer (number of observed events), and no
+#'          `x` is an integer (number of observed events), and no
 #'          probability. See 'Examples'.
 #'
 #' @return A list of length two, with the two distribution parameters than can
@@ -29,21 +29,20 @@
 #'         the shape for the given input parameters.
 #'
 #' @details These functions can be used to find parameter for various distributions,
-#'          to define prior probabilities for Bayesian analyses. \code{x1},
-#'          \code{p1}, \code{x2} and \code{p2} are parameters that describe two
-#'          quantiles. Given this knowledge, the distribution parameters are
-#'          returned. \cr \cr
-#'          Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence
-#'          rate or similar probability, and its standard deviation or confidence
-#'          interval. In this case. \code{x} should be a probability,
-#'          for example a prevalence rate of a certain event. \code{se} then
-#'          needs to be the standard error for this probability. Alternatively,
-#'          \code{ci} can be specified, which should indicate the upper limit
-#'          of the confidence interval od the probability (prevalence rate) \code{x}.
-#'          If the number of events out of a total number of trials is known
-#'          (e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number
-#'          of observed events, while \code{n} indicates the total amount of trials
-#'          (in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}).
+#' to define prior probabilities for Bayesian analyses. `x1`, `p1`, `x2` and
+#' `p2` are parameters that describe two quantiles. Given this knowledge, the
+#' distribution parameters are returned.
+#'
+#' Use `find_beta2()`, if the known parameters are, e.g. a prevalence rate or
+#' similar probability, and its standard deviation or confidence interval. In
+#' this case. `x` should be a probability, for example a prevalence rate of a
+#' certain event. `se` then needs to be the standard error for this probability.
+#' Alternatively, `ci` can be specified, which should indicate the upper limit
+#' of the confidence interval od the probability (prevalence rate) `x`. If the
+#' number of events out of a total number of trials is known (e.g. 12 heads out
+#' of 30 coin tosses), `x` can also be the number of observed events, while `n`
+#' indicates the total amount of trials (in the above example, the function
+#' call would be: `find_beta2(x = 12, n = 30)`).
 #'
 #' @references Cook JD. Determining distribution parameters from quantiles. 2010: Department of Biostatistics, Texas (\href{https://www.johndcook.com/quantiles_parameters.pdf}{PDF})
 #'
@@ -79,14 +78,12 @@
 #' shapes <- find_beta2(x = 3, n = 20)
 #' curve(dbeta(x, shapes[[1]], shapes[[2]]))
 #'
-#' @importFrom stats pbeta approx
-#' @importFrom purrr map_dbl
 #' @export
 find_beta <- function(x1, p1, x2, p2) {
   logK <- seq(-5, 10, length = 200)
   K <- exp(logK)
 
-  m <- purrr::map_dbl(K, ~ betaprior(.x, x1, p1))
+  m <- unlist(lapply(K, betaprior, x = x1, p = p1))
 
   prob2 <- stats::pbeta(x2, K * m, K * (1 - m))
   ind <- ((prob2 > 0) & (prob2 < 1))
@@ -127,13 +124,13 @@ betaprior <- function(K, x, p) {
 find_beta2 <- function(x, se, ci, n) {
   # check if all required arguments are given
   if (missing(se) && missing(ci) && missing(n)) {
-    stop("Either `se` or `ci`, or `n` must be specified.", call. = F)
+    insight::format_error("Either `se` or `ci`, or `n` must be specified.")
   }
 
   # for number of observations, compute variance of beta distribution
   if (!missing(n)) {
     if (!is.integer(x) && x < 1)
-      stop("If `n` is given, x` must be an integer value greater than 0.", call. = F)
+      insight::format_error("If `n` is given, x` must be an integer value greater than 0.")
 
     # compute 2 SD from beta variance
     bvar <- 2 * sqrt((x * n) / ((x + n)^2 * (x + n + 1)))
@@ -164,7 +161,6 @@ find_beta2 <- function(x, se, ci, n) {
 }
 
 
-#' @importFrom stats qcauchy
 #' @rdname find_beta
 #' @export
 find_cauchy <- function(x1, p1, x2, p2) {
@@ -177,7 +173,6 @@ find_cauchy <- function(x1, p1, x2, p2) {
 
 
 
-#' @importFrom stats qnorm
 #' @rdname find_beta
 #' @export
 find_normal <- function(x1, p1, x2, p2) {
diff --git a/R/helpfunctions.R b/R/helpfunctions.R
index 856f955e..ce36f369 100644
--- a/R/helpfunctions.R
+++ b/R/helpfunctions.R
@@ -84,6 +84,7 @@ get_grouped_data <- function(x) {
     x <- gsub("Rho", "\u03C1", x, ignore.case = TRUE)
     x <- gsub("Mu", "\u03BC", x, ignore.case = TRUE)
     x <- gsub("Theta", "\u03B8", x, ignore.case = TRUE)
+    x <- gsub("Fei", "\u05E4\u200E", x, ignore.case = TRUE)
   }
   x
 }
diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R
index 80b6b7b1..d004c738 100644
--- a/R/kruskal_wallis_test.R
+++ b/R/kruskal_wallis_test.R
@@ -1,12 +1,17 @@
 #' @title Kruskal-Wallis-Test
 #' @name kruskal_wallis_test
-#' @description This function performs a Kruskal-Wallis rank sum test, see
-#' [`kruskal.test()`] and [`coin::kruskal_test()`]).
+#' @description This function performs a Kruskal-Wallis rank sum test, to test
+#' the null hypothesis that the population median of all of the groups are
+#' equal. The alternative is that they differ in at least one.
 #'
 #' @inheritParams mann_whitney_test
 #'
 #' @return A data frame with test results.
 #'
+#' @details The function simply is a wrapper around [`kruskal.test()`]. The
+#' weighted version of the Kruskal-Wallis test is based on the `survey` package,
+#' using [`survey::svyranktest()`].
+#'
 #' @examples
 #' data(efc)
 #' # Kruskal-Wallis-Test for elder's age by education
diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R
index 6dbbc257..2473c81c 100644
--- a/R/mann_whitney_test.R
+++ b/R/mann_whitney_test.R
@@ -1,9 +1,11 @@
 #' @title Mann-Whitney-Test
 #' @name mann_whitney_test
 #' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank
-#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]).
+#' sum test for _unpaired_ samples.
 #'
-#' The function reports p and Z-values as well as effect size r and group-rank-means.
+#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two
+#' independent samples have identical continuous distributions. It can be used
+#' when the two continuous variables are not normally distributed.
 #'
 #' @param data A data frame.
 #' @param select Name of the dependent variable (as string) to be used for the
@@ -16,9 +18,13 @@
 #' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"`
 #' (default). See [`coin::wilcox_test()`] for details.
 #'
-#' @return A data frame with test results.
+#' @return A data frame with test results. The function returns p and Z-values
+#' as well as effect size r and group-rank-means.
+#'
+#' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`]
+#' (the latter to extract effect sizes). The weighted version of the test is
+#' based on [`survey::svyranktest()`].
 #'
-#' @details This function calls [`coin::wilcox_test()`] to extract effect sizes.
 #' Interpretation of the effect size **r**, as a rule-of-thumb:
 #'
 #' - small effect >= 0.1
diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd
index abb066fb..e3196d9a 100644
--- a/man/chi_squared_test.Rd
+++ b/man/chi_squared_test.Rd
@@ -35,13 +35,25 @@ must be \code{NULL}. The probabilities must sum to 1.}
 \item{...}{Additional arguments passed down to \code{\link[=chisq.test]{chisq.test()}}.}
 }
 \value{
-A data frame with test results.
+A data frame with test results. The returned effects sizes are
+Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
+for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
+given probabilities.
 }
 \description{
-This function performs a Mann-Whitney-Test (or Wilcoxon rank
-sum test for \emph{unpaired} samples, see \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}).
-
-The function reports p and Z-values as well as effect size r and group-rank-means.
+This function performs a \eqn{chi}^2 test for contingency
+tables or tests for given probabilities. The returned effects sizes are
+Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
+for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
+given probabilities (see \emph{Ben-Shachar et al. 2023}).
+}
+\details{
+The function is a wrapper around \code{\link[=chisq.test]{chisq.test()}} and
+\code{\link[=fisher.test]{fisher.test()}} (for small expected values) for contingency tables, and
+\code{chisq.test()} for given probabilities. When \code{probabilities} are provided,
+these are rescaled to sum to 1 (i.e. \code{rescale.p = TRUE}). When \code{fisher.test()}
+is called, simulated p-values are returned (i.e. \code{simulate.p.value = TRUE},
+see \code{?fisher.test}).
 }
 \examples{
 data(efc)
@@ -53,3 +65,9 @@ chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight")
 # Chi-squared-test for given probabilities
 chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7))
 }
+\references{
+Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
+That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
+\doi{10.3390/math11091982}
+}
diff --git a/man/find_beta.Rd b/man/find_beta.Rd
index 16d28385..ccd5dcc1 100644
--- a/man/find_beta.Rd
+++ b/man/find_beta.Rd
@@ -54,21 +54,20 @@ or confidence intervals.
 }
 \details{
 These functions can be used to find parameter for various distributions,
-to define prior probabilities for Bayesian analyses. \code{x1},
-\code{p1}, \code{x2} and \code{p2} are parameters that describe two
-quantiles. Given this knowledge, the distribution parameters are
-returned. \cr \cr
-Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence
-rate or similar probability, and its standard deviation or confidence
-interval. In this case. \code{x} should be a probability,
-for example a prevalence rate of a certain event. \code{se} then
-needs to be the standard error for this probability. Alternatively,
-\code{ci} can be specified, which should indicate the upper limit
-of the confidence interval od the probability (prevalence rate) \code{x}.
-If the number of events out of a total number of trials is known
-(e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number
-of observed events, while \code{n} indicates the total amount of trials
-(in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}).
+to define prior probabilities for Bayesian analyses. \code{x1}, \code{p1}, \code{x2} and
+\code{p2} are parameters that describe two quantiles. Given this knowledge, the
+distribution parameters are returned.
+
+Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence rate or
+similar probability, and its standard deviation or confidence interval. In
+this case. \code{x} should be a probability, for example a prevalence rate of a
+certain event. \code{se} then needs to be the standard error for this probability.
+Alternatively, \code{ci} can be specified, which should indicate the upper limit
+of the confidence interval od the probability (prevalence rate) \code{x}. If the
+number of events out of a total number of trials is known (e.g. 12 heads out
+of 30 coin tosses), \code{x} can also be the number of observed events, while \code{n}
+indicates the total amount of trials (in the above example, the function
+call would be: \code{find_beta2(x = 12, n = 30)}).
 }
 \examples{
 # example from blogpost:
diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd
index 366a544c..7be4e2e5 100644
--- a/man/kruskal_wallis_test.Rd
+++ b/man/kruskal_wallis_test.Rd
@@ -22,8 +22,14 @@ not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if
 A data frame with test results.
 }
 \description{
-This function performs a Kruskal-Wallis rank sum test, see
-\code{\link[=kruskal.test]{kruskal.test()}} and \code{\link[coin:LocationTests]{coin::kruskal_test()}}).
+This function performs a Kruskal-Wallis rank sum test, to test
+the null hypothesis that the population median of all of the groups are
+equal. The alternative is that they differ in at least one.
+}
+\details{
+The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test()}}. The
+weighted version of the Kruskal-Wallis test is based on the \code{survey} package,
+using \code{\link[survey:svyranktest]{survey::svyranktest()}}.
 }
 \examples{
 data(efc)
diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd
index 2256df8b..1401a040 100644
--- a/man/mann_whitney_test.Rd
+++ b/man/mann_whitney_test.Rd
@@ -29,16 +29,22 @@ should be computed. May be one of \code{"exact"}, \code{"approximate"} or \code{
 (default). See \code{\link[coin:LocationTests]{coin::wilcox_test()}} for details.}
 }
 \value{
-A data frame with test results.
+A data frame with test results. The function returns p and Z-values
+as well as effect size r and group-rank-means.
 }
 \description{
 This function performs a Mann-Whitney-Test (or Wilcoxon rank
-sum test for \emph{unpaired} samples, see \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}).
+sum test for \emph{unpaired} samples.
 
-The function reports p and Z-values as well as effect size r and group-rank-means.
+A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two
+independent samples have identical continuous distributions. It can be used
+when the two continuous variables are not normally distributed.
 }
 \details{
-This function calls \code{\link[coin:LocationTests]{coin::wilcox_test()}} to extract effect sizes.
+This function is based on \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}
+(the latter to extract effect sizes). The weighted version of the test is
+based on \code{\link[survey:svyranktest]{survey::svyranktest()}}.
+
 Interpretation of the effect size \strong{r}, as a rule-of-thumb:
 \itemize{
 \item small effect >= 0.1