diff --git a/.Rbuildignore b/.Rbuildignore
index 96c5763c..d124fd41 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -9,3 +9,10 @@
 ^pkgdown$
 ^.github$
 \.code-workspace$
+^README\.rmd
+^\.git$
+^WIP$
+\.lintr$
+^CRAN-SUBMISSION$
+^cran-comments\.md$
+^LICENSE\.md$
diff --git a/.github/workflows/R-check.yaml b/.github/workflows/R-check.yaml
index 93fd9037..7c504b9b 100644
--- a/.github/workflows/R-check.yaml
+++ b/.github/workflows/R-check.yaml
@@ -32,8 +32,6 @@ jobs:
           ##  This is because they are already run in `R-CMD-check-strict` workflow.  ##
           ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-          - { os: macOS-latest, r: "release" }
-
           - { os: windows-latest, r: "devel" }
           - { os: windows-latest, r: "release" }
 
diff --git a/DESCRIPTION b/DESCRIPTION
index e25170ae..358379d0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -20,33 +20,21 @@ Depends:
 Imports:
     bayestestR,
     datawizard,
-    dplyr,
-    effectsize,
+    effectsize (>= 0.8.8),
     insight,
-    lme4,
-    magrittr,
-    MASS,
-    modelr,
     parameters,
     performance,
-    purrr,
-    rlang,
-    sjlabelled,
-    sjmisc,
-    stats,
-    tidyr
+    stats
 Suggests:
     brms,
-    broom,
     car,
     coin,
     ggplot2,
-    graphics,
+    lme4,
+    MASS,
     pscl,
     pwr,
-    sjPlot,
     survey,
-    rstan,
     testthat
 URL: https://strengejacke.github.io/sjstats/
 BugReports: https://github.com/strengejacke/sjstats/issues
diff --git a/NAMESPACE b/NAMESPACE
index 5a224af0..e410b8f3 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,52 +12,31 @@ S3method(formula,svyglm.nb)
 S3method(formula,svyglm.zip)
 S3method(model.frame,svyglm.nb)
 S3method(model.frame,svyglm.zip)
-S3method(model.matrix,gls)
 S3method(phi,formula)
 S3method(phi,ftable)
 S3method(phi,table)
 S3method(plot,sj_inequ_trend)
 S3method(predict,svyglm.nb)
 S3method(print,sj_anova_stat)
-S3method(print,sj_check_assump)
 S3method(print,sj_chi2gof)
-S3method(print,sj_grpmean)
-S3method(print,sj_grpmeans)
 S3method(print,sj_htest_chi)
 S3method(print,sj_htest_kw)
 S3method(print,sj_htest_mwu)
-S3method(print,sj_mwu)
-S3method(print,sj_outliers)
-S3method(print,sj_pval)
+S3method(print,sj_htest_t)
+S3method(print,sj_htest_wilcox)
 S3method(print,sj_resample)
 S3method(print,sj_ttest)
 S3method(print,sj_wcor)
-S3method(print,sj_wmwu)
 S3method(print,sj_xtab_stat)
-S3method(print,sj_xtab_stat2)
 S3method(print,svyglm.nb)
 S3method(print,svyglm.zip)
-S3method(print,tidy_stan)
 S3method(residuals,svyglm.nb)
-S3method(summary,sj_pval)
 S3method(terms,svyglm.nb)
 S3method(weighted_correlation,default)
 S3method(weighted_correlation,formula)
-S3method(weighted_mannwhitney,default)
-S3method(weighted_mannwhitney,formula)
-S3method(weighted_mean,data.frame)
-S3method(weighted_mean,default)
-S3method(weighted_median,data.frame)
-S3method(weighted_median,default)
-S3method(weighted_sd,data.frame)
-S3method(weighted_sd,default)
-S3method(weighted_sd,matrix)
 S3method(weighted_se,data.frame)
 S3method(weighted_se,default)
 S3method(weighted_se,matrix)
-S3method(weighted_ttest,default)
-S3method(weighted_ttest,formula)
-export("%>%")
 export(anova_stats)
 export(auto_prior)
 export(boot_ci)
@@ -110,94 +89,24 @@ export(smpsize_lmm)
 export(survey_median)
 export(svyglm.nb)
 export(svyglm.zip)
+export(t_test)
 export(table_values)
-export(typical_value)
 export(var_pop)
 export(weight)
 export(weight2)
 export(weighted_correlation)
-export(weighted_mannwhitney)
 export(weighted_mean)
 export(weighted_median)
-export(weighted_ranktest)
 export(weighted_sd)
 export(weighted_se)
-export(weighted_ttest)
-export(wtd_sd)
+export(wilcoxon_test)
 export(xtab_statistics)
-importFrom(MASS,glm.nb)
 importFrom(bayestestR,ci)
 importFrom(bayestestR,equivalence_test)
-importFrom(dplyr,case_when)
-importFrom(dplyr,filter)
-importFrom(dplyr,group_vars)
-importFrom(dplyr,mutate)
-importFrom(dplyr,quos)
-importFrom(dplyr,select)
-importFrom(dplyr,select_if)
-importFrom(dplyr,summarise)
-importFrom(insight,export_table)
-importFrom(insight,find_formula)
-importFrom(insight,find_response)
-importFrom(insight,format_p)
-importFrom(insight,format_value)
-importFrom(insight,get_data)
-importFrom(insight,get_response)
+importFrom(datawizard,weighted_mean)
+importFrom(datawizard,weighted_median)
+importFrom(datawizard,weighted_sd)
 importFrom(insight,link_inverse)
-importFrom(insight,print_color)
-importFrom(lme4,ngrps)
-importFrom(magrittr,"%>%")
-importFrom(modelr,crossv_kfold)
 importFrom(performance,mse)
 importFrom(performance,rmse)
-importFrom(purrr,flatten_df)
-importFrom(purrr,map)
-importFrom(purrr,map2)
-importFrom(purrr,map_dbl)
-importFrom(purrr,map_df)
-importFrom(purrr,map_lgl)
-importFrom(purrr,walk)
-importFrom(rlang,.data)
-importFrom(rlang,enquo)
-importFrom(rlang,quo_name)
-importFrom(sjlabelled,as_numeric)
-importFrom(sjmisc,is_empty)
-importFrom(sjmisc,is_float)
-importFrom(sjmisc,is_num_fac)
-importFrom(sjmisc,str_contains)
-importFrom(sjmisc,trim)
-importFrom(sjmisc,typical_value)
-importFrom(stats,as.formula)
-importFrom(stats,chisq.test)
-importFrom(stats,coef)
-importFrom(stats,complete.cases)
-importFrom(stats,dpois)
 importFrom(stats,family)
-importFrom(stats,fitted)
-importFrom(stats,formula)
-importFrom(stats,kruskal.test)
-importFrom(stats,lm)
-importFrom(stats,model.frame)
-importFrom(stats,model.matrix)
-importFrom(stats,na.omit)
-importFrom(stats,na.pass)
-importFrom(stats,nobs)
-importFrom(stats,pf)
-importFrom(stats,pnorm)
-importFrom(stats,predict.glm)
-importFrom(stats,pt)
-importFrom(stats,qf)
-importFrom(stats,qnorm)
-importFrom(stats,resid)
-importFrom(stats,sd)
-importFrom(stats,setNames)
-importFrom(stats,terms)
-importFrom(stats,update)
-importFrom(stats,var)
-importFrom(stats,vcov)
-importFrom(stats,weighted.mean)
-importFrom(stats,weights)
-importFrom(stats,xtabs)
-importFrom(tidyr,gather)
-importFrom(tidyr,nest)
-importFrom(tidyr,unnest)
diff --git a/R/S3-methods.R b/R/S3-methods.R
index 434b700a..568d22a6 100644
--- a/R/S3-methods.R
+++ b/R/S3-methods.R
@@ -1,24 +1,16 @@
-#' @importFrom stats model.matrix
-#' @importFrom insight get_data
-#' @export
-model.matrix.gls <- function(object, ...) {
-  stats::model.matrix(object, data = insight::get_data(object))
-}
-
-
-#' @importFrom stats coef vcov pnorm
-#' @importFrom dplyr case_when
 #' @export
 print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) {
   se <- match.arg(se)
   sm <- tidy_svyglm.nb(x, digits, v_se = se)[-1, -2]
 
-  pan <- dplyr::case_when(
-    sm$p.value < 0.001 ~ "<0.001 ***",
-    sm$p.value < 0.01 ~ sprintf("%.*f ** ", digits, sm$p.value),
-    sm$p.value < 0.05 ~ sprintf("%.*f *  ", digits, sm$p.value),
-    sm$p.value < 0.1 ~ sprintf("%.*f .  ", digits, sm$p.value),
-    TRUE ~  sprintf("%.*f    ", digits, sm$p.value)
+  pan <- ifelse(sm$p.value < 0.001, "<0.001 ***",
+    ifelse(sm$p.value < 0.01, sprintf("%.*f ** ", digits, sm$p.value), # nolint
+      ifelse(sm$p.value < 0.05, sprintf("%.*f *  ", digits, sm$p.value), # nolint
+        ifelse(sm$p.value < 0.1, sprintf("%.*f .  ", digits, sm$p.value), # nolint
+          sprintf("%.*f    ", digits, sm$p.value)
+        )
+      )
+    )
   )
 
   sm$p.value <- pan
@@ -32,20 +24,19 @@ print.svyglm.nb <- function(x, se = c("robust", "model"), digits = 4, ...) {
 }
 
 
-
-#' @importFrom stats coef vcov pnorm
-#' @importFrom dplyr case_when
 #' @export
 print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) {
   se <- match.arg(se)
   sm <- tidy_svyglm.zip(x, digits, v_se = se)[-1, ]
 
-  pan <- dplyr::case_when(
-    sm$p.value < 0.001 ~ "<0.001 ***",
-    sm$p.value < 0.01 ~ sprintf("%.*f ** ", digits, sm$p.value),
-    sm$p.value < 0.05 ~ sprintf("%.*f *  ", digits, sm$p.value),
-    sm$p.value < 0.1 ~ sprintf("%.*f .  ", digits, sm$p.value),
-    TRUE ~  sprintf("%.*f    ", digits, sm$p.value)
+  pan <- ifelse(sm$p.value < 0.001, "<0.001 ***",
+    ifelse(sm$p.value < 0.01, sprintf("%.*f ** ", digits, sm$p.value), # nolint
+      ifelse(sm$p.value < 0.05, sprintf("%.*f *  ", digits, sm$p.value), # nolint
+        ifelse(sm$p.value < 0.1, sprintf("%.*f .  ", digits, sm$p.value), # nolint
+          sprintf("%.*f    ", digits, sm$p.value)
+        )
+      )
+    )
   )
 
   sm$p.value <- pan
@@ -55,8 +46,6 @@ print.svyglm.zip <- function(x, se = c("robust", "model"), digits = 4, ...) {
 }
 
 
-
-#' @importFrom stats qnorm coef pnorm vcov
 tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) {
   v_se <- match.arg(v_se)
 
@@ -72,15 +61,13 @@ tidy_svyglm.nb <- function(x, digits = 4, v_se = c("robust", "model")) {
     estimate = round(est, digits),
     irr = round(exp(est), digits),
     std.error = round(se, digits),
-    conf.low = round(exp(est - stats::qnorm(.975) * se), digits),
-    conf.high = round(exp(est + stats::qnorm(.975) * se), digits),
+    conf.low = round(exp(est - stats::qnorm(0.975) * se), digits),
+    conf.high = round(exp(est + stats::qnorm(0.975) * se), digits),
     p.value = round(2 * stats::pnorm(abs(est / se), lower.tail = FALSE), digits)
   )
 }
 
 
-
-#' @importFrom stats qnorm coef pnorm vcov
 tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) {
   v_se <- match.arg(v_se)
 
@@ -95,63 +82,52 @@ tidy_svyglm.zip <- function(x, digits = 4, v_se = c("robust", "model")) {
     term = substring(names(stats::coef(x)), 5),
     estimate = round(est, digits),
     std.error = round(se, digits),
-    conf.low = round(exp(est - stats::qnorm(.975) * se), digits),
-    conf.high = round(exp(est + stats::qnorm(.975) * se), digits),
+    conf.low = round(exp(est - stats::qnorm(0.975) * se), digits),
+    conf.high = round(exp(est + stats::qnorm(0.975) * se), digits),
     p.value = round(2 * stats::pnorm(abs(est / se), lower.tail = FALSE), digits)
   )
 }
 
 
-
-#' @importFrom dplyr select
 #' @export
 model.frame.svyglm.nb <- function(formula, ...) {
-  pred <- attr(formula, "nb.terms", exact = T)
-  dplyr::select(formula$design$variables, string_one_of(pattern = pred, x = colnames(formula$design$variables)))
+  pred <- attr(formula, "nb.terms", exact = TRUE)
+  formula$design$variables[intersect(pred, colnames(formula$design$variables))]
 }
 
 
-
-#' @importFrom dplyr select
 #' @export
 model.frame.svyglm.zip <- function(formula, ...) {
-  pred <- attr(formula, "zip.terms", exact = T)
-  dplyr::select(formula$design$variables, string_one_of(pattern = pred, x = colnames(formula$design$variables)))
+  pred <- attr(formula, "zip.terms", exact = TRUE)
+  formula$design$variables[intersect(pred, colnames(formula$design$variables))]
 }
 
 
-
+#' @importFrom stats family
 #' @export
 family.svyglm.nb <- function(object, ...) {
   attr(object, "family", exact = TRUE)
 }
 
 
-
 #' @export
 formula.svyglm.nb <- function(x, ...) {
   attr(x, "nb.formula", exact = TRUE)
 }
 
 
-
 #' @export
 formula.svyglm.zip <- function(x, ...) {
   attr(x, "zip.formula", exact = TRUE)
 }
 
 
-
-#' @importFrom MASS glm.nb
-#' @importFrom stats coef setNames predict.glm
 #' @export
 predict.svyglm.nb <- function(object, newdata = NULL,
                               type = c("link", "response", "terms"),
                               se.fit = FALSE, dispersion = NULL, terms = NULL,
-                              na.action = na.pass, ...) {
-
-  if (!isNamespaceLoaded("survey"))
-    requireNamespace("survey", quietly = TRUE)
+                              na.action = stats::na.pass, ...) {
+  insight::check_if_installed(c("survey", "MASS"))
 
   fnb <- MASS::glm.nb(
     attr(object, "nb.formula", exact = TRUE),
@@ -178,9 +154,6 @@ predict.svyglm.nb <- function(object, newdata = NULL,
 }
 
 
-#' @importFrom MASS glm.nb
-#' @importFrom stats coef setNames predict.glm
-#' @importFrom insight get_response
 #' @export
 residuals.svyglm.nb <- function(object, ...) {
 
@@ -201,7 +174,6 @@ residuals.svyglm.nb <- function(object, ...) {
 }
 
 
-#' @importFrom stats terms formula
 #' @export
 terms.svyglm.nb <- function(x, ...) {
 
@@ -212,14 +184,11 @@ terms.svyglm.nb <- function(x, ...) {
 }
 
 
-#' @importFrom purrr map flatten_df
 #' @export
 AIC.svyglm.nb <- function(object, ...) {
   ## FIXME this one just returns the AIC of the underlying glm.nb() model
-  list(object, ...) %>%
-    purrr::map(~ getaic(.x)) %>%
-    purrr::flatten_df() %>%
-    as.data.frame()
+  aics <- lapply(list(object, ...), getaic)
+  as.data.frame(do.call(rbind, aics))
 }
 
 
@@ -235,53 +204,18 @@ deviance.svyglm.nb <- function(object, ...) {
 }
 
 
-#' @importFrom insight print_color
-#' @export
-print.tidy_stan <- function(x, ...) {
-  insight::print_color("\nSummary Statistics of Stan-Model\n\n", "blue")
-  digits <- attr(x, "digits")
-
-  for (i in x) {
-    insight::print_color(paste0("# ", attr(i, "main_title")), "blue")
-    cat(" ")
-    insight::print_color(attr(i, "sub_title"), "red")
-    cat("\n\n")
-    rem <- which(colnames(i) %in% c("Parameter", "Group", "Response", "Function"))
-    i <- i[, -rem]
-    colnames(i)[1] <- "Parameter"
-    i$ESS <- as.character(i$ESS)
-    i$pd <- sprintf("%.1f%%", 100 * i$pd)
-    i[] <- lapply(i, function(.j) {
-      if (is.numeric(.j)) .j <- sprintf("%.*f", digits, .j)
-      .j
-    })
-    print.data.frame(i, quote = FALSE, row.names = FALSE)
-    cat("\n\n")
-  }
-}
-
-
-#' @importFrom sjmisc trim
-clean_term_name <- function(x) {
-  x <- sjmisc::trim(x)
-  format(x, width = max(nchar(x)))
-}
-
-
 #' @export
 as.integer.sj_resample <- function(x, ...) {
   x$id
 }
 
 
-
 #' @export
 as.data.frame.sj_resample <- function(x, ...) {
   x$data[x$id, , drop = FALSE]
 }
 
 
-
 #' @export
 print.sj_resample <- function(x, ...) {
   n <- length(x$id)
@@ -290,20 +224,20 @@ print.sj_resample <- function(x, ...) {
   else
     id10 <- x$id
 
-  cat("<", paste0("id's of resample [", prettyNum(nrow(x$data), big.mark = ","), " x ",
-                  prettyNum(ncol(x$data), big.mark = ","), "]"), "> ",
-      paste(id10, collapse = ", "), "\n", sep = "")
+  cat("<", paste0(
+    "id's of resample [", prettyNum(nrow(x$data), big.mark = ","), " x ",
+    prettyNum(ncol(x$data), big.mark = ","), "]"
+  ), "> ",
+  toString(id10), "\n",
+  sep = ""
+  )
 }
 
 
-
-#' @importFrom tidyr gather
-#' @importFrom rlang .data
 #' @export
 plot.sj_inequ_trend <- function(x, ...) {
-  if (!requireNamespace("ggplot2", quietly = TRUE)) {
-    stop("Package `ggplot2` required for plotting inequalities trends.", call. = F)
-  }
+  .data <- NULL
+  insight::check_if_installed("ggplot2")
 
   # add time indicator
   x$data$zeit <- seq_len(nrow(x$data))
@@ -312,14 +246,11 @@ plot.sj_inequ_trend <- function(x, ...) {
   gather.cols1 <- colnames(x$data)[!colnames(x$data) %in% c("zeit", "lo", "hi")]
   gather.cols2 <- colnames(x$data)[!colnames(x$data) %in% c("zeit", "rr", "rd")]
 
-  key_col <- "grp"
-  value_col <- "y"
-
   # gather data to plot rr and rd
-  dat1 <- tidyr::gather(x$data, !! key_col, !! value_col, !! gather.cols1)
+  dat1 <- datawizard::data_to_long(x$data, select = gather.cols1, names_to = "grp", values_to = "y")
 
   # gather data for raw prevalences
-  dat2 <- tidyr::gather(x$data, !! key_col, !! value_col, !! gather.cols2)
+  dat2 <- datawizard::data_to_long(x$data, select = gather.cols1, names_to = "grp", values_to = "y")
 
   # Proper value names, for facet labels
   dat1$grp[dat1$grp == "rr"] <- "Rate Ratios"
@@ -327,7 +258,7 @@ plot.sj_inequ_trend <- function(x, ...) {
 
   # plot prevalences
   gp1 <- ggplot2::ggplot(dat2, ggplot2::aes_string(x = "zeit", y = "y", colour = "grp")) +
-    ggplot2::geom_smooth(method = "loess", se = F) +
+    ggplot2::geom_smooth(method = "loess", se = FALSE) +
     ggplot2::labs(title = "Prevalance Rates for Lower and Higher SES Groups",
                   y = "Prevalances", x = "Time", colour = "") +
     ggplot2::scale_color_manual(values = c("darkblue", "darkred"), labels = c("High SES", "Low SES"))
@@ -335,90 +266,17 @@ plot.sj_inequ_trend <- function(x, ...) {
 
   # plot rr and rd
   gp2 <- ggplot2::ggplot(dat1, ggplot2::aes_string(x = "zeit", y = "y", colour = "grp")) +
-    ggplot2::geom_smooth(method = "loess", se = F) +
+    ggplot2::geom_smooth(method = "loess", se = FALSE) +
     ggplot2::facet_wrap(~grp, ncol = 1, scales = "free") +
     ggplot2::labs(title = "Proportional Change in Rate Ratios and Rate Differences",
                   colour = NULL, y = NULL, x = "Time") +
-    ggplot2::guides(colour = FALSE)
+    ggplot2::guides(colour = "none")
 
   suppressMessages(graphics::plot(gp1))
   suppressMessages(graphics::plot(gp2))
 }
 
 
-#' @importFrom stats kruskal.test na.omit
-#' @export
-print.sj_mwu <- function(x, ...) {
-  insight::print_color("\n# Mann-Whitney-U-Test\n\n", "blue")
-  # get data
-  .dat <- x$df
-  # print to console
-  for (i in seq_len(nrow(.dat))) {
-    # get value labels
-    l1 <- .dat[i, "grp1.label"]
-    l2 <- .dat[i, "grp2.label"]
-    # do we have value labels?
-    if (!is.null(l1) && !is.na(l1) %% !is.null(l2) && !is.na(l2)) {
-      insight::print_color(
-        sprintf(
-          "Groups %i = %s (n = %i) | %i = %s (n = %i):\n",
-          .dat[i, "grp1"],
-          l1,
-          .dat[i, "grp1.n"],
-          .dat[i, "grp2"],
-          l2,
-          .dat[i, "grp2.n"]
-        ), "cyan"
-      )
-    } else {
-      insight::print_color(
-        sprintf("Groups (%i|%i), n = %i/%i:\n",
-                .dat[i, "grp1"], .dat[i, "grp2"],
-                .dat[i, "grp1.n"], .dat[i, "grp2.n"]),
-        "cyan"
-      )
-    }
-
-    cat(sprintf(
-      "  U = %.3f, W = %.3f, %s, Z = %.3f\n",
-      .dat[i, "u"], .dat[i, "w"], insight::format_p(.dat[i, "p"]), .dat[i, "z"]
-    ))
-
-    string_es <- "effect-size r"
-    string_r <- sprintf("%.3f", .dat[i, "r"])
-    string_group1 <- sprintf("rank-mean(%i)", .dat[i, "grp1"])
-    string_group2 <- sprintf("rank-mean(%i)", .dat[i, "grp2"])
-    string_rm1 <- sprintf("%.2f", .dat[i, "rank.mean.grp1"])
-    string_rm2 <- sprintf("%.2f", .dat[i, "rank.mean.grp2"])
-
-    space1 <- max(nchar(c(string_es, string_group1, string_group2)))
-    space2 <- max(nchar(c(string_r, string_rm1, string_rm2)))
-
-    cat(
-      sprintf("  %*s = %*s\n", space1, string_es, space2 + 1, string_r),
-      sprintf(" %*s = %*s\n", space1, string_group1, space2, string_rm1),
-      sprintf(" %*s = %*s\n\n", space1, string_group2, space2, string_rm2)
-    )
-  }
-
-  # if we have more than 2 groups, also perfom kruskal-wallis-test
-  if (length(unique(stats::na.omit(x$data$grp))) > 2) {
-    insight::print_color("# Kruskal-Wallis-Test\n\n", "blue")
-    kw <- stats::kruskal.test(x$data$dv, x$data$grp)
-    cat(sprintf("chi-squared = %.3f\n", kw$statistic))
-    cat(sprintf("df = %i\n", kw$parameter))
-    cat(paste(insight::format_p(kw$p.value, stars = TRUE), "\n"))
-  }
-}
-
-
-#' @export
-print.sj_outliers <- function(x, ...) {
-  print(x$result, ...)
-}
-
-
-#' @importFrom insight format_p
 #' @export
 print.sj_xtab_stat <- function(x, ...) {
   # get length of method name, to align output
@@ -442,105 +300,6 @@ print.sj_xtab_stat <- function(x, ...) {
 }
 
 
-
-#' @export
-print.sj_xtab_stat2 <- function(x, ...) {
-  # get length of method name, to align output
-  l <- max(nchar(c(x$stat.name, "p-value", "Observations")))
-
-  # headline
-  insight::print_color(paste0("\n# ", x$method, "\n\n"), "blue")
-
-  # print test statistic
-  cat(sprintf("  %*s: %.4f\n", l, x$stat.name, x$estimate))
-  cat(sprintf("  %*s: %g\n", l, "df", x$df))
-  cat(sprintf("  %*s: %s\n", l, "p-value", insight::format_p(x$p.value, stars = TRUE, name = NULL)))
-  cat(sprintf("  %*s: %g\n", l, "Observations", x$n_obs))
-}
-
-
-
-#' @export
-print.sj_grpmean <- function(x, ...) {
-  cat("\n")
-  print_grpmean(x, digits = attributes(x)$digits, ...)
-}
-
-
-#' @importFrom insight export_table print_color format_value format_p
-print_grpmean <- function(x, digits = NULL, ...) {
-  # headline
-  insight::print_color(sprintf(
-    "# Grouped Means for %s by %s\n\n",
-    attr(x, "dv.label", exact = TRUE),
-    attr(x, "grp.label", exact = TRUE)
-  ), "blue")
-
-  if (is.null(digits)) {
-    digits <- 2
-  }
-
-  x$mean <- insight::format_value(x$mean, digits = digits)
-  x$std.dev <- insight::format_value(x$std.dev, digits = digits)
-  x$std.error <- insight::format_value(x$std.error, digits = digits)
-  x$p.value <- insight::format_p(x$p.value, name = NULL)
-
-  colnames(x) <- c("Category", "Mean", "N", "SD", "SE", "p")
-  cat(insight::export_table(x))
-
-  # statistics
-  cat(sprintf(
-    "\nAnova: R2=%.3f; adj.R2=%.3f; F=%.3f; p=%.3f\n",
-    attr(x, "r2", exact = TRUE),
-    attr(x, "adj.r2", exact = TRUE),
-    attr(x, "fstat", exact = TRUE),
-    attr(x, "p.value", exact = TRUE)
-  ))
-}
-
-
-#' @importFrom purrr walk
-#' @export
-print.sj_grpmeans <- function(x, ...) {
-
-  cat("\n")
-  purrr::walk(x, function(dat) {
-    # get grouping title label
-    grp <- attr(dat, "group", exact = T)
-
-    # print title for grouping
-    insight::print_color(sprintf("Grouped by:\n%s\n\n", grp), "cyan")
-
-    # print grpmean-table
-    print_grpmean(dat, digits = attributes(x)$digits, ...)
-
-    cat("\n\n")
-  })
-}
-
-
-#' @export
-print.sj_pval <- function(x, digits = 3, summary = FALSE, ...) {
-
-  if (summary) {
-    df.kr <- attr(x, "df.kr", exact = TRUE)
-    t.kr <- attr(x, "t.kr", exact = TRUE)
-
-    if (!is.null(df.kr)) x$df <- df.kr
-    if (!is.null(t.kr)) x$statistic <- t.kr
-  }
-
-  x <- purrr::map_if(x, is.numeric, round, digits = digits)
-  print.data.frame(as.data.frame(x), ..., row.names = TRUE)
-}
-
-
-#' @export
-summary.sj_pval <- function(object, digits = 3, summary = FALSE, ...) {
-  print(object, digits, summary = TRUE)
-}
-
-
 #' @export
 print.sj_chi2gof <- function(x, ...) {
   insight::print_color("\n# Chi-squared Goodness-of-Fit Test\n\n", "blue")
@@ -562,30 +321,6 @@ print.sj_chi2gof <- function(x, ...) {
 }
 
 
-#' @export
-print.sj_check_assump <- function(x, ...) {
-  insight::print_color("\n# Checking Model-Assumptions\n\n", "blue")
-  cat(sprintf("  Model: %s", attr(x, "formula", exact = TRUE)))
-
-  insight::print_color("\n\n                          violated    statistic\n", "red")
-
-  v1 <- ifelse(x$heteroskedasticity < 0.05, "yes", "no")
-  v2 <- ifelse(x$multicollinearity > 4, "yes", "no")
-  v3 <- ifelse(x$non.normal.resid < 0.05, "yes", "no")
-  v4 <- ifelse(x$autocorrelation < 0.05, "yes", "no")
-
-  s1 <- sprintf("p = %.3f", x$heteroskedasticity)
-  s2 <- sprintf("vif = %.3f", x$multicollinearity)
-  s3 <- sprintf("p = %.3f", x$non.normal.resid)
-  s4 <- sprintf("p = %.3f", x$autocorrelation)
-
-  cat(sprintf("  Heteroskedasticity      %8s  %11s\n", v1, s1))
-  cat(sprintf("  Non-normal residuals    %8s  %11s\n", v3, s3))
-  cat(sprintf("  Autocorrelated residuals%8s  %11s\n", v4, s4))
-  cat(sprintf("  Multicollinearity       %8s  %11s\n", v2, s2))
-}
-
-
 #' @export
 print.sj_ttest <- function(x, ...) {
   insight::print_color(sprintf("\n%s (%s)\n", x$method, x$alternative), "blue")
@@ -632,18 +367,6 @@ print.sj_ttest <- function(x, ...) {
 }
 
 
-#' @export
-print.sj_wmwu <- function(x, ...) {
-  insight::print_color(sprintf("\n# %s\n", x$method), "blue")
-
-  group <- attr(x, "group.name", exact = TRUE)
-  xn <- attr(x, "x.name", exact = TRUE)
-
-  insight::print_color(sprintf("\n  comparison of %s by %s\n", xn, group), "cyan")
-  cat(sprintf("  Chisq=%.2f  df=%i  p-value=%.3f\n\n", x$statistic, as.integer(x$parameter), x$p.value))
-}
-
-
 #' @export
 print.sj_wcor <- function(x, ...) {
   insight::print_color(sprintf("\nWeighted %s\n\n", x$method), "blue")
@@ -659,7 +382,6 @@ print.sj_wcor <- function(x, ...) {
 }
 
 
-#' @importFrom insight export_table
 #' @export
 print.sj_anova_stat <- function(x, digits = 3, ...) {
   x$p.value <- insight::format_p(x$p.value, name = NULL)
diff --git a/R/anova_stats.R b/R/anova_stats.R
index f7917687..d1e6e640 100644
--- a/R/anova_stats.R
+++ b/R/anova_stats.R
@@ -15,7 +15,7 @@
 #'   \cr \cr
 #'   Tippey K, Longnecker MT (2016): An Ad Hoc Method for Computing Pseudo-Effect Size for Mixed Model.
 #'
-#' @examples
+#' @examplesIf requireNamespace("car") && requireNamespace("pwr")
 #' # load sample data
 #' data(efc)
 #'
@@ -24,9 +24,7 @@
 #'   c12hour ~ as.factor(e42dep) + as.factor(c172code) + c160age,
 #'   data = efc
 #' )
-#' \dontrun{
 #' anova_stats(car::Anova(fit, type = 2))
-#' }
 #' @export
 anova_stats <- function(model, digits = 3) {
   insight::check_if_installed("pwr")
@@ -47,39 +45,44 @@ anova_stats <- function(model, digits = 3) {
   cohens.f <- sqrt(partial.etasq / (1 - partial.etasq))
 
   # bind as data frame
-  as <- dplyr::bind_rows(
+  anov_stat <- rbind(
     data.frame(etasq, partial.etasq, omegasq, partial.omegasq, epsilonsq, cohens.f),
     data.frame(etasq = NA, partial.etasq = NA, omegasq = NA, partial.omegasq = NA, epsilonsq = NA, cohens.f = NA)
-  ) %>%
-    sjmisc::add_columns(aov.sum)
+  )
+  anov_stat <- cbind(anov_stat, data.frame(aov.sum))
 
   # get nr of terms
-  nt <- nrow(as) - 1
+  nt <- nrow(anov_stat) - 1
 
   # finally, compute power
-  power <- tryCatch(
-    {
-      c(
-        pwr::pwr.f2.test(u = as$df[1:nt], v = as$df[nrow(as)], f2 = as$cohens.f[1:nt]^2)[["power"]],
-        NA
-      )
-    },
+  as_power <- tryCatch(
+    c(
+      pwr::pwr.f2.test(
+        u = anov_stat$df[1:nt],
+        v = anov_stat$df[nrow(anov_stat)],
+        f2 = anov_stat$cohens.f[1:nt]^2
+      )[["power"]],
+      NA
+    ),
     error = function(x) {
       NA
     }
   )
 
-  out <- sjmisc::add_variables(as, power = power) %>%
-    sjmisc::round_num(digits = digits) %>%
-    as.data.frame()
+  out <- cbind(anov_stat, data.frame(power = as_power))
+  out[] <- lapply(out, function(i) {
+    if (is.numeric(i)) {
+      round(i, digits)
+    } else {
+      i
+    }
+  })
 
   class(out) <- c("sj_anova_stat", class(out))
   out
 }
 
 
-
-#' @importFrom rlang .data
 aov_stat <- function(model, type) {
   aov.sum <- aov_stat_summary(model)
   aov.res <- aov_stat_core(aov.sum, type)
@@ -92,7 +95,7 @@ aov_stat <- function(model, type) {
 
 
 aov_stat_summary <- function(model) {
-  insight::check_if_installed("broom")
+  insight::check_if_installed("parameters")
   # check if we have a mixed model
   mm <- is_merMod(model)
   ori.model <- model
@@ -103,12 +106,12 @@ aov_stat_summary <- function(model) {
     model <- stats::anova(model)
 
   # get summary table
-  aov.sum <- as.data.frame(broom::tidy(model))
+  aov.sum <- insight::standardize_names(as.data.frame(parameters::model_parameters(model)), style = "broom")
 
   # for mixed models, add information on residuals
   if (mm) {
     res <- stats::residuals(ori.model)
-    aov.sum <- dplyr::bind_rows(
+    aov.sum <- rbind(
       aov.sum,
       data_frame(
         term = "Residuals",
@@ -132,8 +135,14 @@ aov_stat_summary <- function(model) {
     colnames(aov.sum) <- c("term", "df", "sumsq", "meansq", "statistic", "p.value")
 
   # for car::Anova, the meansq-column might be missing, so add it manually
-  if (!obj_has_name(aov.sum, "meansq"))
-    aov.sum <- sjmisc::add_variables(aov.sum, meansq = aov.sum$sumsq / aov.sum$df, .after = "sumsq")
+  if (!obj_has_name(aov.sum, "meansq")) {
+    pos_sumsq <- which(colnames(aov.sum) == "sumsq")
+    aov.sum <- cbind(
+      aov.sum[1:pos_sumsq],
+      data.frame(meansq = aov.sum$sumsq / aov.sum$df),
+      aov.sum[(pos_sumsq + 1):ncol(aov.sum)]
+    )
+  }
 
   intercept <- .which_intercept(aov.sum$term)
   if (length(intercept) > 0) {
@@ -165,36 +174,35 @@ aov_stat_core <- function(aov.sum, type) {
   N <- sum(aov.sum[["df"]]) + 1
 
 
-  if (type == "omega") {
+  aovstat <- switch(type,
     # compute omega squared for each model term
-    aovstat <- purrr::map_dbl(1:n_terms, function(x) {
+    omega = unlist(lapply(1:n_terms, function(x) {
       ss.term <- aov.sum[["sumsq"]][x]
       df.term <- aov.sum[["df"]][x]
       (ss.term - df.term * meansq.resid) / (ss.total + meansq.resid)
-    })
-  } else if (type == "pomega") {
+    })),
     # compute partial omega squared for each model term
-    aovstat <- purrr::map_dbl(1:n_terms, function(x) {
+    pomega = unlist(lapply(1:n_terms, function(x) {
       df.term <- aov.sum[["df"]][x]
       meansq.term <- aov.sum[["meansq"]][x]
       (df.term * (meansq.term - meansq.resid)) / (df.term * meansq.term + (N - df.term) * meansq.resid)
-    })
-  } else if (type == "epsilon") {
+    })),
     # compute epsilon squared for each model term
-    aovstat <- purrr::map_dbl(1:n_terms, function(x) {
+    epsilon = unlist(lapply(1:n_terms, function(x) {
       ss.term <- aov.sum[["sumsq"]][x]
       df.term <- aov.sum[["df"]][x]
       (ss.term - df.term * meansq.resid) / ss.total
-    })
-  } else if (type == "eta") {
+    })),
     # compute eta squared for each model term
-    aovstat <-
-      purrr::map_dbl(1:n_terms, ~ aov.sum[["sumsq"]][.x] / sum(aov.sum[["sumsq"]]))
-  } else if (type %in% c("cohens.f", "peta")) {
+    eta = unlist(lapply(1:n_terms, function(x) {
+      aov.sum[["sumsq"]][x] / sum(aov.sum[["sumsq"]])
+    })),
     # compute partial eta squared for each model term
-    aovstat <-
-      purrr::map_dbl(1:n_terms, ~ aov.sum[["sumsq"]][.x] / (aov.sum[["sumsq"]][.x] + ss.resid))
-  }
+    cohens.f = ,
+    peta = unlist(lapply(1:n_terms, function(x) {
+      aov.sum[["sumsq"]][x] / (aov.sum[["sumsq"]][x] + ss.resid)
+    }))
+  )
 
   # compute Cohen's F
   if (type == "cohens.f") aovstat <- sqrt(aovstat / (1 - aovstat))
diff --git a/R/auto_prior.R b/R/auto_prior.R
index 7442c09d..1e036369 100644
--- a/R/auto_prior.R
+++ b/R/auto_prior.R
@@ -34,20 +34,16 @@
 #'   formula used in \code{brms::brm()} must be rewritten to something like
 #'   \code{y ~ 0 + intercept ...}, see \code{\link[brms]{set_prior}}.
 #'
-#' @examples
-#' library(sjmisc)
+#' @examplesIf requireNamespace("brms")
 #' data(efc)
 #' efc$c172code <- as.factor(efc$c172code)
-#' efc$c161sex <- to_label(efc$c161sex)
+#' efc$c161sex <- as.factor(efc$c161sex)
 #'
 #' mf <- formula(neg_c_7 ~ c161sex + c160age + c172code)
-#'
-#' if (requireNamespace("brms", quietly = TRUE))
-#'   auto_prior(mf, efc, TRUE)
+#' auto_prior(mf, efc, TRUE)
 #'
 #' ## compare to
-#' # library(rstanarm)
-#' # m <- stan_glm(mf, data = efc, chains = 2, iter = 200)
+#' # m <- rstanarm::stan_glm(mf, data = efc, chains = 2, iter = 200)
 #' # ps <- prior_summary(m)
 #' # ps$prior_intercept$adjusted_scale
 #' # ps$prior$adjusted_scale
@@ -59,22 +55,16 @@
 #' # add informative priors
 #' mf <- formula(neg_c_7 ~ c161sex + c172code)
 #'
-#' if (requireNamespace("brms", quietly = TRUE)) {
-#'   auto_prior(mf, efc, TRUE) +
-#'     brms::prior(normal(.1554, 40), class = "b", coef = "c160age")
-#' }
+#' auto_prior(mf, efc, TRUE) +
+#'   brms::prior(normal(.1554, 40), class = "b", coef = "c160age")
 #'
 #' # example with binary response
 #' efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1)
 #' mf <- formula(neg_c_7d ~ c161sex + c160age + c172code + e17age)
-#'
-#' if (requireNamespace("brms", quietly = TRUE))
-#'   auto_prior(mf, efc, FALSE)
+#' auto_prior(mf, efc, FALSE)
 #' @export
 auto_prior <- function(formula, data, gaussian, locations = NULL) {
-
-  if (!requireNamespace("brms", quietly = TRUE))
-    stop("Package `brms` required.", call. = FALSE)
+  insight::check_if_installed("brms")
 
   scale.b <- 2.5
   scale.y <- 10
@@ -82,20 +72,14 @@ auto_prior <- function(formula, data, gaussian, locations = NULL) {
   pred <- insight::find_predictors(formula, effects = "all", flatten = TRUE)
   y.name <- insight::find_response(formula, combine = TRUE)
 
-  cols <- c(y.name, pred)
-
-  data <- data %>%
-    dplyr::select(!! cols) %>%
-    stats::na.omit() %>%
-    as.data.frame()
-
+  data <- stats::na.omit(data[c(y.name, pred)])
   y <- data[[y.name]]
 
   # check if response is binary
-  if (missing(gaussian) && dplyr::n_distinct(y, na.rm = TRUE) == 2) gaussian <- FALSE
+  if (missing(gaussian) && insight::n_unique(y) == 2) gaussian <- FALSE
 
-  if (isTRUE(gaussian) && dplyr::n_distinct(y, na.rm = TRUE) == 2)
-    warning("Priors were calculated based on assumption that the response is Gaussian, however it seems to be binary.", call. = F)
+  if (isTRUE(gaussian) && insight::n_unique(y) == 2)
+    insight::format_alert("Priors were calculated based on assumption that the response is Gaussian, however it seems to be binary.") # nolint
 
 
   if (gaussian) {
@@ -135,7 +119,7 @@ auto_prior <- function(formula, data, gaussian, locations = NULL) {
     term.names <- c(term.names, i)
   }
 
-  for (i in 1:length(term.names)) {
+  for (i in seq_along(term.names)) {
 
     if (!is.null(locations) && length(locations) >= (i + 1))
       location.b <- locations[i + 1]
diff --git a/R/boot_ci.R b/R/boot_ci.R
index a1ad3bdd..33b8b9f3 100644
--- a/R/boot_ci.R
+++ b/R/boot_ci.R
@@ -6,66 +6,55 @@
 #'              replicate estimates.
 #'
 #' @param data A data frame that containts the vector with bootstrapped
-#'          estimates, or directly the vector (see 'Examples').
+#' estimates, or directly the vector (see 'Examples').
 #' @param ci.lvl Numeric, the level of the confidence intervals.
-#' @param ... Optional, unquoted names of variables with bootstrapped estimates.
-#'          Required, if either \code{data} is a data frame (and no vector),
-#'          and only selected variables from \code{data} should be processed.
-#'          You may also use functions like \code{:} or tidyselect's
-#'          \code{select_helpers()}.
+#' @param select Optional, unquoted names of variables (as character vector)
+#' with bootstrapped estimates. Required, if either `data` is a data frame
+#' (and no vector), and only selected variables from `data` should be processed.
 #' @param method Character vector, indicating if confidence intervals should be
-#'          based on bootstrap standard error, multiplied by the value of the
-#'          quantile function of the t-distribution (default), or on sample
-#'          quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}.
-#'          May be abbreviated.
-#'
-#' @return A data frame with either bootstrap estimate,
-#'         standard error, the lower and upper confidence intervals or the
-#'         p-value for all bootstrapped estimates.
-#'
-#' @details The methods require one or more vectors of bootstrap replicate estimates
-#'          as input.
-#'          \itemize{
-#'            \item{
-#'              \code{boot_est()} returns the bootstrapped estimate, simply by
-#'              computing the mean value of all bootstrap estimates.
-#'            }
-#'            \item{
-#'              \code{boot_se()} computes the nonparametric bootstrap standard
-#'              error by calculating the standard deviation of the input vector.
-#'            }
-#'            \item{
-#'              The mean value of the input vector and its standard error is used
-#'              by \code{boot_ci()} to calculate the lower and upper confidence
-#'              interval, assuming a t-distribution of bootstrap estimate replicates
-#'              (for \code{method = "dist"}, the default, which is
-#'              \code{mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)}); for
-#'              \code{method = "quantile"}, 95\% sample quantiles are used to compute
-#'              the confidence intervals (\code{quantile(x, probs = c(.025, .975))}).
-#'              Use \code{ci.lvl} to change the level for the confidence interval.
-#'            }
-#'            \item{
-#'              P-values from \code{boot_p()} are also based on t-statistics,
-#'              assuming normal distribution.
-#'            }
-#'          }
+#' based on bootstrap standard error, multiplied by the value of the quantile
+#' function of the t-distribution (default), or on sample quantiles of the
+#' bootstrapped values. See 'Details' in `boot_ci()`. May be abbreviated.
+#'
+#' @return A data frame with either bootstrap estimate, standard error, the
+#' lower and upper confidence intervals or the p-value for all bootstrapped
+#' estimates.
+#'
+#' @details The methods require one or more vectors of bootstrap replicate
+#' estimates as input.
+#'
+#' - `boot_est()`: returns the bootstrapped estimate, simply by computing
+#'   the mean value of all bootstrap estimates.
+#' - `boot_se()`: computes the nonparametric bootstrap standard error by
+#'   calculating the standard deviation of the input vector.
+#' - The mean value of the input vector and its standard error is used by
+#'   `boot_ci()` to calculate the lower and upper confidence interval,
+#'   assuming a t-distribution of bootstrap estimate replicates (for
+#'   `method = "dist"`, the default, which is
+#'   `mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)`); for
+#'   `method = "quantile"`, 95\% sample quantiles are used to compute the
+#'   confidence intervals (`quantile(x, probs = c(0.025, 0.975))`). Use
+#'   `ci.lvl` to change the level for the confidence interval.
+#' - P-values from `boot_p()` are also based on t-statistics, assuming normal
+#'   distribution.
 #'
 #' @references Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164
 #'
-#' @seealso \code{\link{bootstrap}} to generate nonparametric bootstrap samples.
+#' @seealso []`bootstrap()`] to generate nonparametric bootstrap samples.
 #'
 #' @examples
-#' library(dplyr)
-#' library(purrr)
 #' data(efc)
 #' bs <- bootstrap(efc, 100)
 #'
 #' # now run models for each bootstrapped sample
-#' bs$models <- map(bs$strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x))
+#' bs$models <- lapply(
+#'   bs$strap,
+#'   function(.x) lm(neg_c_7 ~ e42dep + c161sex, data = .x)
+#' )
 #'
 #' # extract coefficient "dependency" and "gender" from each model
-#' bs$dependency <- map_dbl(bs$models, ~coef(.x)[2])
-#' bs$gender <- map_dbl(bs$models, ~coef(.x)[3])
+#' bs$dependency <- vapply(bs$models, function(x) coef(x)[2], numeric(1))
+#' bs$gender <- vapply(bs$models, function(x) coef(x)[3], numeric(1))
 #'
 #' # get bootstrapped confidence intervals
 #' boot_ci(bs$dependency)
@@ -76,67 +65,26 @@
 #'
 #' # alternative function calls.
 #' boot_ci(bs$dependency)
-#' boot_ci(bs, dependency)
-#' boot_ci(bs, dependency, gender)
-#' boot_ci(bs, dependency, gender, method = "q")
+#' boot_ci(bs, "dependency")
+#' boot_ci(bs, c("dependency", "gender"))
+#' boot_ci(bs, c("dependency", "gender"), method = "q")
 #'
 #'
 #' # compare coefficients
 #' mean(bs$dependency)
 #' boot_est(bs$dependency)
 #' coef(fit)[2]
-#'
-#'
-#' # bootstrap() and boot_ci() work fine within pipe-chains
-#' efc %>%
-#'   bootstrap(100) %>%
-#'   mutate(
-#'     models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)),
-#'     dependency = map_dbl(models, ~coef(.x)[2])
-#'   ) %>%
-#'   boot_ci(dependency)
-#'
-#' # check p-value
-#' boot_p(bs$gender)
-#' summary(fit)$coefficients[3, ]
-#'
-#' \dontrun{
-#' # 'spread_coef()' from the 'sjmisc'-package makes it easy to generate
-#' # bootstrapped statistics like confidence intervals or p-values
-#' library(dplyr)
-#' library(sjmisc)
-#' efc %>%
-#'   # generate bootstrap replicates
-#'   bootstrap(100) %>%
-#'   # apply lm to all bootstrapped data sets
-#'   mutate(
-#'     models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x))
-#'   ) %>%
-#'   # spread model coefficient for all 100 models
-#'   spread_coef(models) %>%
-#'   # compute the CI for all bootstrapped model coefficients
-#'   boot_ci(e42dep, c161sex, c172code)
-#'
-#' # or...
-#' efc %>%
-#'   # generate bootstrap replicates
-#'   bootstrap(100) %>%
-#'   # apply lm to all bootstrapped data sets
-#'   mutate(
-#'     models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x))
-#'   ) %>%
-#'   # spread model coefficient for all 100 models
-#'   spread_coef(models, append = FALSE) %>%
-#'   # compute the CI for all bootstrapped model coefficients
-#'   boot_ci()}
-#' @importFrom rlang .data
 #' @export
-boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = .95) {
+boot_ci <- function(data, select = NULL, method = c("dist", "quantile"), ci.lvl = 0.95) {
   # match arguments
   method <- match.arg(method)
 
   # evaluate arguments, generate data
-  .dat <- get_dot_data(data, dplyr::quos(...))
+  if (is.null(select)) {
+    .dat <- as.data.frame(data)
+  } else {
+    .dat <- data[select]
+  }
 
   # compute confidence intervals for all values
   transform_boot_result(lapply(.dat, function(x) {
@@ -144,9 +92,9 @@ boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = .95) {
     # bootstrap values or quantiles
     if (method == "dist") {
       # get bootstrap standard error
-      bootse <- stats::qt((1 + ci.lvl) / 2, df = length(x) - 1) * stats::sd(x, na.rm = T)
+      bootse <- stats::qt((1 + ci.lvl) / 2, df = length(x) - 1) * stats::sd(x, na.rm = TRUE)
       # lower and upper confidence interval
-      ci <- mean(x, na.rm = T) + c(-bootse, bootse)
+      ci <- mean(x, na.rm = TRUE) + c(-bootse, bootse)
     } else {
       # CI based on quantiles of bootstrapped values
       ci <- stats::quantile(x, probs = c((1 - ci.lvl) / 2, (1 + ci.lvl) / 2))
@@ -159,16 +107,18 @@ boot_ci <- function(data, ..., method = c("dist", "quantile"), ci.lvl = .95) {
 
 
 #' @rdname boot_ci
-#' @importFrom stats sd
 #' @export
-boot_se <- function(data, ...) {
+boot_se <- function(data, select = NULL) {
   # evaluate arguments, generate data
-  .dat <- get_dot_data(data, dplyr::quos(...))
-
+  if (is.null(select)) {
+    .dat <- as.data.frame(data)
+  } else {
+    .dat <- data[select]
+  }
   # compute confidence intervalls for all values
   transform_boot_result(lapply(.dat, function(x) {
     # get bootstrap standard error
-    se <- stats::sd(x, na.rm = T)
+    se <- stats::sd(x, na.rm = TRUE)
     names(se) <- "std.err"
     se
   }))
@@ -176,16 +126,18 @@ boot_se <- function(data, ...) {
 
 
 #' @rdname boot_ci
-#' @importFrom stats sd pt
 #' @export
-boot_p <- function(data, ...) {
+boot_p <- function(data, select = NULL) {
   # evaluate arguments, generate data
-  .dat <- get_dot_data(data, dplyr::quos(...))
-
+  if (is.null(select)) {
+    .dat <- as.data.frame(data)
+  } else {
+    .dat <- data[select]
+  }
   # compute confidence intervalls for all values
   transform_boot_result(lapply(.dat, function(x) {
     # compute t-statistic
-    t.stat <- mean(x, na.rm = T) / stats::sd(x, na.rm = T)
+    t.stat <- mean(x, na.rm = TRUE) / stats::sd(x, na.rm = TRUE)
     # compute p-value
     p <- 2 * stats::pt(abs(t.stat), df = length(x) - 1, lower.tail = FALSE)
     names(p) <- "p.value"
@@ -196,34 +148,23 @@ boot_p <- function(data, ...) {
 
 #' @rdname boot_ci
 #' @export
-boot_est <- function(data, ...) {
+boot_est <- function(data, select = NULL) {
   # evaluate arguments, generate data
-  .dat <- get_dot_data(data, dplyr::quos(...))
-
+  if (is.null(select)) {
+    .dat <- as.data.frame(data)
+  } else {
+    .dat <- data[select]
+  }
   # compute mean for all values (= bootstrapped estimate)
   transform_boot_result(lapply(.dat, function(x) {
-    estimate <- mean(x, na.rm = T)
+    estimate <- mean(x, na.rm = TRUE)
     names(estimate) <- "estimate"
     estimate
   }))
 }
 
 
-
 transform_boot_result <- function(res) {
   # transform a bit, so we have each estimate in a row, and ci's as columns...
-  res %>%
-    as.data.frame() %>%
-    t() %>%
-    as.data.frame() %>%
-    rownames_as_column(var = "term")
-}
-
-
-#' @importFrom dplyr select
-get_dot_data <- function(x, qs) {
-  if (sjmisc::is_empty(qs))
-    as.data.frame(x)
-  else
-    suppressWarnings(dplyr::select(x, !!!qs))
+  rownames_as_column(as.data.frame(t(as.data.frame(res))), var = "term")
 }
diff --git a/R/bootstrap.R b/R/bootstrap.R
index c3d3635f..444896a2 100644
--- a/R/bootstrap.R
+++ b/R/bootstrap.R
@@ -61,17 +61,11 @@
 #'   mean(as.data.frame(x)$c12hour, na.rm = TRUE)
 #' }))
 #'
-#' # or as tidyverse-approach
-#' if (require("dplyr") && require("purrr")) {
-#'   bs <- efc %>%
-#'     bootstrap(100) %>%
-#'     mutate(
-#'       c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE))
-#'     )
+#' # bootstrapped standard error
+#' boot_se(bs, "c12hour")
 #'
-#'   # bootstrapped standard error
-#'   boot_se(bs, c12hour)
-#' }
+#' # bootstrapped CI
+#' boot_ci(bs, "c12hour")
 #' @export
 bootstrap <- function(data, n, size) {
   if (!missing(size) && !is.null(size)) {
diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R
index 426883a6..474ecadd 100644
--- a/R/chi_squared_test.R
+++ b/R/chi_squared_test.R
@@ -1,6 +1,6 @@
 #' @title Chi-Squared test
 #' @name chi_squared_test
-#' @description This function performs a \eqn{chi}^2 test for contingency
+#' @description This function performs a \eqn{\chi^2} test for contingency
 #' tables or tests for given probabilities. The returned effects sizes are
 #' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
 #' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
@@ -17,6 +17,10 @@
 #' @param ... Additional arguments passed down to [`chisq.test()`].
 #' @inheritParams mann_whitney_test
 #'
+#' @inheritSection mann_whitney_test Which test to use
+#'
+#' @inherit mann_whitney_test seealso
+#'
 #' @return A data frame with test results. The returned effects sizes are
 #' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
 #' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
@@ -33,12 +37,23 @@
 #' The weighted version of the chi-squared test is based on the a weighted
 #' table, using [`xtabs()`] as input for `chisq.test()`.
 #'
-#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
-#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
-#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
-#' \doi{10.3390/math11091982}
+#' Interpretation of effect sizes are based on rules described in
+#' [`effectsize::interpret_phi()`], [`effectsize::interpret_cramers_v()`],
+#' and [`effectsize::interpret_fei()`].
+#'
+#' @references
+#' - Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+#'   Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
+#'   That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
+#'   \doi{10.3390/math11091982}
 #'
-#' @examples
+#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+#'   Dtsch Med Wochenschr 2007; 132: e24–e25
+#'
+#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+#'   Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+#'
+#' @examplesIf requireNamespace("effectsize")
 #' data(efc)
 #' efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
 #'
@@ -58,6 +73,12 @@ chi_squared_test <- function(data,
                              weights = NULL,
                              paired = FALSE,
                              ...) {
+  # sanity check - if we only have one variable in "select" and "by" and
+  # "probabilities" are NULL, set probalities
+  if (is.null(probabilities) && !is.null(select) && is.null(by) && length(select) == 1) {
+    probabilities <- rep(1 / length(data[[select]]), length(data[[select]]))
+  }
+
   if (is.null(probabilities)) {
     .calculate_chisq(data, select, by, weights, paired, ...)
   } else {
@@ -260,8 +281,29 @@ print.sj_htest_chi <- function(x, ...) {
   eff_symbol <- .format_symbols(x$effect_size_name)
   stat_symbol <- .format_symbols(x$statistic_name)
 
+  # string for effectsizes
+  eff_string <- switch(x$effect_size_name,
+    Fei = sprintf(
+      "%s = %.3f (%s effect)",
+      eff_symbol,
+      x$effect_size,
+      effectsize::interpret_fei(x$effect_size)
+    ),
+    Phi = sprintf(
+      "%s = %.3f (%s effect)",
+      eff_symbol,
+      x$effect_size,
+      effectsize::interpret_phi(x$effect_size)
+    ),
+    sprintf(
+      "Cramer's V = %.3f (%s effect)",
+      x$effect_size,
+      effectsize::interpret_cramers_v(x$effect_size)
+    )
+  )
+
   cat(sprintf(
-    "\n  %s = %.4f, %s = %.4f, df = %i, %s\n\n",
-    stat_symbol, x$statistic, eff_symbol, x$effect_size, round(x$df), insight::format_p(x$p)
+    "\n  %s = %.3f, %s, df = %i, %s\n\n",
+    stat_symbol, x$statistic, eff_string, round(x$df), insight::format_p(x$p)
   ))
 }
diff --git a/R/confint_ncg.R b/R/confint_ncg.R
index 6996d7da..abf8a305 100644
--- a/R/confint_ncg.R
+++ b/R/confint_ncg.R
@@ -3,7 +3,6 @@
 # Author: Ken Kelley
 # License: GPL-3
 
-#' @importFrom stats pf qf
 confint_ncg <- function(F.value = NULL, conf.level = 0.95, df.1 = NULL, df.2 = NULL) {
   alpha.lower <- alpha.upper <- (1 - conf.level) / 2
   tol <- 1e-09
@@ -40,12 +39,12 @@ confint_ncg <- function(F.value = NULL, conf.level = 0.95, df.1 = NULL, df.2 = N
       Diff.2 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = LL.Bounds[2]) - (1 - alpha.lower) > tol
       Diff.3 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = LL.Bounds[3]) - (1 - alpha.lower) > tol
 
-      if (isTRUE(Diff.1) & isTRUE(Diff.2) & !isTRUE(Diff.3)) {
+      if (isTRUE(Diff.1) && isTRUE(Diff.2) && !isTRUE(Diff.3)) {
         LL.Bounds <-
           c(LL.Bounds[2], (LL.Bounds[2] + LL.Bounds[3]) / 2, LL.Bounds[3])
       }
 
-      if (isTRUE(Diff.1) & !isTRUE(Diff.2) & !isTRUE(Diff.3)) {
+      if (isTRUE(Diff.1) && !isTRUE(Diff.2) && !isTRUE(Diff.3)) {
         LL.Bounds <-
           c(LL.Bounds[1], (LL.Bounds[1] + LL.Bounds[2]) / 2, LL.Bounds[2])
       }
@@ -89,14 +88,13 @@ confint_ncg <- function(F.value = NULL, conf.level = 0.95, df.1 = NULL, df.2 = N
       Diff.2 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = UL.Bounds[2]) - alpha.upper > tol
       Diff.3 <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = UL.Bounds[3]) - alpha.upper > tol
 
-      if (isTRUE(Diff.1) & isTRUE(Diff.2) & !isTRUE(Diff.3)) {
+      if (isTRUE(Diff.1) && isTRUE(Diff.2) && !isTRUE(Diff.3)) {
         UL.Bounds <-
           c(UL.Bounds[2], (UL.Bounds[2] + UL.Bounds[3]) / 2, UL.Bounds[3])
       }
 
-      if (isTRUE(Diff.1) & !isTRUE(Diff.2) & !isTRUE(Diff.3)) {
-        UL.Bounds <- c(UL.Bounds[1], (UL.Bounds[1] +
-                                        UL.Bounds[2])/2, UL.Bounds[2])
+      if (isTRUE(Diff.1) && !isTRUE(Diff.2) && !isTRUE(Diff.3)) {
+        UL.Bounds <- c(UL.Bounds[1], (UL.Bounds[1] + UL.Bounds[2]) / 2, UL.Bounds[2])
       }
 
       Diff <- stats::pf(q = F.value, df1 = df.1, df2 = df.2, ncp = UL.Bounds[2]) - alpha.upper
diff --git a/R/cramer.R b/R/cramer.R
index a623da44..5c12251b 100644
--- a/R/cramer.R
+++ b/R/cramer.R
@@ -23,19 +23,19 @@ cramers_v.ftable <- function(tab, ...) {
 #' @rdname crosstable_statistics
 #' @export
 cramers_v.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist", "quantile"), ...) {
-  terms <- all.vars(formula)
-  tab <- table(data[[terms[1]]], data[[terms[2]]])
+  fterms <- all.vars(formula)
+  tab <- table(data[[fterms[1]]], data[[fterms[2]]])
   method <- match.arg(method)
 
   if (is.null(ci.lvl) || is.na(ci.lvl)) {
     .cramers_v(tab)
   } else {
-    straps <- sjstats::bootstrap(data[terms], n)
+    straps <- sjstats::bootstrap(data[fterms], n)
     tables <- lapply(straps$strap, function(x) {
       dat <- as.data.frame(x)
       table(dat[[1]], dat[[2]])
     })
-    cramers <- sapply(tables, function(x) .cramers_v(x))
+    cramers <- sapply(tables, .cramers_v)
     ci <- boot_ci(cramers, ci.lvl = ci.lvl, method = method)
 
     data_frame(
diff --git a/R/cv.R b/R/cv.R
index 38d6ece0..c68844e1 100644
--- a/R/cv.R
+++ b/R/cv.R
@@ -20,7 +20,6 @@
 #' fit <- lm(barthtot ~ c160age + c12hour, data = efc)
 #' cv(fit)
 #'
-#' @importFrom stats sd
 #' @export
 cv <- function(x, ...) {
   # return value
@@ -37,9 +36,8 @@ cv <- function(x, ...) {
 }
 
 
-#' @importFrom performance rmse
-#' @importFrom insight get_response
 cv_helper <- function(x) {
+  insight::check_if_installed("performance")
   # check if we have a fitted linear model
   if (inherits(x, c("lm", "lmerMod", "lme", "merModLmerTest")) && !inherits(x, "glm")) {
     # get response
diff --git a/R/cv_error.R b/R/cv_error.R
index c4880b12..557b8d15 100644
--- a/R/cv_error.R
+++ b/R/cv_error.R
@@ -31,41 +31,33 @@
 #'   neg_c_7 ~ barthtot + c12hour
 #' ))
 #'
-#' @importFrom modelr crossv_kfold
-#' @importFrom dplyr mutate summarise
-#' @importFrom purrr map map2 map_dbl map_df
-#' @importFrom tidyr unnest
-#' @importFrom rlang .data
-#' @importFrom insight find_response
-#' @importFrom performance rmse
 #' @export
 cv_error <- function(data, formula, k = 5) {
-  insight::check_if_installed("broom")
-
-  # compute cross validation data
-  cv_data <- data %>%
-    modelr::crossv_kfold(k = k) %>%
-    dplyr::mutate(
-      trained.models = purrr::map(.data$train, ~ stats::lm(formula, data = .x)),
-      predicted = purrr::map2(.data$trained.models, .data$test, ~ broom::augment(.x, newdata = .y)),
-      residuals = purrr::map(.data$predicted, ~.x[[insight::find_response(formula)]] - .x$.fitted),
-      rmse.train = purrr::map_dbl(.data$trained.models, ~ performance::rmse(.x))
-    )
+  insight::check_if_installed("datawizard")
 
+  # response
+  resp <- insight::find_response(formula)
 
-  # Training error
-  train.error <- dplyr::summarise(cv_data, train.error = mean(.data$rmse.train, na.rm = TRUE))
+  # compute cross validation data
+  cv_data <- lapply(k, function(i) {
+    datawizard::data_partition(data, proportion = 0.8)
+  })
+  # get train and test datasets
+  train_data <- lapply(cv_data, function(cvdat) cvdat[[1]])
+  test_data <- lapply(cv_data, function(cvdat) cvdat[[2]])
 
+  # fit models on datasets
+  trained_models <- lapply(train_data, function(x) stats::lm(formula, data = x))
+  test_models <- lapply(test_data, function(x) stats::lm(formula, data = x))
 
-  # Test error
-  test.error <- cv_data %>%
-    tidyr::unnest(.data$predicted, .data$residuals) %>%
-    dplyr::summarise(test.error = sqrt(mean(.data$residuals^2, na.rm = TRUE)))
+  # RMSE
+  train_error <- mean(vapply(trained_models, performance::rmse, numeric(1)), na.rm = TRUE)
+  test_error <- mean(vapply(test_models, performance::rmse, numeric(1)), na.rm = TRUE)
 
   data_frame(
     model = deparse(formula),
-    train.error = round(train.error[[1]], 4),
-    test.error = round(test.error[[1]], 4)
+    train.error = round(train_error, 4),
+    test.error = round(test_error, 4)
   )
 }
 
@@ -74,5 +66,5 @@ cv_error <- function(data, formula, k = 5) {
 #' @rdname cv_error
 #' @export
 cv_compare <- function(data, formulas, k = 5) {
-  purrr::map_df(formulas, ~ cv_error(data, formula = .x, k = k))
+  do.call(rbind, lapply(formulas, function(f) cv_error(data, formula = f, k = k)))
 }
diff --git a/R/find_beta.R b/R/find_beta.R
index 1ae733cc..66d1529a 100644
--- a/R/find_beta.R
+++ b/R/find_beta.R
@@ -137,24 +137,24 @@ find_beta2 <- function(x, se, ci, n) {
 
     # need to compute proportion
     x <- x / n
-    p2 <- .95
+    p2 <- 0.95
     x2 <- x + bvar
   }
 
   # for standard errors, we assume a 68% quantile
   if (!missing(se)) {
-    p2 <- .68
+    p2 <- 0.68
     x2 <- x + se
   }
 
   # for CI, we assume a 68% quantile
   if (!missing(ci)) {
-    p2 <- .95
+    p2 <- 0.95
     x2 <- ci
   }
 
   # the probability is assumed to be the median
-  p1 <- .5
+  p1 <- 0.5
   x1 <- x
 
   find_beta(x1, p1, x2, p2)
@@ -182,4 +182,3 @@ find_normal <- function(x1, p1, x2, p2) {
 
   list(mean = mw, sd = stddev)
 }
-
diff --git a/R/gmd.R b/R/gmd.R
index c7c3eaf4..dcfe9678 100644
--- a/R/gmd.R
+++ b/R/gmd.R
@@ -1,45 +1,42 @@
 #' @title Gini's Mean Difference
 #' @name gmd
-#' @description \code{gmd()} computes Gini's mean difference for a numeric vector
+#' @description `gmd()` computes Gini's mean difference for a numeric vector
 #'   or for all numeric vectors in a data frame.
 #'
 #' @param x A vector or data frame.
-#' @param ... Optional, unquoted names of variables that should be selected for
-#'   further processing. Required, if \code{x} is a data frame (and no vector)
-#'   and only selected variables from \code{x} should be processed. You may also
-#'   use functions like \code{:} or tidyselect's \code{select_helpers()}.
+#' @param select Optional, names of variables as character vector that should be
+#' selected for further processing. Required, if `x` is a data frame (and no vector)
+#' and only selected variables from `x` should be processed.
 #'
 #' @return For numeric vectors, Gini's mean difference. For non-numeric vectors
-#'   or vectors of length < 2, returns \code{NA}.
+#' or vectors of length < 2, returns `NA`.
 #'
 #' @note Gini's mean difference is defined as the mean absolute difference between
-#'   any two distinct elements of a vector. Missing values from \code{x} are
-#'   silently removed.
+#' any two distinct elements of a vector. Missing values from `x` are silently
+#' removed.
 #'
 #' @references David HA. Gini's mean difference rediscovered. Biometrika 1968(55): 573-575
 #'
 #' @examples
 #' data(efc)
 #' gmd(efc$e17age)
-#' gmd(efc, e17age, c160age, c12hour)
+#' gmd(efc, c("e17age", "c160age", "c12hour"))
 #'
-#' @importFrom dplyr quos select
-#' @importFrom purrr map_df
-#' @importFrom sjmisc is_empty
 #' @export
-gmd <- function(x, ...) {
-  # evaluate dots
-  qs <- dplyr::quos(...)
-  if (!sjmisc::is_empty(qs)) x <- suppressMessages(dplyr::select(x, !!!qs))
-
-  if (is.data.frame(x))
-    purrr::map_df(x, gmd_helper)
-  else
+gmd <- function(x, select = NULL) {
+  if (is.data.frame(x)) {
+    do.call(rbind, lapply(select, function(i) {
+      data.frame(
+        variable = i,
+        gmd = gmd_helper(x[[i]])
+      )
+    }))
+  } else {
     gmd_helper(x)
+  }
 }
 
 
-#' @importFrom stats na.omit
 gmd_helper <- function(x) {
   if (!is.numeric(x)) return(NA)
 
diff --git a/R/gof.R b/R/gof.R
index 3699a235..9b6d614e 100644
--- a/R/gof.R
+++ b/R/gof.R
@@ -48,7 +48,6 @@
 #' # equal to population
 #' chisq_gof(efc$e42dep, prop.table(table(efc$e42dep)))
 #'
-#' @importFrom stats na.omit fitted resid formula as.formula lm pnorm chisq.test
 #' @export
 chisq_gof <- function(x, prob = NULL, weights = NULL) {
   if (inherits(x, "glm")) {
diff --git a/R/helpfunctions.R b/R/helpfunctions.R
index ce36f369..134ad328 100644
--- a/R/helpfunctions.R
+++ b/R/helpfunctions.R
@@ -12,60 +12,8 @@ is_merMod <- function(fit) {
 }
 
 
-is_stan_model <- function(fit) {
-  inherits(fit, c("stanreg", "stanfit", "brmsfit"))
-}
-
-
-#' @importFrom sjmisc str_contains
-get_glm_family <- function(fit) {
-  c.f <- class(fit)
-
-  # do we have glm? if so, get link family. make exceptions
-  # for specific models that don't have family function
-  if (any(c.f %in% c("lme", "plm"))) {
-    fitfam <- ""
-    logit_link <- FALSE
-  } else {
-    fitfam <- stats::family(fit)$family
-    logit_link <- stats::family(fit)$link == "logit"
-  }
-
-  # create logical for family
-  binom_fam <- fitfam %in% c("binomial", "quasibinomial")
-  poisson_fam <- fitfam %in% c("poisson", "quasipoisson") ||
-    sjmisc::str_contains(fitfam, "negative binomial", ignore.case = TRUE)
-
-  list(is_bin = binom_fam, is_pois = poisson_fam, is_logit = logit_link)
-}
-
-# return names of objects passed as ellipses argument
-dot_names <- function(dots) unname(unlist(lapply(dots, as.character)))
-
-
-#' @importFrom tidyr nest
-#' @importFrom dplyr select filter group_vars
-#' @importFrom stats complete.cases
-#' @importFrom rlang .data
-get_grouped_data <- function(x) {
-  # retain observations that are complete wrt grouping vars, then nest
-  grps <- x %>%
-    dplyr::group_modify(~ dplyr::filter(.x, stats::complete.cases(.y))) %>%
-    tidyr::nest()
-
-  # arrange data
-  if (length(dplyr::group_vars(x)) == 1)
-    reihe <- order(grps[[1]])
-  else
-    reihe <- order(grps[[1]], grps[[2]])
-  grps <- grps[reihe, ]
-
-  grps
-}
-
-
 .compact_character <- function(x) {
-  x[!sapply(x, function(i) is.null(i) || nchar(i) == 0 || is.na(i) || any(i == "NULL", na.rm = TRUE))]
+  x[!sapply(x, function(i) is.null(i) || !nzchar(i, keepNA = TRUE) || is.na(i) || any(i == "NULL", na.rm = TRUE))]
 }
 
 
@@ -106,3 +54,60 @@ get_grouped_data <- function(x) {
   )
   l10n_info()[["UTF-8"]] && ((win_os && getRversion() >= "4.2") || (!win_os && getRversion() >= "4.0"))
 }
+
+
+.is_pseudo_numeric <- function(x) {
+  (is.character(x) && !anyNA(suppressWarnings(as.numeric(stats::na.omit(x[nzchar(x, keepNA = TRUE)]))))) || (is.factor(x) && !anyNA(suppressWarnings(as.numeric(levels(x))))) # nolint
+}
+
+
+.misspelled_string <- function(source, searchterm, default_message = NULL) {
+  if (is.null(searchterm) || length(searchterm) < 1) {
+    return(default_message)
+  }
+  # used for many matches
+  more_found <- ""
+  # init default
+  msg <- ""
+  # remove matching strings
+  same <- intersect(source, searchterm)
+  searchterm <- setdiff(searchterm, same)
+  source <- setdiff(source, same)
+  # guess the misspelled string
+  possible_strings <- unlist(lapply(searchterm, function(s) {
+    source[.fuzzy_grep(source, s)] # nolint
+  }), use.names = FALSE)
+  if (length(possible_strings)) {
+    msg <- "Did you mean "
+    if (length(possible_strings) > 1) {
+      # make sure we don't print dozens of alternatives for larger data frames
+      if (length(possible_strings) > 5) {
+        more_found <- sprintf(
+          " We even found %i more possible matches, not shown here.",
+          length(possible_strings) - 5
+        )
+        possible_strings <- possible_strings[1:5]
+      }
+      msg <- paste0(msg, "one of ", toString(paste0("\"", possible_strings, "\"")))
+    } else {
+      msg <- paste0(msg, "\"", possible_strings, "\"")
+    }
+    msg <- paste0(msg, "?", more_found)
+  } else {
+    msg <- default_message
+  }
+  # no double white space
+  insight::trim_ws(msg)
+}
+
+
+.fuzzy_grep <- function(x, pattern, precision = NULL) {
+  if (is.null(precision)) {
+    precision <- round(nchar(pattern) / 3)
+  }
+  if (precision > nchar(pattern)) {
+    return(NULL)
+  }
+  p <- sprintf("(%s){~%i}", pattern, precision)
+  grep(pattern = p, x = x, ignore.case = FALSE)
+}
diff --git a/R/inequ_trends.R b/R/inequ_trends.R
index e4415375..3b94a669 100644
--- a/R/inequ_trends.R
+++ b/R/inequ_trends.R
@@ -26,7 +26,7 @@
 #'          in changes in rate differences and rate ratios. The function implements
 #'          the algorithm proposed by \emph{Mackenbach et al. 2015}.
 #'
-#' @examples
+#' @examplesIf requireNamespace("ggplot2")
 #' # This example reproduces Fig. 1 of Mackenbach et al. 2015, p.5
 #'
 #' # 40 simulated time points, with an initial rate ratio of 2 and
@@ -47,27 +47,21 @@
 #' prev.data <- data.frame(lo, hi)
 #'
 #' # print values
-#' inequ_trend(prev.data, lo, hi)
+#' inequ_trend(prev.data, "lo", "hi")
 #'
 #' # plot trends - here we see that the relative inequalities
 #' # are increasing over time, while the absolute inequalities
 #' # are first increasing as well, but later are decreasing
 #' # (while rel. inequ. are still increasing)
-#' plot(inequ_trend(prev.data, lo, hi))
+#' plot(inequ_trend(prev.data, "lo", "hi"))
 #'
-#' @importFrom dplyr select
-#' @importFrom rlang quo_name enquo .data
 #' @export
 inequ_trend <- function(data, prev.low, prev.hi) {
   # prepare data for prevalence rates for low and hi status groups
   if (is.null(data) || missing(data)) {
     dat <- data.frame(prev.low, prev.hi)
   } else {
-    # get variable names
-    # create quosures
-    low <- rlang::quo_name(rlang::enquo(prev.low))
-    high <- rlang::quo_name(rlang::enquo(prev.hi))
-    dat <- dplyr::select(data, !! low, !! high)
+    dat <- data[c(prev.low, prev.hi)]
   }
 
   # ensure common column names
@@ -96,7 +90,7 @@ inequ_trend <- function(data, prev.low, prev.hi) {
   for (t in 2:nrow(dat)) {
     delta.low <- (dat$lo[t] - dat$lo[t - 1]) / dat$lo[t - 1]
     delta.hi <- (dat$hi[t] - dat$hi[t - 1]) / dat$hi[t - 1]
-    dat$rd[t] <- dat$rd[t - 1] + (dat$lo[t - 1 ] * delta.low - dat$hi[t - 1] * delta.hi)
+    dat$rd[t] <- dat$rd[t - 1] + (dat$lo[t - 1] * delta.low - dat$hi[t - 1] * delta.hi)
   }
 
   # return
diff --git a/R/is_prime.R b/R/is_prime.R
index 213fcbb5..f5b5ecef 100644
--- a/R/is_prime.R
+++ b/R/is_prime.R
@@ -2,23 +2,21 @@
 #' @name is_prime
 #'
 #' @description This functions checks whether a number is, or numbers in a
-#'    vector are prime numbers.
+#' vector are prime numbers.
 #'
 #' @param x An integer, or a vector of integers.
 #'
-#' @return \code{TRUE} for each prime number in \code{x}, \code{FALSE} otherwise.
+#' @return `TRUE` for each prime number in `x`, `FALSE` otherwise.
 #'
 #' @examples
 #' is_prime(89)
 #' is_prime(15)
 #' is_prime(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
 #'
-#' @importFrom purrr map_lgl
-#' @importFrom sjmisc is_float
 #' @export
 is_prime <- function(x) {
-  if (sjmisc::is_float(x))
-    stop("`x` needs to be an integer value.", call. = F)
-
-  purrr::map_lgl(x, ~ .x == 2L || all(.x %% 2L:max(2, floor(sqrt(.x))) != 0))
+  if (is.numeric(x) && !all(x %% 1 == 0, na.rm = TRUE)) {
+    insight::format_error("`x` needs to be an integer value.")
+  }
+  vapply(x, function(.x) .x == 2L || all(.x %% 2L:max(2, floor(sqrt(.x))) != 0), logical(1))
 }
diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R
index a66e3c15..24bc2cea 100644
--- a/R/kruskal_wallis_test.R
+++ b/R/kruskal_wallis_test.R
@@ -2,12 +2,24 @@
 #' @name kruskal_wallis_test
 #' @description This function performs a Kruskal-Wallis rank sum test, to test
 #' the null hypothesis that the population median of all of the groups are
-#' equal. The alternative is that they differ in at least one.
+#' equal. The alternative is that they differ in at least one. Unlike the
+#' underlying base R function `kruskal.test()`, this function allows for
+#' weighted tests.
 #'
 #' @inheritParams mann_whitney_test
+#' @inherit mann_whitney_test seealso
 #'
 #' @return A data frame with test results.
 #'
+#' @inheritSection mann_whitney_test Which test to use
+#'
+#' @references
+#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+#'   Dtsch Med Wochenschr 2007; 132: e24–e25
+#'
+#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+#'   Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+#' 
 #' @details The function simply is a wrapper around [`kruskal.test()`]. The
 #' weighted version of the Kruskal-Wallis test is based on the **survey** package,
 #' using [`survey::svyranktest()`].
@@ -33,6 +45,8 @@
 #'   groups = rep(c("A", "B", "C"), each = 20)
 #' )
 #' kruskal_wallis_test(long_data, select = "scales", by = "groups")
+#' # base R equivalent
+#' kruskal.test(scales ~ groups, data = long_data)
 #' @export
 kruskal_wallis_test <- function(data,
                                 select = NULL,
@@ -41,13 +55,10 @@ kruskal_wallis_test <- function(data,
   insight::check_if_installed("datawizard")
 
   # sanity checks
-  .sanitize_htest_input(data, select, by, weights)
+  .sanitize_htest_input(data, select, by, weights, test = "kruskal_wallis_test")
 
   # does select indicate more than one variable?
   if (length(select) > 1) {
-    if (!is.null(by)) {
-      insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.")
-    }
     # we convert the data into long format, and create a grouping variable
     data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale")
     by <- select[2]
@@ -68,16 +79,16 @@ kruskal_wallis_test <- function(data,
     insight::format_error("At least two groups are required, i.e. data must have at least two unique levels in `by` for `kruskal_wallis_test()`.") # nolint
   }
   if (is.null(weights)) {
-    .calculate_kw(dv, grp)
+    .calculate_kw(dv, grp, group_labels = c(select, by))
   } else {
-    .calculate_weighted_kw(dv, grp, data[[weights]])
+    .calculate_weighted_kw(dv, grp, data[[weights]], group_labels = c(select, by))
   }
 }
 
 
 # Kruskal-Wallis-Test --------------------------------------------
 
-.calculate_kw <- function(dv, grp, paired = FALSE) {
+.calculate_kw <- function(dv, grp, paired = FALSE, group_labels = NULL) {
   # prepare data
   wcdat <- data.frame(dv, grp)
   if (paired) {
@@ -95,7 +106,7 @@ kruskal_wallis_test <- function(data,
   )
 
   out <- data.frame(
-    data = wt$data.name,
+    data = paste(group_labels[1], "by", group_labels[2]),
     Chi2 = wt$statistic,
     df = wt$parameter,
     p = as.numeric(wt$p.value),
@@ -113,7 +124,7 @@ kruskal_wallis_test <- function(data,
 
 # Weighted Mann-Whitney-Test for two groups ----------------------------------
 
-.calculate_weighted_kw <- function(dv, grp, weights, paired = FALSE) {
+.calculate_weighted_kw <- function(dv, grp, weights, paired = FALSE, group_labels = NULL) {
   # check if pkg survey is available
   insight::check_if_installed("survey")
 
@@ -133,7 +144,7 @@ kruskal_wallis_test <- function(data,
   }
 
   out <- data.frame(
-    data = paste(dv, "by", grp),
+    data = paste(group_labels[1], "by", group_labels[2]),
     Chi2 = result$statistic,
     df = result$parameter,
     p = as.numeric(result$p.value),
@@ -182,7 +193,7 @@ print.sj_htest_kw <- function(x, ...) {
 
   stat_symbol <- .format_symbols("Chi2")
   cat(sprintf(
-    "\n  %s = %.3f, df = %i, %s\n\n",
+    "\n  %s = %.2f, df = %i, %s\n\n",
     stat_symbol, x$Chi2, round(x$df), insight::format_p(x$p)
   ))
 }
diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R
index d16f1788..7874f5a6 100644
--- a/R/mann_whitney_test.R
+++ b/R/mann_whitney_test.R
@@ -1,29 +1,98 @@
-#' @title Mann-Whitney-Test
+#' @title Mann-Whitney test
 #' @name mann_whitney_test
-#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank
-#' sum test for _unpaired_ samples.
+#' @description This function performs a Mann-Whitney test (or Wilcoxon rank
+#' sum test for _unpaired_ samples). Unlike the underlying base R function
+#' `wilcox.test()`, this function allows for weighted tests and automatically
+#' calculates effect sizes. For _paired_ (dependent) samples, or for one-sample
+#' tests, please use the `wilcoxon_test()` function.
 #'
-#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two
-#' independent samples have identical continuous distributions. It can be used
-#' when the two continuous variables are not normally distributed.
+#' A Mann-Whitney test is a non-parametric test for the null hypothesis that two
+#' _independent_ samples have identical continuous distributions. It can be used
+#' for ordinal scales or when the two continuous variables are not normally
+#' distributed. For large samples, or approximately normally distributed variables,
+#' the `t_test()` function can be used.
 #'
 #' @param data A data frame.
-#' @param select Name of the dependent variable (as string) to be used for the
-#' test. `select` can also be a character vector, specifying the names of
-#' multiple continuous variables. In this case, `by` is ignored and variables
-#' specified in `select` are used to compute the test. This can be useful if
-#' the data is in wide-format and no grouping variable is available.
-#' @param by Name of the grouping variable to be used for the test. If `by` is
-#' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if
-#' `probabilities` is provided, `by` must be `NULL`.
+#' @param select Name(s) of the continuous variable(s) (as character vector)
+#' to be used as samples for the test. `select` can be one of the following:
+#'
+#' - `select` can be used in combination with `by`, in which case `select` is
+#'   the name of the continous variable (and `by` indicates a grouping factor).
+#' - `select` can also be a character vector of length two or more (more than
+#'   two names only apply to `kruskal_wallis_test()`), in which case the two
+#'   continuous variables are treated as samples to be compared. `by` must be
+#'   `NULL` in this case.
+#' - If `select` select is of length **two** and `paired = TRUE`, the two samples
+#'   are considered as *dependent* and a paired test is carried out.
+#' - If `select` specifies **one** variable and `by = NULL`, a one-sample test
+#'   is carried out (only applicable for `t_test()` and `wilcoxon_test()`).
+#' @param by Name of the variable indicating the groups. Required if `select`
+#' specifies only one variable that contains all samples to be compared in the
+#' test. If `by` is not a factor, it will be coerced to a factor. For
+#' `chi_squared_test()`, if `probabilities` is provided, `by` must be `NULL`.
 #' @param weights Name of an (optional) weighting variable to be used for the test.
-#' @param distribution Indicates how the null distribution of the test statistic
-#' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"`
-#' (default). See [`coin::wilcox_test()`] for details.
+#' @param alternative A character string specifying the alternative hypothesis,
+#' must be one of `"two.sided"` (default), `"greater"` or `"less"`. See `?t.test`
+#' and `?wilcox.test`.
+#' @param mu The hypothesized difference in means (for `t_test()`) or location
+#' shift (for `mann_whitney_test()`). The default is 0.
+#' @param ... Additional arguments passed to `wilcox.test()` (for unweighted
+#' tests, i.e. when `weights = NULL`).
+#'
+#' @section Which test to use:
+#' The following table provides an overview of which test to use for different
+#' types of data. The choice of test depends on the scale of the outcome
+#' variable and the number of samples to compare.
+#'
+#' | **Samples**     | **Scale of Outcome**   | **Significance Test**           |
+#' |-----------------|------------------------|---------------------------------|
+#' | 1               | binary / nominal       | `chi_squared_test()`            |
+#' | 1               | continuous, not normal | `wilcoxon_test()`               |
+#' | 1               | continuous, normal     | `t_test()`                      |
+#' | 2, independent  | binary / nominal       | `chi_squared_test()`            |
+#' | 2, independent  | continuous, not normal | `mann_whitney_test()`           |
+#' | 2, independent  | continuous, normal     | `t_test()`                      |
+#' | 2, dependent    | binary (only 2x2)      | `chi_squared_test(paired=TRUE)` |
+#' | 2, dependent    | continuous, not normal | `wilcoxon_test()`               |
+#' | 2, dependent    | continuous, normal     | `t_test(paired=TRUE)`           |
+#' | >2, independent | continuous, not normal | `kruskal_wallis_test()`         |
+#' | >2, independent | continuous,     normal | `datawizard::means_by_group()`  |
+#' | >2, dependent   | continuous, not normal | _not yet implemented_ (1)       |
+#' | >2, dependent   | continuous,     normal | _not yet implemented_ (2)       |
+#'
+#' (1) More than two dependent samples are considered as _repeated measurements_.
+#'     For ordinal or not-normally distributed outcomes, these samples are
+#'     usually tested using a [`friedman.test()`], which requires the samples
+#'     in one variable, the groups to compare in another variable, and a third
+#'     variable indicating the repeated measurements (subject IDs).
+#'
+#' (2) More than two dependent samples are considered as _repeated measurements_.
+#'     For normally distributed outcomes, these samples are usually tested using
+#'     a ANOVA for repeated measurements. A more sophisticated approach would
+#'     be using a linear mixed model.
+#'
+#' @seealso
+#' - [`mann_whitney_test()`] for unpaired (independent) samples.
+#' - [`t_test()`] for parametric t-tests.
+#' - [`kruskal_wallis_test()`] for non-parametric ANOVA (i.e. more than two samples).
+#' - [`wilcoxon_test()`] for Wilcoxon rank sum tests for paired (dependent) samples.
+#' - [`chi_squared_test()`] for chi-squared tests (two categorical variables).
 #'
 #' @return A data frame with test results. The function returns p and Z-values
 #' as well as effect size r and group-rank-means.
 #'
+#' @references
+#' - Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+#'   Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
+#'   That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
+#'   \doi{10.3390/math11091982}
+#'
+#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+#'   Dtsch Med Wochenschr 2007; 132: e24–e25
+#'
+#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+#'   Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+#'
 #' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`]
 #' (the latter to extract effect sizes). The weighted version of the test is
 #' based on [`survey::svyranktest()`].
@@ -34,49 +103,51 @@
 #' - medium effect >= 0.3
 #' - large effect >= 0.5
 #'
-#' **r** is calcuated as:
-#'
-#' ```
-#' r = |Z| / sqrt(n1 + n2)
-#' ```
+#' **r** is calcuated as \eqn{r = \frac{|Z|}{\sqrt{n1 + n2}}}.
 #'
-#' @examples
+#' @examplesIf requireNamespace("coin") && requireNamespace("survey")
 #' data(efc)
-#' # Mann-Whitney-U-Tests for elder's age by elder's sex.
+#' # Mann-Whitney-U tests for elder's age by elder's sex.
 #' mann_whitney_test(efc, "e17age", by = "e16sex")
+#' # base R equivalent
+#' wilcox.test(e17age ~ e16sex, data = efc)
 #'
 #' # when data is in wide-format, specify all relevant continuous
 #' # variables in `select` and omit `by`
 #' set.seed(123)
 #' wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20))
 #' mann_whitney_test(wide_data, select = c("scale1", "scale2"))
-#'
+#' # base R equivalent
+#' wilcox.test(wide_data$scale1, wide_data$scale2)
 #' # same as if we had data in long format, with grouping variable
 #' long_data <- data.frame(
 #'   scales = c(wide_data$scale1, wide_data$scale2),
-#'   groups = rep(c("A", "B"), each = 20)
+#'   groups = as.factor(rep(c("A", "B"), each = 20))
 #' )
 #' mann_whitney_test(long_data, select = "scales", by = "groups")
+#' # base R equivalent
+#' wilcox.test(scales ~ groups, long_data)
 #' @export
 mann_whitney_test <- function(data,
                               select = NULL,
                               by = NULL,
                               weights = NULL,
-                              distribution = "asymptotic") {
+                              mu = 0,
+                              alternative = "two.sided",
+                              ...) {
   insight::check_if_installed("datawizard")
+  alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater"))
 
   # sanity checks
-  .sanitize_htest_input(data, select, by, weights)
+  .sanitize_htest_input(data, select, by, weights, test = "mann_whitney_test")
+
+  # alternative only if weights are NULL
+  if (!is.null(weights) && alternative != "two.sided") {
+    insight::format_error("Argument `alternative` must be `two.sided` if `weights` are specified.")
+  }
 
   # does select indicate more than one variable?
   if (length(select) > 1) {
-    # sanity check - may only specify two variable names
-    if (length(select) > 2) {
-      insight::format_error("You may only specify two variables for Mann-Whitney test.")
-    }
-    if (!is.null(by)) {
-      insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.")
-    }
     # we convert the data into long format, and create a grouping variable
     data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale")
     by <- select[2]
@@ -104,7 +175,7 @@ mann_whitney_test <- function(data,
   }
 
   if (is.null(weights)) {
-    .calculate_mwu(dv, grp, distribution, group_labels)
+    .calculate_mwu(dv, grp, alternative, mu, group_labels, ...)
   } else {
     .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels)
   }
@@ -113,12 +184,12 @@ mann_whitney_test <- function(data,
 
 # Mann-Whitney-Test for two groups --------------------------------------------
 
-.calculate_mwu <- function(dv, grp, distribution, group_labels) {
+.calculate_mwu <- function(dv, grp, alternative, mu, group_labels, ...) {
   insight::check_if_installed("coin")
   # prepare data
   wcdat <- data.frame(dv, grp)
   # perfom wilcox test
-  wt <- coin::wilcox_test(dv ~ grp, data = wcdat, distribution = distribution)
+  wt <- coin::wilcox_test(dv ~ grp, data = wcdat)
 
   # for rank mean
   group_levels <- levels(grp)
@@ -126,9 +197,16 @@ mann_whitney_test <- function(data,
   # compute statistics
   u <- as.numeric(coin::statistic(wt, type = "linear"))
   z <- as.numeric(coin::statistic(wt, type = "standardized"))
-  p <- coin::pvalue(wt)
   r <- abs(z / sqrt(length(dv)))
-  w <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat)$statistic)
+  htest <- suppressWarnings(stats::wilcox.test(
+    dv ~ grp,
+    data = wcdat,
+    alternative = alternative,
+    mu = mu,
+    ...
+  ))
+  w <- htest$statistic
+  p <- htest$p.value
 
   # group means
   dat_gr1 <- stats::na.omit(dv[grp == group_levels[1]])
@@ -149,7 +227,9 @@ mann_whitney_test <- function(data,
     w = w,
     z = z,
     r = r,
-    p = as.numeric(p)
+    p = as.numeric(p),
+    mu = mu,
+    alternative = alternative
   )
   attr(out, "rank_means") <- stats::setNames(
     c(rank_mean_1, rank_mean_2),
@@ -211,13 +291,14 @@ mann_whitney_test <- function(data,
   z <- result$statistic
   r <- abs(z / sqrt(sum(n_grp1, n_grp2)))
 
-  out <- data.frame(
+  out <- data_frame(
     group1 = group_levels[1],
     group2 = group_levels[2],
     estimate = result$estimate,
     z = z,
     r = r,
-    p = as.numeric(result$p.value)
+    p = as.numeric(result$p.value),
+    alternative = "two.sided"
   )
 
   attr(out, "rank_means") <- stats::setNames(
@@ -238,66 +319,25 @@ mann_whitney_test <- function(data,
 
 # helper ----------------------------------------------------------------------
 
-.misspelled_string <- function(source, searchterm, default_message = NULL) {
-  if (is.null(searchterm) || length(searchterm) < 1) {
-    return(default_message)
-  }
-  # used for many matches
-  more_found <- ""
-  # init default
-  msg <- ""
-  # remove matching strings
-  same <- intersect(source, searchterm)
-  searchterm <- setdiff(searchterm, same)
-  source <- setdiff(source, same)
-  # guess the misspelled string
-  possible_strings <- unlist(lapply(searchterm, function(s) {
-    source[.fuzzy_grep(source, s)] # nolint
-  }), use.names = FALSE)
-  if (length(possible_strings)) {
-    msg <- "Did you mean "
-    if (length(possible_strings) > 1) {
-      # make sure we don't print dozens of alternatives for larger data frames
-      if (length(possible_strings) > 5) {
-        more_found <- sprintf(
-          " We even found %i more possible matches, not shown here.",
-          length(possible_strings) - 5
-        )
-        possible_strings <- possible_strings[1:5]
-      }
-      msg <- paste0(msg, "one of ", toString(paste0("\"", possible_strings, "\"")))
-    } else {
-      msg <- paste0(msg, "\"", possible_strings, "\"")
-    }
-    msg <- paste0(msg, "?", more_found)
-  } else {
-    msg <- default_message
+.sanitize_htest_input <- function(data, select, by, weights, test = NULL) {
+  # check if arguments are NULL
+  if (is.null(select)) {
+    insight::format_error("Argument `select` is missing.")
   }
-  # no double white space
-  insight::trim_ws(msg)
-}
-
-
-.fuzzy_grep <- function(x, pattern, precision = NULL) {
-  if (is.null(precision)) {
-    precision <- round(nchar(pattern) / 3)
+  # sanity check - may only specify two variable names
+  if (identical(test, "mann_whitney_test") && length(select) > 2) {
+    insight::format_error("You may only specify two variables for Mann-Whitney test.")
   }
-  if (precision > nchar(pattern)) {
-    return(NULL)
+  if (identical(test, "mann_whitney_test") && length(select) == 1 && is.null(by)) {
+    insight::format_error("Only one variable provided in `select`, but none in `by`. You need to specify a second continuous variable in `select`, or a grouping variable in `by` for Mann-Whitney test.") # nolint
   }
-  p <- sprintf("(%s){~%i}", pattern, precision)
-  grep(pattern = p, x = x, ignore.case = FALSE)
-}
-
 
-.sanitize_htest_input <- function(data, select, by, weights) {
-  # check if arguments are NULL
-  if (is.null(select)) {
-    insight::format_error("Argument `select` is missing.")
+  # sanity check - may only specify two variable names
+  if (identical(test, "t_test") && length(select) > 2) {
+    insight::format_error("You may only specify two variables for Student's t test.")
   }
-  # `by` is only allowed to be NULL if `select` specifies more than one variable
-  if (is.null(by) && length(select) == 1) {
-    insight::format_error("Arguments `by` is missing.")
+  if ((!is.null(test) && test %in% c("t_test", "kruskal_wallis_test", "mann_whitney_test")) && length(select) > 1 && !is.null(by)) { # nolint
+    insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.")
   }
 
   # check if arguments have correct length or are of correct type
@@ -307,7 +347,7 @@ mann_whitney_test <- function(data,
   if (!is.null(by) && (length(by) != 1 || !is.character(by))) {
     insight::format_error("Argument `by` must be a character string with the name of a single variable.")
   }
-  if (!is.null(weights) && length(weights) != 1) {
+  if (!is.null(weights) && (length(weights) != 1 || !is.character(weights))) {
     insight::format_error("Argument `weights` must be a character string with the name of a single variable.")
   }
 
@@ -374,5 +414,21 @@ print.sj_htest_mwu <- function(x, ...) {
     ), "cyan"
   )
 
-  cat(sprintf("\n  r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p)))
+  # alternative hypothesis
+  if (!is.null(x$alternative) && !is.null(x$mu)) {
+    alt_string <- switch(x$alternative,
+      two.sided = "not equal to",
+      less = "less than",
+      greater = "greater than"
+    )
+    alt_string <- paste("true location shift is", alt_string, x$mu)
+    insight::print_color(sprintf("  Alternative hypothesis: %s\n", alt_string), "cyan")
+  }
+
+  if (!is.null(x$w)) {
+    w_stat <- paste("W =", insight::format_value(x$w, protect_integers = TRUE), ", ")
+  } else {
+    w_stat <- ""
+  }
+  cat(sprintf("\n  %sr = %.2f, Z = %.2f, %s\n\n", w_stat, x$r, x$z, insight::format_p(x$p)))
 }
diff --git a/R/phi.R b/R/phi.R
index 48a07f1e..eb57406c 100644
--- a/R/phi.R
+++ b/R/phi.R
@@ -19,19 +19,19 @@ phi.ftable <- function(tab, ...) {
 
 #' @export
 phi.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist", "quantile"), ...) {
-  terms <- all.vars(formula)
-  tab <- table(data[[terms[1]]], data[[terms[2]]])
+  formula_terms <- all.vars(formula)
+  tab <- table(data[[formula_terms[1]]], data[[formula_terms[2]]])
   method <- match.arg(method)
 
   if (is.null(ci.lvl) || is.na(ci.lvl)) {
-    .cramer(tab)
+    .cramers_v(tab)
   } else {
-    straps <- sjstats::bootstrap(data[terms], n)
+    straps <- sjstats::bootstrap(data[formula_terms], n)
     tables <- lapply(straps$strap, function(x) {
       dat <- as.data.frame(x)
       table(dat[[1]], dat[[2]])
     })
-    phis <- sapply(tables, function(x) .phi(x))
+    phis <- sapply(tables, .phi)
     ci <- boot_ci(phis, ci.lvl = ci.lvl, method = method)
 
     data_frame(
@@ -44,6 +44,7 @@ phi.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist
 
 
 .phi <- function(tab) {
+  insight::check_if_installed("MASS")
   # convert to flat table
   if (!inherits(tab, "ftable")) tab <- stats::ftable(tab)
 
diff --git a/R/prop.R b/R/prop.R
index 167b3cdd..6c980541 100644
--- a/R/prop.R
+++ b/R/prop.R
@@ -35,7 +35,7 @@
 #'         returns a data frame with one column per group with grouping categories,
 #'         followed by one column with proportions per condition.
 #'
-#' @examples
+#' @examplesIf getRversion() >= "4.2.0" && requireNamespace("datawizard", quietly = TRUE)
 #' data(efc)
 #'
 #' # proportion of value 1 in e42dep
@@ -52,10 +52,10 @@
 #' prop(efc, e42dep == 1, e42dep > 2, na.rm = FALSE)
 #'
 #' # for factors or character vectors, use quoted or unquoted values
-#' library(sjmisc)
+#' library(datawizard)
 #' # convert numeric to factor, using labels as factor levels
-#' efc$e16sex <- to_label(efc$e16sex)
-#' efc$n4pstu <- to_label(efc$n4pstu)
+#' efc$e16sex <- to_factor(efc$e16sex)
+#' efc$n4pstu <- to_factor(efc$n4pstu)
 #'
 #' # get proportion of female older persons
 #' prop(efc, e16sex == female)
@@ -70,40 +70,16 @@
 #' )
 #'
 #' # also works with pipe-chains
-#' library(dplyr)
-#' efc %>% prop(e17age > 70)
-#' efc %>% prop(e17age > 70, e16sex == 1)
-#'
-#' # and with group_by
-#' efc %>%
-#'   group_by(e16sex) %>%
-#'   prop(e42dep > 2)
-#'
-#' efc %>%
-#'   select(e42dep, c161sex, c172code, e16sex) %>%
-#'   group_by(c161sex, c172code) %>%
-#'   prop(e42dep > 2, e16sex == 1)
-#'
-#' # same for "props()"
-#' efc %>%
-#'   select(e42dep, c161sex, c172code, c12hour, n4pstu) %>%
-#'   group_by(c161sex, c172code) %>%
-#'   props(
-#'     e42dep > 2,
-#'     c12hour > 20 & c12hour < 40,
-#'     n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3'
-#'   )
+#' efc |> prop(e17age > 70)
+#' efc |> prop(e17age > 70, e16sex == 1)
 #' @export
 prop <- function(data, ..., weights = NULL, na.rm = TRUE, digits = 4) {
   # check argument
   if (!is.data.frame(data)) {
     insight::format_error("`data` needs to be a data frame.")
   }
-
-  # get dots
-  dots <- match.call(expand.dots = FALSE)$`...`
-
-  proportions(data, dots, weight.by = weights, na.rm, digits, multi_logical = FALSE)
+  dots <- match.call(expand.dots = FALSE)[["..."]]
+  .proportions(data, dots = dots, weight.by = weights, na.rm, digits, multi_logical = FALSE)
 }
 
 
@@ -114,16 +90,12 @@ props <- function(data, ..., na.rm = TRUE, digits = 4) {
   if (!is.data.frame(data)) {
     insight::format_error("`data` needs to be a data frame.")
   }
-
-  # get dots
-  dots <- match.call(expand.dots = FALSE)$`...`
-
-  proportions(data, dots, NULL, na.rm, digits, multi_logical = TRUE)
+  dots <- match.call(expand.dots = FALSE)[["..."]]
+  .proportions(data, dots = dots, NULL, na.rm, digits, multi_logical = TRUE)
 }
 
 
-#' @importFrom purrr map_df
-proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) {
+.proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) {
   # remember comparisons
   comparisons <- lapply(dots, function(x) {
     # to character, and remove spaces and quotes
@@ -132,86 +104,35 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) {
     x
   })
 
-  # do we have a grouped data frame?
   if (inherits(data, "grouped_df")) {
-
-    # remember order of values
-    reihenfolge <- NULL
-
-    # get grouped data
-    grps <- get_grouped_data(data)
-
-    # now get proportions for each subset
-    fr <- purrr::map_df(
-      seq_len(nrow(grps)),
-      function(i) {
-        # get data from grouped data frame
-        .d <- grps$data[[i]]
-
-        # iterate dots (comparing conditions)
-        if (multi_logical)
-          result <- lapply(dots, get_multiple_proportion, .d, na.rm, digits)
-        else
-          result <- lapply(dots, get_proportion, .d, weight.by, na.rm, digits)
-
-        as.data.frame(t(unlist(result)))
-      }
-    )
-
-
-    # now we need the values from the groups of the grouped data frame
-    for (i in (ncol(grps) - 1):1) {
-      # get value label
-      var.name <- colnames(grps)[i]
-      val.labels <- suppressWarnings(
-        rep(sjlabelled::get_labels(data[[var.name]]), length.out = nrow(fr))
-      )
-
-      # if we have no value labels, use values instead
-      if (is.null(val.labels)) {
-        val.labels <-
-          rep(unique(sort(data[[var.name]])), length.out = nrow(fr))
-      }
-
-      # add row order, based on values of grouping variables
-      reihenfolge <- rep(sort(unique(sjlabelled::as_numeric(data[[var.name]]))), length.out = nrow(fr)) %>%
-        as.data.frame() %>%
-        dplyr::bind_cols(reihenfolge)
-
-      # bind values as column
-      fr <- dplyr::bind_cols(data.frame(val.labels, stringsAsFactors = FALSE), fr)
-    }
-
-    # get column names. we need variable labels as column names
-    var.names <- colnames(grps)[seq_len(ncol(grps) - 1)]
-    var.labels <- sjlabelled::get_label(data[, var.names], def.value = var.names)
-
-    # set variable labels and comparisons as colum names
-    colnames(fr) <- c(var.labels, comparisons)
-
-    # order rows by values of grouping variables
-    fr <- fr[do.call(order, reihenfolge), ]
-
-    fr
-
+    grps <- attributes(data)$groups
+    result <- lapply(grps[[".rows"]], function(x) {
+      .process_prop(data[x, , drop = FALSE], comparisons, dots, multi_logical, na.rm, digits, weight.by)
+    })
   } else {
-    # iterate dots (comparing conditions)
-    if (multi_logical)
-      result <- lapply(dots, get_multiple_proportion, data, na.rm, digits)
-    else
-      result <- lapply(dots, get_proportion, data, weight.by, na.rm, digits)
+    result <- .process_prop(data, comparisons, dots, multi_logical, na.rm, digits, weight.by)
+  }
+  result
+}
 
-    # if we have more than one proportion, return a data frame. this allows us
-    # to save more information, the condition and the proportion value
-    if (length(comparisons) > 1) {
-      return(data_frame(
-        condition = as.character(unlist(comparisons)),
-        prop = unlist(result)
-      ))
-    }
 
-    unlist(result)
+.process_prop <- function(data, comparisons, dots, multi_logical, na.rm, digits, weight.by) {
+  # iterate dots (comparing conditions)
+  if (multi_logical)
+    result <- lapply(dots, get_multiple_proportion, data, na.rm, digits)
+  else
+    result <- lapply(dots, get_proportion, data, weight.by, na.rm, digits)
+
+  # if we have more than one proportion, return a data frame. this allows us
+  # to save more information, the condition and the proportion value
+  if (length(comparisons) > 1) {
+    return(data_frame(
+      condition = as.character(unlist(comparisons)),
+      prop = unlist(result)
+    ))
   }
+
+  unlist(result)
 }
 
 
@@ -241,19 +162,16 @@ get_proportion <- function(x, data, weight.by, na.rm, digits) {
   if (!is.null(weight.by)) f <- weight(f, weights = weight.by)
 
   # get proportions
-  if (x.parts[2] == "==")
-    dummy <- f == v
-  else if (x.parts[2] == "!=")
-    dummy <- f != v
-  else if (x.parts[2] == "<")
-    dummy <- f < v
-  else if (x.parts[2] == ">")
-    dummy <- f > v
-  else
-    dummy <- f == v
+  dummy <- switch(x.parts[2],
+    "==" = f == v,
+    "!=" = f != v,
+    "<" = f < v,
+    ">" = f > v,
+    f == v
+  )
 
   # remove missings?
-  if (na.rm) dummy <- na.omit(dummy)
+  if (na.rm) dummy <- stats::na.omit(dummy)
 
   # get proportion
   round(sum(dummy, na.rm = TRUE) / length(dummy), digits = digits)
@@ -265,7 +183,7 @@ get_multiple_proportion <- function(x, data, na.rm, digits) {
   dummy <- with(data, eval(parse(text = deparse(x))))
 
   # remove missings?
-  if (na.rm) dummy <- na.omit(dummy)
+  if (na.rm) dummy <- stats::na.omit(dummy)
 
   # get proportion
   round(sum(dummy, na.rm = TRUE) / length(dummy), digits = digits)
diff --git a/R/re-exports.R b/R/re-exports.R
index f16d9fb6..1b5605ae 100644
--- a/R/re-exports.R
+++ b/R/re-exports.R
@@ -1,12 +1,3 @@
-#' @importFrom magrittr %>%
-#' @export
-magrittr::`%>%`
-
-
-#' @importFrom sjmisc typical_value
-#' @export
-sjmisc::typical_value
-
 #' @importFrom performance mse
 #' @export
 performance::mse
@@ -26,3 +17,15 @@ bayestestR::equivalence_test
 #' @importFrom insight link_inverse
 #' @export
 insight::link_inverse
+
+#' @importFrom datawizard weighted_sd
+#' @export
+datawizard::weighted_sd
+
+#' @importFrom datawizard weighted_mean
+#' @export
+datawizard::weighted_mean
+
+#' @importFrom datawizard weighted_median
+#' @export
+datawizard::weighted_median
diff --git a/R/samplesize_mixed.R b/R/samplesize_mixed.R
index 66f170cf..42521118 100644
--- a/R/samplesize_mixed.R
+++ b/R/samplesize_mixed.R
@@ -36,7 +36,7 @@
 #'          additionally include repeated measures (three-level-designs) may work
 #'          as well, however, the computed sample size may be less accurate.
 #'
-#' @examples
+#' @examplesIf requireNamespace("pwr")
 #' # Sample size for multilevel model with 30 cluster groups and a small to
 #' # medium effect size (Cohen's d) of 0.3. 27 subjects per cluster and
 #' # hence a total sample size of about 802 observations is needed.
@@ -47,7 +47,13 @@
 #' # hence a total sample size of about 107 observations is needed.
 #' samplesize_mixed(eff.size = .2, df.n = 5, k = 20, power = .9)
 #' @export
-samplesize_mixed <- function(eff.size, df.n = NULL, power = .8, sig.level = .05, k, n, icc = 0.05) {
+samplesize_mixed <- function(eff.size,
+                             df.n = NULL,
+                             power = 0.8,
+                             sig.level = 0.05,
+                             k,
+                             n,
+                             icc = 0.05) {
   if (!requireNamespace("pwr", quietly = TRUE)) {
     stop("Package `pwr` needed for this function to work. Please install it.", call. = FALSE)
   }
diff --git a/R/se_ybar.R b/R/se_ybar.R
index 0b1bc882..642dc856 100644
--- a/R/se_ybar.R
+++ b/R/se_ybar.R
@@ -12,14 +12,9 @@
 #'
 #' @references Gelman A, Hill J. 2007. Data analysis using regression and multilevel/hierarchical models. Cambridge, New York: Cambridge University Press
 #'
-#' @examples
-#' if (require("lme4")) {
-#'   fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy)
-#'   se_ybar(fit)
-#' }
-#' @importFrom lme4 ngrps
-#' @importFrom stats nobs
-#' @importFrom purrr map_dbl
+#' @examplesIf require("lme4")
+#' fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy)
+#' se_ybar(fit)
 #' @export
 se_ybar <- function(fit) {
   # get model icc
@@ -32,7 +27,7 @@ se_ybar <- function(fit) {
   tot_var <- sum(tau.00, vars$var.residual)
 
   # get number of groups
-  m.cnt <- lme4::ngrps(fit)
+  m.cnt <- vapply(fit@flist, nlevels, 1)
 
   # compute number of observations per group (level-2-unit)
   obs <- round(stats::nobs(fit) / m.cnt)
@@ -41,7 +36,9 @@ se_ybar <- function(fit) {
   icc <- tau.00 / tot_var
 
   # compute standard error of sample mean
-  se <- purrr::map_dbl(seq_len(length(m.cnt)), ~ sqrt((tot_var / stats::nobs(fit)) * design_effect(n = obs[.x], icc = icc[.x])))
+  se <- unlist(lapply(seq_len(length(m.cnt)), function(.x) {
+    sqrt((tot_var / stats::nobs(fit)) * design_effect(n = obs[.x], icc = icc[.x]))
+  }))
 
   # give names for se, so user sees, which random effect has what impact
   names(se) <- names(m.cnt)
diff --git a/R/select_helpers.R b/R/select_helpers.R
index de7de9b2..87d6104c 100644
--- a/R/select_helpers.R
+++ b/R/select_helpers.R
@@ -13,9 +13,8 @@ string_ends_with <- function(pattern, x) {
   grep(pattern, x, perl = TRUE)
 }
 
-#' @importFrom purrr map
 string_one_of <- function(pattern, x) {
-  m <- unlist(purrr::map(pattern, ~ grep(., x, fixed = TRUE, useBytes = TRUE)))
+  m <- unlist(lapply(pattern, grep, x = x, fixed = TRUE, useBytes = TRUE))
   x[m]
 }
 
@@ -32,5 +31,5 @@ obj_has_name <- function(x, name) {
 }
 
 obj_has_rownames <- function(x) {
-  !identical(as.character(1:nrow(x)), rownames(x))
+  !identical(as.character(seq_len(nrow(x))), rownames(x))
 }
diff --git a/R/svy_median.R b/R/svy_median.R
index 963aea2c..9a0d68f2 100644
--- a/R/svy_median.R
+++ b/R/svy_median.R
@@ -1,11 +1,8 @@
-#' @rdname weighted_sd
-#' @importFrom stats as.formula
+#' @rdname weighted_se
 #' @export
 survey_median <- function(x, design) {
   # check if pkg survey is available
-  if (!requireNamespace("survey", quietly = TRUE)) {
-    stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE)
-  }
+  insight::check_if_installed("survey")
 
   # deparse
   v <- stats::as.formula(paste("~", as.character(substitute(x))))
diff --git a/R/svyglmnb.R b/R/svyglmnb.R
index 309b8095..26c84d52 100644
--- a/R/svyglmnb.R
+++ b/R/svyglmnb.R
@@ -65,14 +65,9 @@ utils::globalVariables("scaled.weights")
 #'   # print coefficients and standard errors
 #'   fit
 #' }
-#' @importFrom MASS glm.nb
-#' @importFrom stats weights update model.frame coef as.formula family
 #' @export
 svyglm.nb <- function(formula, design, ...) {
-  # check if pkg survey is available
-  if (!requireNamespace("survey", quietly = TRUE)) {
-    stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE)
-  }
+  insight::check_if_installed(c("survey", "MASS"))
 
   # get design weights. we need to scale these weights for the glm.nb() function
   dw <- stats::weights(design)
diff --git a/R/svyglmzip.R b/R/svyglmzip.R
index f9b575b9..ba6dd5f8 100644
--- a/R/svyglmzip.R
+++ b/R/svyglmzip.R
@@ -45,19 +45,9 @@ utils::globalVariables("scaled.weights")
 #'   # print coefficients and standard errors
 #'   fit
 #' }
-#' @importFrom insight find_formula
-#' @importFrom stats weights update model.frame coef as.formula family
 #' @export
 svyglm.zip <- function(formula, design, ...) {
-  # check if pkg survey is available
-  if (!requireNamespace("survey", quietly = TRUE)) {
-    stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE)
-  }
-
-  if (!requireNamespace("pscl", quietly = TRUE)) {
-    stop("Package `pscl` needed to for this function to work. Please install it.", call. = FALSE)
-  }
-
+  insight::check_if_installed(c("survey", "pscl"))
 
   # get design weights. we need to scale these weights for the glm.nb() function
   dw <- stats::weights(design)
@@ -66,7 +56,7 @@ svyglm.zip <- function(formula, design, ...) {
   design <- stats::update(design, scaled.weights = dw / mean(dw, na.rm = TRUE))
 
   # fit ZIP model, with scaled design weights
-  mod <- pscl::zeroinfl(formula, data = stats::model.frame(design), weights = scaled.weights, ...)
+  mod <- suppressWarnings(pscl::zeroinfl(formula, data = stats::model.frame(design), weights = scaled.weights, ...))
   ff <- insight::find_formula(mod)
 
   # fit survey model, using maximum likelihood estimation
@@ -94,7 +84,6 @@ svyglm.zip <- function(formula, design, ...) {
 }
 
 
-#' @importFrom stats dpois
 # log-likelihood function used in "svymle()"
 sjstats_loglik_zip <- function(y, eta, logitp) {
   mu <- exp(eta)
diff --git a/R/t_test.R b/R/t_test.R
new file mode 100644
index 00000000..4f7e9bbf
--- /dev/null
+++ b/R/t_test.R
@@ -0,0 +1,425 @@
+#' @title Student's t test
+#' @name t_test
+#' @description This function performs a Student's t test for two independent
+#' samples, for paired samples, or for one sample. Unlike the underlying
+#' base R function `t.test()`, this function allows for weighted tests and
+#' automatically calculates effect sizes.
+#'
+#' @inheritParams mann_whitney_test
+#' @param paired Logical, whether to compute a paired t-test for dependent
+#' samples.
+#' @inherit mann_whitney_test seealso
+#'
+#' @inheritSection mann_whitney_test Which test to use
+#'
+#' @details Interpretation of effect sizes are based on rules described in
+#' [`effectsize::interpret_cohens_d()`] and [`effectsize::interpret_hedges_g()`].
+#'
+#' @return A data frame with test results.
+#'
+#' @references
+#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+#'   Dtsch Med Wochenschr 2007; 132: e24–e25
+#'
+#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+#'   Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+#' 
+#' @examplesIf requireNamespace("effectsize")
+#' data(sleep)
+#' # one-sample t-test
+#' t_test(sleep, "extra")
+#' # base R equivalent
+#' t.test(extra ~ 1, data = sleep)
+#'
+#' # two-sample t-test, by group
+#' t_test(mtcars, "mpg", by = "am")
+#' # base R equivalent
+#' t.test(mpg ~ am, data = mtcars)
+#'
+#' # paired t-test
+#' t_test(mtcars, c("mpg", "hp"), paired = TRUE)
+#' # base R equivalent
+#' t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE)
+#' @export
+t_test <- function(data,
+                   select = NULL,
+                   by = NULL,
+                   weights = NULL,
+                   paired = FALSE,
+                   mu = 0,
+                   alternative = "two.sided") {
+  insight::check_if_installed(c("datawizard", "effectsize"))
+  alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater"))
+
+  # sanity checks
+  .sanitize_htest_input(data, select, by, weights, test = "t_test")
+  data_name <- NULL
+
+  # filter and remove NA
+  data <- stats::na.omit(data[c(select, by, weights)])
+
+  # does select indicate more than one variable? We than reshape the data
+  # to have one continous scale and one grouping variable
+  if (length(select) > 1) {
+    # paired?
+    if (paired) {
+      # subtract the two variables for paired t-test, and "set" by to NULL
+      data[[select[1]]] <- data[[select[1]]] - data[[select[2]]]
+      data_name <- paste(select[1], "and", select[2])
+      select <- select[1]
+      by <- NULL
+    } else {
+      # we convert the data into long format, and create a grouping variable
+      data <- datawizard::data_to_long(
+        data[c(select, weights)],
+        select = select,
+        names_to = "group",
+        values_to = "scale"
+      )
+      by <- select[2]
+      select <- select[1]
+      # after converting to long, we have the "grouping" variable first in the data
+      colnames(data) <- c(weights, by, select)
+    }
+  }
+
+  # get data
+  dv <- data[[select]]
+
+  # for two-sample t-test...
+  if (!is.null(by)) {
+    grp <- data[[by]]
+    # coerce to factor
+    grp <- datawizard::to_factor(grp)
+    # only two groups allowed
+    if (insight::n_unique(grp) > 2) {
+      insight::format_error("Only two groups are allowed for Student's t test.") # nolint
+    }
+    # value labels
+    group_labels <- names(attr(data[[by]], "labels", exact = TRUE))
+    if (is.null(group_labels)) {
+      group_labels <- levels(droplevels(grp))
+    }
+    data_name <- paste(select, "by", by)
+  } else {
+    # one-sample t-test...
+    grp <- NULL
+    group_labels <- select
+    if (is.null(data_name)) {
+      data_name <- select
+    }
+  }
+
+  if (is.null(weights)) {
+    .calculate_ttest(dv, grp, mu, paired, alternative, group_labels, data_name)
+  } else {
+    .calculate_weighted_ttest(dv, grp, mu, paired, alternative, data[[weights]], group_labels, data_name)
+  }
+}
+
+
+# Mann-Whitney-Test for two groups --------------------------------------------
+
+.calculate_ttest <- function(dv, grp, mu, paired, alternative, group_labels, data_name) {
+  insight::check_if_installed("effectsize")
+  # prepare data
+  if (is.null(grp)) {
+    tdat <- data.frame(dv)
+    t_formula <- stats::as.formula("dv ~ 1")
+  } else {
+    tdat <- data.frame(dv, grp)
+    t_formula <- stats::as.formula("dv ~ grp")
+  }
+  # perfom wilcox test
+  htest <- stats::t.test(
+    t_formula,
+    data = tdat,
+    alternative = alternative,
+    mu = mu
+  )
+  test_statistic <- htest$statistic
+  if (nrow(tdat) > 20) {
+    effect_size <- stats::setNames(
+      effectsize::cohens_d(
+        t_formula,
+        data = tdat,
+        alternative = alternative,
+        mu = mu
+      )$Cohens_d,
+      "Cohens_d"
+    )
+  } else {
+    effect_size <- stats::setNames(
+      effectsize::hedges_g(
+        t_formula,
+        data = tdat,
+        alternative = alternative,
+        mu = mu
+      )$Hedges_g,
+      "Hedges_g"
+    )
+  }
+
+  # return result
+  out <- data.frame(
+    data = data_name,
+    statistic_name = "t",
+    statistic = test_statistic,
+    effect_size_name = names(effect_size),
+    effect_size = as.numeric(effect_size),
+    p = as.numeric(htest$p.value),
+    df = as.numeric(htest$parameter),
+    method = ifelse(paired, "Paired t-test", htest$method),
+    alternative = alternative,
+    mu = mu,
+    stringsAsFactors = FALSE
+  )
+  class(out) <- c("sj_htest_t", "data.frame")
+  attr(out, "group_labels") <- group_labels
+  attr(out, "means") <- as.numeric(htest$estimate)
+  attr(out, "paired") <- isTRUE(paired)
+  attr(out, "one_sample") <- is.null(grp)
+  attr(out, "weighted") <- FALSE
+  if (!is.null(grp)) {
+    attr(out, "n_groups") <- stats::setNames(
+      c(as.numeric(table(grp))),
+      c("N Group 1", "N Group 2")
+    )
+  }
+  out
+}
+
+
+# Weighted Mann-Whitney-Test for two groups ----------------------------------
+
+.calculate_weighted_ttest <- function(dv, grp, mu, paired, alternative, weights, group_labels, data_name) {
+  insight::check_if_installed(c("datawizard", "effectsize"))
+  if (is.null(grp)) {
+    dat <- stats::na.omit(data.frame(dv, weights))
+    colnames(dat) <- c("y", "w")
+    x_values <- dat$y
+    x_weights <- dat$w
+    y_values <- NULL
+    # group N's
+    n_groups <- stats::setNames(round(sum(x_weights)), "N Group 1")
+  } else {
+    dat <- stats::na.omit(data.frame(dv, grp, weights))
+    colnames(dat) <- c("y", "g", "w")
+    # unique groups
+    groups <- unique(dat$g)
+    # values for sample 1
+    x_values <- dat$y[dat$g == groups[1]]
+    x_weights <- dat$w[dat$g == groups[1]]
+    # values for sample 2
+    y_values <- dat$y[dat$g == groups[2]]
+    y_weights <- dat$w[dat$g == groups[2]]
+    # group N's
+    n_groups <- stats::setNames(
+      c(round(sum(x_weights)), round(sum(y_weights))),
+      c("N Group 1", "N Group 2")
+    )
+  }
+
+  mu_x <- stats::weighted.mean(x_values, x_weights, na.rm = TRUE)
+  var_x <- datawizard::weighted_sd(x_values, x_weights)^2
+  se_x <- sqrt(var_x / length(x_values))
+
+  if (paired || is.null(y_values)) {
+    # paired
+    se <- se_x
+    dof <- length(x_values) - 1
+    test_statistic <- (mu_x - mu) / se
+    estimate <- mu_x
+    method <- if (paired) "Paired t-test" else "One Sample t-test"
+  } else {
+    # unpaired t-test
+    mu_y <- stats::weighted.mean(y_values, y_weights)
+    var_y <- datawizard::weighted_sd(y_values, y_weights)^2
+    se_y <- sqrt(var_y / length(y_values))
+    se <- sqrt(se_x^2 + se_y^2)
+    dof <- se^4 / (se_x^4 / (length(x_values) - 1) + se_y^4 / (length(y_values) - 1))
+    test_statistic <- (mu_x - mu_y - mu) / se
+    estimate <- c(mu_x, mu_y)
+    method <- "Two-Sample t-test"
+  }
+
+  # p-values
+  if (alternative == "less") {
+    pval <- stats::pt(test_statistic, dof)
+  } else if (alternative == "greater") {
+    pval <- stats::pt(test_statistic, dof, lower.tail = FALSE)
+  } else {
+    pval <- 2 * stats::pt(-abs(test_statistic), dof)
+  }
+
+  # effect size
+  dat$y <- dat$y * dat$w
+  if (is.null(y_values)) {
+    t_formula <- stats::as.formula("y ~ 1")
+  } else {
+    t_formula <- stats::as.formula("y ~ g")
+  }
+
+  if (nrow(dat) > 20) {
+    effect_size <- stats::setNames(
+      effectsize::cohens_d(
+        t_formula,
+        data = dat,
+        alternative = alternative,
+        mu = mu,
+        paired = FALSE
+      )$Cohens_d,
+      "Cohens_d"
+    )
+  } else {
+    effect_size <- stats::setNames(
+      effectsize::hedges_g(
+        t_formula,
+        data = dat,
+        alternative = alternative,
+        mu = mu,
+        paired = FALSE
+      )$Hedges_g,
+      "Hedges_g"
+    )
+  }
+
+  # return result
+  out <- data.frame(
+    data = data_name,
+    statistic_name = "t",
+    statistic = test_statistic,
+    effect_size_name = names(effect_size),
+    effect_size = as.numeric(effect_size),
+    p = pval,
+    df = dof,
+    method = method,
+    alternative = alternative,
+    mu = mu,
+    stringsAsFactors = FALSE
+  )
+  class(out) <- c("sj_htest_t", "data.frame")
+  attr(out, "means") <- estimate
+  attr(out, "n_groups") <- n_groups
+  attr(out, "means") <- estimate
+  attr(out, "group_labels") <- group_labels
+  attr(out, "paired") <- isTRUE(paired)
+  attr(out, "one_sample") <- is.null(y_values) && !isTRUE(paired)
+  attr(out, "weighted") <- TRUE
+  out
+}
+
+
+# methods ---------------------------------------------------------------------
+
+#' @export
+print.sj_htest_t <- function(x, ...) {
+  insight::check_if_installed("effectsize")
+
+  # fetch attributes
+  group_labels <- attributes(x)$group_labels
+  means <- attributes(x)$means
+  n_groups <- attributes(x)$n_groups
+  weighted <- attributes(x)$weighted
+  paired <- isTRUE(attributes(x)$paired)
+  one_sample <- isTRUE(attributes(x)$one_sample)
+
+  if (weighted) {
+    weight_string <- " (weighted)"
+  } else {
+    weight_string <- ""
+  }
+
+  # same width
+  group_labels <- format(group_labels)
+
+  # header
+  insight::print_color(sprintf("# %s%s\n\n", x$method, weight_string), "blue")
+
+  # print for paired t-test
+  if (paired) {
+    # data
+    insight::print_color(sprintf(
+      "  Data: %s (mean difference = %s)\n",
+      x$data,
+      insight::format_value(means[1], protect_integers = TRUE)
+    ), "cyan")
+  } else {
+    # data
+    insight::print_color(sprintf("  Data: %s\n", x$data), "cyan")
+
+    # group-1-info
+    if (is.null(n_groups)) {
+      insight::print_color(
+        sprintf(
+          "  Group 1: %s (mean = %s)\n",
+          group_labels[1], insight::format_value(means[1], protect_integers = TRUE)
+        ), "cyan"
+      )
+    } else {
+      insight::print_color(
+        sprintf(
+          "  Group 1: %s (n = %i, mean = %s)\n",
+          group_labels[1], n_groups[1], insight::format_value(means[1], protect_integers = TRUE)
+        ), "cyan"
+      )
+    }
+
+    # group-2-info
+    if (length(group_labels) > 1) {
+      if (is.null(n_groups)) {
+        insight::print_color(
+          sprintf(
+            "  Group 2: %s (mean = %s)\n",
+            group_labels[2], insight::format_value(means[2], protect_integers = TRUE)
+          ), "cyan"
+        )
+      } else {
+        insight::print_color(
+          sprintf(
+            "  Group 2: %s (n = %i, mean = %s)\n",
+            group_labels[2], n_groups[2], insight::format_value(means[2], protect_integers = TRUE)
+          ), "cyan"
+        )
+      }
+    }
+  }
+
+  # alternative hypothesis
+  alt_string <- switch(x$alternative,
+    two.sided = "not equal to",
+    less = "less than",
+    greater = "greater than"
+  )
+  if (one_sample) {
+    alt_string <- paste("true mean is", alt_string, x$mu)
+  } else if (paired) {
+    alt_string <- paste("true mean difference is", alt_string, x$mu)
+  } else {
+    alt_string <- paste("true difference in means is", alt_string, x$mu)
+  }
+  insight::print_color(sprintf("  Alternative hypothesis: %s\n", alt_string), "cyan")
+
+  # string for effectsizes
+  if (x$effect_size_name == "Cohens_d") {
+    eff_string <- sprintf(
+      "Cohen's d = %.2f (%s effect)",
+      x$effect_size,
+      effectsize::interpret_cohens_d(x$effect_size)
+    )
+  } else {
+    eff_string <- sprintf(
+      "Hedges' g = %.2f (%s effect)",
+      x$effect_size,
+      effectsize::interpret_hedges_g(x$effect_size)
+    )
+  }
+
+  cat(sprintf(
+    "\n  t = %.2f, %s, df = %s, %s\n\n",
+    x$statistic,
+    eff_string,
+    insight::format_value(x$df, digits = 1, protect_integers = TRUE),
+    insight::format_p(x$p)
+  ))
+}
diff --git a/R/var_pop.R b/R/var_pop.R
index bfe5367d..db53d6b8 100644
--- a/R/var_pop.R
+++ b/R/var_pop.R
@@ -22,21 +22,18 @@
 #' sd(efc$c12hour, na.rm = TRUE)
 #' # population sd
 #' sd_pop(efc$c12hour)
-#'
-#' @importFrom stats na.omit var
-#' @importFrom sjmisc is_num_fac
-#' @importFrom sjlabelled as_numeric
 #' @export
 var_pop <- function(x) {
+  insight::check_if_installed("datawizard")
   # check for categorical
   if (is.factor(x)) {
     # only allow numeric factors
-    if (!sjmisc::is_num_fac(x)) {
-      warning("`x` must be numeric vector or a factor with numeric levels.", call. = F)
-      return(NA)
+
+    if (!.is_pseudo_numeric(x)) {
+      insight::format_error("`x` must be numeric vector or a factor with numeric levels.")
     }
     # convert factor to numeric
-    x <- sjlabelled::as_numeric(x)
+    x <- datawizard::to_numeric(x, dummy_factors = FALSE)
   }
 
   # remove NA
@@ -49,7 +46,6 @@ var_pop <- function(x) {
 
 
 #' @rdname var_pop
-#' @importFrom stats na.omit var
 #' @export
 sd_pop <- function(x) {
   # get population variance
diff --git a/R/weight.R b/R/weight.R
index c36a3c55..11423688 100644
--- a/R/weight.R
+++ b/R/weight.R
@@ -38,8 +38,6 @@
 #' table(x)
 #' table(weight(x, w))
 #'
-#' @importFrom stats na.pass xtabs
-#' @importFrom sjlabelled as_numeric
 #' @export
 weight <- function(x, weights, digits = 0) {
   # remember if x is numeric
@@ -76,7 +74,9 @@ weight <- function(x, weights, digits = 0) {
 
   # if we have NA values, weighted var is coerced to character.
   # coerce back to numeric then here
-  if (!is.numeric(weightedvar) && x.is.num) weightedvar <- sjlabelled::as_numeric(weightedvar)
+  if (!is.numeric(weightedvar) && x.is.num) {
+    weightedvar <- datawizard::to_numeric(weightedvar, dummy_factors = FALSE)
+  }
 
   # return result
   weightedvar
diff --git a/R/wilcoxon_test.R b/R/wilcoxon_test.R
new file mode 100644
index 00000000..5ef4d925
--- /dev/null
+++ b/R/wilcoxon_test.R
@@ -0,0 +1,257 @@
+#' @title Wilcoxon rank sum test
+#' @name wilcoxon_test
+#' @description This function performs Wilcoxon rank sum tests for one sample
+#' or for two _paired_ (dependent) samples. For _unpaired_ (independent)
+#' samples, please use the `mann_whitney_test()` function.
+#'
+#' A Wilcoxon rank sum test is a non-parametric test for the null hypothesis
+#' that two samples have identical continuous distributions. The implementation
+#' in `wilcoxon_test()` is only used for _paired_, i.e. _dependent_ samples. For
+#' independent (unpaired) samples, use `mann_whitney_test()`.
+#'
+#' `wilcoxon_test()` can be used for ordinal scales or when the continuous
+#' variables are not normally distributed. For large samples, or approximately
+#' normally distributed variables, the `t_test()` function can be used (with
+#' `paired = TRUE`).
+#'
+#' @inheritParams mann_whitney_test
+#' @inherit mann_whitney_test seealso
+#'
+#' @inheritSection mann_whitney_test Which test to use
+#'
+#' @return A data frame with test results. The function returns p and Z-values
+#' as well as effect size r and group-rank-means.
+#'
+#' @references
+#' - Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+#'   Dtsch Med Wochenschr 2007; 132: e24–e25
+#'
+#' - du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+#'   Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+#'
+#' @examplesIf requireNamespace("coin")
+#' data(mtcars)
+#' # one-sample test
+#' wilcoxon_test(mtcars, "mpg")
+#' # base R equivalent, we set exact = FALSE to avoid a warning
+#' wilcox.test(mtcars$mpg ~ 1, exact = FALSE)
+#'
+#' # paired test
+#' wilcoxon_test(mtcars, c("mpg", "hp"))
+#' # base R equivalent, we set exact = FALSE to avoid a warning
+#' wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE, exact = FALSE)
+#'
+#' # when `by` is specified, each group must be of same length
+#' data(iris)
+#' d <- iris[iris$Species != "setosa", ]
+#' wilcoxon_test(d, "Sepal.Width", by = "Species")
+#' @export
+wilcoxon_test <- function(data,
+                          select = NULL,
+                          by = NULL,
+                          weights = NULL,
+                          mu = 0,
+                          alternative = "two.sided",
+                          ...) {
+  insight::check_if_installed("datawizard")
+  alternative <- match.arg(alternative, choices = c("two.sided", "less", "greater"))
+
+  # sanity checks
+  .sanitize_htest_input(data, select, by, weights, test = "wilcoxon_test")
+
+  # alternative only if weights are NULL
+  if (!is.null(weights) && alternative != "two.sided") {
+    insight::format_error("Argument `alternative` must be `two.sided` if `weights` are specified.")
+  }
+
+  # for paired two-sample, do groups all have same length?
+  if (!is.null(by)) {
+    group_len <- as.numeric(table(as.vector(data[[by]])))
+    if (!all(group_len == group_len[1])) {
+      insight::format_error("For paired two-sample Wilcoxon test, all groups specified in `by` must have the same length.") # nolint
+    }
+    # convert to wide format
+    out <- split(data[select], as.character(data[[by]]))
+    data <- stats::setNames(do.call(cbind, out), names(out))
+    select <- colnames(data)
+  }
+
+  # value labels
+  group_labels <- select
+
+  x <- data[[select[1]]]
+  if (length(select) > 1) {
+    y <- data[[select[2]]]
+  } else {
+    y <- NULL
+  }
+
+  if (is.null(weights)) {
+    .calculate_wilcox(x, y, alternative, mu, group_labels, ...)
+  } else {
+    .calculate_weighted_mwu(x, y, data[[weights]], group_labels)
+  }
+}
+
+
+# Mann-Whitney-Test for two groups --------------------------------------------
+
+.calculate_wilcox <- function(x, y, alternative, mu, group_labels, ...) {
+  insight::check_if_installed("coin")
+  # for paired Wilcoxon test, we have effect sizes
+  if (!is.null(y)) {
+    # prepare data
+    wcdat <- data.frame(x, y)
+    # perfom wilcox test
+    wt <- coin::wilcoxsign_test(x ~ y, data = wcdat)
+    # compute statistics
+    u <- as.numeric(coin::statistic(wt, type = "linear"))
+    z <- as.numeric(coin::statistic(wt, type = "standardized"))
+    r <- abs(z / sqrt(nrow(wcdat)))
+  } else {
+    wt <- u <- z <- r <- NULL
+  }
+
+  # prepare data
+  if (is.null(y)) {
+    dv <- x
+  } else {
+    dv <- x - y
+  }
+  htest <- suppressWarnings(stats::wilcox.test(
+    dv ~ 1,
+    alternative = alternative,
+    mu = mu,
+    ...
+  ))
+  v <- htest$statistic
+  p <- htest$p.value
+
+  out <- data.frame(
+    group1 = group_labels[1],
+    v = v,
+    p = as.numeric(p),
+    mu = mu,
+    alternative = alternative
+  )
+  # two groups?
+  if (length(group_labels) > 1) {
+    out$group2 <- group_labels[2]
+  }
+  # add effectsizes, when we have
+  if (!is.null(wt)) {
+    out$u <- u
+    out$z <- z
+    out$r <- r
+  }
+  attr(out, "group_labels") <- group_labels
+  attr(out, "method") <- "wilcoxon"
+  attr(out, "weighted") <- FALSE
+  attr(out, "one_sample") <- length(group_labels) == 1
+  class(out) <- c("sj_htest_wilcox", "data.frame")
+
+  out
+}
+
+
+# Weighted Mann-Whitney-Test for two groups ----------------------------------
+
+.calculate_weighted_wilcox <- function(x, y, weights, group_labels) {
+  # check if pkg survey is available
+  insight::check_if_installed("survey")
+
+  # prepare data
+  if (is.null(y)) {
+    dv <- x
+  } else {
+    dv <- x - y
+  }
+
+  dat <- stats::na.omit(data.frame(dv, weights))
+  colnames(dat) <- c("y", "w")
+
+  design <- survey::svydesign(ids = ~0, data = dat, weights = ~w)
+  result <- survey::svyranktest(formula = y ~ 1, design, test = "wilcoxon")
+
+  # statistics and effect sizes
+  z <- result$statistic
+  r <- abs(z / sqrt(nrow(dat)))
+
+  out <- data_frame(
+    group1 = group_labels[1],
+    estimate = result$estimate,
+    z = z,
+    r = r,
+    p = as.numeric(result$p.value),
+    mu = 0,
+    alternative = "two.sided"
+  )
+  # two groups?
+  if (length(group_labels) > 1) {
+    out$group2 <- group_labels[2]
+  }
+
+  attr(out, "group_labels") <- group_labels
+  attr(out, "weighted") <- TRUE
+  attr(out, "one_sample") <- length(group_labels) == 1
+  attr(out, "method") <- "wilcoxon"
+  class(out) <- c("sj_htest_wilcox", "data.frame")
+
+  out
+}
+
+
+# methods ---------------------------------------------------------------------
+
+#' @export
+print.sj_htest_wilcox <- function(x, ...) {
+  # fetch attributes
+  group_labels <- attributes(x)$group_labels
+  weighted <- attributes(x)$weighted
+  one_sample <- attributes(x)$one_sample
+
+  if (weighted) {
+    weight_string <- " (weighted)"
+  } else {
+    weight_string <- ""
+  }
+
+  if (one_sample) {
+    onesample_string <- "One Sample"
+  } else {
+    onesample_string <- "Paired"
+  }
+
+  # same width
+  group_labels <- format(group_labels)
+
+  # header
+  insight::print_color(sprintf(
+    "# %s Wilcoxon signed rank test%s\n\n",
+    onesample_string,
+    weight_string
+  ), "blue")
+
+  # alternative hypothesis
+  if (!is.null(x$alternative) && !is.null(x$mu)) {
+    alt_string <- switch(x$alternative,
+      two.sided = "not equal to",
+      less = "less than",
+      greater = "greater than"
+    )
+    alt_string <- paste("true location shift is", alt_string, x$mu)
+    insight::print_color(sprintf("  Alternative hypothesis: %s\n", alt_string), "cyan")
+  }
+
+  if (!is.null(x[["v"]])) {
+    v_stat <- sprintf("V = %i, ", round(x$v))
+  } else {
+    v_stat <- ""
+  }
+
+  if (!is.null(x[["r"]])) {
+    cat(sprintf("\n  %sr = %.2f, Z = %.2f, %s\n\n", v_stat, x$r, x$z, insight::format_p(x$p)))
+  } else {
+    cat(sprintf("\n  %s%s\n\n", v_stat, insight::format_p(x$p)))
+  }
+}
diff --git a/R/wtd_cor.R b/R/wtd_cor.R
index bcb7e24f..19a1d7fb 100644
--- a/R/wtd_cor.R
+++ b/R/wtd_cor.R
@@ -1,15 +1,15 @@
-#' @rdname weighted_sd
+#' @rdname weighted_se
 #' @export
 weighted_correlation <- function(data, ...) {
   UseMethod("weighted_correlation")
 }
 
 
-#' @rdname weighted_sd
+#' @rdname weighted_se
 #' @export
-weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...) {
-  if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1))
-    stop("'ci.lvl' must be a single number between 0 and 1")
+weighted_correlation.default <- function(data, x, y, weights, ci.lvl = 0.95, ...) {
+  if (!missing(ci.lvl) && (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1))
+    insight::format_error("'ci.lvl' must be a single number between 0 and 1.")
 
   x.name <- deparse(substitute(x))
   y.name <- deparse(substitute(y))
@@ -24,8 +24,8 @@ weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...)
   vars <- c(x.name, y.name, w.name)
 
   # get data
-  dat <- suppressMessages(dplyr::select(data, !! vars))
-  dat <- na.omit(dat)
+  dat <- suppressMessages(data[vars])
+  dat <- stats::na.omit(dat)
 
   xv <- dat[[x.name]]
   yv <- dat[[y.name]]
@@ -35,12 +35,12 @@ weighted_correlation.default <- function(data, x, y, weights, ci.lvl = .95, ...)
 }
 
 
-#' @rdname weighted_sd
+#' @rdname weighted_se
 #' @export
-weighted_correlation.formula <- function(formula, data, ci.lvl = .95, ...) {
+weighted_correlation.formula <- function(formula, data, ci.lvl = 0.95, ...) {
 
-  if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1))
-    stop("'ci.lvl' must be a single number between 0 and 1")
+  if (!missing(ci.lvl) && (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1))
+    insight::format_error("'ci.lvl' must be a single number between 0 and 1.")
 
   vars <- all.vars(formula)
 
@@ -50,8 +50,8 @@ weighted_correlation.formula <- function(formula, data, ci.lvl = .95, ...) {
   }
 
   # get data
-  dat <- suppressMessages(dplyr::select(data, !! vars))
-  dat <- na.omit(dat)
+  dat <- suppressMessages(data[vars])
+  dat <- stats::na.omit(dat)
 
   xv <- dat[[vars[1]]]
   yv <- dat[[vars[2]]]
diff --git a/R/wtd_mean.R b/R/wtd_mean.R
deleted file mode 100644
index 2b5f369b..00000000
--- a/R/wtd_mean.R
+++ /dev/null
@@ -1,22 +0,0 @@
-#' @rdname weighted_sd
-#' @export
-weighted_mean <- function(x, weights = NULL) {
-  UseMethod("weighted_mean")
-}
-
-#' @importFrom stats weighted.mean
-#' @export
-weighted_mean.default <- function(x, weights = NULL) {
-  if (is.null(weights)) weights <- rep(1, length(x))
-  stats::weighted.mean(x, w = weights, na.rm = TRUE)
-}
-
-#' @importFrom stats weighted.mean
-#' @importFrom purrr map_dbl
-#' @importFrom dplyr select_if
-#' @export
-weighted_mean.data.frame <- function(x, weights = NULL) {
-  if (is.null(weights)) weights <- rep(1, length(x))
-  dplyr::select_if(x, is.numeric) %>%
-    purrr::map_dbl(~ weighted.mean(.x, w = weights))
-}
diff --git a/R/wtd_median.R b/R/wtd_median.R
deleted file mode 100644
index 0e19d98a..00000000
--- a/R/wtd_median.R
+++ /dev/null
@@ -1,40 +0,0 @@
-#' @rdname weighted_sd
-#' @export
-weighted_median <- function(x, weights = NULL) {
-  UseMethod("weighted_median")
-}
-
-#' @export
-weighted_median.default <- function(x, weights = NULL) {
-  weighted_md_helper(x, w = weights, p = 0.5)
-}
-
-#' @export
-weighted_median.data.frame <- function(x, weights = NULL) {
-  dplyr::select_if(x, is.numeric) %>%
-    purrr::map_dbl(~ weighted_md_helper(.x, w = weights, p = 0.5))
-}
-
-weighted_md_helper <- function(x, w, p = .5) {
-  if (is.null(w)) w <- rep(1, length(x))
-
-  x[is.na(w)] <- NA
-  w[is.na(x)] <- NA
-
-  w <- na.omit(w)
-  x <- na.omit(x)
-
-  order <- order(x)
-  x <- x[order]
-  w <- w[order]
-
-  rw <- cumsum(w) / sum(w)
-  md.values <- min(which(rw >= p))
-
-  if (rw[md.values] == p)
-    q <- mean(x[md.values:(md.values + 1)])
-  else
-    q <- x[md.values]
-
-  q
-}
diff --git a/R/wtd_mwu.R b/R/wtd_mwu.R
deleted file mode 100644
index d0e9fa3d..00000000
--- a/R/wtd_mwu.R
+++ /dev/null
@@ -1,74 +0,0 @@
-#' @rdname weighted_sd
-#' @export
-weighted_mannwhitney <- function(data, ...) {
-  UseMethod("weighted_mannwhitney")
-}
-
-
-#' @importFrom dplyr select
-#' @rdname weighted_sd
-#' @export
-weighted_mannwhitney.default <- function(data, x, grp, weights, ...) {
-  x.name <- deparse(substitute(x))
-  g.name <- deparse(substitute(grp))
-  w.name <- deparse(substitute(weights))
-
-  # create string with variable names
-  vars <- c(x.name, g.name, w.name)
-
-  # get data
-  dat <- suppressMessages(dplyr::select(data, !! vars))
-  dat <- na.omit(dat)
-
-  weighted_mannwhitney_helper(dat)
-}
-
-
-#' @importFrom dplyr select
-#' @rdname weighted_sd
-#' @export
-weighted_mannwhitney.formula <- function(formula, data, ...) {
-  vars <- all.vars(formula)
-
-  # get data
-  dat <- suppressMessages(dplyr::select(data, !! vars))
-  dat <- na.omit(dat)
-
-  weighted_mannwhitney_helper(dat)
-}
-
-weighted_mannwhitney_helper <- function(dat, vars) {
-  # check if pkg survey is available
-  if (!requireNamespace("survey", quietly = TRUE)) {
-    stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE)
-  }
-
-  x.name <- colnames(dat)[1]
-  group.name <- colnames(dat)[2]
-
-  colnames(dat) <- c("x", "g", "w")
-
-  if (dplyr::n_distinct(dat$g, na.rm = TRUE) > 2) {
-    m <- "Weighted Kruskal-Wallis test"
-    method <- "KruskalWallis"
-  } else {
-    m <- "Weighted Mann-Whitney-U test"
-    method <- "wilcoxon"
-  }
-
-  design <- survey::svydesign(ids = ~0, data = dat, weights = ~w)
-  mw <- survey::svyranktest(formula = x ~ g, design, test = method)
-
-  attr(mw, "x.name") <- x.name
-  attr(mw, "group.name") <- group.name
-  class(mw) <- c("sj_wmwu", "list")
-
-  mw$method <- m
-
-  mw
-}
-
-
-#' @rdname weighted_sd
-#' @export
-weighted_ranktest <- weighted_mannwhitney.formula
diff --git a/R/wtd_sd.R b/R/wtd_sd.R
deleted file mode 100644
index 95af674d..00000000
--- a/R/wtd_sd.R
+++ /dev/null
@@ -1,115 +0,0 @@
-#' @title Weighted statistics for tests and variables
-#' @name weighted_sd
-#' @description \strong{Weighted statistics for variables}
-#'   \cr \cr
-#'   \code{weighted_sd()}, \code{weighted_se()}, \code{weighted_mean()} and \code{weighted_median()}
-#'   compute weighted standard deviation, standard error, mean or median for a
-#'   variable or for all variables of a data frame. \code{survey_median()} computes the
-#'   median for a variable in a survey-design (see \code{\link[survey]{svydesign}}).
-#'   \code{weighted_correlation()} computes a weighted correlation for a two-sided alternative
-#'   hypothesis.
-#'   \cr \cr
-#'   \strong{Weighted tests}
-#'   \cr \cr
-#'   \code{weighted_ttest()} computes a weighted t-test, while \code{weighted_mannwhitney()}
-#'   computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test
-#'   (for more than two groups). `weighted_ranktest()` is an alias for `weighted_mannwhitney()`.
-#'   \code{weighted_chisqtest()} computes a weighted Chi-squared test for contingency tables.
-#'
-#' @param x (Numeric) vector or a data frame. For \code{survey_median()}, \code{weighted_ttest()},
-#'    \code{weighted_mannwhitney()} and \code{weighted_chisqtest()} the bare (unquoted) variable
-#'    name, or a character vector with the variable name.
-#' @param weights Bare (unquoted) variable name, or a character vector with
-#'    the variable name of the numeric vector of weights. If \code{weights = NULL},
-#'    unweighted statistic is reported.
-#' @param data A data frame.
-#' @param formula A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a
-#'    numeric variable giving the data values and \code{rhs1} a factor with two
-#'    levels giving the corresponding groups and \code{rhs2} a variable with weights.
-#' @param y Optional, bare (unquoted) variable name, or a character vector with
-#'    the variable name.
-#' @param grp Bare (unquoted) name of the cross-classifying variable, where
-#'    \code{x} is grouped into the categories represented by \code{grp},
-#'    or a character vector with the variable name.
-#' @param mu A number indicating the true value of the mean (or difference in
-#'   means if you are performing a two sample test).
-#' @param ci.lvl Confidence level of the interval.
-#' @param alternative A character string specifying the alternative hypothesis,
-#'   must be one of \code{"two.sided"} (default), \code{"greater"} or
-#'   \code{"less"}. You can specify just the initial letter.
-#' @param paired Logical, whether to compute a paired t-test.
-#' @param ... For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used.
-#'   For \code{weighted_chisqtest()}, further arguments passed down to
-#'   \code{\link[stats]{chisq.test}}.
-#'
-#' @inheritParams svyglm.nb
-#'
-#' @return The weighted (test) statistic.
-#'
-#' @note \code{weighted_chisq()} is a convenient wrapper for \code{\link{crosstable_statistics}}.
-#'    For a weighted one-way Anova, use \code{means_by_group()} with
-#'    \code{weights}-argument.
-#'    \cr \cr
-#'    \code{weighted_ttest()} assumes unequal variance between the two groups.
-#'
-#' @examples
-#' # weighted sd and se ----
-#' weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100))
-#'
-#' data(efc)
-#' weighted_sd(efc[, 1:3], runif(n = nrow(efc)))
-#' weighted_se(efc[, 1:3], runif(n = nrow(efc)))
-#'
-#' # survey_median ----
-#' # median for variables from weighted survey designs
-#' if (require("survey")) {
-#'   data(nhanes_sample)
-#'
-#'   des <- svydesign(
-#'     id = ~SDMVPSU,
-#'     strat = ~SDMVSTRA,
-#'     weights = ~WTINT2YR,
-#'     nest = TRUE,
-#'     data = nhanes_sample
-#'   )
-#'
-#'   survey_median(total, des)
-#'   survey_median("total", des)
-#' }
-#'
-#' # weighted t-test ----
-#' efc$weight <- abs(rnorm(nrow(efc), 1, .3))
-#' weighted_ttest(efc, e17age, weights = weight)
-#' weighted_ttest(efc, e17age, c160age, weights = weight)
-#' weighted_ttest(e17age ~ e16sex + weight, efc)
-#' @export
-weighted_sd <- function(x, weights = NULL) {
-  UseMethod("weighted_sd")
-}
-
-
-#' @rdname weighted_sd
-#' @export
-wtd_sd <- weighted_sd
-
-
-#' @export
-weighted_sd.data.frame <- function(x, weights = NULL) {
-  sd_result <- purrr::map_dbl(x, ~ sqrt(weighted_variance(.x, weights)))
-  names(sd_result) <- colnames(x)
-
-  sd_result
-}
-
-#' @export
-weighted_sd.matrix <- function(x, weights = NULL) {
-  sd_result <- purrr::map_dbl(x, ~ sqrt(weighted_variance(.x, weights)))
-  names(sd_result) <- colnames(x)
-
-  sd_result
-}
-
-#' @export
-weighted_sd.default <- function(x, weights = NULL) {
-  sqrt(weighted_variance(x, weights))
-}
diff --git a/R/wtd_se.R b/R/wtd_se.R
index 7fdefcbc..62c48c3e 100644
--- a/R/wtd_se.R
+++ b/R/wtd_se.R
@@ -1,4 +1,47 @@
-#' @rdname weighted_sd
+#' @title Weighted statistics for variables
+#' @name weighted_se
+#' @description
+#' `weighted_se()` computes weighted standard errors of a variable or for
+#' all variables of a data frame. `survey_median()` computes the median for
+#' a variable in a survey-design (see [`survey::svydesign()]`).
+#' `weighted_correlation()` computes a weighted correlation for a two-sided
+#' alternative hypothesis.
+#'
+#' @param x (Numeric) vector or a data frame. For `survey_median()` or `weighted_ttest()`,
+#' the bare (unquoted) variable name, or a character vector with the variable name.
+#' @param weights Bare (unquoted) variable name, or a character vector with
+#' the variable name of the numeric vector of weights. If `weights = NULL`,
+#' unweighted statistic is reported.
+#' @param data A data frame.
+#' @param formula A formula of the form `lhs ~ rhs1 + rhs2` where `lhs` is a
+#' numeric variable giving the data values and `rhs1` a factor with two
+#' levels giving the corresponding groups and `rhs2` a variable with weights.
+#' @param y Optional, bare (unquoted) variable name, or a character vector with
+#' the variable name.
+#' @param ci.lvl Confidence level of the interval.
+#' @param ... Currently not used.
+#'
+#' @inheritParams svyglm.nb
+#'
+#' @return The weighted (test) statistic.
+#'
+#' @examplesIf requireNamespace("survey")
+#' data(efc)
+#' weighted_se(efc$c12hour, abs(runif(n = nrow(efc))))
+#'
+#' # survey_median ----
+#' # median for variables from weighted survey designs
+#' data(nhanes_sample)
+#'
+#' des <- survey::svydesign(
+#'   id = ~SDMVPSU,
+#'   strat = ~SDMVSTRA,
+#'   weights = ~WTINT2YR,
+#'   nest = TRUE,
+#'   data = nhanes_sample
+#' )
+#' survey_median(total, des)
+#' survey_median("total", des)
 #' @export
 weighted_se <- function(x, weights = NULL) {
   UseMethod("weighted_se")
@@ -7,7 +50,7 @@ weighted_se <- function(x, weights = NULL) {
 
 #' @export
 weighted_se.data.frame <- function(x, weights = NULL) {
-  se_result <- purrr::map_dbl(x, ~ weighted_se_helper(.x, weights = weights))
+  se_result <- vapply(x, weighted_se_helper, numeric(1), weights = weights)
   names(se_result) <- colnames(x)
 
   se_result
@@ -15,7 +58,7 @@ weighted_se.data.frame <- function(x, weights = NULL) {
 
 #' @export
 weighted_se.matrix <- function(x, weights = NULL) {
-  se_result <- purrr::map_dbl(x, ~ weighted_se_helper(.x, weights = weights))
+  se_result <- vapply(x, weighted_se_helper, numeric(1), weights = weights)
   names(se_result) <- colnames(x)
 
   se_result
diff --git a/R/wtd_ttest.R b/R/wtd_ttest.R
deleted file mode 100644
index ed00245b..00000000
--- a/R/wtd_ttest.R
+++ /dev/null
@@ -1,182 +0,0 @@
-#' @rdname weighted_sd
-#' @export
-weighted_ttest <- function(data, ...) {
-  UseMethod("weighted_ttest")
-}
-
-#' @rdname weighted_sd
-#' @export
-weighted_ttest.default <- function(data, x, y = NULL, weights, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) {
-
-  if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1))
-    stop("'ci.lvl' must be a single number between 0 and 1")
-
-  alternative <- match.arg(alternative)
-
-  x.name <- deparse(substitute(x))
-  y.name <- deparse(substitute(y))
-  w.name <- deparse(substitute(weights))
-
-  if (y.name == "NULL") y.name <- NULL
-
-  if (w.name == "NULL") {
-    w.name <- "weights"
-    data$weights <- 1
-  }
-
-  # create string with variable names
-  vars <- c(x.name, y.name, w.name)
-
-  # get data
-  dat <- suppressMessages(dplyr::select(data, !! vars))
-  dat <- na.omit(dat)
-
-  if (sjmisc::is_empty(dat) || nrow(dat) == 1) {
-    warning("Too less data to compute t-test.")
-    return(NULL)
-  }
-
-  xv <- dat[[x.name]]
-  wx <- wy <- dat[[w.name]]
-
-  if (!is.null(y.name))
-    yv <- dat[[y.name]]
-  else
-    yv <- NULL
-
-  nx <- ny <- nrow(dat)
-
-  weighted_ttest_helper(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, x.name, y.name, NULL)
-}
-
-
-#' @rdname weighted_sd
-#' @export
-weighted_ttest.formula <- function(formula, data, mu = 0, paired = FALSE, ci.lvl = 0.95, alternative = c("two.sided", "less", "greater"), ...) {
-
-  if (!missing(ci.lvl) & (length(ci.lvl) != 1 || !is.finite(ci.lvl) || ci.lvl < 0 || ci.lvl > 1))
-    stop("'ci.lvl' must be a single number between 0 and 1")
-
-  alternative <- match.arg(alternative)
-
-  vars <- all.vars(formula)
-
-  g <- data[[vars[2]]]
-
-  if (is.factor(g))
-    grps <- levels(g)
-  else
-    grps <- na.omit(sort(unique(g)))
-
-  if (length(grps) > 2)
-    stop("Grouping factor has more than two levels.")
-
-  if (length(vars) < 3) {
-    vars <- c(vars, "weights")
-    data$weights <- 1
-  }
-
-  x <- data[[vars[1]]]
-  y <- data[[vars[2]]]
-  w <- data[[vars[3]]]
-
-  xv <- x[y == grps[1]]
-  yv <- x[y == grps[2]]
-  wx <- w[y == grps[1]]
-  wy <- w[y == grps[2]]
-
-  mxv <- is.na(xv)
-  xv <- xv[!mxv]
-  wx <- wx[!mxv]
-
-  myv <- is.na(yv)
-  yv <- yv[!myv]
-  wy <- wy[!myv]
-
-  nx <- length(xv)
-  ny <- length(yv)
-
-  labs <- sjlabelled::get_labels(
-    data[[vars[2]]],
-    attr.only = FALSE,
-    values = "p",
-    drop.na = TRUE,
-    drop.unused = TRUE
-  )
-
-  weighted_ttest_helper(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, vars[1], vars[2], labs)
-}
-
-
-weighted_ttest_helper <- function(xv, yv, wx, wy, nx, ny, mu, paired, alternative, ci.lvl, x.name, y.name, group.name) {
-  if (paired) {
-    xv <- xv - yv
-    yv <- NULL
-  }
-
-  mu.x.w <- stats::weighted.mean(xv, wx)
-  var.x.w <- weighted_sd(xv, wx)^2
-  se.x <- sqrt(var.x.w / nx)
-
-  if (!is.null(yv)) {
-    mu.y.w <- stats::weighted.mean(yv, wy)
-    var.y.w <- weighted_sd(yv, wy)^2
-    se.y <- sqrt(var.y.w / ny)
-
-    se <- sqrt(se.x^2 + se.y^2)
-    df <- se^4 / (se.x^4 / (nx - 1) + se.y^4 / (ny - 1))
-    tstat <- (mu.x.w - mu.y.w - mu) / se
-
-    estimate <- c(mu.x.w, mu.y.w)
-    names(estimate) <- c("mean of x", "mean of y")
-    method <- "Two-Sample t-test"
-  } else {
-    se <- se.x
-    df <- nx - 1
-    tstat <- (mu.x.w - mu) / se
-
-    estimate <- stats::setNames(mu.x.w, if (paired)  "mean of the differences" else "mean of x")
-    method <- if (paired) "Paired t-test" else "One Sample t-test"
-  }
-
-
-
-  if (alternative == "less") {
-    pval <- stats::pt(tstat, df)
-    cint <- c(-Inf, tstat + stats::qt(ci.lvl, df))
-  } else if (alternative == "greater") {
-    pval <- stats::pt(tstat, df, lower.tail = FALSE)
-    cint <- c(tstat - stats::qt(ci.lvl, df), Inf)
-  } else {
-    pval <- 2 * stats::pt(-abs(tstat), df)
-    alpha <- 1 - ci.lvl
-    cint <- stats::qt(1 - alpha / 2, df)
-    cint <- tstat + c(-cint, cint)
-  }
-
-  cint <- mu + cint * se
-
-  names(tstat) <- "t"
-  names(df) <- "df"
-  names(mu) <- if (paired || !is.null(yv)) "difference in means" else "mean"
-
-
-  tt <- structure(
-    class = "sj_ttest",
-    list(
-      estimate = estimate,
-      statistic = tstat,
-      df = df,
-      p.value = pval,
-      ci = cint,
-      alternative = alternative,
-      method = method
-    )
-  )
-
-  attr(tt, "x.name") <- x.name
-  attr(tt, "y.name") <- y.name
-  attr(tt, "group.name") <- group.name
-
-  tt
-}
diff --git a/R/wtd_variance.R b/R/wtd_variance.R
index 12f77f28..0bde43a2 100644
--- a/R/wtd_variance.R
+++ b/R/wtd_variance.R
@@ -4,8 +4,8 @@ weighted_variance <- function(x, w) {
   x[is.na(w)] <- NA
   w[is.na(x)] <- NA
 
-  w <- na.omit(w)
-  x <- na.omit(x)
+  w <- stats::na.omit(w)
+  x <- stats::na.omit(x)
 
   xbar <- sum(w * x) / sum(w)
   sum(w * ((x - xbar)^2)) / (sum(w) - 1)
diff --git a/R/xtab_statistics.R b/R/xtab_statistics.R
index 21824194..19fc5abf 100644
--- a/R/xtab_statistics.R
+++ b/R/xtab_statistics.R
@@ -113,7 +113,20 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au
   stat.html <- NULL
 
   # check if data is a table
-  if (!is.table(data)) {
+  if (is.table(data)) {
+    # 'data' is a table - copy to table object
+    tab <- data
+    # check if statistics are possible to compute
+    if (statistics %in% c("spearman", "kendall", "pearson")) {
+      stop(
+        sprintf(
+          "Need arguments `data`, `x1` and `x2` to compute %s-statistics.",
+          statistics
+        ),
+        call. = FALSE
+      )
+    }
+  } else {
     # evaluate unquoted names
     x1 <- deparse(substitute(x1))
     x2 <- deparse(substitute(x2))
@@ -124,7 +137,7 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au
     x2 <- gsub("\"", "", x2, fixed = TRUE)
     weights <- gsub("\"", "", weights, fixed = TRUE)
 
-    if (sjmisc::is_empty(weights) || weights == "NULL")
+    if (insight::is_empty_object(weights) || weights == "NULL")
       weights <- NULL
     else
       weights <- data[[weights]]
@@ -145,19 +158,6 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au
     } else {
       tab <- table(data)
     }
-  } else {
-    # 'data' is a table - copy to table object
-    tab <- data
-    # check if statistics are possible to compute
-    if (statistics %in% c("spearman", "kendall", "pearson")) {
-      stop(
-        sprintf(
-          "Need arguments `data`, `x1` and `x2` to compute %s-statistics.",
-          statistics
-        ),
-        call. = FALSE
-      )
-    }
   }
 
   # get expected values
@@ -217,21 +217,21 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au
   }
 
   # compute method string
-  method <- dplyr::case_when(
-    statistics == "kendall" ~ "Kendall's tau",
-    statistics == "spearman" ~ "Spearman's rho",
-    statistics == "pearson" ~ "Pearson's r",
-    statistics == "cramer" ~ "Cramer's V",
-    statistics == "phi" ~ "Phi"
+  method <- ifelse(statistics == "kendall", "Kendall's tau",
+    ifelse(statistics == "spearman", "Spearman's rho", # nolint
+      ifelse(statistics == "pearson", "Pearson's r", # nolint
+        ifelse(statistics == "cramer", "Cramer's V", "Phi") # nolint
+      )
+    )
   )
 
   # compute method string
-  method.html <- dplyr::case_when(
-    statistics == "kendall" ~ "Kendall's &tau;",
-    statistics == "spearman" ~ "Spearman's &rho;",
-    statistics == "pearson" ~ "Pearson's r",
-    statistics == "cramer" ~ "Cramer's V",
-    statistics == "phi" ~ "&phi;"
+  method.html <- ifelse(statistics == "kendall", "Kendall's &tau;",
+    ifelse(statistics == "spearman", "Spearman's &rho;", # nolint
+      ifelse(statistics == "pearson", "Pearson's r", # nolint
+        ifelse(statistics == "cramer", "Cramer's V", "&phi") # nolint
+      )
+    )
   )
 
   # return result
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 2fb1d7b8..fa456107 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -25,7 +25,7 @@ reference:
 - title: "Weighted Estimates and Dispersion"
   contents:
   - weight
-  - weighted_ttest
+  - weighted_se
 
 - title: "Summary Statistics and Tests"
   contents:
@@ -33,6 +33,8 @@ reference:
   - chi_squared_test
   - kruskal_wallis_test
   - mann_whitney_test
+  - t_test
+  - wilcoxon_test
   - var_pop
 
 - title: "Tools for Regression Models"
diff --git a/man/anova_stats.Rd b/man/anova_stats.Rd
index 77f7e82f..a64263e9 100644
--- a/man/anova_stats.Rd
+++ b/man/anova_stats.Rd
@@ -22,6 +22,7 @@ epsilon-squared statistic or Cohen's F for all terms in an anovas.
 and power for each term.
 }
 \examples{
+\dontshow{if (requireNamespace("car") && requireNamespace("pwr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # load sample data
 data(efc)
 
@@ -30,9 +31,8 @@ fit <- aov(
   c12hour ~ as.factor(e42dep) + as.factor(c172code) + c160age,
   data = efc
 )
-\dontrun{
 anova_stats(car::Anova(fit, type = 2))
-}
+\dontshow{\}) # examplesIf}
 }
 \references{
 Levine TR, Hullett CR (2002): Eta Squared, Partial Eta Squared, and Misreporting of Effect Size in Communication Research.
diff --git a/man/auto_prior.Rd b/man/auto_prior.Rd
index f7701564..64047399 100644
--- a/man/auto_prior.Rd
+++ b/man/auto_prior.Rd
@@ -48,19 +48,16 @@ formula used in \code{brms::brm()} must be rewritten to something like
 \code{y ~ 0 + intercept ...}, see \code{\link[brms]{set_prior}}.
 }
 \examples{
-library(sjmisc)
+\dontshow{if (requireNamespace("brms")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 data(efc)
 efc$c172code <- as.factor(efc$c172code)
-efc$c161sex <- to_label(efc$c161sex)
+efc$c161sex <- as.factor(efc$c161sex)
 
 mf <- formula(neg_c_7 ~ c161sex + c160age + c172code)
-
-if (requireNamespace("brms", quietly = TRUE))
-  auto_prior(mf, efc, TRUE)
+auto_prior(mf, efc, TRUE)
 
 ## compare to
-# library(rstanarm)
-# m <- stan_glm(mf, data = efc, chains = 2, iter = 200)
+# m <- rstanarm::stan_glm(mf, data = efc, chains = 2, iter = 200)
 # ps <- prior_summary(m)
 # ps$prior_intercept$adjusted_scale
 # ps$prior$adjusted_scale
@@ -72,15 +69,12 @@ if (requireNamespace("brms", quietly = TRUE))
 # add informative priors
 mf <- formula(neg_c_7 ~ c161sex + c172code)
 
-if (requireNamespace("brms", quietly = TRUE)) {
-  auto_prior(mf, efc, TRUE) +
-    brms::prior(normal(.1554, 40), class = "b", coef = "c160age")
-}
+auto_prior(mf, efc, TRUE) +
+  brms::prior(normal(.1554, 40), class = "b", coef = "c160age")
 
 # example with binary response
 efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1)
 mf <- formula(neg_c_7d ~ c161sex + c160age + c172code + e17age)
-
-if (requireNamespace("brms", quietly = TRUE))
-  auto_prior(mf, efc, FALSE)
+auto_prior(mf, efc, FALSE)
+\dontshow{\}) # examplesIf}
 }
diff --git a/man/boot_ci.Rd b/man/boot_ci.Rd
index c437f137..893080e8 100644
--- a/man/boot_ci.Rd
+++ b/man/boot_ci.Rd
@@ -7,36 +7,33 @@
 \alias{boot_est}
 \title{Standard error and confidence intervals for bootstrapped estimates}
 \usage{
-boot_ci(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95)
+boot_ci(data, select = NULL, method = c("dist", "quantile"), ci.lvl = 0.95)
 
-boot_se(data, ...)
+boot_se(data, select = NULL)
 
-boot_p(data, ...)
+boot_p(data, select = NULL)
 
-boot_est(data, ...)
+boot_est(data, select = NULL)
 }
 \arguments{
 \item{data}{A data frame that containts the vector with bootstrapped
 estimates, or directly the vector (see 'Examples').}
 
-\item{...}{Optional, unquoted names of variables with bootstrapped estimates.
-Required, if either \code{data} is a data frame (and no vector),
-and only selected variables from \code{data} should be processed.
-You may also use functions like \code{:} or tidyselect's
-\code{select_helpers()}.}
+\item{select}{Optional, unquoted names of variables (as character vector)
+with bootstrapped estimates. Required, if either \code{data} is a data frame
+(and no vector), and only selected variables from \code{data} should be processed.}
 
 \item{method}{Character vector, indicating if confidence intervals should be
-based on bootstrap standard error, multiplied by the value of the
-quantile function of the t-distribution (default), or on sample
-quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}.
-May be abbreviated.}
+based on bootstrap standard error, multiplied by the value of the quantile
+function of the t-distribution (default), or on sample quantiles of the
+bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.}
 
 \item{ci.lvl}{Numeric, the level of the confidence intervals.}
 }
 \value{
-A data frame with either bootstrap estimate,
-standard error, the lower and upper confidence intervals or the
-p-value for all bootstrapped estimates.
+A data frame with either bootstrap estimate, standard error, the
+lower and upper confidence intervals or the p-value for all bootstrapped
+estimates.
 }
 \description{
 Compute nonparametric bootstrap estimate, standard error,
@@ -44,45 +41,38 @@ confidence intervals and p-value for a vector of bootstrap
 replicate estimates.
 }
 \details{
-The methods require one or more vectors of bootstrap replicate estimates
-as input.
+The methods require one or more vectors of bootstrap replicate
+estimates as input.
 \itemize{
-\item{
-\code{boot_est()} returns the bootstrapped estimate, simply by
-computing the mean value of all bootstrap estimates.
-}
-\item{
-\code{boot_se()} computes the nonparametric bootstrap standard
-error by calculating the standard deviation of the input vector.
-}
-\item{
-The mean value of the input vector and its standard error is used
-by \code{boot_ci()} to calculate the lower and upper confidence
-interval, assuming a t-distribution of bootstrap estimate replicates
-(for \code{method = "dist"}, the default, which is
-\code{mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)}); for
-\code{method = "quantile"}, 95\\% sample quantiles are used to compute
-the confidence intervals (\code{quantile(x, probs = c(.025, .975))}).
-Use \code{ci.lvl} to change the level for the confidence interval.
-}
-\item{
-P-values from \code{boot_p()} are also based on t-statistics,
-assuming normal distribution.
-}
+\item \code{boot_est()}: returns the bootstrapped estimate, simply by computing
+the mean value of all bootstrap estimates.
+\item \code{boot_se()}: computes the nonparametric bootstrap standard error by
+calculating the standard deviation of the input vector.
+\item The mean value of the input vector and its standard error is used by
+\code{boot_ci()} to calculate the lower and upper confidence interval,
+assuming a t-distribution of bootstrap estimate replicates (for
+\code{method = "dist"}, the default, which is
+\verb{mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)}); for
+\code{method = "quantile"}, 95\\% sample quantiles are used to compute the
+confidence intervals (\code{quantile(x, probs = c(0.025, 0.975))}). Use
+\code{ci.lvl} to change the level for the confidence interval.
+\item P-values from \code{boot_p()} are also based on t-statistics, assuming normal
+distribution.
 }
 }
 \examples{
-library(dplyr)
-library(purrr)
 data(efc)
 bs <- bootstrap(efc, 100)
 
 # now run models for each bootstrapped sample
-bs$models <- map(bs$strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x))
+bs$models <- lapply(
+  bs$strap,
+  function(.x) lm(neg_c_7 ~ e42dep + c161sex, data = .x)
+)
 
 # extract coefficient "dependency" and "gender" from each model
-bs$dependency <- map_dbl(bs$models, ~coef(.x)[2])
-bs$gender <- map_dbl(bs$models, ~coef(.x)[3])
+bs$dependency <- vapply(bs$models, function(x) coef(x)[2], numeric(1))
+bs$gender <- vapply(bs$models, function(x) coef(x)[3], numeric(1))
 
 # get bootstrapped confidence intervals
 boot_ci(bs$dependency)
@@ -93,63 +83,19 @@ confint(fit)[2, ]
 
 # alternative function calls.
 boot_ci(bs$dependency)
-boot_ci(bs, dependency)
-boot_ci(bs, dependency, gender)
-boot_ci(bs, dependency, gender, method = "q")
+boot_ci(bs, "dependency")
+boot_ci(bs, c("dependency", "gender"))
+boot_ci(bs, c("dependency", "gender"), method = "q")
 
 
 # compare coefficients
 mean(bs$dependency)
 boot_est(bs$dependency)
 coef(fit)[2]
-
-
-# bootstrap() and boot_ci() work fine within pipe-chains
-efc \%>\%
-  bootstrap(100) \%>\%
-  mutate(
-    models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)),
-    dependency = map_dbl(models, ~coef(.x)[2])
-  ) \%>\%
-  boot_ci(dependency)
-
-# check p-value
-boot_p(bs$gender)
-summary(fit)$coefficients[3, ]
-
-\dontrun{
-# 'spread_coef()' from the 'sjmisc'-package makes it easy to generate
-# bootstrapped statistics like confidence intervals or p-values
-library(dplyr)
-library(sjmisc)
-efc \%>\%
-  # generate bootstrap replicates
-  bootstrap(100) \%>\%
-  # apply lm to all bootstrapped data sets
-  mutate(
-    models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x))
-  ) \%>\%
-  # spread model coefficient for all 100 models
-  spread_coef(models) \%>\%
-  # compute the CI for all bootstrapped model coefficients
-  boot_ci(e42dep, c161sex, c172code)
-
-# or...
-efc \%>\%
-  # generate bootstrap replicates
-  bootstrap(100) \%>\%
-  # apply lm to all bootstrapped data sets
-  mutate(
-    models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x))
-  ) \%>\%
-  # spread model coefficient for all 100 models
-  spread_coef(models, append = FALSE) \%>\%
-  # compute the CI for all bootstrapped model coefficients
-  boot_ci()}
 }
 \references{
 Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164
 }
 \seealso{
-\code{\link{bootstrap}} to generate nonparametric bootstrap samples.
+[]\code{bootstrap()}] to generate nonparametric bootstrap samples.
 }
diff --git a/man/bootstrap.Rd b/man/bootstrap.Rd
index ac2e2c63..e40b7845 100644
--- a/man/bootstrap.Rd
+++ b/man/bootstrap.Rd
@@ -69,17 +69,11 @@ bs$c12hour <- unlist(lapply(bs$strap, function(x) {
   mean(as.data.frame(x)$c12hour, na.rm = TRUE)
 }))
 
-# or as tidyverse-approach
-if (require("dplyr") && require("purrr")) {
-  bs <- efc \%>\%
-    bootstrap(100) \%>\%
-    mutate(
-      c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE))
-    )
+# bootstrapped standard error
+boot_se(bs, "c12hour")
 
-  # bootstrapped standard error
-  boot_se(bs, c12hour)
-}
+# bootstrapped CI
+boot_ci(bs, "c12hour")
 }
 \seealso{
 \code{\link{boot_ci}} to calculate confidence intervals from
diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd
index f6dd5d77..9b60c041 100644
--- a/man/chi_squared_test.Rd
+++ b/man/chi_squared_test.Rd
@@ -17,15 +17,25 @@ chi_squared_test(
 \arguments{
 \item{data}{A data frame.}
 
-\item{select}{Name of the dependent variable (as string) to be used for the
-test. \code{select} can also be a character vector, specifying the names of
-multiple continuous variables. In this case, \code{by} is ignored and variables
-specified in \code{select} are used to compute the test. This can be useful if
-the data is in wide-format and no grouping variable is available.}
+\item{select}{Name(s) of the continuous variable(s) (as character vector)
+to be used as samples for the test. \code{select} can be one of the following:
+\itemize{
+\item \code{select} can be used in combination with \code{by}, in which case \code{select} is
+the name of the continous variable (and \code{by} indicates a grouping factor).
+\item \code{select} can also be a character vector of length two or more (more than
+two names only apply to \code{kruskal_wallis_test()}), in which case the two
+continuous variables are treated as samples to be compared. \code{by} must be
+\code{NULL} in this case.
+\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples
+are considered as \emph{dependent} and a paired test is carried out.
+\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test
+is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}).
+}}
 
-\item{by}{Name of the grouping variable to be used for the test. If \code{by} is
-not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if
-\code{probabilities} is provided, \code{by} must be \code{NULL}.}
+\item{by}{Name of the variable indicating the groups. Required if \code{select}
+specifies only one variable that contains all samples to be compared in the
+test. If \code{by} is not a factor, it will be coerced to a factor. For
+\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.}
 
 \item{probabilities}{A numeric vector of probabilities for each cell in the
 contingency table. The length of the vector must match the number of cells
@@ -48,7 +58,7 @@ for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
 given probabilities.
 }
 \description{
-This function performs a \eqn{chi}^2 test for contingency
+This function performs a \eqn{\chi^2} test for contingency
 tables or tests for given probabilities. The returned effects sizes are
 Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi})
 for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against
@@ -65,8 +75,47 @@ a McNemar test (see \code{\link[=mcnemar.test]{mcnemar.test()}}) is conducted.
 
 The weighted version of the chi-squared test is based on the a weighted
 table, using \code{\link[=xtabs]{xtabs()}} as input for \code{chisq.test()}.
+
+Interpretation of effect sizes are based on rules described in
+\code{\link[effectsize:interpret_r]{effectsize::interpret_phi()}}, \code{\link[effectsize:interpret_r]{effectsize::interpret_cramers_v()}},
+and \code{\link[effectsize:interpret_r]{effectsize::interpret_fei()}}.
+}
+\section{Which test to use}{
+
+The following table provides an overview of which test to use for different
+types of data. The choice of test depends on the scale of the outcome
+variable and the number of samples to compare.\tabular{lll}{
+   \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr
+   1 \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   1 \tab continuous, normal \tab \code{t_test()} \cr
+   2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr
+   2, independent \tab continuous, normal \tab \code{t_test()} \cr
+   2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr
+   2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr
+   >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr
+   >2, independent \tab continuous,     normal \tab \code{datawizard::means_by_group()} \cr
+   >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr
+   >2, dependent \tab continuous,     normal \tab \emph{not yet implemented} (2) \cr
 }
+
+
+(1) More than two dependent samples are considered as \emph{repeated measurements}.
+For ordinal or not-normally distributed outcomes, these samples are
+usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples
+in one variable, the groups to compare in another variable, and a third
+variable indicating the repeated measurements (subject IDs).
+
+(2) More than two dependent samples are considered as \emph{repeated measurements}.
+For normally distributed outcomes, these samples are usually tested using
+a ANOVA for repeated measurements. A more sophisticated approach would
+be using a linear mixed model.
+}
+
 \examples{
+\dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 data(efc)
 efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
 
@@ -78,10 +127,26 @@ chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight")
 
 # Chi-squared test for given probabilities
 chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7))
+\dontshow{\}) # examplesIf}
 }
 \references{
-Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+\itemize{
+\item Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
 Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
 That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
 \doi{10.3390/math11091982}
+\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+Dtsch Med Wochenschr 2007; 132: e24–e25
+\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+}
+}
+\seealso{
+\itemize{
+\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples.
+\item \code{\link[=t_test]{t_test()}} for parametric t-tests.
+\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples).
+\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples.
+\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables).
+}
 }
diff --git a/man/crosstable_statistics.Rd b/man/crosstable_statistics.Rd
index cbea412f..7ae56212 100644
--- a/man/crosstable_statistics.Rd
+++ b/man/crosstable_statistics.Rd
@@ -65,10 +65,9 @@ frame including lower and upper confidence intervals.}
 \item{n}{Number of bootstraps to be generated.}
 
 \item{method}{Character vector, indicating if confidence intervals should be
-based on bootstrap standard error, multiplied by the value of the
-quantile function of the t-distribution (default), or on sample
-quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}.
-May be abbreviated.}
+based on bootstrap standard error, multiplied by the value of the quantile
+function of the t-distribution (default), or on sample quantiles of the
+bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.}
 
 \item{x1}{Name of first variable that should be used to compute the
 contingency table. If \code{data} is a table object, this argument will be
diff --git a/man/gmd.Rd b/man/gmd.Rd
index 03823df5..7c549f8a 100644
--- a/man/gmd.Rd
+++ b/man/gmd.Rd
@@ -4,15 +4,14 @@
 \alias{gmd}
 \title{Gini's Mean Difference}
 \usage{
-gmd(x, ...)
+gmd(x, select = NULL)
 }
 \arguments{
 \item{x}{A vector or data frame.}
 
-\item{...}{Optional, unquoted names of variables that should be selected for
-further processing. Required, if \code{x} is a data frame (and no vector)
-and only selected variables from \code{x} should be processed. You may also
-use functions like \code{:} or tidyselect's \code{select_helpers()}.}
+\item{select}{Optional, names of variables as character vector that should be
+selected for further processing. Required, if \code{x} is a data frame (and no vector)
+and only selected variables from \code{x} should be processed.}
 }
 \value{
 For numeric vectors, Gini's mean difference. For non-numeric vectors
@@ -24,13 +23,13 @@ or for all numeric vectors in a data frame.
 }
 \note{
 Gini's mean difference is defined as the mean absolute difference between
-any two distinct elements of a vector. Missing values from \code{x} are
-silently removed.
+any two distinct elements of a vector. Missing values from \code{x} are silently
+removed.
 }
 \examples{
 data(efc)
 gmd(efc$e17age)
-gmd(efc, e17age, c160age, c12hour)
+gmd(efc, c("e17age", "c160age", "c12hour"))
 
 }
 \references{
diff --git a/man/inequ_trend.Rd b/man/inequ_trend.Rd
index 5a2bbc9f..25a188ed 100644
--- a/man/inequ_trend.Rd
+++ b/man/inequ_trend.Rd
@@ -36,6 +36,7 @@ in changes in rate differences and rate ratios. The function implements
 the algorithm proposed by \emph{Mackenbach et al. 2015}.
 }
 \examples{
+\dontshow{if (requireNamespace("ggplot2")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # This example reproduces Fig. 1 of Mackenbach et al. 2015, p.5
 
 # 40 simulated time points, with an initial rate ratio of 2 and
@@ -56,14 +57,14 @@ for (i in 2:n) hi[i] <- hi[i - 1] * .97
 prev.data <- data.frame(lo, hi)
 
 # print values
-inequ_trend(prev.data, lo, hi)
+inequ_trend(prev.data, "lo", "hi")
 
 # plot trends - here we see that the relative inequalities
 # are increasing over time, while the absolute inequalities
 # are first increasing as well, but later are decreasing
 # (while rel. inequ. are still increasing)
-plot(inequ_trend(prev.data, lo, hi))
-
+plot(inequ_trend(prev.data, "lo", "hi"))
+\dontshow{\}) # examplesIf}
 }
 \references{
 Mackenbach JP, Martikainen P, Menvielle G, de Gelder R. 2015. The Arithmetic of Reducing Relative and Absolute Inequalities in Health: A Theoretical Analysis Illustrated with European Mortality Data. Journal of Epidemiology and Community Health 70(7): 730-36. \doi{10.1136/jech-2015-207018}
diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd
index d3033c42..8ca0bc9d 100644
--- a/man/kruskal_wallis_test.Rd
+++ b/man/kruskal_wallis_test.Rd
@@ -9,15 +9,25 @@ kruskal_wallis_test(data, select = NULL, by = NULL, weights = NULL)
 \arguments{
 \item{data}{A data frame.}
 
-\item{select}{Name of the dependent variable (as string) to be used for the
-test. \code{select} can also be a character vector, specifying the names of
-multiple continuous variables. In this case, \code{by} is ignored and variables
-specified in \code{select} are used to compute the test. This can be useful if
-the data is in wide-format and no grouping variable is available.}
+\item{select}{Name(s) of the continuous variable(s) (as character vector)
+to be used as samples for the test. \code{select} can be one of the following:
+\itemize{
+\item \code{select} can be used in combination with \code{by}, in which case \code{select} is
+the name of the continous variable (and \code{by} indicates a grouping factor).
+\item \code{select} can also be a character vector of length two or more (more than
+two names only apply to \code{kruskal_wallis_test()}), in which case the two
+continuous variables are treated as samples to be compared. \code{by} must be
+\code{NULL} in this case.
+\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples
+are considered as \emph{dependent} and a paired test is carried out.
+\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test
+is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}).
+}}
 
-\item{by}{Name of the grouping variable to be used for the test. If \code{by} is
-not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if
-\code{probabilities} is provided, \code{by} must be \code{NULL}.}
+\item{by}{Name of the variable indicating the groups. Required if \code{select}
+specifies only one variable that contains all samples to be compared in the
+test. If \code{by} is not a factor, it will be coerced to a factor. For
+\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.}
 
 \item{weights}{Name of an (optional) weighting variable to be used for the test.}
 }
@@ -27,13 +37,49 @@ A data frame with test results.
 \description{
 This function performs a Kruskal-Wallis rank sum test, to test
 the null hypothesis that the population median of all of the groups are
-equal. The alternative is that they differ in at least one.
+equal. The alternative is that they differ in at least one. Unlike the
+underlying base R function \code{kruskal.test()}, this function allows for
+weighted tests.
 }
 \details{
 The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test()}}. The
 weighted version of the Kruskal-Wallis test is based on the \strong{survey} package,
 using \code{\link[survey:svyranktest]{survey::svyranktest()}}.
 }
+\section{Which test to use}{
+
+The following table provides an overview of which test to use for different
+types of data. The choice of test depends on the scale of the outcome
+variable and the number of samples to compare.\tabular{lll}{
+   \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr
+   1 \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   1 \tab continuous, normal \tab \code{t_test()} \cr
+   2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr
+   2, independent \tab continuous, normal \tab \code{t_test()} \cr
+   2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr
+   2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr
+   >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr
+   >2, independent \tab continuous,     normal \tab \code{datawizard::means_by_group()} \cr
+   >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr
+   >2, dependent \tab continuous,     normal \tab \emph{not yet implemented} (2) \cr
+}
+
+
+(1) More than two dependent samples are considered as \emph{repeated measurements}.
+For ordinal or not-normally distributed outcomes, these samples are
+usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples
+in one variable, the groups to compare in another variable, and a third
+variable indicating the repeated measurements (subject IDs).
+
+(2) More than two dependent samples are considered as \emph{repeated measurements}.
+For normally distributed outcomes, these samples are usually tested using
+a ANOVA for repeated measurements. A more sophisticated approach would
+be using a linear mixed model.
+}
+
 \examples{
 data(efc)
 # Kruskal-Wallis test for elder's age by education
@@ -55,4 +101,23 @@ long_data <- data.frame(
   groups = rep(c("A", "B", "C"), each = 20)
 )
 kruskal_wallis_test(long_data, select = "scales", by = "groups")
+# base R equivalent
+kruskal.test(scales ~ groups, data = long_data)
+}
+\references{
+\itemize{
+\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+Dtsch Med Wochenschr 2007; 132: e24–e25
+\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+}
+}
+\seealso{
+\itemize{
+\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples.
+\item \code{\link[=t_test]{t_test()}} for parametric t-tests.
+\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples).
+\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples.
+\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables).
+}
 }
diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd
index ab376af7..2e3787be 100644
--- a/man/mann_whitney_test.Rd
+++ b/man/mann_whitney_test.Rd
@@ -2,46 +2,69 @@
 % Please edit documentation in R/mann_whitney_test.R
 \name{mann_whitney_test}
 \alias{mann_whitney_test}
-\title{Mann-Whitney-Test}
+\title{Mann-Whitney test}
 \usage{
 mann_whitney_test(
   data,
   select = NULL,
   by = NULL,
   weights = NULL,
-  distribution = "asymptotic"
+  mu = 0,
+  alternative = "two.sided",
+  ...
 )
 }
 \arguments{
 \item{data}{A data frame.}
 
-\item{select}{Name of the dependent variable (as string) to be used for the
-test. \code{select} can also be a character vector, specifying the names of
-multiple continuous variables. In this case, \code{by} is ignored and variables
-specified in \code{select} are used to compute the test. This can be useful if
-the data is in wide-format and no grouping variable is available.}
+\item{select}{Name(s) of the continuous variable(s) (as character vector)
+to be used as samples for the test. \code{select} can be one of the following:
+\itemize{
+\item \code{select} can be used in combination with \code{by}, in which case \code{select} is
+the name of the continous variable (and \code{by} indicates a grouping factor).
+\item \code{select} can also be a character vector of length two or more (more than
+two names only apply to \code{kruskal_wallis_test()}), in which case the two
+continuous variables are treated as samples to be compared. \code{by} must be
+\code{NULL} in this case.
+\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples
+are considered as \emph{dependent} and a paired test is carried out.
+\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test
+is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}).
+}}
 
-\item{by}{Name of the grouping variable to be used for the test. If \code{by} is
-not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if
-\code{probabilities} is provided, \code{by} must be \code{NULL}.}
+\item{by}{Name of the variable indicating the groups. Required if \code{select}
+specifies only one variable that contains all samples to be compared in the
+test. If \code{by} is not a factor, it will be coerced to a factor. For
+\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.}
 
 \item{weights}{Name of an (optional) weighting variable to be used for the test.}
 
-\item{distribution}{Indicates how the null distribution of the test statistic
-should be computed. May be one of \code{"exact"}, \code{"approximate"} or \code{"asymptotic"}
-(default). See \code{\link[coin:LocationTests]{coin::wilcox_test()}} for details.}
+\item{mu}{The hypothesized difference in means (for \code{t_test()}) or location
+shift (for \code{mann_whitney_test()}). The default is 0.}
+
+\item{alternative}{A character string specifying the alternative hypothesis,
+must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test}
+and \code{?wilcox.test}.}
+
+\item{...}{Additional arguments passed to \code{wilcox.test()} (for unweighted
+tests, i.e. when \code{weights = NULL}).}
 }
 \value{
 A data frame with test results. The function returns p and Z-values
 as well as effect size r and group-rank-means.
 }
 \description{
-This function performs a Mann-Whitney-Test (or Wilcoxon rank
-sum test for \emph{unpaired} samples.
+This function performs a Mann-Whitney test (or Wilcoxon rank
+sum test for \emph{unpaired} samples). Unlike the underlying base R function
+\code{wilcox.test()}, this function allows for weighted tests and automatically
+calculates effect sizes. For \emph{paired} (dependent) samples, or for one-sample
+tests, please use the \code{wilcoxon_test()} function.
 
-A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two
-independent samples have identical continuous distributions. It can be used
-when the two continuous variables are not normally distributed.
+A Mann-Whitney test is a non-parametric test for the null hypothesis that two
+\emph{independent} samples have identical continuous distributions. It can be used
+for ordinal scales or when the two continuous variables are not normally
+distributed. For large samples, or approximately normally distributed variables,
+the \code{t_test()} function can be used.
 }
 \details{
 This function is based on \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}
@@ -55,26 +78,85 @@ Interpretation of the effect size \strong{r}, as a rule-of-thumb:
 \item large effect >= 0.5
 }
 
-\strong{r} is calcuated as:
+\strong{r} is calcuated as \eqn{r = \frac{|Z|}{\sqrt{n1 + n2}}}.
+}
+\section{Which test to use}{
+
+The following table provides an overview of which test to use for different
+types of data. The choice of test depends on the scale of the outcome
+variable and the number of samples to compare.\tabular{lll}{
+   \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr
+   1 \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   1 \tab continuous, normal \tab \code{t_test()} \cr
+   2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr
+   2, independent \tab continuous, normal \tab \code{t_test()} \cr
+   2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr
+   2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr
+   >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr
+   >2, independent \tab continuous,     normal \tab \code{datawizard::means_by_group()} \cr
+   >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr
+   >2, dependent \tab continuous,     normal \tab \emph{not yet implemented} (2) \cr
+}
+
+
+(1) More than two dependent samples are considered as \emph{repeated measurements}.
+For ordinal or not-normally distributed outcomes, these samples are
+usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples
+in one variable, the groups to compare in another variable, and a third
+variable indicating the repeated measurements (subject IDs).
 
-\if{html}{\out{<div class="sourceCode">}}\preformatted{r = |Z| / sqrt(n1 + n2)
-}\if{html}{\out{</div>}}
+(2) More than two dependent samples are considered as \emph{repeated measurements}.
+For normally distributed outcomes, these samples are usually tested using
+a ANOVA for repeated measurements. A more sophisticated approach would
+be using a linear mixed model.
 }
+
 \examples{
+\dontshow{if (requireNamespace("coin") && requireNamespace("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 data(efc)
-# Mann-Whitney-U-Tests for elder's age by elder's sex.
+# Mann-Whitney-U tests for elder's age by elder's sex.
 mann_whitney_test(efc, "e17age", by = "e16sex")
+# base R equivalent
+wilcox.test(e17age ~ e16sex, data = efc)
 
 # when data is in wide-format, specify all relevant continuous
 # variables in `select` and omit `by`
 set.seed(123)
 wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20))
 mann_whitney_test(wide_data, select = c("scale1", "scale2"))
-
+# base R equivalent
+wilcox.test(wide_data$scale1, wide_data$scale2)
 # same as if we had data in long format, with grouping variable
 long_data <- data.frame(
   scales = c(wide_data$scale1, wide_data$scale2),
-  groups = rep(c("A", "B"), each = 20)
+  groups = as.factor(rep(c("A", "B"), each = 20))
 )
 mann_whitney_test(long_data, select = "scales", by = "groups")
+# base R equivalent
+wilcox.test(scales ~ groups, long_data)
+\dontshow{\}) # examplesIf}
+}
+\references{
+\itemize{
+\item Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M.,
+Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data
+That Use the Chi‑Squared Statistic. Mathematics, 11, 1982.
+\doi{10.3390/math11091982}
+\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+Dtsch Med Wochenschr 2007; 132: e24–e25
+\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+}
+}
+\seealso{
+\itemize{
+\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples.
+\item \code{\link[=t_test]{t_test()}} for parametric t-tests.
+\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples).
+\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples.
+\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables).
+}
 }
diff --git a/man/prop.Rd b/man/prop.Rd
index 6f263659..118e69d8 100644
--- a/man/prop.Rd
+++ b/man/prop.Rd
@@ -51,6 +51,7 @@ also processes comparisons, which are passed as character vector
 (see 'Examples').
 }
 \examples{
+\dontshow{if (getRversion() >= "4.2.0" && requireNamespace("datawizard", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 data(efc)
 
 # proportion of value 1 in e42dep
@@ -67,10 +68,10 @@ props(efc, e17age > 70 & e17age < 80)
 prop(efc, e42dep == 1, e42dep > 2, na.rm = FALSE)
 
 # for factors or character vectors, use quoted or unquoted values
-library(sjmisc)
+library(datawizard)
 # convert numeric to factor, using labels as factor levels
-efc$e16sex <- to_label(efc$e16sex)
-efc$n4pstu <- to_label(efc$n4pstu)
+efc$e16sex <- to_factor(efc$e16sex)
+efc$n4pstu <- to_factor(efc$n4pstu)
 
 # get proportion of female older persons
 prop(efc, e16sex == female)
@@ -85,27 +86,7 @@ props(efc,
 )
 
 # also works with pipe-chains
-library(dplyr)
-efc \%>\% prop(e17age > 70)
-efc \%>\% prop(e17age > 70, e16sex == 1)
-
-# and with group_by
-efc \%>\%
-  group_by(e16sex) \%>\%
-  prop(e42dep > 2)
-
-efc \%>\%
-  select(e42dep, c161sex, c172code, e16sex) \%>\%
-  group_by(c161sex, c172code) \%>\%
-  prop(e42dep > 2, e16sex == 1)
-
-# same for "props()"
-efc \%>\%
-  select(e42dep, c161sex, c172code, c12hour, n4pstu) \%>\%
-  group_by(c161sex, c172code) \%>\%
-  props(
-    e42dep > 2,
-    c12hour > 20 & c12hour < 40,
-    n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3'
-  )
+efc |> prop(e17age > 70)
+efc |> prop(e17age > 70, e16sex == 1)
+\dontshow{\}) # examplesIf}
 }
diff --git a/man/reexports.Rd b/man/reexports.Rd
index 8af864d8..8a7b6105 100644
--- a/man/reexports.Rd
+++ b/man/reexports.Rd
@@ -3,13 +3,14 @@
 \docType{import}
 \name{reexports}
 \alias{reexports}
-\alias{\%>\%}
-\alias{typical_value}
 \alias{mse}
 \alias{rmse}
 \alias{ci}
 \alias{equivalence_test}
 \alias{link_inverse}
+\alias{weighted_sd}
+\alias{weighted_mean}
+\alias{weighted_median}
 \title{Objects exported from other packages}
 \keyword{internal}
 \description{
@@ -19,12 +20,10 @@ below to see their documentation.
 \describe{
   \item{bayestestR}{\code{\link[bayestestR]{ci}}, \code{\link[bayestestR]{equivalence_test}}}
 
-  \item{insight}{\code{\link[insight]{link_inverse}}}
+  \item{datawizard}{\code{\link[datawizard]{weighted_mean}}, \code{\link[datawizard:weighted_mean]{weighted_median}}, \code{\link[datawizard:weighted_mean]{weighted_sd}}}
 
-  \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}}
+  \item{insight}{\code{\link[insight]{link_inverse}}}
 
   \item{performance}{\code{\link[performance:performance_mse]{mse}}, \code{\link[performance:performance_rmse]{rmse}}}
-
-  \item{sjmisc}{\code{\link[sjmisc]{typical_value}}}
 }}
 
diff --git a/man/samplesize_mixed.Rd b/man/samplesize_mixed.Rd
index 39e18690..9035ea9d 100644
--- a/man/samplesize_mixed.Rd
+++ b/man/samplesize_mixed.Rd
@@ -65,6 +65,7 @@ additionally include repeated measures (three-level-designs) may work
 as well, however, the computed sample size may be less accurate.
 }
 \examples{
+\dontshow{if (requireNamespace("pwr")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
 # Sample size for multilevel model with 30 cluster groups and a small to
 # medium effect size (Cohen's d) of 0.3. 27 subjects per cluster and
 # hence a total sample size of about 802 observations is needed.
@@ -74,6 +75,7 @@ samplesize_mixed(eff.size = .3, k = 30)
 # to large effect size for linear models of 0.2. Five subjects per cluster and
 # hence a total sample size of about 107 observations is needed.
 samplesize_mixed(eff.size = .2, df.n = 5, k = 20, power = .9)
+\dontshow{\}) # examplesIf}
 }
 \references{
 Cohen J. 1988. Statistical power analysis for the behavioral sciences (2nd ed.). Hillsdale,NJ: Lawrence Erlbaum.
diff --git a/man/se_ybar.Rd b/man/se_ybar.Rd
index 3b976fc2..5a16bafb 100644
--- a/man/se_ybar.Rd
+++ b/man/se_ybar.Rd
@@ -19,10 +19,10 @@ May be used as part of the multilevel power calculation for cluster sampling
 (see \cite{Gelman and Hill 2007, 447ff}).
 }
 \examples{
-if (require("lme4")) {
-  fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy)
-  se_ybar(fit)
-}
+\dontshow{if (require("lme4")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy)
+se_ybar(fit)
+\dontshow{\}) # examplesIf}
 }
 \references{
 Gelman A, Hill J. 2007. Data analysis using regression and multilevel/hierarchical models. Cambridge, New York: Cambridge University Press
diff --git a/man/t_test.Rd b/man/t_test.Rd
new file mode 100644
index 00000000..8a5e5194
--- /dev/null
+++ b/man/t_test.Rd
@@ -0,0 +1,134 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/t_test.R
+\name{t_test}
+\alias{t_test}
+\title{Student's t test}
+\usage{
+t_test(
+  data,
+  select = NULL,
+  by = NULL,
+  weights = NULL,
+  paired = FALSE,
+  mu = 0,
+  alternative = "two.sided"
+)
+}
+\arguments{
+\item{data}{A data frame.}
+
+\item{select}{Name(s) of the continuous variable(s) (as character vector)
+to be used as samples for the test. \code{select} can be one of the following:
+\itemize{
+\item \code{select} can be used in combination with \code{by}, in which case \code{select} is
+the name of the continous variable (and \code{by} indicates a grouping factor).
+\item \code{select} can also be a character vector of length two or more (more than
+two names only apply to \code{kruskal_wallis_test()}), in which case the two
+continuous variables are treated as samples to be compared. \code{by} must be
+\code{NULL} in this case.
+\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples
+are considered as \emph{dependent} and a paired test is carried out.
+\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test
+is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}).
+}}
+
+\item{by}{Name of the variable indicating the groups. Required if \code{select}
+specifies only one variable that contains all samples to be compared in the
+test. If \code{by} is not a factor, it will be coerced to a factor. For
+\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.}
+
+\item{weights}{Name of an (optional) weighting variable to be used for the test.}
+
+\item{paired}{Logical, whether to compute a paired t-test for dependent
+samples.}
+
+\item{mu}{The hypothesized difference in means (for \code{t_test()}) or location
+shift (for \code{mann_whitney_test()}). The default is 0.}
+
+\item{alternative}{A character string specifying the alternative hypothesis,
+must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test}
+and \code{?wilcox.test}.}
+}
+\value{
+A data frame with test results.
+}
+\description{
+This function performs a Student's t test for two independent
+samples, for paired samples, or for one sample. Unlike the underlying
+base R function \code{t.test()}, this function allows for weighted tests and
+automatically calculates effect sizes.
+}
+\details{
+Interpretation of effect sizes are based on rules described in
+\code{\link[effectsize:interpret_cohens_d]{effectsize::interpret_cohens_d()}} and \code{\link[effectsize:interpret_cohens_d]{effectsize::interpret_hedges_g()}}.
+}
+\section{Which test to use}{
+
+The following table provides an overview of which test to use for different
+types of data. The choice of test depends on the scale of the outcome
+variable and the number of samples to compare.\tabular{lll}{
+   \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr
+   1 \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   1 \tab continuous, normal \tab \code{t_test()} \cr
+   2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr
+   2, independent \tab continuous, normal \tab \code{t_test()} \cr
+   2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr
+   2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr
+   >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr
+   >2, independent \tab continuous,     normal \tab \code{datawizard::means_by_group()} \cr
+   >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr
+   >2, dependent \tab continuous,     normal \tab \emph{not yet implemented} (2) \cr
+}
+
+
+(1) More than two dependent samples are considered as \emph{repeated measurements}.
+For ordinal or not-normally distributed outcomes, these samples are
+usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples
+in one variable, the groups to compare in another variable, and a third
+variable indicating the repeated measurements (subject IDs).
+
+(2) More than two dependent samples are considered as \emph{repeated measurements}.
+For normally distributed outcomes, these samples are usually tested using
+a ANOVA for repeated measurements. A more sophisticated approach would
+be using a linear mixed model.
+}
+
+\examples{
+\dontshow{if (requireNamespace("effectsize")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+data(sleep)
+# one-sample t-test
+t_test(sleep, "extra")
+# base R equivalent
+t.test(extra ~ 1, data = sleep)
+
+# two-sample t-test, by group
+t_test(mtcars, "mpg", by = "am")
+# base R equivalent
+t.test(mpg ~ am, data = mtcars)
+
+# paired t-test
+t_test(mtcars, c("mpg", "hp"), paired = TRUE)
+# base R equivalent
+t.test(mtcars$mpg, mtcars$hp, data = mtcars, paired = TRUE)
+\dontshow{\}) # examplesIf}
+}
+\references{
+\itemize{
+\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+Dtsch Med Wochenschr 2007; 132: e24–e25
+\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+}
+}
+\seealso{
+\itemize{
+\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples.
+\item \code{\link[=t_test]{t_test()}} for parametric t-tests.
+\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples).
+\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples.
+\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables).
+}
+}
diff --git a/man/var_pop.Rd b/man/var_pop.Rd
index a25c5ac6..79bdc64b 100644
--- a/man/var_pop.Rd
+++ b/man/var_pop.Rd
@@ -35,5 +35,4 @@ var_pop(efc$c12hour)
 sd(efc$c12hour, na.rm = TRUE)
 # population sd
 sd_pop(efc$c12hour)
-
 }
diff --git a/man/weighted_sd.Rd b/man/weighted_sd.Rd
deleted file mode 100644
index c553f998..00000000
--- a/man/weighted_sd.Rd
+++ /dev/null
@@ -1,170 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_mean.R,
-%   R/wtd_median.R, R/wtd_mwu.R, R/wtd_sd.R, R/wtd_se.R, R/wtd_ttest.R
-\name{survey_median}
-\alias{survey_median}
-\alias{weighted_correlation}
-\alias{weighted_correlation.default}
-\alias{weighted_correlation.formula}
-\alias{weighted_mean}
-\alias{weighted_median}
-\alias{weighted_mannwhitney}
-\alias{weighted_mannwhitney.default}
-\alias{weighted_mannwhitney.formula}
-\alias{weighted_ranktest}
-\alias{weighted_sd}
-\alias{wtd_sd}
-\alias{weighted_se}
-\alias{weighted_ttest}
-\alias{weighted_ttest.default}
-\alias{weighted_ttest.formula}
-\title{Weighted statistics for tests and variables}
-\usage{
-survey_median(x, design)
-
-weighted_correlation(data, ...)
-
-\method{weighted_correlation}{default}(data, x, y, weights, ci.lvl = 0.95, ...)
-
-\method{weighted_correlation}{formula}(formula, data, ci.lvl = 0.95, ...)
-
-weighted_mean(x, weights = NULL)
-
-weighted_median(x, weights = NULL)
-
-weighted_mannwhitney(data, ...)
-
-\method{weighted_mannwhitney}{default}(data, x, grp, weights, ...)
-
-\method{weighted_mannwhitney}{formula}(formula, data, ...)
-
-weighted_ranktest(formula, data, ...)
-
-weighted_sd(x, weights = NULL)
-
-wtd_sd(x, weights = NULL)
-
-weighted_se(x, weights = NULL)
-
-weighted_ttest(data, ...)
-
-\method{weighted_ttest}{default}(
-  data,
-  x,
-  y = NULL,
-  weights,
-  mu = 0,
-  paired = FALSE,
-  ci.lvl = 0.95,
-  alternative = c("two.sided", "less", "greater"),
-  ...
-)
-
-\method{weighted_ttest}{formula}(
-  formula,
-  data,
-  mu = 0,
-  paired = FALSE,
-  ci.lvl = 0.95,
-  alternative = c("two.sided", "less", "greater"),
-  ...
-)
-}
-\arguments{
-\item{x}{(Numeric) vector or a data frame. For \code{survey_median()}, \code{weighted_ttest()},
-\code{weighted_mannwhitney()} and \code{weighted_chisqtest()} the bare (unquoted) variable
-name, or a character vector with the variable name.}
-
-\item{design}{An object of class \code{\link[survey]{svydesign}}, providing
-a specification of the survey design.}
-
-\item{data}{A data frame.}
-
-\item{...}{For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used.
-For \code{weighted_chisqtest()}, further arguments passed down to
-\code{\link[stats]{chisq.test}}.}
-
-\item{y}{Optional, bare (unquoted) variable name, or a character vector with
-the variable name.}
-
-\item{weights}{Bare (unquoted) variable name, or a character vector with
-the variable name of the numeric vector of weights. If \code{weights = NULL},
-unweighted statistic is reported.}
-
-\item{ci.lvl}{Confidence level of the interval.}
-
-\item{formula}{A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a
-numeric variable giving the data values and \code{rhs1} a factor with two
-levels giving the corresponding groups and \code{rhs2} a variable with weights.}
-
-\item{grp}{Bare (unquoted) name of the cross-classifying variable, where
-\code{x} is grouped into the categories represented by \code{grp},
-or a character vector with the variable name.}
-
-\item{mu}{A number indicating the true value of the mean (or difference in
-means if you are performing a two sample test).}
-
-\item{paired}{Logical, whether to compute a paired t-test.}
-
-\item{alternative}{A character string specifying the alternative hypothesis,
-must be one of \code{"two.sided"} (default), \code{"greater"} or
-\code{"less"}. You can specify just the initial letter.}
-}
-\value{
-The weighted (test) statistic.
-}
-\description{
-\strong{Weighted statistics for variables}
-\cr \cr
-\code{weighted_sd()}, \code{weighted_se()}, \code{weighted_mean()} and \code{weighted_median()}
-compute weighted standard deviation, standard error, mean or median for a
-variable or for all variables of a data frame. \code{survey_median()} computes the
-median for a variable in a survey-design (see \code{\link[survey]{svydesign}}).
-\code{weighted_correlation()} computes a weighted correlation for a two-sided alternative
-hypothesis.
-\cr \cr
-\strong{Weighted tests}
-\cr \cr
-\code{weighted_ttest()} computes a weighted t-test, while \code{weighted_mannwhitney()}
-computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test
-(for more than two groups). \code{weighted_ranktest()} is an alias for \code{weighted_mannwhitney()}.
-\code{weighted_chisqtest()} computes a weighted Chi-squared test for contingency tables.
-}
-\note{
-\code{weighted_chisq()} is a convenient wrapper for \code{\link{crosstable_statistics}}.
-For a weighted one-way Anova, use \code{means_by_group()} with
-\code{weights}-argument.
-\cr \cr
-\code{weighted_ttest()} assumes unequal variance between the two groups.
-}
-\examples{
-# weighted sd and se ----
-weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100))
-
-data(efc)
-weighted_sd(efc[, 1:3], runif(n = nrow(efc)))
-weighted_se(efc[, 1:3], runif(n = nrow(efc)))
-
-# survey_median ----
-# median for variables from weighted survey designs
-if (require("survey")) {
-  data(nhanes_sample)
-
-  des <- svydesign(
-    id = ~SDMVPSU,
-    strat = ~SDMVSTRA,
-    weights = ~WTINT2YR,
-    nest = TRUE,
-    data = nhanes_sample
-  )
-
-  survey_median(total, des)
-  survey_median("total", des)
-}
-
-# weighted t-test ----
-efc$weight <- abs(rnorm(nrow(efc), 1, .3))
-weighted_ttest(efc, e17age, weights = weight)
-weighted_ttest(efc, e17age, c160age, weights = weight)
-weighted_ttest(e17age ~ e16sex + weight, efc)
-}
diff --git a/man/weighted_se.Rd b/man/weighted_se.Rd
new file mode 100644
index 00000000..1aa07abd
--- /dev/null
+++ b/man/weighted_se.Rd
@@ -0,0 +1,74 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_se.R
+\name{survey_median}
+\alias{survey_median}
+\alias{weighted_correlation}
+\alias{weighted_correlation.default}
+\alias{weighted_correlation.formula}
+\alias{weighted_se}
+\title{Weighted statistics for variables}
+\usage{
+survey_median(x, design)
+
+weighted_correlation(data, ...)
+
+\method{weighted_correlation}{default}(data, x, y, weights, ci.lvl = 0.95, ...)
+
+\method{weighted_correlation}{formula}(formula, data, ci.lvl = 0.95, ...)
+
+weighted_se(x, weights = NULL)
+}
+\arguments{
+\item{x}{(Numeric) vector or a data frame. For \code{survey_median()} or \code{weighted_ttest()},
+the bare (unquoted) variable name, or a character vector with the variable name.}
+
+\item{design}{An object of class \code{\link[survey]{svydesign}}, providing
+a specification of the survey design.}
+
+\item{data}{A data frame.}
+
+\item{...}{Currently not used.}
+
+\item{y}{Optional, bare (unquoted) variable name, or a character vector with
+the variable name.}
+
+\item{weights}{Bare (unquoted) variable name, or a character vector with
+the variable name of the numeric vector of weights. If \code{weights = NULL},
+unweighted statistic is reported.}
+
+\item{ci.lvl}{Confidence level of the interval.}
+
+\item{formula}{A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a
+numeric variable giving the data values and \code{rhs1} a factor with two
+levels giving the corresponding groups and \code{rhs2} a variable with weights.}
+}
+\value{
+The weighted (test) statistic.
+}
+\description{
+\code{weighted_se()} computes weighted standard errors of a variable or for
+all variables of a data frame. \code{survey_median()} computes the median for
+a variable in a survey-design (see [\verb{survey::svydesign()]}).
+\code{weighted_correlation()} computes a weighted correlation for a two-sided
+alternative hypothesis.
+}
+\examples{
+\dontshow{if (requireNamespace("survey")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+data(efc)
+weighted_se(efc$c12hour, abs(runif(n = nrow(efc))))
+
+# survey_median ----
+# median for variables from weighted survey designs
+data(nhanes_sample)
+
+des <- survey::svydesign(
+  id = ~SDMVPSU,
+  strat = ~SDMVSTRA,
+  weights = ~WTINT2YR,
+  nest = TRUE,
+  data = nhanes_sample
+)
+survey_median(total, des)
+survey_median("total", des)
+\dontshow{\}) # examplesIf}
+}
diff --git a/man/wilcoxon_test.Rd b/man/wilcoxon_test.Rd
new file mode 100644
index 00000000..ca195c6c
--- /dev/null
+++ b/man/wilcoxon_test.Rd
@@ -0,0 +1,140 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/wilcoxon_test.R
+\name{wilcoxon_test}
+\alias{wilcoxon_test}
+\title{Wilcoxon rank sum test}
+\usage{
+wilcoxon_test(
+  data,
+  select = NULL,
+  by = NULL,
+  weights = NULL,
+  mu = 0,
+  alternative = "two.sided",
+  ...
+)
+}
+\arguments{
+\item{data}{A data frame.}
+
+\item{select}{Name(s) of the continuous variable(s) (as character vector)
+to be used as samples for the test. \code{select} can be one of the following:
+\itemize{
+\item \code{select} can be used in combination with \code{by}, in which case \code{select} is
+the name of the continous variable (and \code{by} indicates a grouping factor).
+\item \code{select} can also be a character vector of length two or more (more than
+two names only apply to \code{kruskal_wallis_test()}), in which case the two
+continuous variables are treated as samples to be compared. \code{by} must be
+\code{NULL} in this case.
+\item If \code{select} select is of length \strong{two} and \code{paired = TRUE}, the two samples
+are considered as \emph{dependent} and a paired test is carried out.
+\item If \code{select} specifies \strong{one} variable and \code{by = NULL}, a one-sample test
+is carried out (only applicable for \code{t_test()} and \code{wilcoxon_test()}).
+}}
+
+\item{by}{Name of the variable indicating the groups. Required if \code{select}
+specifies only one variable that contains all samples to be compared in the
+test. If \code{by} is not a factor, it will be coerced to a factor. For
+\code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.}
+
+\item{weights}{Name of an (optional) weighting variable to be used for the test.}
+
+\item{mu}{The hypothesized difference in means (for \code{t_test()}) or location
+shift (for \code{mann_whitney_test()}). The default is 0.}
+
+\item{alternative}{A character string specifying the alternative hypothesis,
+must be one of \code{"two.sided"} (default), \code{"greater"} or \code{"less"}. See \code{?t.test}
+and \code{?wilcox.test}.}
+
+\item{...}{Additional arguments passed to \code{wilcox.test()} (for unweighted
+tests, i.e. when \code{weights = NULL}).}
+}
+\value{
+A data frame with test results. The function returns p and Z-values
+as well as effect size r and group-rank-means.
+}
+\description{
+This function performs Wilcoxon rank sum tests for one sample
+or for two \emph{paired} (dependent) samples. For \emph{unpaired} (independent)
+samples, please use the \code{mann_whitney_test()} function.
+
+A Wilcoxon rank sum test is a non-parametric test for the null hypothesis
+that two samples have identical continuous distributions. The implementation
+in \code{wilcoxon_test()} is only used for \emph{paired}, i.e. \emph{dependent} samples. For
+independent (unpaired) samples, use \code{mann_whitney_test()}.
+
+\code{wilcoxon_test()} can be used for ordinal scales or when the continuous
+variables are not normally distributed. For large samples, or approximately
+normally distributed variables, the \code{t_test()} function can be used (with
+\code{paired = TRUE}).
+}
+\section{Which test to use}{
+
+The following table provides an overview of which test to use for different
+types of data. The choice of test depends on the scale of the outcome
+variable and the number of samples to compare.\tabular{lll}{
+   \strong{Samples} \tab \strong{Scale of Outcome} \tab \strong{Significance Test} \cr
+   1 \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   1 \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   1 \tab continuous, normal \tab \code{t_test()} \cr
+   2, independent \tab binary / nominal \tab \code{chi_squared_test()} \cr
+   2, independent \tab continuous, not normal \tab \code{mann_whitney_test()} \cr
+   2, independent \tab continuous, normal \tab \code{t_test()} \cr
+   2, dependent \tab binary (only 2x2) \tab \code{chi_squared_test(paired=TRUE)} \cr
+   2, dependent \tab continuous, not normal \tab \code{wilcoxon_test()} \cr
+   2, dependent \tab continuous, normal \tab \code{t_test(paired=TRUE)} \cr
+   >2, independent \tab continuous, not normal \tab \code{kruskal_wallis_test()} \cr
+   >2, independent \tab continuous,     normal \tab \code{datawizard::means_by_group()} \cr
+   >2, dependent \tab continuous, not normal \tab \emph{not yet implemented} (1) \cr
+   >2, dependent \tab continuous,     normal \tab \emph{not yet implemented} (2) \cr
+}
+
+
+(1) More than two dependent samples are considered as \emph{repeated measurements}.
+For ordinal or not-normally distributed outcomes, these samples are
+usually tested using a \code{\link[=friedman.test]{friedman.test()}}, which requires the samples
+in one variable, the groups to compare in another variable, and a third
+variable indicating the repeated measurements (subject IDs).
+
+(2) More than two dependent samples are considered as \emph{repeated measurements}.
+For normally distributed outcomes, these samples are usually tested using
+a ANOVA for repeated measurements. A more sophisticated approach would
+be using a linear mixed model.
+}
+
+\examples{
+\dontshow{if (requireNamespace("coin")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
+data(mtcars)
+# one-sample test
+wilcoxon_test(mtcars, "mpg")
+# base R equivalent, we set exact = FALSE to avoid a warning
+wilcox.test(mtcars$mpg ~ 1, exact = FALSE)
+
+# paired test
+wilcoxon_test(mtcars, c("mpg", "hp"))
+# base R equivalent, we set exact = FALSE to avoid a warning
+wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE, exact = FALSE)
+
+# when `by` is specified, each group must be of same length
+data(iris)
+d <- iris[iris$Species != "setosa", ]
+wilcoxon_test(d, "Sepal.Width", by = "Species")
+\dontshow{\}) # examplesIf}
+}
+\references{
+\itemize{
+\item Bender, R., Lange, S., Ziegler, A. Wichtige Signifikanztests.
+Dtsch Med Wochenschr 2007; 132: e24–e25
+\item du Prel, J.B., Röhrig, B., Hommel, G., Blettner, M. Auswahl statistischer
+Testverfahren. Dtsch Arztebl Int 2010; 107(19): 343–8
+}
+}
+\seealso{
+\itemize{
+\item \code{\link[=mann_whitney_test]{mann_whitney_test()}} for unpaired (independent) samples.
+\item \code{\link[=t_test]{t_test()}} for parametric t-tests.
+\item \code{\link[=kruskal_wallis_test]{kruskal_wallis_test()}} for non-parametric ANOVA (i.e. more than two samples).
+\item \code{\link[=wilcoxon_test]{wilcoxon_test()}} for Wilcoxon rank sum tests for paired (dependent) samples.
+\item \code{\link[=chi_squared_test]{chi_squared_test()}} for chi-squared tests (two categorical variables).
+}
+}
diff --git a/sjstats.code-workspace b/sjstats.code-workspace
index 362d7c25..c3045df9 100644
--- a/sjstats.code-workspace
+++ b/sjstats.code-workspace
@@ -3,5 +3,52 @@
 		{
 			"path": "."
 		}
-	]
-}
\ No newline at end of file
+	],
+  "launch": {
+    "version": "0.2.0",
+    "configurations": [
+      {
+        "type": "R-Debugger",
+        "name": "Launch R-Workspace",
+        "request": "launch",
+        "debugMode": "workspace",
+        "workingDirectory": "${workspaceFolder}"
+      },
+      {
+        "type": "R-Debugger",
+        "name": "Debug R-File",
+        "request": "launch",
+        "debugMode": "file",
+        "workingDirectory": "${workspaceFolder}",
+        "file": "${file}"
+      },
+      {
+        "type": "R-Debugger",
+        "name": "Debug R-Function",
+        "request": "launch",
+        "debugMode": "function",
+        "workingDirectory": "${workspaceFolder}",
+        "file": "${file}",
+        "mainFunction": "main",
+        "allowGlobalDebugging": false
+      },
+      {
+        "type": "R-Debugger",
+        "name": "Debug R-Package",
+        "request": "launch",
+        "debugMode": "workspace",
+        "workingDirectory": "${workspaceFolder}",
+        "includePackageScopes": true,
+        "loadPackages": [
+          "."
+        ]
+      },
+      {
+        "type": "R-Debugger",
+        "request": "attach",
+        "name": "Attach to R process",
+        "splitOverwrittenOutput": true
+      }
+    ]
+  }
+}
diff --git a/tests/testthat.R b/tests/testthat.R
index f19230e3..43bae94d 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,10 +1,3 @@
 library(testthat)
 library(sjstats)
-
-if (length(strsplit(packageDescription("sjstats")$Version, "\\.")[[1]]) > 3) {
-  Sys.setenv("RunAllsjstatsTests" = "yes")
-} else {
-  Sys.setenv("RunAllsjstatsTests" = "no")
-}
-
 test_check("sjstats")
diff --git a/tests/testthat/_snaps/chi_squared_test.md b/tests/testthat/_snaps/chi_squared_test.md
new file mode 100644
index 00000000..5c4478f2
--- /dev/null
+++ b/tests/testthat/_snaps/chi_squared_test.md
@@ -0,0 +1,66 @@
+# chi_squared_test
+
+    Code
+      print(out1)
+    Output
+      
+      # Chi-squared test for contingency tables
+      
+        Data: c161sex by e16sex (n = 900)
+      
+        χ² = 2.233, ϕ = 0.053 (very small effect), df = 1, p = 0.135
+      
+
+---
+
+    Code
+      print(out)
+    Output
+      
+      # Chi-squared test for contingency tables (weighted)
+      
+        Data: c161sex by e16sex (n = 904)
+      
+        χ² = 2.416, ϕ = 0.054 (very small effect), df = 1, p = 0.120
+      
+
+---
+
+    Code
+      print(out1)
+    Output
+      
+      # Chi-squared test for given probabilities
+      
+        Data: c161sex against probabilities 30% and 70% (n = 901)
+      
+        χ² = 16.162, פ‎ = 0.088 (very small effect), df = 1, p < .001
+      
+
+---
+
+    Code
+      print(out)
+    Output
+      
+      # Chi-squared test for given probabilities (weighted)
+      
+        Data: c161sex against probabilities 30% and 70% (n = 906)
+      
+        χ² = 20.074, פ‎ = 0.097 (very small effect), df = 1, p < .001
+      
+
+---
+
+    Code
+      print(out1)
+    Output
+      
+      # Chi-squared test for contingency tables
+        (using McNemar's test for paired data)
+      
+        Data: survey_1 by survey_2 (n = 1000)
+      
+        χ² = 10.868, ϕ = 0.032 (tiny effect), df = 1, p < .001
+      
+
diff --git a/tests/testthat/_snaps/kruskal_wallis_test.md b/tests/testthat/_snaps/kruskal_wallis_test.md
new file mode 100644
index 00000000..d0589424
--- /dev/null
+++ b/tests/testthat/_snaps/kruskal_wallis_test.md
@@ -0,0 +1,24 @@
+# kruskal_wallis_test
+
+    Code
+      print(out1)
+    Output
+      # Kruskal-Wallis test
+      
+        Data: e17age by c172code (3 groups, n = 506, 180 and 156)
+      
+        χ² = 4.05, df = 2, p = 0.132
+      
+
+---
+
+    Code
+      print(out1)
+    Output
+      # Kruskal-Wallis test
+      
+        Data: scale1 by scale2 (3 groups, n = 20, 20 and 20)
+      
+        χ² = 4.86, df = 2, p = 0.088
+      
+
diff --git a/tests/testthat/_snaps/mann_whitney_test.md b/tests/testthat/_snaps/mann_whitney_test.md
new file mode 100644
index 00000000..ed7526fb
--- /dev/null
+++ b/tests/testthat/_snaps/mann_whitney_test.md
@@ -0,0 +1,41 @@
+# mann_whitney_test
+
+    Code
+      print(out1)
+    Output
+      # Mann-Whitney test
+      
+        Group 1: male   (n = 294, rank mean = 147.50)
+        Group 2: female (n = 596, rank mean = 298.50)
+        Alternative hypothesis: true location shift is not equal to 0
+      
+        W = 59684 , r = 0.26, Z = -7.75, p < .001
+      
+
+---
+
+    Code
+      print(out1)
+    Output
+      # Mann-Whitney test
+      
+        Group 1: scale1 (n = 20, rank mean = 10.50)
+        Group 2: scale2 (n = 20, rank mean = 10.50)
+        Alternative hypothesis: true location shift is not equal to 0
+      
+        W = 188 , r = 0.05, Z = -0.32, p = 0.758
+      
+
+---
+
+    Code
+      print(out)
+    Output
+      # Mann-Whitney test (weighted)
+      
+        Group 1: male   (n = 296, rank mean = 147.58)
+        Group 2: female (n = 600, rank mean = 299.42)
+      
+        r = 0.26, Z = 7.78, p < .001
+      
+
diff --git a/tests/testthat/_snaps/t_test.md b/tests/testthat/_snaps/t_test.md
new file mode 100644
index 00000000..cae87ef3
--- /dev/null
+++ b/tests/testthat/_snaps/t_test.md
@@ -0,0 +1,114 @@
+# t_test
+
+    Code
+      t_test(efc, "e17age")
+    Output
+      # One Sample t-test
+      
+        Data: e17age
+        Group 1: e17age (mean = 79.12)
+        Alternative hypothesis: true mean is not equal to 0
+      
+        t = 291.78, Cohen's d = 9.77 (large effect), df = 890, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, "e17age", "e16sex")
+    Output
+      # Welch Two Sample t-test
+      
+        Data: e17age by e16sex
+        Group 1: 1 (n = 294, mean = 76.16)
+        Group 2: 2 (n = 596, mean = 80.57)
+        Alternative hypothesis: true difference in means is not equal to 0
+      
+        t = -8.05, Cohen's d = -0.56 (medium effect), df = 610.8, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, c("e17age", "c160age"))
+    Output
+      # Welch Two Sample t-test
+      
+        Data: e17age by c160age
+        Group 1: c160age (n = 890, mean = 53.42)
+        Group 2: e17age  (n = 890, mean = 79.12)
+        Alternative hypothesis: true difference in means is not equal to 0
+      
+        t = -49.22, Cohen's d = -2.33 (large effect), df = 1468.1, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, c("e17age", "c160age"), paired = TRUE)
+    Output
+      # Paired t-test
+      
+        Data: e17age and c160age (mean difference = 25.70)
+        Alternative hypothesis: true mean is not equal to 0
+      
+        t = 54.11, Cohen's d = 1.81 (large effect), df = 889, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, "e17age", weights = "weight")
+    Output
+      # One Sample t-test (weighted)
+      
+        Data: e17age
+        Group 1: e17age (n = 897, mean = 79.17)
+        Alternative hypothesis: true mean is not equal to 0
+      
+        t = 291.31, Cohen's d = 3.17 (large effect), df = 890, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, "e17age", "e16sex", weights = "weight")
+    Output
+      # Two-Sample t-test (weighted)
+      
+        Data: e17age by e16sex
+        Group 1: 1 (n = 600, mean = 80.63)
+        Group 2: 2 (n = 296, mean = 76.19)
+        Alternative hypothesis: true difference in means is not equal to 0
+      
+        t = 8.03, Cohen's d = -0.17 (very small effect), df = 604.5, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, c("e17age", "c160age"), weights = "weight")
+    Output
+      # Two-Sample t-test (weighted)
+      
+        Data: e17age by c160age
+        Group 1: c160age (n = 896, mean = 79.17)
+        Group 2: e17age  (n = 896, mean = 53.40)
+        Alternative hypothesis: true difference in means is not equal to 0
+      
+        t = 49.31, Cohen's d = -1.12 (large effect), df = 1470.0, p < .001
+      
+
+---
+
+    Code
+      t_test(efc, c("e17age", "c160age"), weights = "weight", paired = TRUE)
+    Output
+      # Paired t-test (weighted)
+      
+        Data: e17age and c160age (mean difference = 25.77)
+        Alternative hypothesis: true mean difference is not equal to 0
+      
+        t = 54.37, Cohen's d = 1.54 (large effect), df = 889, p < .001
+      
+
diff --git a/tests/testthat/_snaps/wilcoxon_test.md b/tests/testthat/_snaps/wilcoxon_test.md
new file mode 100644
index 00000000..4649a8eb
--- /dev/null
+++ b/tests/testthat/_snaps/wilcoxon_test.md
@@ -0,0 +1,36 @@
+# wilcoxon_test
+
+    Code
+      print(out1)
+    Output
+      # One Sample Wilcoxon signed rank test
+      
+        Alternative hypothesis: true location shift is not equal to 0
+      
+        V = 528, p < .001
+      
+
+---
+
+    Code
+      print(out1)
+    Output
+      # Paired Wilcoxon signed rank test
+      
+        Alternative hypothesis: true location shift is not equal to 0
+      
+        V = 0, r = 0.87, Z = -4.94, p < .001
+      
+
+---
+
+    Code
+      print(out)
+    Output
+      # Paired Wilcoxon signed rank test
+      
+        Alternative hypothesis: true location shift is not equal to 0
+      
+        V = 247, r = 0.39, Z = -2.76, p = 0.006
+      
+
diff --git a/tests/testthat/test-autoprior.R b/tests/testthat/test-autoprior.R
index 17f1e459..33431018 100644
--- a/tests/testthat/test-autoprior.R
+++ b/tests/testthat/test-autoprior.R
@@ -5,7 +5,6 @@ if (.runThisTest) {
   if (suppressWarnings(
     require("testthat") &&
     require("sjstats") &&
-    require("sjmisc") &&
     require("brms")
   )) {
     context("sjstats, autoprior")
diff --git a/tests/testthat/test-chi_squared_test.R b/tests/testthat/test-chi_squared_test.R
new file mode 100644
index 00000000..3031b3a8
--- /dev/null
+++ b/tests/testthat/test-chi_squared_test.R
@@ -0,0 +1,42 @@
+skip_if_not_installed("effectsize")
+skip_if_not_installed("datawizard")
+
+test_that("chi_squared_test", {
+  data(efc)
+  set.seed(123)
+  efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
+  out1 <- chi_squared_test(efc, "c161sex", by = "e16sex")
+  out2 <- chisq.test(efc$c161sex, efc$e16sex)
+  expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  out <- chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight")
+  expect_equal(out$statistic, 2.415755, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out$effect_size, 0.05448519, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out$p, 0.1201201, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out))
+
+  out1 <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7))
+  out2 <- chisq.test(table(efc$c161sex), p = c(0.3, 0.7))
+  expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  out <- chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7), weights = "weight")
+  expect_equal(out$statistic, 20.07379, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out$effect_size, 0.0974456, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out))
+
+  set.seed(1234)
+  d <- data.frame(
+    survey_1 = sample(c("Approve", "Disapprove"), size = 1000, replace = TRUE, prob = c(0.45, 0.55)),
+    survey_2 = sample(c("Approve", "Disapprove"), size = 1000, replace = TRUE, prob = c(0.42, 0.58))
+  )
+  out1 <- chi_squared_test(d, "survey_1", "survey_2", paired = TRUE)
+  out2 <- mcnemar.test(table(d))
+  expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$effect_size, 0.03170437, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+})
diff --git a/tests/testthat/test-kruskal_wallis_test.R b/tests/testthat/test-kruskal_wallis_test.R
new file mode 100644
index 00000000..9bf98b6c
--- /dev/null
+++ b/tests/testthat/test-kruskal_wallis_test.R
@@ -0,0 +1,35 @@
+skip_if_not_installed("survey")
+skip_if_not_installed("datawizard")
+
+test_that("kruskal_wallis_test", {
+  data(efc)
+  set.seed(123)
+  efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
+  out1 <- kruskal_wallis_test(efc, "e17age", by = "c172code")
+  out2 <- kruskal.test(e17age ~ c172code, data = efc)
+  expect_equal(out1$Chi2, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  set.seed(123)
+  wide_data <- data.frame(
+    scale1 = runif(20),
+    scale2 = runif(20),
+    scale3 = runif(20)
+  )
+  long_data <- data.frame(
+    scales = c(wide_data$scale1, wide_data$scale2, wide_data$scale3),
+    groups = as.factor(rep(c("A", "B", "C"), each = 20)),
+    stringsAsFactors = FALSE
+  )
+  out1 <- kruskal_wallis_test(wide_data, select = c("scale1", "scale2", "scale3"))
+  out2 <- kruskal_wallis_test(long_data, select = "scales", by = "groups")
+  out3 <- kruskal.test(scales ~ groups, data = long_data)
+  expect_equal(out1$Chi2, out2$Chi2, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$Chi2, out3$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out3$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  out1 <- kruskal_wallis_test(efc, "e17age", by = "c172code", weights = "weight")
+})
diff --git a/tests/testthat/test-mann_whitney_test.R b/tests/testthat/test-mann_whitney_test.R
new file mode 100644
index 00000000..aade0b01
--- /dev/null
+++ b/tests/testthat/test-mann_whitney_test.R
@@ -0,0 +1,39 @@
+skip_if_not_installed("coin")
+skip_if_not_installed("survey")
+skip_if_not_installed("datawizard")
+
+test_that("mann_whitney_test", {
+  data(efc)
+  set.seed(123)
+  efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
+  out1 <- mann_whitney_test(efc, "e17age", by = "e16sex")
+  out2 <- wilcox.test(e17age ~ e16sex, data = efc)
+  expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$estimate, -151, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$r, 0.2571254, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  set.seed(123)
+  wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20))
+  out1 <- mann_whitney_test(wide_data, select = c("scale1", "scale2"))
+  out2 <- wilcox.test(wide_data$scale1, wide_data$scale2)
+  expect_equal(out1$w, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$r, 0.05132394, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  out <- mann_whitney_test(efc, "e17age", by = "e16sex", weights = "weight")
+  expect_equal(out$p, 1.976729e-14, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out$estimate, 0.1594972, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out$r, 0.2599877, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out))
+})
+
+test_that("mann_whitney_test, sanity checks", {
+  data(efc)
+  expect_error(mann_whitney_test(efc, "e17age", by = "c172code"), regex = "Only two groups are")
+  expect_error(mann_whitney_test(efc, c("e17age", "c172code", "e16sex")), regex = "You may only specify")
+  expect_error(mann_whitney_test(efc, c("e17age", "c172code"), by = "e17age"), regex = "If `select` specifies more")
+  expect_error(mann_whitney_test(efc, "e17age"), regex = "Only one variable provided")
+})
diff --git a/tests/testthat/test-t_test.R b/tests/testthat/test-t_test.R
new file mode 100644
index 00000000..1b61ed34
--- /dev/null
+++ b/tests/testthat/test-t_test.R
@@ -0,0 +1,29 @@
+skip_if_not_installed("datawizard")
+skip_if_not_installed("effectsize")
+
+test_that("t_test", {
+  data(efc)
+  set.seed(123)
+  efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
+  expect_snapshot(t_test(efc, "e17age"))
+  expect_snapshot(t_test(efc, "e17age", "e16sex"))
+  expect_snapshot(t_test(efc, c("e17age", "c160age")))
+  expect_snapshot(t_test(efc, c("e17age", "c160age"), paired = TRUE))
+
+  expect_snapshot(t_test(efc, "e17age", weights = "weight"))
+  expect_snapshot(t_test(efc, "e17age", "e16sex", weights = "weight"))
+  expect_snapshot(t_test(efc, c("e17age", "c160age"), weights = "weight"))
+  expect_snapshot(t_test(efc, c("e17age", "c160age"), weights = "weight", paired = TRUE))
+
+  out1 <- t_test(efc, "e17age")
+  out2 <- t.test(efc$e17age ~ 1)
+  expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$effect_size, 9.774916, tolerance = 1e-4, ignore_attr = TRUE)
+
+  out1 <- t_test(efc, "e17age", "e16sex")
+  out2 <- t.test(efc$e17age ~ efc$e16sex)
+  expect_equal(out1$statistic, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$effect_size, -0.5641989, tolerance = 1e-4, ignore_attr = TRUE)
+})
diff --git a/tests/testthat/test-wilcoxon_test.R b/tests/testthat/test-wilcoxon_test.R
new file mode 100644
index 00000000..2ccfe81a
--- /dev/null
+++ b/tests/testthat/test-wilcoxon_test.R
@@ -0,0 +1,23 @@
+skip_if_not_installed("survey")
+skip_if_not_installed("datawizard")
+skip_if_not_installed("coin")
+
+test_that("wilcoxon_test", {
+  data(mtcars)
+  out1 <- wilcoxon_test(mtcars, "mpg")
+  out2 <- wilcox.test(mtcars$mpg ~ 1, exact = FALSE)
+  expect_equal(out1$v, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  out1 <- wilcoxon_test(mtcars, c("mpg", "hp"))
+  out2 <- wilcox.test(mtcars$mpg, mtcars$hp, paired = TRUE, exact = FALSE)
+  expect_equal(out1$v, out2$statistic, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_equal(out1$p, out2$p.value, tolerance = 1e-4, ignore_attr = TRUE)
+  expect_snapshot(print(out1))
+
+  data(iris)
+  d <- iris[iris$Species != "setosa", ]
+  out <- wilcoxon_test(d, "Sepal.Width", by = "Species")
+  expect_snapshot(print(out))
+})
diff --git a/tests/testthat/test-wtd.R b/tests/testthat/test-wtd.R
index 7642253b..7e452000 100644
--- a/tests/testthat/test-wtd.R
+++ b/tests/testthat/test-wtd.R
@@ -1,26 +1,7 @@
-if (require("testthat") && require("sjstats")) {
+test_that("wtd", {
   data(efc)
   set.seed(123)
-  efc$weight <- abs(rnorm(nrow(efc), 1, .3))
-
-  test_that("wtd", {
-    expect_equal(weighted_sd(efc$c12hour, weights = efc$weight), 51.18224, tolerance = 1e-5)
-    expect_equal(weighted_sd(efc$c12hour, weights = NULL), 50.80504, tolerance = 1e-5)
-
-    expect_equal(weighted_mean(efc$c12hour, weights = efc$weight), 42.80723, tolerance = 1e-5)
-    expect_equal(weighted_mean(efc$c12hour, weights = NULL), 42.39911, tolerance = 1e-5)
-
-    expect_equal(weighted_se(efc$c12hour, weights = efc$weight), 1.704182, tolerance = 1e-5)
-    expect_equal(weighted_se(efc$c12hour, weights = NULL), 1.691623, tolerance = 1e-5)
-
-    expect_equal(weighted_median(efc$c12hour, weights = efc$weight), 20, tolerance = 1e-5)
-    expect_equal(weighted_median(efc$c12hour, weights = NULL), 20, tolerance = 1e-5)
-  })
-
-  test_that("weighted_ttest", {
-    weighted_ttest(efc, e17age, weights = weight)
-    weighted_ttest(efc, e17age, c160age, weights = weight)
-    weighted_ttest(e17age ~ e16sex + weight, efc)
-    weighted_ttest(efc, e17age, c160age, weights = weight, ci.lvl = .8)
-  })
-}
+  efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
+  expect_equal(weighted_se(efc$c12hour, weights = efc$weight), 1.704182, tolerance = 1e-5)
+  expect_equal(weighted_se(efc$c12hour, weights = NULL), 1.691623, tolerance = 1e-5)
+})