Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed May 9, 2024
1 parent f47612d commit 7b17a7c
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 48 deletions.
17 changes: 17 additions & 0 deletions .lintr
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
linters: linters_with_defaults(
absolute_path_linter = NULL,
commented_code_linter = NULL,
cyclocomp_linter = cyclocomp_linter(25),
extraction_operator_linter = NULL,
implicit_integer_linter = NULL,
line_length_linter(120),
namespace_linter = NULL,
nonportable_path_linter = NULL,
object_name_linter = NULL,
object_length_linter(50),
object_usage_linter = NULL,
todo_comment_linter = NULL,
undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)),
undesirable_operator_linter = NULL,
defaults = linters_with_tags(tags = NULL)
)
58 changes: 40 additions & 18 deletions R/chi_squared_test.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @title Chi-Squared Test
#' @title Chi-Squared test
#' @name chi_squared_test
#' @description This function performs a \eqn{chi}^2 test for contingency
#' tables or tests for given probabilities. The returned effects sizes are
Expand All @@ -12,6 +12,8 @@
#' in `select`. If `probabilities` is provided, a chi-squared test for given
#' probabilities is conducted. Furthermore, if `probabilities` is given, `by`
#' must be `NULL`. The probabilities must sum to 1.
#' @param paired Logical, if `TRUE`, a McNemar test is conducted for 2x2 tables.
#' Note that `paired` only works for 2x2 tables.
#' @param ... Additional arguments passed down to [`chisq.test()`].
#' @inheritParams mann_whitney_test
#'
Expand All @@ -25,7 +27,8 @@
#' `chisq.test()` for given probabilities. When `probabilities` are provided,
#' these are rescaled to sum to 1 (i.e. `rescale.p = TRUE`). When `fisher.test()`
#' is called, simulated p-values are returned (i.e. `simulate.p.value = TRUE`,
#' see `?fisher.test`).
#' see `?fisher.test`). If `paired = TRUE` and a 2x2 table is provided,
#' a McNemar test (see [`mcnemar.test()`]) is conducted.
#'
#' The weighted version of the chi-squared test is based on the a weighted
#' table, using [`xtabs()`] as input for `chisq.test()`.
Expand All @@ -38,30 +41,35 @@
#' @examples
#' data(efc)
#' efc$weight <- abs(rnorm(nrow(efc), 1, 0.3))
#' # Chi-squared-test
#' # Chi-squared test
#' chi_squared_test(efc, "c161sex", by = "e16sex")
#' # weighted Chi-squared-test
#' # weighted Chi-squared test
#' chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight")
#' # Chi-squared-test for given probabilities
#' # Chi-squared test for given probabilities
#' chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7))
#' @export
chi_squared_test <- function(data,
select = NULL,
by = NULL,
probabilities = NULL,
weights = NULL,
paired = FALSE,
...) {
if (is.null(probabilities)) {
.calculate_chisq(data, select, by, weights, ...)
.calculate_chisq(data, select, by, weights, paired, ...)
} else {
# sanity check - `paired = TRUE` is not available for given probabilities
if (paired) {
insight::format_error("When `probabilities` are provided, `paired = TRUE` is not available.") # nolint
}
.calculate_chisq_gof(data, select, probabilities, weights, ...)
}
}


# Mann-Whitney-Test for two groups --------------------------------------------

.calculate_chisq <- function(data, select, by, weights, verbose = TRUE, ...) {
.calculate_chisq <- function(data, select, by, weights, paired = FALSE, ...) {
insight::check_if_installed("datawizard")
# sanity checks
.sanitize_htest_input(data, select, by, weights)
Expand All @@ -70,6 +78,11 @@ chi_squared_test <- function(data,
grp1 <- data[[select]]
grp2 <- data[[by]]

# if paired = TRUE, we only allow a 2x2 table
if (paired && (length(stats::na.omit(unique(grp1))) != 2 || length(stats::na.omit(unique(grp2))) != 2)) {
insight::format_error("When `paired = TRUE`, only 2x2 tables are allowed (i.e. both variables must have exactly two levels).") # nolint
}

# create data frame for table
x <- data.frame(
grp1 = datawizard::to_factor(grp1),
Expand All @@ -93,13 +106,18 @@ chi_squared_test <- function(data,
# expected values, to identify whether Fisher's test is needed
expected_values <- as.table(round(as.array(margin.table(tab, 1)) %*% t(as.array(margin.table(tab, 2))) / margin.table(tab))) # nolint

# chi-squared test
htest <- suppressWarnings(stats::chisq.test(tab, ...))
test_statistic <- htest$statistic

# need fisher?
if (min(expected_values) < 5 || (min(expected_values) < 10 && htest$parameter == 1)) {
htest <- stats::fisher.test(tab, simulate.p.value = TRUE, ...)
# paired? mc-nemar test
if (paired) {
htest <- suppressWarnings(stats::mcnemar.test(tab, ...))
test_statistic <- htest$statistic
} else {
# chi-squared test
htest <- suppressWarnings(stats::chisq.test(tab, ...))
test_statistic <- htest$statistic
# need fisher?
if (min(expected_values) < 5 || (min(expected_values) < 10 && htest$parameter == 1)) {
htest <- stats::fisher.test(tab, simulate.p.value = TRUE, ...)
}
}
p_value <- htest$p.value

Expand All @@ -125,7 +143,8 @@ chi_squared_test <- function(data,
class(out) <- c("sj_htest_chi", "data.frame")
attr(out, "weighted") <- !is.null(weights)
attr(out, "fisher") <- isTRUE(startsWith(htest$method, "Fisher"))
attr(out, "caption") <- "Contingency Tables"
attr(out, "mcnemar") <- isTRUE(paired)
attr(out, "caption") <- "contingency tables"
out
}

Expand Down Expand Up @@ -192,7 +211,7 @@ chi_squared_test <- function(data,
stringsAsFactors = FALSE
)
class(out) <- c("sj_htest_chi", "data.frame")
attr(out, "caption") <- "given Probabilities"
attr(out, "caption") <- "given probabilities"
attr(out, "weighted") <- !is.null(weights)
out
}
Expand All @@ -210,17 +229,20 @@ print.sj_htest_chi <- function(x, ...) {
}

fisher <- attributes(x)$fisher
mcnemar <- attributes(x)$mcnemar

# headline
insight::print_color(sprintf(
"\n# Chi-Squared Test for %s%s\n",
"\n# Chi-squared test for %s%s\n",
attributes(x)$caption,
weight_string
), "blue")

# Fisher's exact test?
if (fisher) {
if (isTRUE(fisher)) {
insight::print_color(" (using Fisher's exact test due to small expected values)\n", "blue") # nolint
} else if (isTRUE(mcnemar)) {
insight::print_color(" (using McNemar's test for paired data)\n", "blue") # nolint
}

cat("\n")
Expand Down
64 changes: 45 additions & 19 deletions R/kruskal_wallis_test.R
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
#' @title Kruskal-Wallis-Test
#' @title Kruskal-Wallis test
#' @name kruskal_wallis_test
#' @description This function performs a Kruskal-Wallis rank sum test, to test
#' the null hypothesis that the population median of all of the groups are
#' equal. The alternative is that they differ in at least one.
#' equal. The alternative is that they differ in at least one. If `paired = TRUE`,
#' a paired Friedman test is conducted.
#'
#' @inheritParams mann_whitney_test
#' @param paired Logical, if `TRUE`, a paired Friedman test is conducted (see
#' [`friedman.test()`]).
#'
#' @return A data frame with test results.
#'
#' @details The function simply is a wrapper around [`kruskal.test()`]. The
#' weighted version of the Kruskal-Wallis test is based on the `survey` package,
#' using [`survey::svyranktest()`].
#' using [`survey::svyranktest()`]. When `paired = TRUE`, a paired Friedman test
#' is conducted (see [`friedman.test()`]).
#'
#' @examples
#' data(efc)
#' # Kruskal-Wallis-Test for elder's age by education
#' # Kruskal-Wallis test for elder's age by education
#' kruskal_wallis_test(efc, "e17age", by = "c172code")
#' @export
kruskal_wallis_test <- function(data,
select = NULL,
by = NULL,
weights = NULL) {
weights = NULL,
paired = FALSE) {
insight::check_if_installed("datawizard")

# sanity checks
Expand All @@ -38,22 +43,31 @@ kruskal_wallis_test <- function(data,
insight::format_error("At least two groups are required, i.e. data must have at least two unique levels in `by` for `kruskal_wallis_test()`.") # nolint
}
if (is.null(weights)) {
.calculate_kw(dv, grp)
.calculate_kw(dv, grp, paired)
} else {
.calculate_weighted_kw(dv, grp, data[[weights]])
.calculate_weighted_kw(dv, grp, data[[weights]], paired = TRUE)
}
}


# Kruskal-Wallis-Test --------------------------------------------

.calculate_kw <- function(dv, grp) {
.calculate_kw <- function(dv, grp, paired = FALSE) {
# prepare data
wcdat <- data.frame(dv, grp)
# perfom wilcox test
wt <- stats::kruskal.test(dv ~ grp, data = wcdat)
if (paired) {
# perfom friedman test for paired data
wt <- stats::friedman.test(table(wcdat))
} else {
# perfom kruskal wallis test
wt <- stats::kruskal.test(dv ~ grp, data = wcdat)
}
# number of groups
n_groups <- vapply(unique(grp), function(g) sum(grp == g, na.rm = TRUE), numeric(1))
n_groups <- vapply(
stats::na.omit(unique(grp)),
function(g) sum(grp == g, na.rm = TRUE),
numeric(1)
)

out <- data.frame(
data = wt$data.name,
Expand All @@ -64,7 +78,7 @@ kruskal_wallis_test <- function(data,
)

attr(out, "n_groups") <- n_groups
attr(out, "method") <- "kruskal"
attr(out, "method") <- ifelse(paired, "friedman", "kruskal")
attr(out, "weighted") <- FALSE
class(out) <- c("sj_htest_kw", "data.frame")

Expand All @@ -74,21 +88,28 @@ kruskal_wallis_test <- function(data,

# Weighted Mann-Whitney-Test for two groups ----------------------------------

.calculate_weighted_kw <- function(dv, grp, weights) {
.calculate_weighted_kw <- function(dv, grp, weights, paired = FALSE) {
# check if pkg survey is available
insight::check_if_installed("survey")

dat <- stats::na.omit(data.frame(dv, grp, weights))
colnames(dat) <- c("x", "g", "w")

design <- survey::svydesign(ids = ~0, data = dat, weights = ~w)
result <- survey::svyranktest(formula = x ~ g, design, test = "KruskalWallis")

# number of groups
n_groups <- vapply(unique(grp), function(g) {
n_groups <- vapply(stats::na.omit(unique(grp)), function(g) {
sum(dat$w[dat$grp == g], na.rm = TRUE)
}, numeric(1))

if (paired) {
tab <- as.table(round(stats::xtabs(x[[3]] ~ x[[1]] + x[[2]])))
class(tab) <- "table"
# perfom friedman test for paired data
result <- stats::friedman.test(tab)
} else {
design <- survey::svydesign(ids = ~0, data = dat, weights = ~w)
result <- survey::svyranktest(formula = x ~ g, design, test = "KruskalWallis")
}

out <- data.frame(
data = paste(dv, "by", grp),
Chi2 = result$statistic,
Expand All @@ -98,7 +119,7 @@ kruskal_wallis_test <- function(data,
)

attr(out, "n_groups") <- n_groups
attr(out, "method") <- "kruskal"
attr(out, "method") <- ifelse(paired, "friedman", "kruskal")
attr(out, "weighted") <- TRUE
class(out) <- c("sj_htest_kw", "data.frame")

Expand All @@ -114,6 +135,7 @@ print.sj_htest_kw <- function(x, ...) {
# fetch attributes
n_groups <- attributes(x)$n_groups
weighted <- attributes(x)$weighted
method <- attributes(x)$method

if (weighted) {
weight_string <- " (weighted)"
Expand All @@ -122,7 +144,11 @@ print.sj_htest_kw <- function(x, ...) {
}

# header
insight::print_color(sprintf("# Kruskal-Wallis-Test%s\n\n", weight_string), "blue")
if (identical(method, "kruskal")) {
insight::print_color(sprintf("# Kruskal-Wallis test%s\n\n", weight_string), "blue")
} else {
insight::print_color(sprintf("# Friedman test%s\n\n", weight_string), "blue")
}

# data info
insight::print_color(
Expand Down
2 changes: 1 addition & 1 deletion R/mann_whitney_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ print.sj_htest_mwu <- function(x, ...) {
group_labels <- format(group_labels)

# header
insight::print_color(sprintf("# Mann-Whitney-Test%s\n\n", weight_string), "blue")
insight::print_color(sprintf("# Mann-Whitney test%s\n\n", weight_string), "blue")

# group-1-info
insight::print_color(
Expand Down
15 changes: 10 additions & 5 deletions man/chi_squared_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7b17a7c

Please sign in to comment.