Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
strengejacke committed May 9, 2024
1 parent 7b17a7c commit df5a5ee
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 40 deletions.
33 changes: 19 additions & 14 deletions R/kruskal_wallis_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,15 @@
#' @name kruskal_wallis_test
#' @description This function performs a Kruskal-Wallis rank sum test, to test
#' the null hypothesis that the population median of all of the groups are
#' equal. The alternative is that they differ in at least one. If `paired = TRUE`,
#' a paired Friedman test is conducted.
#' equal. The alternative is that they differ in at least one.
#'
#' @inheritParams mann_whitney_test
#' @param paired Logical, if `TRUE`, a paired Friedman test is conducted (see
#' [`friedman.test()`]).
#'
#' @return A data frame with test results.
#'
#' @details The function simply is a wrapper around [`kruskal.test()`]. The
#' weighted version of the Kruskal-Wallis test is based on the `survey` package,
#' using [`survey::svyranktest()`]. When `paired = TRUE`, a paired Friedman test
#' is conducted (see [`friedman.test()`]).
#' using [`survey::svyranktest()`].
#'
#' @examples
#' data(efc)
Expand All @@ -24,13 +20,25 @@
kruskal_wallis_test <- function(data,
select = NULL,
by = NULL,
weights = NULL,
paired = FALSE) {
weights = NULL) {
insight::check_if_installed("datawizard")

# sanity checks
.sanitize_htest_input(data, select, by, weights)

# does select indicate more than one variable?
if (length(select) > 1) {
if (!is.null(by)) {
insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.")
}
# we convert the data into long format, and create a grouping variable
data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale")
by <- select[2]
select <- select[1]
# after converting to long, we have the "grouping" variable first in the data
colnames(data) <- c(by, select)
}

# get data
dv <- data[[select]]
grp <- data[[by]]
Expand All @@ -43,9 +51,9 @@ kruskal_wallis_test <- function(data,
insight::format_error("At least two groups are required, i.e. data must have at least two unique levels in `by` for `kruskal_wallis_test()`.") # nolint
}
if (is.null(weights)) {
.calculate_kw(dv, grp, paired)
.calculate_kw(dv, grp)
} else {
.calculate_weighted_kw(dv, grp, data[[weights]], paired = TRUE)
.calculate_weighted_kw(dv, grp, data[[weights]])
}
}

Expand Down Expand Up @@ -101,10 +109,7 @@ kruskal_wallis_test <- function(data,
}, numeric(1))

if (paired) {
tab <- as.table(round(stats::xtabs(x[[3]] ~ x[[1]] + x[[2]])))
class(tab) <- "table"
# perfom friedman test for paired data
result <- stats::friedman.test(tab)
## TODO: paired no working. should call `friedman.test()`
} else {
design <- survey::svydesign(ids = ~0, data = dat, weights = ~w)
result <- survey::svyranktest(formula = x ~ g, design, test = "KruskalWallis")
Expand Down
39 changes: 30 additions & 9 deletions R/mann_whitney_test.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@
#'
#' @param data A data frame.
#' @param select Name of the dependent variable (as string) to be used for the
#' test.
#' test. `select` can also be a character vector, specifing the names of
#' multiple continuous variables. In this case, `by` is ignored and variables
#' specified in `select` are used to compute the test. This can be useful if
#' the data is in wide-format and no grouping variable is available.
#' @param by Name of the grouping variable to be used for the test. If `by` is
#' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if
#' `probabilities` is provided, `by` must be `NULL`.
Expand Down Expand Up @@ -52,6 +55,23 @@ mann_whitney_test <- function(data,
# sanity checks
.sanitize_htest_input(data, select, by, weights)

# does select indicate more than one variable?
if (length(select) > 1) {
# sanity check - may only specify two variable names
if (length(select) > 2) {
insight::format_error("You may only specify two variables for Mann-Whitney test.")
}
if (!is.null(by)) {
insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.")
}
# we convert the data into long format, and create a grouping variable
data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale")
by <- select[2]
select <- select[1]
# after converting to long, we have the "grouping" variable first in the data
colnames(data) <- c(by, select)
}

# get data
dv <- data[[select]]
grp <- data[[by]]
Expand All @@ -61,7 +81,7 @@ mann_whitney_test <- function(data,

# only two groups allowed
if (insight::n_unique(grp) > 2) {
insight::format_error("Only two groups are allowed for Mann-Whitney-Test. Please use `kruskal_wallis_test()` for more than two groups.") # nolint
insight::format_error("Only two groups are allowed for Mann-Whitney test. Please use `kruskal_wallis_test()` for more than two groups.") # nolint
}

# value labels
Expand Down Expand Up @@ -267,21 +287,22 @@ mann_whitney_test <- function(data,
}

# check if arguments have correct length (length of 1)
if (length(select) != 1 || !is.character(select)) {
insight::format_error("Argument `select` must be the name of a single variable.")
if (!is.character(select)) {
insight::format_error("Argument `select` must be a character string with the name(s) of the variable(s).")
}
if (length(by) != 1 || !is.character(by)) {
insight::format_error("Argument `by` must be the name of a single variable.")
insight::format_error("Argument `by` must be a character string with the name of a single variable.")
}
if (!is.null(weights) && length(weights) != 1) {
insight::format_error("Argument `weights` must be the name of a single variable.")
insight::format_error("Argument `weights` must be a character string with the name of a single variable.")
}

# check if "select" is in data
if (!select %in% colnames(data)) {
if (!all(select %in% colnames(data))) {
not_found <- setdiff(select, colnames(data))[1]
insight::format_error(
sprintf("Variable '%s' not found in data frame.", select),
.misspelled_string(colnames(data), select, "Maybe misspelled?")
sprintf("Variable '%s' not found in data frame.", not_found),
.misspelled_string(colnames(data), not_found, "Maybe misspelled?")
)
}
# check if "by" is in data
Expand Down
5 changes: 4 additions & 1 deletion man/chi_squared_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 7 additions & 15 deletions man/kruskal_wallis_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/mann_whitney_test.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit df5a5ee

Please sign in to comment.