diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R index dd0eebc..aca3e98 100644 --- a/R/kruskal_wallis_test.R +++ b/R/kruskal_wallis_test.R @@ -2,19 +2,15 @@ #' @name kruskal_wallis_test #' @description This function performs a Kruskal-Wallis rank sum test, to test #' the null hypothesis that the population median of all of the groups are -#' equal. The alternative is that they differ in at least one. If `paired = TRUE`, -#' a paired Friedman test is conducted. +#' equal. The alternative is that they differ in at least one. #' #' @inheritParams mann_whitney_test -#' @param paired Logical, if `TRUE`, a paired Friedman test is conducted (see -#' [`friedman.test()`]). #' #' @return A data frame with test results. #' #' @details The function simply is a wrapper around [`kruskal.test()`]. The #' weighted version of the Kruskal-Wallis test is based on the `survey` package, -#' using [`survey::svyranktest()`]. When `paired = TRUE`, a paired Friedman test -#' is conducted (see [`friedman.test()`]). +#' using [`survey::svyranktest()`]. #' #' @examples #' data(efc) @@ -24,13 +20,25 @@ kruskal_wallis_test <- function(data, select = NULL, by = NULL, - weights = NULL, - paired = FALSE) { + weights = NULL) { insight::check_if_installed("datawizard") # sanity checks .sanitize_htest_input(data, select, by, weights) + # does select indicate more than one variable? + if (length(select) > 1) { + if (!is.null(by)) { + insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") + } + # we convert the data into long format, and create a grouping variable + data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + by <- select[2] + select <- select[1] + # after converting to long, we have the "grouping" variable first in the data + colnames(data) <- c(by, select) + } + # get data dv <- data[[select]] grp <- data[[by]] @@ -43,9 +51,9 @@ kruskal_wallis_test <- function(data, insight::format_error("At least two groups are required, i.e. data must have at least two unique levels in `by` for `kruskal_wallis_test()`.") # nolint } if (is.null(weights)) { - .calculate_kw(dv, grp, paired) + .calculate_kw(dv, grp) } else { - .calculate_weighted_kw(dv, grp, data[[weights]], paired = TRUE) + .calculate_weighted_kw(dv, grp, data[[weights]]) } } @@ -101,10 +109,7 @@ kruskal_wallis_test <- function(data, }, numeric(1)) if (paired) { - tab <- as.table(round(stats::xtabs(x[[3]] ~ x[[1]] + x[[2]]))) - class(tab) <- "table" - # perfom friedman test for paired data - result <- stats::friedman.test(tab) + ## TODO: paired no working. should call `friedman.test()` } else { design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) result <- survey::svyranktest(formula = x ~ g, design, test = "KruskalWallis") diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R index d900bf2..1caf823 100644 --- a/R/mann_whitney_test.R +++ b/R/mann_whitney_test.R @@ -9,7 +9,10 @@ #' #' @param data A data frame. #' @param select Name of the dependent variable (as string) to be used for the -#' test. +#' test. `select` can also be a character vector, specifing the names of +#' multiple continuous variables. In this case, `by` is ignored and variables +#' specified in `select` are used to compute the test. This can be useful if +#' the data is in wide-format and no grouping variable is available. #' @param by Name of the grouping variable to be used for the test. If `by` is #' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if #' `probabilities` is provided, `by` must be `NULL`. @@ -52,6 +55,23 @@ mann_whitney_test <- function(data, # sanity checks .sanitize_htest_input(data, select, by, weights) + # does select indicate more than one variable? + if (length(select) > 1) { + # sanity check - may only specify two variable names + if (length(select) > 2) { + insight::format_error("You may only specify two variables for Mann-Whitney test.") + } + if (!is.null(by)) { + insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") + } + # we convert the data into long format, and create a grouping variable + data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + by <- select[2] + select <- select[1] + # after converting to long, we have the "grouping" variable first in the data + colnames(data) <- c(by, select) + } + # get data dv <- data[[select]] grp <- data[[by]] @@ -61,7 +81,7 @@ mann_whitney_test <- function(data, # only two groups allowed if (insight::n_unique(grp) > 2) { - insight::format_error("Only two groups are allowed for Mann-Whitney-Test. Please use `kruskal_wallis_test()` for more than two groups.") # nolint + insight::format_error("Only two groups are allowed for Mann-Whitney test. Please use `kruskal_wallis_test()` for more than two groups.") # nolint } # value labels @@ -267,21 +287,22 @@ mann_whitney_test <- function(data, } # check if arguments have correct length (length of 1) - if (length(select) != 1 || !is.character(select)) { - insight::format_error("Argument `select` must be the name of a single variable.") + if (!is.character(select)) { + insight::format_error("Argument `select` must be a character string with the name(s) of the variable(s).") } if (length(by) != 1 || !is.character(by)) { - insight::format_error("Argument `by` must be the name of a single variable.") + insight::format_error("Argument `by` must be a character string with the name of a single variable.") } if (!is.null(weights) && length(weights) != 1) { - insight::format_error("Argument `weights` must be the name of a single variable.") + insight::format_error("Argument `weights` must be a character string with the name of a single variable.") } # check if "select" is in data - if (!select %in% colnames(data)) { + if (!all(select %in% colnames(data))) { + not_found <- setdiff(select, colnames(data))[1] insight::format_error( - sprintf("Variable '%s' not found in data frame.", select), - .misspelled_string(colnames(data), select, "Maybe misspelled?") + sprintf("Variable '%s' not found in data frame.", not_found), + .misspelled_string(colnames(data), not_found, "Maybe misspelled?") ) } # check if "by" is in data diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd index 9bcfd73..71a0b15 100644 --- a/man/chi_squared_test.Rd +++ b/man/chi_squared_test.Rd @@ -18,7 +18,10 @@ chi_squared_test( \item{data}{A data frame.} \item{select}{Name of the dependent variable (as string) to be used for the -test.} +test. \code{select} can also be a character vector, specifing the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} \item{by}{Name of the grouping variable to be used for the test. If \code{by} is not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd index 0b30814..6b8bf3f 100644 --- a/man/kruskal_wallis_test.Rd +++ b/man/kruskal_wallis_test.Rd @@ -4,28 +4,22 @@ \alias{kruskal_wallis_test} \title{Kruskal-Wallis test} \usage{ -kruskal_wallis_test( - data, - select = NULL, - by = NULL, - weights = NULL, - paired = FALSE -) +kruskal_wallis_test(data, select = NULL, by = NULL, weights = NULL) } \arguments{ \item{data}{A data frame.} \item{select}{Name of the dependent variable (as string) to be used for the -test.} +test. \code{select} can also be a character vector, specifing the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} \item{by}{Name of the grouping variable to be used for the test. If \code{by} is not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if \code{probabilities} is provided, \code{by} must be \code{NULL}.} \item{weights}{Name of an (optional) weighting variable to be used for the test.} - -\item{paired}{Logical, if \code{TRUE}, a paired Friedman test is conducted (see -\code{\link[=friedman.test]{friedman.test()}}).} } \value{ A data frame with test results. @@ -33,14 +27,12 @@ A data frame with test results. \description{ This function performs a Kruskal-Wallis rank sum test, to test the null hypothesis that the population median of all of the groups are -equal. The alternative is that they differ in at least one. If \code{paired = TRUE}, -a paired Friedman test is conducted. +equal. The alternative is that they differ in at least one. } \details{ The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test()}}. The weighted version of the Kruskal-Wallis test is based on the \code{survey} package, -using \code{\link[survey:svyranktest]{survey::svyranktest()}}. When \code{paired = TRUE}, a paired Friedman test -is conducted (see \code{\link[=friedman.test]{friedman.test()}}). +using \code{\link[survey:svyranktest]{survey::svyranktest()}}. } \examples{ data(efc) diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd index 1401a04..9ab30e6 100644 --- a/man/mann_whitney_test.Rd +++ b/man/mann_whitney_test.Rd @@ -16,7 +16,10 @@ mann_whitney_test( \item{data}{A data frame.} \item{select}{Name of the dependent variable (as string) to be used for the -test.} +test. \code{select} can also be a character vector, specifing the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} \item{by}{Name of the grouping variable to be used for the test. If \code{by} is not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if