diff --git a/.lintr b/.lintr new file mode 100644 index 00000000..b0e8abef --- /dev/null +++ b/.lintr @@ -0,0 +1,17 @@ +linters: linters_with_defaults( + absolute_path_linter = NULL, + commented_code_linter = NULL, + cyclocomp_linter = cyclocomp_linter(25), + extraction_operator_linter = NULL, + implicit_integer_linter = NULL, + line_length_linter(120), + namespace_linter = NULL, + nonportable_path_linter = NULL, + object_name_linter = NULL, + object_length_linter(50), + object_usage_linter = NULL, + todo_comment_linter = NULL, + undesirable_function_linter(c("mapply" = NA, "sapply" = NA, "setwd" = NA)), + undesirable_operator_linter = NULL, + defaults = linters_with_tags(tags = NULL) + ) diff --git a/DESCRIPTION b/DESCRIPTION index e1c10be4..e25170ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,11 +19,9 @@ Depends: utils Imports: bayestestR, - broom, datawizard, dplyr, effectsize, - emmeans, insight, lme4, magrittr, @@ -39,6 +37,7 @@ Imports: tidyr Suggests: brms, + broom, car, coin, ggplot2, diff --git a/NAMESPACE b/NAMESPACE index 2872f936..5a224af0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,9 +3,9 @@ S3method(AIC,svyglm.nb) S3method(as.data.frame,sj_resample) S3method(as.integer,sj_resample) -S3method(cramer,formula) -S3method(cramer,ftable) -S3method(cramer,table) +S3method(cramers_v,formula) +S3method(cramers_v,ftable) +S3method(cramers_v,table) S3method(deviance,svyglm.nb) S3method(family,svyglm.nb) S3method(formula,svyglm.nb) @@ -13,8 +13,6 @@ S3method(formula,svyglm.zip) S3method(model.frame,svyglm.nb) S3method(model.frame,svyglm.zip) S3method(model.matrix,gls) -S3method(mwu,default) -S3method(mwu,formula) S3method(phi,formula) S3method(phi,ftable) S3method(phi,table) @@ -25,6 +23,9 @@ S3method(print,sj_check_assump) S3method(print,sj_chi2gof) S3method(print,sj_grpmean) S3method(print,sj_grpmeans) +S3method(print,sj_htest_chi) +S3method(print,sj_htest_kw) +S3method(print,sj_htest_mwu) S3method(print,sj_mwu) S3method(print,sj_outliers) S3method(print,sj_pval) @@ -40,8 +41,6 @@ S3method(print,tidy_stan) S3method(residuals,svyglm.nb) S3method(summary,sj_pval) S3method(terms,svyglm.nb) -S3method(weighted_chisqtest,default) -S3method(weighted_chisqtest,formula) S3method(weighted_correlation,default) S3method(weighted_correlation,formula) S3method(weighted_mannwhitney,default) @@ -66,10 +65,12 @@ export(boot_est) export(boot_p) export(boot_se) export(bootstrap) +export(chi_squared_test) export(chisq_gof) export(ci) export(cohens_f) export(cramer) +export(cramers_v) export(crosstable_statistics) export(cv) export(cv_compare) @@ -83,16 +84,15 @@ export(find_beta2) export(find_cauchy) export(find_normal) export(gmd) -export(grpmean) export(icc) export(inequ_trend) export(is_prime) +export(kruskal_wallis_test) export(link_inverse) -export(mannwhitney) +export(mann_whitney_test) export(mean_n) export(means_by_group) export(mse) -export(mwu) export(omega_sq) export(p_value) export(phi) @@ -115,7 +115,6 @@ export(typical_value) export(var_pop) export(weight) export(weight2) -export(weighted_chisqtest) export(weighted_correlation) export(weighted_mannwhitney) export(weighted_mean) @@ -129,19 +128,14 @@ export(xtab_statistics) importFrom(MASS,glm.nb) importFrom(bayestestR,ci) importFrom(bayestestR,equivalence_test) -importFrom(broom,augment) importFrom(dplyr,case_when) importFrom(dplyr,filter) importFrom(dplyr,group_vars) importFrom(dplyr,mutate) -importFrom(dplyr,n_distinct) -importFrom(dplyr,pull) importFrom(dplyr,quos) importFrom(dplyr,select) importFrom(dplyr,select_if) importFrom(dplyr,summarise) -importFrom(emmeans,contrast) -importFrom(emmeans,emmeans) importFrom(insight,export_table) importFrom(insight,find_formula) importFrom(insight,find_response) @@ -159,7 +153,6 @@ importFrom(performance,rmse) importFrom(purrr,flatten_df) importFrom(purrr,map) importFrom(purrr,map2) -importFrom(purrr,map_chr) importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_lgl) @@ -168,19 +161,12 @@ importFrom(rlang,.data) importFrom(rlang,enquo) importFrom(rlang,quo_name) importFrom(sjlabelled,as_numeric) -importFrom(sjlabelled,drop_labels) -importFrom(sjlabelled,get_label) -importFrom(sjlabelled,get_labels) -importFrom(sjmisc,add_variables) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) importFrom(sjmisc,is_num_fac) -importFrom(sjmisc,recode_to) importFrom(sjmisc,str_contains) -importFrom(sjmisc,to_value) importFrom(sjmisc,trim) importFrom(sjmisc,typical_value) -importFrom(stats,approx) importFrom(stats,as.formula) importFrom(stats,chisq.test) importFrom(stats,coef) @@ -196,12 +182,10 @@ importFrom(stats,model.matrix) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,nobs) -importFrom(stats,pbeta) importFrom(stats,pf) importFrom(stats,pnorm) importFrom(stats,predict.glm) importFrom(stats,pt) -importFrom(stats,qcauchy) importFrom(stats,qf) importFrom(stats,qnorm) importFrom(stats,resid) @@ -213,7 +197,6 @@ importFrom(stats,var) importFrom(stats,vcov) importFrom(stats,weighted.mean) importFrom(stats,weights) -importFrom(stats,wilcox.test) importFrom(stats,xtabs) importFrom(tidyr,gather) importFrom(tidyr,nest) diff --git a/R/Deprecated.R b/R/Deprecated.R index 862b8da7..94a27e22 100644 --- a/R/Deprecated.R +++ b/R/Deprecated.R @@ -83,3 +83,19 @@ se <- function(x, ...) { .Defunct("parameters::standard_error()") parameters::standard_error(x) } + + +#' @rdname r2 +#' @export +means_by_group <- function(x, ...) { + .Defunct("datawizard::means_by_group()") + datawizard::means_by_group(x, ...) +} + + +#' @rdname r2 +#' @export +mean_n <- function(x, ...) { + .Defunct("datawizard::row_means()") + datawizard::row_means(x, ...) +} diff --git a/R/anova_stats.R b/R/anova_stats.R index ef7c1218..f7917687 100644 --- a/R/anova_stats.R +++ b/R/anova_stats.R @@ -29,9 +29,7 @@ #' } #' @export anova_stats <- function(model, digits = 3) { - if (!requireNamespace("pwr", quietly = TRUE)) { - stop("Package `pwr` needed for this function to work. Please install it.", call. = FALSE) - } + insight::check_if_installed("pwr") # .Deprecated("effectsize::effectsize()", package = "effectsize") @@ -94,6 +92,7 @@ aov_stat <- function(model, type) { aov_stat_summary <- function(model) { + insight::check_if_installed("broom") # check if we have a mixed model mm <- is_merMod(model) ori.model <- model diff --git a/R/chi_squared_test.R b/R/chi_squared_test.R new file mode 100644 index 00000000..426883a6 --- /dev/null +++ b/R/chi_squared_test.R @@ -0,0 +1,267 @@ +#' @title Chi-Squared test +#' @name chi_squared_test +#' @description This function performs a \eqn{chi}^2 test for contingency +#' tables or tests for given probabilities. The returned effects sizes are +#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +#' given probabilities (see _Ben-Shachar et al. 2023_). +#' +#' @param probabilities A numeric vector of probabilities for each cell in the +#' contingency table. The length of the vector must match the number of cells +#' in the table, i.e. the number of unique levels of the variable specified +#' in `select`. If `probabilities` is provided, a chi-squared test for given +#' probabilities is conducted. Furthermore, if `probabilities` is given, `by` +#' must be `NULL`. The probabilities must sum to 1. +#' @param paired Logical, if `TRUE`, a McNemar test is conducted for 2x2 tables. +#' Note that `paired` only works for 2x2 tables. +#' @param ... Additional arguments passed down to [`chisq.test()`]. +#' @inheritParams mann_whitney_test +#' +#' @return A data frame with test results. The returned effects sizes are +#' Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +#' for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +#' given probabilities. +#' +#' @details The function is a wrapper around [`chisq.test()`] and +#' [`fisher.test()`] (for small expected values) for contingency tables, and +#' `chisq.test()` for given probabilities. When `probabilities` are provided, +#' these are rescaled to sum to 1 (i.e. `rescale.p = TRUE`). When `fisher.test()` +#' is called, simulated p-values are returned (i.e. `simulate.p.value = TRUE`, +#' see `?fisher.test`). If `paired = TRUE` and a 2x2 table is provided, +#' a McNemar test (see [`mcnemar.test()`]) is conducted. +#' +#' The weighted version of the chi-squared test is based on the a weighted +#' table, using [`xtabs()`] as input for `chisq.test()`. +#' +#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +#' \doi{10.3390/math11091982} +#' +#' @examples +#' data(efc) +#' efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) +#' +#' # Chi-squared test +#' chi_squared_test(efc, "c161sex", by = "e16sex") +#' +#' # weighted Chi-squared test +#' chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") +#' +#' # Chi-squared test for given probabilities +#' chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) +#' @export +chi_squared_test <- function(data, + select = NULL, + by = NULL, + probabilities = NULL, + weights = NULL, + paired = FALSE, + ...) { + if (is.null(probabilities)) { + .calculate_chisq(data, select, by, weights, paired, ...) + } else { + # sanity check - `paired = TRUE` is not available for given probabilities + if (paired) { + insight::format_error("When `probabilities` are provided, `paired = TRUE` is not available.") # nolint + } + .calculate_chisq_gof(data, select, probabilities, weights, ...) + } +} + + +# Mann-Whitney-Test for two groups -------------------------------------------- + +.calculate_chisq <- function(data, select, by, weights, paired = FALSE, ...) { + insight::check_if_installed("datawizard") + # sanity checks + .sanitize_htest_input(data, select, by, weights) + + # get data + grp1 <- data[[select]] + grp2 <- data[[by]] + + # if paired = TRUE, we only allow a 2x2 table + if (paired && (length(stats::na.omit(unique(grp1))) != 2 || length(stats::na.omit(unique(grp2))) != 2)) { + insight::format_error("When `paired = TRUE`, only 2x2 tables are allowed (i.e. both variables must have exactly two levels).") # nolint + } + + # create data frame for table + x <- data.frame( + grp1 = datawizard::to_factor(grp1), + grp2 = datawizard::to_factor(grp2) + ) + # add weights + if (!is.null(weights)) { + x$weights <- data[[weights]] + } + # remove missings + x <- stats::na.omit(x) + + # contingency table + if (is.null(weights)) { + tab <- table(x) + } else { + tab <- as.table(round(stats::xtabs(x[[3]] ~ x[[1]] + x[[2]]))) + class(tab) <- "table" + } + + # expected values, to identify whether Fisher's test is needed + expected_values <- as.table(round(as.array(margin.table(tab, 1)) %*% t(as.array(margin.table(tab, 2))) / margin.table(tab))) # nolint + + # paired? mc-nemar test + if (paired) { + htest <- suppressWarnings(stats::mcnemar.test(tab, ...)) + test_statistic <- htest$statistic + } else { + # chi-squared test + htest <- suppressWarnings(stats::chisq.test(tab, ...)) + test_statistic <- htest$statistic + # need fisher? + if (min(expected_values) < 5 || (min(expected_values) < 10 && htest$parameter == 1)) { + htest <- stats::fisher.test(tab, simulate.p.value = TRUE, ...) + } + } + p_value <- htest$p.value + + # effect size + if (nrow(tab) > 2 || ncol(tab) > 2) { + effect_size <- stats::setNames(cramer(tab), "Cramer's V") + } else { + effect_size <- stats::setNames(phi(tab), "Phi") + } + + # return result + out <- data.frame( + data = paste(select, "by", by), + statistic_name = "Chi-squared", + statistic = test_statistic, + effect_size_name = names(effect_size), + effect_size = as.numeric(effect_size), + p = p_value, + df = (nrow(tab) - 1) * (ncol(tab) - 1), + n_obs = sum(tab, na.rm = TRUE), + stringsAsFactors = FALSE + ) + class(out) <- c("sj_htest_chi", "data.frame") + attr(out, "weighted") <- !is.null(weights) + attr(out, "fisher") <- isTRUE(startsWith(htest$method, "Fisher")) + attr(out, "mcnemar") <- isTRUE(paired) + attr(out, "caption") <- "contingency tables" + out +} + + +.calculate_chisq_gof <- function(data, select, probabilities, weights, ...) { + insight::check_if_installed("effectsize") + + # get data + x <- data.frame(grp = data[[select]]) + # add weights + if (!is.null(weights)) { + x$weights <- data[[weights]] + } + # remove missings + x <- stats::na.omit(x) + + # contingency table + if (is.null(weights)) { + tab <- table(x) + } else { + tab <- as.table(round(stats::xtabs(x[[2]] ~ x[[1]]))) + class(tab) <- "table" + } + + # table dimensions + n_rows <- nlevels(droplevels(as.factor(x$grp))) + + # sanity check + if (length(probabilities) != n_rows) { + insight::format_error("Length of probabilities must match number of cells in table (i.e. number of levels of input factor).") # nolint + } + if (!isTRUE(all.equal(sum(probabilities), 1))) { + insight::format_error("Probabilities must sum to 1.") + } + + # chi-squared test + htest <- suppressWarnings(stats::chisq.test(tab, p = probabilities, rescale.p = TRUE, ...)) + test_statistic <- htest$statistic + p_value <- htest$p.value + + effect_size <- effectsize::chisq_to_fei( + test_statistic, + n = sum(tab), + nrow = n_rows, + ncol = 1, + p = probabilities, + alternative = "two.sided" + )$Fei + + # return result + out <- data.frame( + data = paste( + select, + "against probabilities", + datawizard::text_concatenate(sprintf("%i%%", round(100 * probabilities))) + ), + statistic_name = "Chi-squared", + statistic = test_statistic, + effect_size_name = "Fei", + effect_size = as.numeric(effect_size), + p = p_value, + df = n_rows - 1, + n_obs = sum(tab, na.rm = TRUE), + stringsAsFactors = FALSE + ) + class(out) <- c("sj_htest_chi", "data.frame") + attr(out, "caption") <- "given probabilities" + attr(out, "weighted") <- !is.null(weights) + out +} + + +# methods --------------------------------------------------------------------- + +#' @export +print.sj_htest_chi <- function(x, ...) { + weighted <- attributes(x)$weighted + if (weighted) { + weight_string <- " (weighted)" + } else { + weight_string <- "" + } + + fisher <- attributes(x)$fisher + mcnemar <- attributes(x)$mcnemar + + # headline + insight::print_color(sprintf( + "\n# Chi-squared test for %s%s\n", + attributes(x)$caption, + weight_string + ), "blue") + + # Fisher's exact test? + if (isTRUE(fisher)) { + insight::print_color(" (using Fisher's exact test due to small expected values)\n", "blue") # nolint + } else if (isTRUE(mcnemar)) { + insight::print_color(" (using McNemar's test for paired data)\n", "blue") # nolint + } + + cat("\n") + + # data info + insight::print_color( + sprintf(" Data: %s (n = %i)\n", x$data, round(x$n_obs)), + "cyan" + ) + + # prepare and align strings + eff_symbol <- .format_symbols(x$effect_size_name) + stat_symbol <- .format_symbols(x$statistic_name) + + cat(sprintf( + "\n %s = %.4f, %s = %.4f, df = %i, %s\n\n", + stat_symbol, x$statistic, eff_symbol, x$effect_size, round(x$df), insight::format_p(x$p) + )) +} diff --git a/R/cramer.R b/R/cramer.R index 66798e72..a623da44 100644 --- a/R/cramer.R +++ b/R/cramer.R @@ -1,53 +1,52 @@ #' @rdname crosstable_statistics #' @export -cramer <- function(tab, ...) { - UseMethod("cramer") +cramers_v <- function(tab, ...) { + UseMethod("cramers_v") } +#' @rdname crosstable_statistics +#' @export +cramer <- cramers_v #' @export -cramer.table <- function(tab, ...) { - .cramer(tab) +cramers_v.table <- function(tab, ...) { + .cramers_v(tab) } #' @export -cramer.ftable <- function(tab, ...) { - .cramer(tab) +cramers_v.ftable <- function(tab, ...) { + .cramers_v(tab) } #' @rdname crosstable_statistics #' @export -cramer.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist", "quantile"), ...) { +cramers_v.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist", "quantile"), ...) { terms <- all.vars(formula) tab <- table(data[[terms[1]]], data[[terms[2]]]) method <- match.arg(method) if (is.null(ci.lvl) || is.na(ci.lvl)) { - .cramer(tab) + .cramers_v(tab) } else { - ci <- data[, terms] %>% - sjstats::bootstrap(n) %>% - dplyr::mutate( - tables = lapply(.data$strap, function(x) { - dat <- as.data.frame(x) - table(dat[[1]], dat[[2]]) - }), - cramers = sapply(.data$tables, function(x) .cramer(x)) - ) %>% - dplyr::pull("cramers") %>% - boot_ci(ci.lvl = ci.lvl, method = method) + straps <- sjstats::bootstrap(data[terms], n) + tables <- lapply(straps$strap, function(x) { + dat <- as.data.frame(x) + table(dat[[1]], dat[[2]]) + }) + cramers <- sapply(tables, function(x) .cramers_v(x)) + ci <- boot_ci(cramers, ci.lvl = ci.lvl, method = method) data_frame( - cramer = .cramer(tab), + cramer = .cramers_v(tab), conf.low = ci$conf.low, conf.high = ci$conf.high ) } } -.cramer <- function(tab) { +.cramers_v <- function(tab) { # convert to flat table if (!inherits(tab, "ftable")) tab <- stats::ftable(tab) sqrt(phi(tab)^2 / min(dim(tab) - 1)) diff --git a/R/cv_error.R b/R/cv_error.R index 9ab6f875..c4880b12 100644 --- a/R/cv_error.R +++ b/R/cv_error.R @@ -34,13 +34,13 @@ #' @importFrom modelr crossv_kfold #' @importFrom dplyr mutate summarise #' @importFrom purrr map map2 map_dbl map_df -#' @importFrom broom augment #' @importFrom tidyr unnest #' @importFrom rlang .data #' @importFrom insight find_response #' @importFrom performance rmse #' @export cv_error <- function(data, formula, k = 5) { + insight::check_if_installed("broom") # compute cross validation data cv_data <- data %>% diff --git a/R/find_beta.R b/R/find_beta.R index 7e45408b..1ae733cc 100644 --- a/R/find_beta.R +++ b/R/find_beta.R @@ -1,10 +1,10 @@ #' @title Determining distribution parameters #' @name find_beta #' -#' @description \code{find_beta()}, \code{find_normal()} and \code{find_cauchy()} find the +#' @description `find_beta()`, `find_normal()` and `find_cauchy()` find the #' shape, mean and standard deviation resp. the location and scale parameters #' to describe the beta, normal or cauchy distribution, based on two -#' percentiles. \code{find_beta2()} finds the shape parameters for a Beta +#' percentiles. `find_beta2()` finds the shape parameters for a Beta #' distribution, based on a probability value and its standard error #' or confidence intervals. #' @@ -14,14 +14,14 @@ #' @param p2 Probability of the second percentile. #' @param x Numeric, a probability value between 0 and 1. Typically indicates #' a prevalence rate of an outcome of interest; Or an integer value -#' with the number of observed events. In this case, specify \code{n} +#' with the number of observed events. In this case, specify `n` #' to indicate the toral number of observations. -#' @param se The standard error of \code{x}. Either \code{se} or \code{ci} must +#' @param se The standard error of `x`. Either `se` or `ci` must #' be specified. -#' @param ci The upper limit of the confidence interval of \code{x}. Either -#' \code{se} or \code{ci} must be specified. +#' @param ci The upper limit of the confidence interval of `x`. Either +#' `se` or `ci` must be specified. #' @param n Numeric, number of total observations. Needs to be specified, if -#' \code{x} is an integer (number of observed events), and no +#' `x` is an integer (number of observed events), and no #' probability. See 'Examples'. #' #' @return A list of length two, with the two distribution parameters than can @@ -29,21 +29,20 @@ #' the shape for the given input parameters. #' #' @details These functions can be used to find parameter for various distributions, -#' to define prior probabilities for Bayesian analyses. \code{x1}, -#' \code{p1}, \code{x2} and \code{p2} are parameters that describe two -#' quantiles. Given this knowledge, the distribution parameters are -#' returned. \cr \cr -#' Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence -#' rate or similar probability, and its standard deviation or confidence -#' interval. In this case. \code{x} should be a probability, -#' for example a prevalence rate of a certain event. \code{se} then -#' needs to be the standard error for this probability. Alternatively, -#' \code{ci} can be specified, which should indicate the upper limit -#' of the confidence interval od the probability (prevalence rate) \code{x}. -#' If the number of events out of a total number of trials is known -#' (e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number -#' of observed events, while \code{n} indicates the total amount of trials -#' (in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}). +#' to define prior probabilities for Bayesian analyses. `x1`, `p1`, `x2` and +#' `p2` are parameters that describe two quantiles. Given this knowledge, the +#' distribution parameters are returned. +#' +#' Use `find_beta2()`, if the known parameters are, e.g. a prevalence rate or +#' similar probability, and its standard deviation or confidence interval. In +#' this case. `x` should be a probability, for example a prevalence rate of a +#' certain event. `se` then needs to be the standard error for this probability. +#' Alternatively, `ci` can be specified, which should indicate the upper limit +#' of the confidence interval od the probability (prevalence rate) `x`. If the +#' number of events out of a total number of trials is known (e.g. 12 heads out +#' of 30 coin tosses), `x` can also be the number of observed events, while `n` +#' indicates the total amount of trials (in the above example, the function +#' call would be: `find_beta2(x = 12, n = 30)`). #' #' @references Cook JD. Determining distribution parameters from quantiles. 2010: Department of Biostatistics, Texas (\href{https://www.johndcook.com/quantiles_parameters.pdf}{PDF}) #' @@ -79,14 +78,12 @@ #' shapes <- find_beta2(x = 3, n = 20) #' curve(dbeta(x, shapes[[1]], shapes[[2]])) #' -#' @importFrom stats pbeta approx -#' @importFrom purrr map_dbl #' @export find_beta <- function(x1, p1, x2, p2) { logK <- seq(-5, 10, length = 200) K <- exp(logK) - m <- purrr::map_dbl(K, ~ betaprior(.x, x1, p1)) + m <- unlist(lapply(K, betaprior, x = x1, p = p1)) prob2 <- stats::pbeta(x2, K * m, K * (1 - m)) ind <- ((prob2 > 0) & (prob2 < 1)) @@ -127,13 +124,13 @@ betaprior <- function(K, x, p) { find_beta2 <- function(x, se, ci, n) { # check if all required arguments are given if (missing(se) && missing(ci) && missing(n)) { - stop("Either `se` or `ci`, or `n` must be specified.", call. = F) + insight::format_error("Either `se` or `ci`, or `n` must be specified.") } # for number of observations, compute variance of beta distribution if (!missing(n)) { if (!is.integer(x) && x < 1) - stop("If `n` is given, x` must be an integer value greater than 0.", call. = F) + insight::format_error("If `n` is given, x` must be an integer value greater than 0.") # compute 2 SD from beta variance bvar <- 2 * sqrt((x * n) / ((x + n)^2 * (x + n + 1))) @@ -164,7 +161,6 @@ find_beta2 <- function(x, se, ci, n) { } -#' @importFrom stats qcauchy #' @rdname find_beta #' @export find_cauchy <- function(x1, p1, x2, p2) { @@ -177,7 +173,6 @@ find_cauchy <- function(x1, p1, x2, p2) { -#' @importFrom stats qnorm #' @rdname find_beta #' @export find_normal <- function(x1, p1, x2, p2) { diff --git a/R/grpmean.R b/R/grpmean.R deleted file mode 100644 index ab2eb2d1..00000000 --- a/R/grpmean.R +++ /dev/null @@ -1,335 +0,0 @@ -#' @title Summary of mean values by group -#' @name means_by_group -#' -#' @description Computes mean, sd and se for each sub-group (indicated by \code{grp}) -#' of \code{dv}. -#' -#' @param x A (grouped) data frame. -#' @param dv Name of the dependent variable, for which the mean value, grouped -#' by \code{grp}, is computed. -#' @param grp Factor with the cross-classifying variable, where \code{dv} is -#' grouped into the categories represented by \code{grp}. Numeric vectors -#' are coerced to factors. -#' @param weights Name of variable in \code{x} that indicated the vector of -#' weights that will be applied to weight all observations. Default is -#' \code{NULL}, so no weights are used. -#' @param digits Numeric, amount of digits after decimal point when rounding -#' estimates and values. -#' @param file Destination file, if the output should be saved as file. -#' Only used when \code{out} is not \code{"txt"}. -#' @param encoding Character vector, indicating the charset encoding used -#' for variable and value labels. Default is \code{"UTF-8"}. Only used -#' when \code{out} is not \code{"txt"}. -#' @param out Character vector, indicating whether the results should be printed -#' to console (\code{out = "txt"}) or as HTML-table in the viewer-pane -#' (\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the -#' results should be plotted (\code{out = "plot"}, only applies to certain -#' functions). May be abbreviated. -#' -#' @return For non-grouped data frames, \code{means_by_group()} returns a data frame with -#' following columns: \code{term}, \code{mean}, \code{N}, \code{std.dev}, -#' \code{std.error} and \code{p.value}. For grouped data frames, returns -#' a list of such data frames. -#' -#' @details This function performs a One-Way-Anova with \code{dv} as dependent -#' and \code{grp} as independent variable, by calling -#' \code{lm(count ~ as.factor(grp))}. Then \code{\link[emmeans]{contrast}} -#' is called to get p-values for each sub-group. P-values indicate whether -#' each group-mean is significantly different from the total mean. -#' -#' @examples -#' data(efc) -#' means_by_group(efc, c12hour, e42dep) -#' -#' data(iris) -#' means_by_group(iris, Sepal.Width, Species) -#' -#' # also works for grouped data frames -#' if (require("dplyr")) { -#' efc %>% -#' group_by(c172code) %>% -#' means_by_group(c12hour, e42dep) -#' } -#' -#' # weighting -#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) -#' means_by_group(efc, c12hour, e42dep, weights = weight) -#' @importFrom sjlabelled get_label drop_labels get_labels -#' @importFrom stats lm na.omit sd weighted.mean -#' @importFrom purrr map_chr map_df -#' @importFrom sjmisc to_value is_empty -#' @importFrom rlang enquo .data quo_name -#' @export -means_by_group <- function(x, - dv, - grp, - weights = NULL, - digits = 2, - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL) { - - out <- match.arg(out) - - if (out != "txt" && !requireNamespace("sjPlot", quietly = TRUE)) { - message("Package `sjPlot` needs to be loaded to print HTML tables.") - out <- "txt" - } - - # create quosures - grp.name <- rlang::quo_name(rlang::enquo(grp)) - dv.name <- rlang::quo_name(rlang::enquo(dv)) - - # weights need extra checking, might be NULL - if (!missing(weights)) { - .weights <- try(rlang::quo_name(rlang::enquo(weights)), silent = TRUE) - if (inherits(.weights, "try-error")) .weights <- NULL - - w.string <- try(eval(weights), silent = TRUE) - if (!inherits(w.string, "try-error") && !is.null(w.string) && is.character(w.string)) .weights <- w.string - - if (sjmisc::is_empty(.weights) || .weights == "NULL") .weights <- NULL - } else - .weights <- NULL - - - # create string with variable names - vars <- c(grp.name, dv.name, .weights) - - # get data - x <- suppressMessages(dplyr::select(x, !! vars)) - - # set value and row labels - varGrpLabel <- sjlabelled::get_label(x[[grp.name]], def.value = grp.name) - varCountLabel <- sjlabelled::get_label(x[[dv.name]], def.value = dv.name) - - # first, drop unused labels - x[[grp.name]] <- sjlabelled::drop_labels(x[[grp.name]], drop.na = TRUE) - - # now get valid value labels - value.labels <- sjlabelled::get_labels( - x[[grp.name]], attr.only = F, values = "n", non.labelled = TRUE - ) - - # return values - dataframes <- list() - - # do we have a grouped data frame? - if (inherits(x, "grouped_df")) { - # get grouped data - grps <- get_grouped_data(x) - - # now plot everything - for (i in seq_len(nrow(grps))) { - # copy back labels to grouped data frame - tmp <- sjlabelled::copy_labels(grps$data[[i]], x) - - # get grouped means table - dummy <- means_by_group_helper( - x = tmp, - dv = dv.name, - grp = grp.name, - weight.by = .weights, - value.labels = value.labels, - varCountLabel = varCountLabel, - varGrpLabel = varGrpLabel - ) - - attr(dummy, "group") <- get_grouped_title(x, grps, i, sep = "\n") - - # save data frame for return value - dataframes[[length(dataframes) + 1]] <- dummy - } - - # add class-attr for print-method() - if (out == "txt") - class(dataframes) <- c("sj_grpmeans", "list") - else - class(dataframes) <- c("sjt_grpmeans", "list") - - } else { - dataframes <- means_by_group_helper( - x = x, - dv = dv.name, - grp = grp.name, - weight.by = .weights, - value.labels = value.labels, - varCountLabel = varCountLabel, - varGrpLabel = varGrpLabel - ) - - # add class-attr for print-method() - if (out == "txt") - class(dataframes) <- c("sj_grpmean", class(dataframes)) - else - class(dataframes) <- c("sjt_grpmean", class(dataframes)) - } - - # save how to print output - attr(dataframes, "print") <- out - attr(dataframes, "encoding") <- encoding - attr(dataframes, "file") <- file - attr(dataframes, "digits") <- digits - - dataframes -} - - -#' @importFrom stats pf lm weighted.mean na.omit sd -#' @importFrom sjmisc to_value add_variables -#' @importFrom emmeans emmeans contrast -#' @importFrom dplyr pull select n_distinct -#' @importFrom purrr map_chr -#' @importFrom rlang .data -means_by_group_helper <- function(x, dv, grp, weight.by, value.labels, varCountLabel, varGrpLabel) { - # copy vectors from data frame - dv <- x[[dv]] - grp <- x[[grp]] - - if (!is.null(weight.by)) - weight.by <- x[[weight.by]] - else - weight.by <- 1 - - # convert values to numeric - dv <- sjmisc::to_value(dv) - - # create data frame, for emmeans - mydf <- stats::na.omit(data.frame( - dv = dv, - grp = as.factor(grp), - weight.by = weight.by - )) - - # compute anova statistics for mean table - fit <- stats::lm(dv ~ grp, weights = weight.by, data = mydf) - - # p-values of contrast-means - means.p <- fit %>% - emmeans::emmeans(specs = "grp") %>% - emmeans::contrast(method = "eff") %>% - summary() %>% - dplyr::pull("p.value") - - ## TODO - # efc %>% - # group_by(c172code, c161sex) %>% - # means_by_group(c12hour, e42dep) - - - # check if value labels length matches group count - if (dplyr::n_distinct(mydf$grp) != length(value.labels)) { - # get unique factor levels and check if these are numeric. - # if so, we match the values from value labels and the remaining - # factor levels, so we get the correct value labels for printing - nl <- unique(mydf$grp) - if (sjmisc::is_num_fac(nl)) - value.labels <- value.labels[names(value.labels) %in% levels(nl)] - else - value.labels <- nl - } - - - # create summary - dat <- mydf %>% - dplyr::group_by(.data$grp) %>% - summarise( - mean = stats::weighted.mean(.data$dv, w = .data$weight.by, na.rm = TRUE), - N = round(sum(.data$weight.by)), - std.dev = weighted_sd(.data$dv, .data$weight.by), - std.error = weighted_se(.data$dv, .data$weight.by) - ) %>% - mutate(p.value = means.p) %>% - dplyr::select(-.data$grp) - - # finally, add total-row - dat <- dplyr::bind_rows( - dat, - data_frame( - mean = stats::weighted.mean(mydf$dv, w = mydf$weight.by, na.rm = TRUE), - N = nrow(mydf), - std.dev = weighted_sd(mydf$dv, mydf$weight.by), - std.error = weighted_se(mydf$dv, mydf$weight.by), - p.value = NA - ) - ) - - - # add row labels - dat <- sjmisc::add_variables( - dat, - term = c(unname(value.labels), "Total"), - .after = -1 - ) - - - # get anova statistics for mean table - sum.fit <- summary(fit) - - # r-squared values - r2 <- sum.fit$r.squared - r2.adj <- sum.fit$adj.r.squared - - # F-statistics - fstat <- sum.fit$fstatistic - pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = F) - - - # copy as attributes - attr(dat, "r2") <- r2 - attr(dat, "adj.r2") <- r2.adj - attr(dat, "fstat") <- fstat[1] - attr(dat, "p.value") <- pval - attr(dat, "dv.label") <- varCountLabel - attr(dat, "grp.label") <- varGrpLabel - - dat -} - - -get_grouped_title <- function(x, grps, i, sep = "\n") { - # create title for first grouping level - tp <- get_title_part(x, grps, 1, i) - title <- sprintf("%s: %s", tp[1], tp[2]) - - # do we have another groupng variable? - if (length(dplyr::group_vars(x)) > 1) { - tp <- get_title_part(x, grps, 2, i) - title <- sprintf("%s%s%s: %s", title, sep, tp[1], tp[2]) - } - - # return title - title -} - - -get_title_part <- function(x, grps, level, i) { - # prepare title for group - var.name <- colnames(grps)[level] - - # get values from value labels - vals <- sjlabelled::get_values(x[[var.name]]) - # if we have no value labels, get values directly - if (is.null(vals)) { - vals <- unique(x[[var.name]]) - lab.pos <- i - } else { - # find position of value labels for current group - lab.pos <- which(vals == grps[[var.name]][i]) - } - - # get variable and value labels - t1 <- sjlabelled::get_label(x[[var.name]], def.value = var.name) - t2 <- sjlabelled::get_labels(x[[var.name]])[lab.pos] - - # if we have no value label, use value instead - if (is.null(t2)) t2 <- vals[lab.pos] - - # generate title - c(t1, t2) -} - - -#' @rdname means_by_group -#' @export -grpmean <- means_by_group diff --git a/R/helpfunctions.R b/R/helpfunctions.R index 0785b281..ce36f369 100644 --- a/R/helpfunctions.R +++ b/R/helpfunctions.R @@ -34,7 +34,7 @@ get_glm_family <- function(fit) { # create logical for family binom_fam <- fitfam %in% c("binomial", "quasibinomial") poisson_fam <- fitfam %in% c("poisson", "quasipoisson") || - sjmisc::str_contains(fitfam, "negative binomial", ignore.case = T) + sjmisc::str_contains(fitfam, "negative binomial", ignore.case = TRUE) list(is_bin = binom_fam, is_pois = poisson_fam, is_logit = logit_link) } @@ -64,8 +64,45 @@ get_grouped_data <- function(x) { } - .compact_character <- function(x) { x[!sapply(x, function(i) is.null(i) || nchar(i) == 0 || is.na(i) || any(i == "NULL", na.rm = TRUE))] } + +.format_symbols <- function(x) { + if (.unicode_symbols()) { + x <- gsub("Delta", "\u0394", x, ignore.case = TRUE) + x <- gsub("Phi", "\u03D5", x, ignore.case = TRUE) + x <- gsub("Eta", "\u03B7", x, ignore.case = TRUE) + x <- gsub("Epsilon", "\u03b5", x, ignore.case = TRUE) + x <- gsub("Omega", "\u03b5", x, ignore.case = TRUE) + x <- gsub("R2", "R\u00b2", x, ignore.case = TRUE) + x <- gsub("Chi2", "\u03C7\u00b2", x, ignore.case = TRUE) + x <- gsub("Chi-squared", "\u03C7\u00b2", x, ignore.case = TRUE) + x <- gsub("Chi", "\u03C7", x, ignore.case = TRUE) + x <- gsub("Sigma", "\u03C3", x, ignore.case = TRUE) + x <- gsub("Rho", "\u03C1", x, ignore.case = TRUE) + x <- gsub("Mu", "\u03BC", x, ignore.case = TRUE) + x <- gsub("Theta", "\u03B8", x, ignore.case = TRUE) + x <- gsub("Fei", "\u05E4\u200E", x, ignore.case = TRUE) + } + x +} + + +.unicode_symbols <- function() { + win_os <- tryCatch( + { + si <- Sys.info() + if (is.null(si["sysname"])) { + FALSE + } else { + si["sysname"] == "Windows" || startsWith(R.version$os, "mingw") + } + }, + error = function(e) { + TRUE + } + ) + l10n_info()[["UTF-8"]] && ((win_os && getRversion() >= "4.2") || (!win_os && getRversion() >= "4.0")) +} diff --git a/R/kruskal_wallis_test.R b/R/kruskal_wallis_test.R new file mode 100644 index 00000000..a66e3c15 --- /dev/null +++ b/R/kruskal_wallis_test.R @@ -0,0 +1,188 @@ +#' @title Kruskal-Wallis test +#' @name kruskal_wallis_test +#' @description This function performs a Kruskal-Wallis rank sum test, to test +#' the null hypothesis that the population median of all of the groups are +#' equal. The alternative is that they differ in at least one. +#' +#' @inheritParams mann_whitney_test +#' +#' @return A data frame with test results. +#' +#' @details The function simply is a wrapper around [`kruskal.test()`]. The +#' weighted version of the Kruskal-Wallis test is based on the **survey** package, +#' using [`survey::svyranktest()`]. +#' +#' @examples +#' data(efc) +#' # Kruskal-Wallis test for elder's age by education +#' kruskal_wallis_test(efc, "e17age", by = "c172code") +#' +#' # when data is in wide-format, specify all relevant continuous +#' # variables in `select` and omit `by` +#' set.seed(123) +#' wide_data <- data.frame( +#' scale1 = runif(20), +#' scale2 = runif(20), +#' scale3 = runif(20) +#' ) +#' kruskal_wallis_test(wide_data, select = c("scale1", "scale2", "scale3")) +#' +#' # same as if we had data in long format, with grouping variable +#' long_data <- data.frame( +#' scales = c(wide_data$scale1, wide_data$scale2, wide_data$scale3), +#' groups = rep(c("A", "B", "C"), each = 20) +#' ) +#' kruskal_wallis_test(long_data, select = "scales", by = "groups") +#' @export +kruskal_wallis_test <- function(data, + select = NULL, + by = NULL, + weights = NULL) { + insight::check_if_installed("datawizard") + + # sanity checks + .sanitize_htest_input(data, select, by, weights) + + # does select indicate more than one variable? + if (length(select) > 1) { + if (!is.null(by)) { + insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") + } + # we convert the data into long format, and create a grouping variable + data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + by <- select[2] + select <- select[1] + # after converting to long, we have the "grouping" variable first in the data + colnames(data) <- c(by, select) + } + + # get data + dv <- data[[select]] + grp <- data[[by]] + + # coerce to factor + grp <- datawizard::to_factor(grp) + + # only two groups allowed + if (insight::n_unique(grp) < 2) { + insight::format_error("At least two groups are required, i.e. data must have at least two unique levels in `by` for `kruskal_wallis_test()`.") # nolint + } + if (is.null(weights)) { + .calculate_kw(dv, grp) + } else { + .calculate_weighted_kw(dv, grp, data[[weights]]) + } +} + + +# Kruskal-Wallis-Test -------------------------------------------- + +.calculate_kw <- function(dv, grp, paired = FALSE) { + # prepare data + wcdat <- data.frame(dv, grp) + if (paired) { + # perfom friedman test for paired data + wt <- stats::friedman.test(table(wcdat)) + } else { + # perfom kruskal wallis test + wt <- stats::kruskal.test(dv ~ grp, data = wcdat) + } + # number of groups + n_groups <- vapply( + stats::na.omit(unique(grp)), + function(g) sum(grp == g, na.rm = TRUE), + numeric(1) + ) + + out <- data.frame( + data = wt$data.name, + Chi2 = wt$statistic, + df = wt$parameter, + p = as.numeric(wt$p.value), + stringsAsFactors = FALSE + ) + + attr(out, "n_groups") <- n_groups + attr(out, "method") <- ifelse(paired, "friedman", "kruskal") + attr(out, "weighted") <- FALSE + class(out) <- c("sj_htest_kw", "data.frame") + + out +} + + +# Weighted Mann-Whitney-Test for two groups ---------------------------------- + +.calculate_weighted_kw <- function(dv, grp, weights, paired = FALSE) { + # check if pkg survey is available + insight::check_if_installed("survey") + + dat <- stats::na.omit(data.frame(dv, grp, weights)) + colnames(dat) <- c("x", "g", "w") + + # number of groups + n_groups <- vapply(stats::na.omit(unique(grp)), function(g) { + sum(dat$w[dat$grp == g], na.rm = TRUE) + }, numeric(1)) + + if (paired) { + ## TODO: paired no working. should call `friedman.test()` + } else { + design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) + result <- survey::svyranktest(formula = x ~ g, design, test = "KruskalWallis") + } + + out <- data.frame( + data = paste(dv, "by", grp), + Chi2 = result$statistic, + df = result$parameter, + p = as.numeric(result$p.value), + stringsAsFactors = FALSE + ) + + attr(out, "n_groups") <- n_groups + attr(out, "method") <- ifelse(paired, "friedman", "kruskal") + attr(out, "weighted") <- TRUE + class(out) <- c("sj_htest_kw", "data.frame") + + out +} + + +# methods --------------------------------------------------------------------- + +#' @export +print.sj_htest_kw <- function(x, ...) { + insight::check_if_installed("datawizard") + # fetch attributes + n_groups <- attributes(x)$n_groups + weighted <- attributes(x)$weighted + method <- attributes(x)$method + + if (weighted) { + weight_string <- " (weighted)" + } else { + weight_string <- "" + } + + # header + if (identical(method, "kruskal")) { + insight::print_color(sprintf("# Kruskal-Wallis test%s\n\n", weight_string), "blue") + } else { + insight::print_color(sprintf("# Friedman test%s\n\n", weight_string), "blue") + } + + # data info + insight::print_color( + sprintf( + " Data: %s (%i groups, n = %s)\n", + x$data, length(n_groups), datawizard::text_concatenate(n_groups) + ), "cyan" + ) + + stat_symbol <- .format_symbols("Chi2") + cat(sprintf( + "\n %s = %.3f, df = %i, %s\n\n", + stat_symbol, x$Chi2, round(x$df), insight::format_p(x$p) + )) +} diff --git a/R/mann_whitney_test.R b/R/mann_whitney_test.R new file mode 100644 index 00000000..d16f1788 --- /dev/null +++ b/R/mann_whitney_test.R @@ -0,0 +1,378 @@ +#' @title Mann-Whitney-Test +#' @name mann_whitney_test +#' @description This function performs a Mann-Whitney-Test (or Wilcoxon rank +#' sum test for _unpaired_ samples. +#' +#' A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +#' independent samples have identical continuous distributions. It can be used +#' when the two continuous variables are not normally distributed. +#' +#' @param data A data frame. +#' @param select Name of the dependent variable (as string) to be used for the +#' test. `select` can also be a character vector, specifying the names of +#' multiple continuous variables. In this case, `by` is ignored and variables +#' specified in `select` are used to compute the test. This can be useful if +#' the data is in wide-format and no grouping variable is available. +#' @param by Name of the grouping variable to be used for the test. If `by` is +#' not a factor, it will be coerced to a factor. For `chi_squared_test()`, if +#' `probabilities` is provided, `by` must be `NULL`. +#' @param weights Name of an (optional) weighting variable to be used for the test. +#' @param distribution Indicates how the null distribution of the test statistic +#' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"` +#' (default). See [`coin::wilcox_test()`] for details. +#' +#' @return A data frame with test results. The function returns p and Z-values +#' as well as effect size r and group-rank-means. +#' +#' @details This function is based on [`wilcox.test()`] and [`coin::wilcox_test()`] +#' (the latter to extract effect sizes). The weighted version of the test is +#' based on [`survey::svyranktest()`]. +#' +#' Interpretation of the effect size **r**, as a rule-of-thumb: +#' +#' - small effect >= 0.1 +#' - medium effect >= 0.3 +#' - large effect >= 0.5 +#' +#' **r** is calcuated as: +#' +#' ``` +#' r = |Z| / sqrt(n1 + n2) +#' ``` +#' +#' @examples +#' data(efc) +#' # Mann-Whitney-U-Tests for elder's age by elder's sex. +#' mann_whitney_test(efc, "e17age", by = "e16sex") +#' +#' # when data is in wide-format, specify all relevant continuous +#' # variables in `select` and omit `by` +#' set.seed(123) +#' wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) +#' mann_whitney_test(wide_data, select = c("scale1", "scale2")) +#' +#' # same as if we had data in long format, with grouping variable +#' long_data <- data.frame( +#' scales = c(wide_data$scale1, wide_data$scale2), +#' groups = rep(c("A", "B"), each = 20) +#' ) +#' mann_whitney_test(long_data, select = "scales", by = "groups") +#' @export +mann_whitney_test <- function(data, + select = NULL, + by = NULL, + weights = NULL, + distribution = "asymptotic") { + insight::check_if_installed("datawizard") + + # sanity checks + .sanitize_htest_input(data, select, by, weights) + + # does select indicate more than one variable? + if (length(select) > 1) { + # sanity check - may only specify two variable names + if (length(select) > 2) { + insight::format_error("You may only specify two variables for Mann-Whitney test.") + } + if (!is.null(by)) { + insight::format_error("If `select` specifies more than one variable, `by` must be `NULL`.") + } + # we convert the data into long format, and create a grouping variable + data <- datawizard::data_to_long(data[select], names_to = "group", values_to = "scale") + by <- select[2] + select <- select[1] + # after converting to long, we have the "grouping" variable first in the data + colnames(data) <- c(by, select) + } + + # get data + dv <- data[[select]] + grp <- data[[by]] + + # coerce to factor + grp <- datawizard::to_factor(grp) + + # only two groups allowed + if (insight::n_unique(grp) > 2) { + insight::format_error("Only two groups are allowed for Mann-Whitney test. Please use `kruskal_wallis_test()` for more than two groups.") # nolint + } + + # value labels + group_labels <- names(attr(data[[by]], "labels", exact = TRUE)) + if (is.null(group_labels)) { + group_labels <- levels(droplevels(grp)) + } + + if (is.null(weights)) { + .calculate_mwu(dv, grp, distribution, group_labels) + } else { + .calculate_weighted_mwu(dv, grp, data[[weights]], group_labels) + } +} + + +# Mann-Whitney-Test for two groups -------------------------------------------- + +.calculate_mwu <- function(dv, grp, distribution, group_labels) { + insight::check_if_installed("coin") + # prepare data + wcdat <- data.frame(dv, grp) + # perfom wilcox test + wt <- coin::wilcox_test(dv ~ grp, data = wcdat, distribution = distribution) + + # for rank mean + group_levels <- levels(grp) + + # compute statistics + u <- as.numeric(coin::statistic(wt, type = "linear")) + z <- as.numeric(coin::statistic(wt, type = "standardized")) + p <- coin::pvalue(wt) + r <- abs(z / sqrt(length(dv))) + w <- suppressWarnings(stats::wilcox.test(dv ~ grp, data = wcdat)$statistic) + + # group means + dat_gr1 <- stats::na.omit(dv[grp == group_levels[1]]) + dat_gr2 <- stats::na.omit(dv[grp == group_levels[2]]) + + rank_mean_1 <- mean(rank(dat_gr1)) + rank_mean_2 <- mean(rank(dat_gr2)) + + # compute n for each group + n_grp1 <- length(dat_gr1) + n_grp2 <- length(dat_gr2) + + out <- data.frame( + group1 = group_levels[1], + group2 = group_levels[2], + estimate = rank_mean_1 - rank_mean_2, + u = u, + w = w, + z = z, + r = r, + p = as.numeric(p) + ) + attr(out, "rank_means") <- stats::setNames( + c(rank_mean_1, rank_mean_2), + c("Mean Group 1", "Mean Group 2") + ) + attr(out, "n_groups") <- stats::setNames( + c(n_grp1, n_grp2), + c("N Group 1", "N Group 2") + ) + attr(out, "group_labels") <- group_labels + attr(out, "method") <- "wilcoxon" + attr(out, "weighted") <- FALSE + class(out) <- c("sj_htest_mwu", "data.frame") + + out +} + + +# Weighted Mann-Whitney-Test for two groups ---------------------------------- + +.calculate_weighted_mwu <- function(dv, grp, weights, group_labels) { + # check if pkg survey is available + insight::check_if_installed("survey") + + dat <- stats::na.omit(data.frame(dv, grp, weights)) + colnames(dat) <- c("x", "g", "w") + + design <- survey::svydesign(ids = ~0, data = dat, weights = ~w) + result <- survey::svyranktest(formula = x ~ g, design, test = "wilcoxon") + + # for rank mean + group_levels <- levels(droplevels(grp)) + # subgroups + dat_gr1 <- dat[dat$g == group_levels[1], ] + dat_gr2 <- dat[dat$g == group_levels[2], ] + dat_gr1$rank_x <- rank(dat_gr1$x) + dat_gr2$rank_x <- rank(dat_gr2$x) + + # rank means + design_mean1 <- survey::svydesign( + ids = ~0, + data = dat_gr1, + weights = ~w + ) + rank_mean_1 <- survey::svymean(~rank_x, design_mean1) + + design_mean2 <- survey::svydesign( + ids = ~0, + data = dat_gr2, + weights = ~w + ) + rank_mean_2 <- survey::svymean(~rank_x, design_mean2) + + # group Ns + n_grp1 <- round(sum(dat_gr1$w)) + n_grp2 <- round(sum(dat_gr2$w)) + + # statistics and effect sizes + z <- result$statistic + r <- abs(z / sqrt(sum(n_grp1, n_grp2))) + + out <- data.frame( + group1 = group_levels[1], + group2 = group_levels[2], + estimate = result$estimate, + z = z, + r = r, + p = as.numeric(result$p.value) + ) + + attr(out, "rank_means") <- stats::setNames( + c(rank_mean_1, rank_mean_2), + c("Mean Group 1", "Mean Group 2") + ) + attr(out, "n_groups") <- stats::setNames( + c(n_grp1, n_grp2), + c("N Group 1", "N Group 2") + ) + attr(out, "group_labels") <- group_labels + attr(out, "weighted") <- TRUE + class(out) <- c("sj_htest_mwu", "data.frame") + + out +} + + +# helper ---------------------------------------------------------------------- + +.misspelled_string <- function(source, searchterm, default_message = NULL) { + if (is.null(searchterm) || length(searchterm) < 1) { + return(default_message) + } + # used for many matches + more_found <- "" + # init default + msg <- "" + # remove matching strings + same <- intersect(source, searchterm) + searchterm <- setdiff(searchterm, same) + source <- setdiff(source, same) + # guess the misspelled string + possible_strings <- unlist(lapply(searchterm, function(s) { + source[.fuzzy_grep(source, s)] # nolint + }), use.names = FALSE) + if (length(possible_strings)) { + msg <- "Did you mean " + if (length(possible_strings) > 1) { + # make sure we don't print dozens of alternatives for larger data frames + if (length(possible_strings) > 5) { + more_found <- sprintf( + " We even found %i more possible matches, not shown here.", + length(possible_strings) - 5 + ) + possible_strings <- possible_strings[1:5] + } + msg <- paste0(msg, "one of ", toString(paste0("\"", possible_strings, "\""))) + } else { + msg <- paste0(msg, "\"", possible_strings, "\"") + } + msg <- paste0(msg, "?", more_found) + } else { + msg <- default_message + } + # no double white space + insight::trim_ws(msg) +} + + +.fuzzy_grep <- function(x, pattern, precision = NULL) { + if (is.null(precision)) { + precision <- round(nchar(pattern) / 3) + } + if (precision > nchar(pattern)) { + return(NULL) + } + p <- sprintf("(%s){~%i}", pattern, precision) + grep(pattern = p, x = x, ignore.case = FALSE) +} + + +.sanitize_htest_input <- function(data, select, by, weights) { + # check if arguments are NULL + if (is.null(select)) { + insight::format_error("Argument `select` is missing.") + } + # `by` is only allowed to be NULL if `select` specifies more than one variable + if (is.null(by) && length(select) == 1) { + insight::format_error("Arguments `by` is missing.") + } + + # check if arguments have correct length or are of correct type + if (!is.character(select)) { + insight::format_error("Argument `select` must be a character string with the name(s) of the variable(s).") + } + if (!is.null(by) && (length(by) != 1 || !is.character(by))) { + insight::format_error("Argument `by` must be a character string with the name of a single variable.") + } + if (!is.null(weights) && length(weights) != 1) { + insight::format_error("Argument `weights` must be a character string with the name of a single variable.") + } + + # check if "select" is in data + if (!all(select %in% colnames(data))) { + not_found <- setdiff(select, colnames(data))[1] + insight::format_error( + sprintf("Variable '%s' not found in data frame.", not_found), + .misspelled_string(colnames(data), not_found, "Maybe misspelled?") + ) + } + # check if "by" is in data + if (!is.null(by) && !by %in% colnames(data)) { + insight::format_error( + sprintf("Variable '%s' not found in data frame.", by), + .misspelled_string(colnames(data), by, "Maybe misspelled?") + ) + } + # check if "weights" is in data + if (!is.null(weights) && !weights %in% colnames(data)) { + insight::format_error( + sprintf("Weighting variable '%s' not found in data frame.", weights), + .misspelled_string(colnames(data), weights, "Maybe misspelled?") + ) + } +} + + +# methods --------------------------------------------------------------------- + +#' @export +print.sj_htest_mwu <- function(x, ...) { + # fetch attributes + group_labels <- attributes(x)$group_labels + rank_means <- attributes(x)$rank_means + n_groups <- attributes(x)$n_groups + weighted <- attributes(x)$weighted + + if (weighted) { + weight_string <- " (weighted)" + } else { + weight_string <- "" + } + + # same width + group_labels <- format(group_labels) + + # header + insight::print_color(sprintf("# Mann-Whitney test%s\n\n", weight_string), "blue") + + # group-1-info + insight::print_color( + sprintf( + " Group 1: %s (n = %i, rank mean = %s)\n", + group_labels[1], n_groups[1], insight::format_value(rank_means[1], protect_integers = TRUE) + ), "cyan" + ) + + # group-2-info + insight::print_color( + sprintf( + " Group 2: %s (n = %i, rank mean = %s)\n", + group_labels[2], n_groups[2], insight::format_value(rank_means[2], protect_integers = TRUE) + ), "cyan" + ) + + cat(sprintf("\n r = %.3f, Z = %.3f, %s\n\n", x$r, x$z, insight::format_p(x$p))) +} diff --git a/R/mean_n.R b/R/mean_n.R deleted file mode 100644 index 5d6a2e0d..00000000 --- a/R/mean_n.R +++ /dev/null @@ -1,78 +0,0 @@ -#' @title Row means with min amount of valid values -#' @name mean_n -#' @description This function is similar to the SPSS \code{MEAN.n} function and computes -#' row means from a \code{data.frame} or \code{matrix} if at least \code{n} -#' values of a row are valid (and not \code{NA}). -#' -#' @param dat A data frame with at least two columns, where row means are applied. -#' @param n May either be -#' \itemize{ -#' \item a numeric value that indicates the amount of valid values per row to calculate the row mean; -#' \item or a value between 0 and 1, indicating a proportion of valid values per row to calculate the row mean (see 'Details'). -#' } -#' If a row's sum of valid values is less than \code{n}, \code{NA} will be returned as row mean value. -#' @param digits Numeric value indicating the number of decimal places to be used for rounding mean -#' value. Negative values are allowed (see 'Details'). -#' -#' @return A vector with row mean values of \code{df} for those rows with at least \code{n} -#' valid values. Else, \code{NA} is returned. -#' -#' @details Rounding to a negative number of \code{digits} means rounding to a power of -#' ten, so for example mean_n(df, 3, digits = -2) rounds to the -#' nearest hundred. \cr \cr -#' For \code{n}, must be a numeric value from \code{0} to \code{ncol(dat)}. If -#' a \emph{row} in \code{dat} has at least \code{n} non-missing values, the -#' row mean is returned. If \code{n} is a non-integer value from 0 to 1, -#' \code{n} is considered to indicate the proportion of necessary non-missing -#' values per row. E.g., if \code{n = .75}, a row must have at least \code{ncol(dat) * n} -#' non-missing values for the row mean to be calculated. See 'Examples'. -#' -#' @references \href{https://r4stats.com/2014/09/03/adding-the-spss-mean-n-function-to-r/}{r4stats.com} -#' -#' @examples -#' dat <- data.frame(c1 = c(1,2,NA,4), -#' c2 = c(NA,2,NA,5), -#' c3 = c(NA,4,NA,NA), -#' c4 = c(2,3,7,8)) -#' -#' # needs at least 4 non-missing values per row -#' mean_n(dat, 4) # 1 valid return value -#' -#' # needs at least 3 non-missing values per row -#' mean_n(dat, 3) # 2 valid return values -#' -#' # needs at least 2 non-missing values per row -#' mean_n(dat, 2) -#' -#' # needs at least 1 non-missing value per row -#' mean_n(dat, 1) # all means are shown -#' -#' # needs at least 50% of non-missing values per row -#' mean_n(dat, .5) # 3 valid return values -#' -#' # needs at least 75% of non-missing values per row -#' mean_n(dat, .75) # 2 valid return values -#' -#' @export -mean_n <- function(dat, n, digits = 2) { - # is 'n' indicating a proportion? - digs <- n %% 1 - if (digs != 0) n <- round(ncol(dat) * digs) - - # coerce matrix to data frame - if (is.matrix(dat)) dat <- as.data.frame(dat) - - # check if we have a data framme with at least two columns - if (!is.data.frame(dat) || ncol(dat) < 2) { - warning("`dat` must be a data frame with at least two columns.", call. = TRUE) - return(NA) - } - - # n may not be larger as df's amount of columns - if (ncol(dat) < n) { - warning("`n` must be smaller or equal to number of columns in data frame.", call. = TRUE) - return(NA) - } - - round(apply(dat, 1, function(x) ifelse(sum(!is.na(x)) >= n, mean(x, na.rm = TRUE), NA)), digits) -} diff --git a/R/mwu.R b/R/mwu.R deleted file mode 100644 index 6967bf7a..00000000 --- a/R/mwu.R +++ /dev/null @@ -1,234 +0,0 @@ -#' @title Mann-Whitney-U-Test -#' @name mwu -#' @description This function performs a Mann-Whitney-U-Test (or Wilcoxon rank -#' sum test for _unpaired_ samples, see [`wilcox.test()`] and [`coin::wilcox_test()`]) -#' comparing `x` by each group indicated by `grp`. If `grp` has more than two -#' categories, a comparison between each combination of two groups is performed. -#' -#' The function reports U, p and Z-values as well as effect size r and group-rank-means. -#' -#' @param x Bare (unquoted) variable name, or a character vector with the variable name. -#' @param distribution Indicates how the null distribution of the test statistic -#' should be computed. May be one of `"exact"`, `"approximate"` or `"asymptotic"` -#' (default). See [`coin::wilcox_test()`] for details. -#' -#' @inheritParams weighted_sd -#' @inheritParams means_by_group -#' -#' @return (Invisibly) returns a data frame with U, p and Z-values for each group-comparison -#' as well as effect-size r; additionally, group-labels and groups' n's are -#' also included. -#' -#' @note This function calls the \code{\link[coin]{wilcox_test}} with formula. If \code{grp} -#' has more than two groups, additionally a Kruskal-Wallis-Test (see \code{\link{kruskal.test}}) -#' is performed. \cr \cr -#' Interpretation of effect sizes, as a rule-of-thumb: -#' \itemize{ -#' \item small effect >= 0.1 -#' \item medium effect >= 0.3 -#' \item large effect >= 0.5 -#' } -#' -#' @examples -#' data(efc) -#' # Mann-Whitney-U-Tests for elder's age by elder's sex. -#' mwu(efc, e17age, e16sex) -#' -#' # using formula interface -#' mwu(e17age ~ e16sex, efc) -#' -#' # Mann-Whitney-Tests for elder's age by each level elder's dependency. -#' mwu(efc, e17age, e42dep) -#' -#' @importFrom stats na.omit wilcox.test kruskal.test -#' @importFrom sjmisc recode_to is_empty -#' @importFrom sjlabelled get_labels as_numeric -#' @importFrom rlang quo_name enquo -#' @export -mwu <- function(data, ...) { - UseMethod("mwu") -} - -#' @rdname mwu -#' @export -mwu.default <- function(data, - x, - grp, - distribution = "asymptotic", - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL, - ...) { - - out <- match.arg(out) - - if (out != "txt" && !requireNamespace("sjPlot", quietly = TRUE)) { - message("Package `sjPlot` needs to be loaded to print HTML tables.") - out <- "txt" - } - - if (!requireNamespace("coin", quietly = TRUE)) { - stop("Package `coin` needs to be installed to compute the Mann-Whitney-U test.", call. = FALSE) - } - - - # create quosures - grp.name <- rlang::quo_name(rlang::enquo(grp)) - dv.name <- rlang::quo_name(rlang::enquo(x)) - - # create string with variable names - vars <- c(grp.name, dv.name) - - # get data - data <- suppressMessages(dplyr::select(data, !! vars)) - - grp <- data[[grp.name]] - dv <- data[[dv.name]] - - # coerce factor and character to numeric - if (is.factor(grp) || is.character(grp)) grp <- sjlabelled::as_numeric(grp) - - # group "counter" (index) should start with 1, not 0 - if (min(grp, na.rm = TRUE) < 1) grp <- sjmisc::recode_to(grp, lowest = 1, append = FALSE) - - # retrieve unique group values. need to iterate all values - grp_values <- sort(unique(stats::na.omit(grp))) - - # length of value range - cnt <- length(grp_values) - labels <- sjlabelled::get_labels( - grp, attr.only = FALSE, values = NULL, non.labelled = TRUE - ) - - df <- data.frame() - for (i in seq_len(cnt)) { - for (j in i:cnt) { - if (i != j) { - # retrieve cases (rows) of subgroups - xsub <- dv[which(grp == grp_values[i] | grp == grp_values[j])] - ysub <- grp[which(grp == grp_values[i] | grp == grp_values[j])] - - # this is for unpaired wilcox.test() - xsub_2 <- stats::na.omit(dv[which(grp == grp_values[i])]) - ysub_2 <- stats::na.omit(dv[which(grp == grp_values[j])]) - - # only use rows with non-missings - ysub <- ysub[which(!is.na(xsub))] - - # remove missings - xsub <- as.numeric(stats::na.omit(xsub)) - ysub.n <- stats::na.omit(ysub) - - # grouping variable is a factor - ysub <- as.factor(ysub.n) - - wcdat <- data.frame( - x = xsub, - y = ysub - ) - - # perfom wilcox test - wt <- coin::wilcox_test(x ~ y, data = wcdat, distribution = distribution) - - # compute statistics - u <- as.numeric(coin::statistic(wt, type = "linear")) - z <- as.numeric(coin::statistic(wt, type = "standardized")) - p <- coin::pvalue(wt) - r <- abs(z / sqrt(length(ysub))) - w <- stats::wilcox.test(xsub_2, ysub_2, paired = FALSE)$statistic - rkm.i <- mean(rank(xsub)[which(ysub.n == grp_values[i])], na.rm = TRUE) - rkm.j <- mean(rank(xsub)[which(ysub.n == grp_values[j])], na.rm = TRUE) - - # compute n for each group - n_grp1 <- length(xsub[which(ysub.n == grp_values[i])]) - n_grp2 <- length(xsub[which(ysub.n == grp_values[j])]) - - # generate result data frame - df <- - rbind( - df, - cbind( - grp1 = grp_values[i], - grp1.label = labels[i], - grp1.n = n_grp1, - grp2 = grp_values[j], - grp2.label = labels[j], - grp2.n = n_grp2, - u = u, - w = w, - p = p, - z = z, - r = r, - rank.mean.grp1 = rkm.i, - rank.mean.grp2 = rkm.j - ) - ) - } - } - } - - # convert variables - df[["grp1"]] <- as.numeric(as.character(df[["grp1"]])) - df[["grp2"]] <- as.numeric(as.character(df[["grp2"]])) - df[["grp1.n"]] <- as.numeric(as.character(df[["grp1.n"]])) - df[["grp2.n"]] <- as.numeric(as.character(df[["grp2.n"]])) - df[["grp1.label"]] <- as.character(df[["grp1.label"]]) - df[["grp2.label"]] <- as.character(df[["grp2.label"]]) - df[["u"]] <- as.numeric(as.character(df[["u"]])) - df[["w"]] <- as.numeric(as.character(df[["w"]])) - df[["p"]] <- as.numeric(as.character(df[["p"]])) - df[["z"]] <- as.numeric(as.character(df[["z"]])) - df[["r"]] <- as.numeric(as.character(df[["r"]])) - df[["rank.mean.grp1"]] <- as.numeric(as.character(df[["rank.mean.grp1"]])) - df[["rank.mean.grp2"]] <- as.numeric(as.character(df[["rank.mean.grp2"]])) - - # prepare a data frame that can be used for 'sjt.df'. - tab.df <- - data_frame( - Groups = sprintf("%s
%s", df$grp1.label, df$grp2.label), - N = sprintf("%s
%s", df$grp1.n, df$grp2.n), - 'Mean Rank' = sprintf("%.2f
%.2f", df$rank.mean.grp1, df$rank.mean.grp2), - 'Mann-Whitney-U' = as.character(df$u), - 'Wilcoxon-W' = as.character(df$w), - Z = sprintf("%.3f", df$z), - 'Effect Size' = sprintf("%.3f", df$r), - p = sprintf("%.3f", df$p) - ) - - # replace 0.001 with <0.001 - tab.df$p[which(tab.df$p == "0.001")] <- "<0.001" - - ret.df <- list(df = df, tab.df = tab.df, data = data.frame(dv, grp)) - - # save how to print output - attr(ret.df, "print") <- out - attr(ret.df, "encoding") <- encoding - attr(ret.df, "file") <- file - - if (out %in% c("viewer", "browser")) - class(ret.df) <- c("mwu", "sjt_mwu") - else - class(ret.df) <- c("mwu", "sj_mwu") - - ret.df -} - - -#' @importFrom dplyr select -#' @rdname mwu -#' @export -mwu.formula <- function(formula, - data, - distribution = "asymptotic", - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL, - ...) { - vars <- all.vars(formula) - mwu(data, x = !! vars[1], grp = !! vars[2], distribution = distribution, out = out, encoding = encoding, file = file, ...) -} - - -#' @rdname mwu -#' @export -mannwhitney <- mwu diff --git a/R/phi.R b/R/phi.R index f173ce2e..48a07f1e 100644 --- a/R/phi.R +++ b/R/phi.R @@ -26,17 +26,13 @@ phi.formula <- function(formula, data, ci.lvl = NULL, n = 1000, method = c("dist if (is.null(ci.lvl) || is.na(ci.lvl)) { .cramer(tab) } else { - ci <- data[, terms] %>% - sjstats::bootstrap(n) %>% - dplyr::mutate( - tables = lapply(.data$strap, function(x) { - dat <- as.data.frame(x) - table(dat[[1]], dat[[2]]) - }), - phis = sapply(.data$tables, function(x) .cramer(x)) - ) %>% - dplyr::pull("phis") %>% - boot_ci(ci.lvl = ci.lvl, method = method) + straps <- sjstats::bootstrap(data[terms], n) + tables <- lapply(straps$strap, function(x) { + dat <- as.data.frame(x) + table(dat[[1]], dat[[2]]) + }) + phis <- sapply(tables, function(x) .phi(x)) + ci <- boot_ci(phis, ci.lvl = ci.lvl, method = method) data_frame( phi = .phi(tab), diff --git a/R/prop.R b/R/prop.R index e22e994e..167b3cdd 100644 --- a/R/prop.R +++ b/R/prop.R @@ -1,11 +1,11 @@ #' @title Proportions of values in a vector #' @name prop #' -#' @description \code{prop()} calculates the proportion of a value or category -#' in a variable. \code{props()} does the same, but allows for +#' @description `prop()` calculates the proportion of a value or category +#' in a variable. `props()` does the same, but allows for #' multiple logical conditions in one statement. It is similar -#' to \code{mean()} with logical predicates, however, both -#' \code{prop()} and \code{props()} work with grouped data frames. +#' to `mean()` with logical predicates, however, both +#' `prop()` and `props()` work with grouped data frames. #' #' @param data A data frame. May also be a grouped data frame (see 'Examples'). #' @param ... One or more value pairs of comparisons (logical predicates). Put @@ -14,17 +14,17 @@ #' 'Examples'. #' @param weights Vector of weights that will be applied to weight all observations. #' Must be a vector of same length as the input vector. Default is -#' \code{NULL}, so no weights are used. +#' `NULL`, so no weights are used. #' @param na.rm Logical, whether to remove NA values from the vector when the -#' proportion is calculated. \code{na.rm = FALSE} gives you the raw -#' percentage of a value in a vector, \code{na.rm = TRUE} the valid +#' proportion is calculated. `na.rm = FALSE` gives you the raw +#' percentage of a value in a vector, `na.rm = TRUE` the valid #' percentage. #' @param digits Amount of digits for returned values. #' -#' @details \code{prop()} only allows one logical statement per comparison, -#' while \code{props()} allows multiple logical statements per comparison. -#' However, \code{prop()} supports weighting of variables before calculating -#' proportions, and comparisons may also be quoted. Hence, \code{prop()} +#' @details `prop()` only allows one logical statement per comparison, +#' while `props()` allows multiple logical statements per comparison. +#' However, `prop()` supports weighting of variables before calculating +#' proportions, and comparisons may also be quoted. Hence, `prop()` #' also processes comparisons, which are passed as character vector #' (see 'Examples'). #' @@ -96,7 +96,9 @@ #' @export prop <- function(data, ..., weights = NULL, na.rm = TRUE, digits = 4) { # check argument - if (!is.data.frame(data)) stop("`data` needs to be a data frame.", call. = F) + if (!is.data.frame(data)) { + insight::format_error("`data` needs to be a data frame.") + } # get dots dots <- match.call(expand.dots = FALSE)$`...` @@ -109,7 +111,9 @@ prop <- function(data, ..., weights = NULL, na.rm = TRUE, digits = 4) { #' @export props <- function(data, ..., na.rm = TRUE, digits = 4) { # check argument - if (!is.data.frame(data)) stop("`data` needs to be a data frame.", call. = F) + if (!is.data.frame(data)) { + insight::format_error("`data` needs to be a data frame.") + } # get dots dots <- match.call(expand.dots = FALSE)$`...` @@ -123,7 +127,7 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { # remember comparisons comparisons <- lapply(dots, function(x) { # to character, and remove spaces and quotes - x <- gsub(" ", "", deparse(x), fixed = T) + x <- gsub(" ", "", deparse(x), fixed = TRUE) x <- gsub("\"", "", x, fixed = TRUE) x }) @@ -188,7 +192,7 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { # order rows by values of grouping variables fr <- fr[do.call(order, reihenfolge), ] - return(fr) + fr } else { # iterate dots (comparing conditions) @@ -206,14 +210,14 @@ proportions <- function(data, dots, weight.by, na.rm, digits, multi_logical) { )) } - return(unlist(result)) + unlist(result) } } get_proportion <- function(x, data, weight.by, na.rm, digits) { # to character, and remove spaces and quotes - x <- gsub(" ", "", deparse(x), fixed = T) + x <- gsub(" ", "", deparse(x), fixed = TRUE) x <- gsub("\"", "", x, fixed = TRUE) # split expression at ==, < or > @@ -252,7 +256,7 @@ get_proportion <- function(x, data, weight.by, na.rm, digits) { if (na.rm) dummy <- na.omit(dummy) # get proportion - round(sum(dummy, na.rm = T) / length(dummy), digits = digits) + round(sum(dummy, na.rm = TRUE) / length(dummy), digits = digits) } @@ -264,5 +268,5 @@ get_multiple_proportion <- function(x, data, na.rm, digits) { if (na.rm) dummy <- na.omit(dummy) # get proportion - round(sum(dummy, na.rm = T) / length(dummy), digits = digits) + round(sum(dummy, na.rm = TRUE) / length(dummy), digits = digits) } diff --git a/R/wtd_chisqtest.R b/R/wtd_chisqtest.R deleted file mode 100644 index 99f9a289..00000000 --- a/R/wtd_chisqtest.R +++ /dev/null @@ -1,91 +0,0 @@ -#' @rdname weighted_sd -#' @export -weighted_chisqtest <- function(data, ...) { - UseMethod("weighted_chisqtest") -} - -#' @importFrom dplyr select -#' @importFrom stats na.omit chisq.test as.formula -#' @rdname weighted_sd -#' @export -weighted_chisqtest.default <- function(data, x, y, weights, ...) { - x.name <- deparse(substitute(x)) - y.name <- deparse(substitute(y)) - w.name <- deparse(substitute(weights)) - - if (w.name == "NULL") { - w.name <- "weights" - data$weights <- 1 - } - - # create string with variable names - vars <- .compact_character(c(x.name, y.name, w.name)) - - # get data - dat <- suppressMessages(dplyr::select(data, !! vars)) - dat <- stats::na.omit(dat) - - colnames(dat)[ncol(dat)] <- ".weights" - - # check if we have chisq-test for given probabilities - dot_args <- list(...) - if ("p" %in% names(dot_args)) { - .weighted_chisq_for_prob(dat, x.name, prob = dot_args[["p"]]) - } else { - crosstable_statistics(data = dat, statistics = "auto", weights = ".weights", ...) - } -} - - -#' @importFrom stats xtabs -#' @rdname weighted_sd -#' @export -weighted_chisqtest.formula <- function(formula, data, ...) { - vars <- all.vars(formula) - dot_args <- list(...) - - if (length(vars) < 3 && !"p" %in% names(dot_args)) { - vars <- c(vars, ".weights") - data$.weights <- 1 - } - - if ("p" %in% names(dot_args)) { - dat <- data[vars] - colnames(dat)[ncol(dat)] <- ".weights" - .weighted_chisq_for_prob(dat, names(dat)[1], prob = dot_args[["p"]]) - } else { - tab <- as.table(round(stats::xtabs(data[[vars[3]]] ~ data[[vars[1]]] + data[[vars[2]]]))) - class(tab) <- "table" - crosstable_statistics(data = tab, statistics = "auto", weights = NULL, ...) - } -} - - - -.weighted_chisq_for_prob <- function(dat, x.name, prob) { - if (!requireNamespace("survey", quietly = TRUE)) { - stop("Package `survey` needed to for this function to work. Please install it.", call. = FALSE) - } - - if (abs(sum(prob) - 1) > sqrt(.Machine$double.eps)) { - prob <- prob / sum(prob) - } - - dat$sj_subject_id <- 1:nrow(dat) - dat$sj_weights <- dat$.weights - design <- survey::svydesign(id = ~sj_subject_id, weights = ~sj_weights, data = dat) - stable <- survey::svytable(stats::as.formula(paste0("~", x.name)), design) - out <- stats::chisq.test(stable, p = prob) - - structure(class = "sj_xtab_stat2", list( - estimate = out$statistic, - p.value = out$p.value, - stat.name = "Chi-squared", - stat.html = "χ2", - df = out$parameter, - n_obs = nrow(dat), - method = "Weighted chi-squared test for given probabilities", - method.html = "Weighted χ2 test for given probabilities", - method.short = "Chi-squared" - )) -} diff --git a/R/wtd_sd.R b/R/wtd_sd.R index b7db04d8..95af674d 100644 --- a/R/wtd_sd.R +++ b/R/wtd_sd.R @@ -82,16 +82,6 @@ #' weighted_ttest(efc, e17age, weights = weight) #' weighted_ttest(efc, e17age, c160age, weights = weight) #' weighted_ttest(e17age ~ e16sex + weight, efc) -#' -#' # weighted Mann-Whitney-U-test ---- -#' weighted_mannwhitney(c12hour ~ c161sex + weight, efc) -#' -#' # weighted Chi-squared-test ---- -#' weighted_chisqtest(efc, c161sex, e16sex, weights = weight, correct = FALSE) -#' weighted_chisqtest(c172code ~ c161sex + weight, efc) -#' -#' # weighted Chi-squared-test for given probabilities ---- -#' weighted_chisqtest(c172code ~ weight, efc, p = c(.33, .33, .34)) #' @export weighted_sd <- function(x, weights = NULL) { UseMethod("weighted_sd") diff --git a/R/xtab_statistics.R b/R/xtab_statistics.R index 4fb592e0..21824194 100644 --- a/R/xtab_statistics.R +++ b/R/xtab_statistics.R @@ -6,68 +6,70 @@ #' Supported measures are Cramer's V, Phi, Spearman's rho, #' Kendall's tau and Pearson's r. #' -#' @param data A data frame or a table object. If a table object, \code{x1} and -#' \code{x2} will be ignored. For Kendall's \emph{tau}, Spearman's \emph{rho} -#' or Pearson's product moment correlation coefficient, \code{data} needs -#' to be a data frame. If \code{x1} and \code{x2} are not specified, -#' the first two columns of the data frames are used as variables -#' to compute the crosstab. -#' @param formula A formula of the form \code{lhs ~ rhs} where \code{lhs} is a -#' numeric variable giving the data values and \code{rhs} a factor giving the -#' corresponding groups. -#' @param tab A \code{\link{table}} or \code{\link[stats]{ftable}}. Tables of class -#' \code{\link[stats]{xtabs}} and other will be coerced to \code{ftable} -#' objects. +#' @param data A data frame or a table object. If a table object, `x1` and +#' `x2` will be ignored. For Kendall's _tau_, Spearman's _rho_ or Pearson's +#' product moment correlation coefficient, `data` needs to be a data frame. +#' If `x1` and `x2` are not specified, the first two columns of the data +#' frames are used as variables to compute the crosstab. +#' @param formula A formula of the form `lhs ~ rhs` where `lhs` is a +#' numeric variable giving the data values and `rhs` a factor giving the +#' corresponding groups. +#' @param tab A [`table()`] or [`ftable()`]. Tables of class [`xtabs()`] and +#' other will be coerced to `ftable` objects. #' @param x1 Name of first variable that should be used to compute the -#' contingency table. If \code{data} is a table object, this argument -#' will be irgnored. +#' contingency table. If `data` is a table object, this argument will be +#' irgnored. #' @param x2 Name of second variable that should be used to compute the -#' contingency table. If \code{data} is a table object, this argument -#' will be irgnored. +#' contingency table. If `data` is a table object, this argument will be +#' irgnored. #' @param statistics Name of measure of association that should be computed. May -#' be one of \code{"auto"}, \code{"cramer"}, \code{"phi"}, \code{"spearman"}, -#' \code{"kendall"}, \code{"pearson"} or \code{"fisher"}. See 'Details'. -#' @param ci.lvl Scalar between 0 and 1. If not \code{NULL}, returns a data -#' frame including lower and upper confidence intervals. +#' be one of `"auto"`, `"cramer"`, `"phi"`, `"spearman"`, `"kendall"`, +#' `"pearson"` or `"fisher"`. See 'Details'. +#' @param ci.lvl Scalar between 0 and 1. If not `NULL`, returns a data +#' frame including lower and upper confidence intervals. +#' @param weights Name of variable in `x` that indicated the vector of weights +#' that will be applied to weight all observations. Default is `NULL`, so no +#' weights are used. #' @param ... Other arguments, passed down to the statistic functions -#' \code{\link[stats]{chisq.test}}, \code{\link[stats]{fisher.test}} or -#' \code{\link[stats]{cor.test}}. +#' [`chisq.test()`], [`fisher.test()`] or [`cor.test()`]. #' -#' @inheritParams means_by_group #' @inheritParams bootstrap #' @inheritParams boot_ci #' -#' @return For \code{phi()}, the table's Phi value. For \code{cramer()}, the -#' table's Cramer's V. -#' \cr \cr -#' For \code{crosstable_statistics()}, a list with following components: -#' \describe{ -#' \item{\code{estimate}}{the value of the estimated measure of association.} -#' \item{\code{p.value}}{the p-value for the test.} -#' \item{\code{statistic}}{the value of the test statistic.} -#' \item{\code{stat.name}}{the name of the test statistic.} -#' \item{\code{stat.html}}{if applicable, the name of the test statistic, in HTML-format.} -#' \item{\code{df}}{the degrees of freedom for the contingency table.} -#' \item{\code{method}}{character string indicating the name of the measure of association.} -#' \item{\code{method.html}}{if applicable, the name of the measure of association, in HTML-format.} -#' \item{\code{method.short}}{the short form of association measure, equals the \code{statistics}-argument.} -#' \item{\code{fisher}}{logical, if Fisher's exact test was used to calculate the p-value.} -#' } +#' @return For [`phi()`], the table's Phi value. For [`cramers_v()]`, the +#' table's Cramer's V. +#' +#' For `crosstable_statistics()`, a list with following components: +#' +#' - `estimate`: the value of the estimated measure of association. +#' - `p.value`: the p-value for the test. +#' - `statistic`: the value of the test statistic. +#' - `stat.name`: the name of the test statistic. +#' - `stat.html`: if applicable, the name of the test statistic, in HTML-format. +#' - `df`: the degrees of freedom for the contingency table. +#' - `method`: character string indicating the name of the measure of association. +#' - `method.html`: if applicable, the name of the measure of association, in HTML-format. +#' - `method.short`: the short form of association measure, equals the `statistics`-argument. +#' - `fisher`: logical, if Fisher's exact test was used to calculate the p-value. #' #' @details The p-value for Cramer's V and the Phi coefficient are based -#' on \code{chisq.test()}. If any expected value of a table cell is -#' smaller than 5, or smaller than 10 and the df is 1, then \code{fisher.test()} -#' is used to compute the p-value, unless \code{statistics = "fisher"}; in -#' this case, the use of \code{fisher.test()} is forced to compute the -#' p-value. The test statistic is calculated with \code{cramer()} resp. -#' \code{phi()}. -#' \cr \cr -#' Both test statistic and p-value for Spearman's rho, Kendall's tau -#' and Pearson's r are calculated with \code{cor.test()}. -#' \cr \cr -#' When \code{statistics = "auto"}, only Cramer's V or Phi are calculated, -#' based on the dimension of the table (i.e. if the table has more than -#' two rows or columns, Cramer's V is calculated, else Phi). +#' on `chisq.test()`. If any expected value of a table cell is smaller than 5, +#' or smaller than 10 and the df is 1, then `fisher.test()` is used to compute +#' the p-value, unless `statistics = "fisher"`; in this case, the use of +#' `fisher.test()` is forced to compute the p-value. The test statistic is +#' calculated with `cramers_v()` resp. `phi()`. +#' +#' Both test statistic and p-value for Spearman's rho, Kendall's tau and +#' Pearson's r are calculated with `cor.test()`. +#' +#' When `statistics = "auto"`, only Cramer's V or Phi are calculated, based on +#' the dimension of the table (i.e. if the table has more than two rows or +#' columns, Cramer's V is calculated, else Phi). +#' +#' @references Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +#' Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +#' That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +#' \doi{10.3390/math11091982} #' #' @examples #' # Phi coefficient for 2x2 tables @@ -118,9 +120,9 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au weights <- deparse(substitute(weights)) # if names were quotes, remove quotes - x1 <- gsub("\"", "", x1, fixed = T) - x2 <- gsub("\"", "", x2, fixed = T) - weights <- gsub("\"", "", weights, fixed = T) + x1 <- gsub("\"", "", x1, fixed = TRUE) + x2 <- gsub("\"", "", x2, fixed = TRUE) + weights <- gsub("\"", "", weights, fixed = TRUE) if (sjmisc::is_empty(weights) || weights == "NULL") weights <- NULL @@ -140,8 +142,9 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au if (!is.null(weights)) { tab <- as.table(round(stats::xtabs(data[[3]] ~ data[[1]] + data[[2]]))) class(tab) <- "table" - } else + } else { tab <- table(data) + } } else { # 'data' is a table - copy to table object tab <- data @@ -152,7 +155,7 @@ crosstable_statistics <- function(data, x1 = NULL, x2 = NULL, statistics = c("au "Need arguments `data`, `x1` and `x2` to compute %s-statistics.", statistics ), - call. = F + call. = FALSE ) } } diff --git a/_pkgdown.yml b/_pkgdown.yml index 4596c636..2fb1d7b8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -22,17 +22,17 @@ reference: - crosstable_statistics - table_values -- title: "Weighted Statistics" +- title: "Weighted Estimates and Dispersion" contents: - weight - weighted_ttest -- title: "Other (Summary) Statistics" +- title: "Summary Statistics and Tests" contents: - gmd - - mannwhitney - - mean_n - - means_by_group + - chi_squared_test + - kruskal_wallis_test + - mann_whitney_test - var_pop - title: "Tools for Regression Models" diff --git a/docs/404.html b/docs/404.html deleted file mode 100644 index 2c858e83..00000000 --- a/docs/404.html +++ /dev/null @@ -1,156 +0,0 @@ - - - - - - - - -Page not found (404) • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
-
- - -Content not found. Please use links in the navbar. - -
- - - -
- - - - -
- - - - - - - - diff --git a/docs/CODE_OF_CONDUCT.html b/docs/CODE_OF_CONDUCT.html deleted file mode 100644 index 26db543b..00000000 --- a/docs/CODE_OF_CONDUCT.html +++ /dev/null @@ -1,164 +0,0 @@ - - - - - - - - -Contributor Code of Conduct • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
-
- - -
- -

As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.

-

We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

-

Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.

-

Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.

-

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.

-

This Code of Conduct is adapted from the Contributor Covenant (http://contributor-covenant.org), version 1.0.0, available at http://contributor-covenant.org/version/1/0/0/

-
- -
- - - -
- - - - -
- - - - - - - - diff --git a/docs/CONTRIBUTING.html b/docs/CONTRIBUTING.html deleted file mode 100644 index 8c579ed9..00000000 --- a/docs/CONTRIBUTING.html +++ /dev/null @@ -1,185 +0,0 @@ - - - - - - - - -Contributing to sjmisc • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
-
- - -
- -

This outlines how to propose a change to sjmisc.

-
-

-Fixing typos

-

Small typos or grammatical errors in documentation may be edited directly using the GitHub web interface, so long as the changes are made in the source file. If you want to fix typos in the documentation, please edit the related .R file in the R/ folder. Do not edit an .Rd file in man/.

-
-
-

-Filing an issue

-

The easiest way to propose a change or new feature is to file an issue. If you’ve found a bug, you may also create an associated issue. If possible, try to illustrate your proposal or the bug with a minimal reproducible example.

-
-
-

-Pull request

-
    -
  • Please create a Git branch for each pull request (PR).
  • -
  • Your contributed code should roughly follow the tidyverse style guide. Exceptions from this guide: if separated, use underscores for function names, but dots for argument names. See as example set_na().
  • -
  • sjmisc uses roxygen2, with Markdown syntax, for documentation.
  • -
  • sjmisc uses testthat. Adding tests to the PR makes it easier for me to merge your PR into the code base.
  • -
  • If your PR is a user-visible change, you may add a bullet to the top of NEWS.md describing the changes made. You may optionally add your GitHub username, and links to relevant issue(s)/PR(s).
  • -
-
-
-

-Code of Conduct

-

Please note that this project is released with a Contributor Code of Conduct. By participating in this project you agree to abide by its terms.

-
-
- -
- - - -
- - - - -
- - - - - - - - diff --git a/docs/apple-touch-icon-120x120.png b/docs/apple-touch-icon-120x120.png deleted file mode 100644 index 61543c1c..00000000 Binary files a/docs/apple-touch-icon-120x120.png and /dev/null differ diff --git a/docs/apple-touch-icon-60x60.png b/docs/apple-touch-icon-60x60.png deleted file mode 100644 index bbfc9336..00000000 Binary files a/docs/apple-touch-icon-60x60.png and /dev/null differ diff --git a/docs/apple-touch-icon-76x76.png b/docs/apple-touch-icon-76x76.png deleted file mode 100644 index 44a90335..00000000 Binary files a/docs/apple-touch-icon-76x76.png and /dev/null differ diff --git a/docs/apple-touch-icon.png b/docs/apple-touch-icon.png deleted file mode 100644 index 61543c1c..00000000 Binary files a/docs/apple-touch-icon.png and /dev/null differ diff --git a/docs/articles/anova-statistics.html b/docs/articles/anova-statistics.html deleted file mode 100644 index 41980f94..00000000 --- a/docs/articles/anova-statistics.html +++ /dev/null @@ -1,237 +0,0 @@ - - - - - - - -Statistics for Anova Tables • sjstats - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

-Effect Size Statistics for Anova Tables

-

This vignettes demontrates those functions of the sjstats-package that deal with Anova tables. These functions report different effect size measures, which are useful beyond significance tests (p-values), because they estimate the magnitude of effects, independent from sample size. sjstats provides following functions:

- -

Befor we start, we fit a simple model:

-
library(sjstats)
-# load sample data
-data(efc)
-
-# fit linear model
-fit <- aov(
-  c12hour ~ as.factor(e42dep) + as.factor(c172code) + c160age,
-  data = efc
-)
-

All functions accept objects of class aov or anova, so you can also use model fits from the car package, which allows fitting Anova’s with different types of sum of squares. Other objects, like lm, will be coerced to anova internally.

-

The following functions return the effect size statistic as named numeric vector, using the model’s term names.

-
-

-Eta-Squared

-

The eta-squared is the proportion of the total variability in the dependent variable that is accounted for by the variation in the independent variable. It is the ratio of the sum of squares for each group level to the total sum of squares. It can be interpreted as percentage of variance accounted for by a variable.

-

For variables with 1 degree of freedom (in the numerator), the square root of eta-squared is equal to the correlation coefficient r. For variables with more than 1 degree of freedom, eta-squared equals R2. This makes eta-squared easily interpretable. Furthermore, these effect sizes can easily be converted into effect size measures that can be, for instance, further processed in meta-analyses.

-

Eta-squared can be computed simply with:

-
eta_sq(fit, ci.lvl = .95)
-#>                  term etasq conf.low conf.high
-#> 1   as.factor(e42dep) 0.266    0.217     0.312
-#> 2 as.factor(c172code) 0.005    0.000     0.018
-#> 3             c160age 0.048    0.024     0.080
-
-
-

-Partial Eta-Squared

-

The partial eta-squared value is the ratio of the sum of squares for each group level to the sum of squares for each group level plus the residual sum of squares. It is more difficult to interpret, because its value strongly depends on the variability of the residuals. Partial eta-squared values should be reported with caution, and Levine and Hullett (2002) recommend reporting eta- or omega-squared rather than partial eta-squared.

-

Use the partial-argument to compute partial eta-squared values:

-
eta_sq(fit, partial = TRUE, ci.lvl = .95)
-#>                  term partial.etasq conf.low conf.high
-#> 1   as.factor(e42dep)         0.281    0.232     0.327
-#> 2 as.factor(c172code)         0.008    0.000     0.023
-#> 3             c160age         0.066    0.038     0.101
-
-
-

-Omega-Squared

-

While eta-squared estimates tend to be biased in certain situations, e.g. when the sample size is small or the independent variables have many group levels, omega-squared estimates are corrected for this bias.

-

Omega-squared can be simply computed with:

-
omega_sq(fit, ci.lvl = .95)
-#>                  term omegasq conf.low conf.high
-#> 1   as.factor(e42dep)   0.263    0.214     0.310
-#> 2 as.factor(c172code)   0.004   -0.002     0.016
-#> 3             c160age   0.048    0.023     0.078
-
-
-

-Partial Omega-Squared

-

omega_sq() also has a partial-argument to compute partial omega-squared values. Computing the partial omega-squared statistics is based on bootstrapping. In this case, use n to define the number of samples (1000 by default.)

-
omega_sq(fit, partial = TRUE, ci.lvl = .95)
-#>                  term partial.omegasq conf.low conf.high
-#> 1   as.factor(e42dep)           0.278    0.229     0.325
-#> 2 as.factor(c172code)           0.005   -0.002     0.020
-#> 3             c160age           0.065    0.036     0.100
-
-
-
-

-Epsilon Squared

-

Espilon-squared is a less common measure of effect size. It is sometimes considered as an “adjusted r-squared” value. You can compute this effect size using epsilon_sq().

-
epsilon_sq(fit, ci.lvl = .95)
-#>                  term epsilonsq conf.low conf.high
-#> 1   as.factor(e42dep)     0.264    0.214     0.310
-#> 2 as.factor(c172code)     0.004   -0.002     0.016
-#> 3             c160age     0.048    0.023     0.079
-
-
-

-Complete Statistical Table Output

-

The anova_stats() function takes a model input and computes a comprehensive summary, including the above effect size measures, returned as tidy data frame:

-
anova_stats(fit)
-#>                  term  df      sumsq     meansq statistic p.value etasq partial.etasq omegasq partial.omegasq epsilonsq cohens.f power
-#> 1   as.factor(e42dep)   3  577756.33 192585.444   108.786   0.000 0.266         0.281   0.263           0.278     0.264    0.626  1.00
-#> 2 as.factor(c172code)   2   11722.05   5861.024     3.311   0.037 0.005         0.008   0.004           0.005     0.004    0.089  0.63
-#> 3             c160age   1  105169.60 105169.595    59.408   0.000 0.048         0.066   0.048           0.065     0.048    0.267  1.00
-#> 4           Residuals 834 1476436.34   1770.307        NA      NA    NA            NA      NA              NA        NA       NA    NA
-

Like the other functions, the input may also be an object of class anova, so you can also use model fits from the car package, which allows fitting Anova’s with different types of sum of squares:

-
anova_stats(car::Anova(fit, type = 3))
-#>                  term       sumsq     meansq  df statistic p.value etasq partial.etasq omegasq partial.omegasq epsilonsq cohens.f power
-#> 1   as.factor(e42dep)  426461.571 142153.857   3    80.299   0.000 0.212         0.224   0.209           0.221     0.209    0.537 1.000
-#> 2 as.factor(c172code)    7352.049   3676.025   2     2.076   0.126 0.004         0.005   0.002           0.003     0.002    0.071 0.429
-#> 3             c160age  105169.595 105169.595   1    59.408   0.000 0.052         0.066   0.051           0.065     0.051    0.267 1.000
-#> 4           Residuals 1476436.343   1770.307 834        NA      NA    NA            NA      NA              NA        NA       NA    NA
-
-
-

-References

-

Levine TR, Hullet CR. Eta Squared, Partial Eta Squared, and Misreporting of Effect Size in Communication Research. Human Communication Research 28(4); 2002: 612-625

-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/anova-statistics_files/header-attrs-2.1/header-attrs.js b/docs/articles/anova-statistics_files/header-attrs-2.1/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/anova-statistics_files/header-attrs-2.1/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/bayesian-statistics.html b/docs/articles/bayesian-statistics.html deleted file mode 100644 index d0e15be6..00000000 --- a/docs/articles/bayesian-statistics.html +++ /dev/null @@ -1,235 +0,0 @@ - - - - - - - -Summary of Mediation Analysis using Bayesian Regression Models • sjstats - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - - - - -

This vignettes demontrates the mediation()-function in sjstats. Before we start, we fit some models, including a mediation-object from the mediation-package, which we use for comparison with brms.

-
library(sjstats)
-library(mediation)
-library(brms)
-
-# load sample data
-data(jobs)
-set.seed(123)
-
-# linear models, for mediation analysis
-b1 <- lm(job_seek ~ treat + econ_hard + sex + age, data = jobs)
-b2 <- lm(depress2 ~ treat + job_seek + econ_hard + sex + age, data = jobs)
-
-# mediation analysis, for comparison with brms
-m1 <- mediate(b1, b2, sims = 1000, treat = "treat", mediator = "job_seek")
-
# Fit Bayesian mediation model
-f1 <- bf(job_seek ~ treat + econ_hard + sex + age)
-f2 <- bf(depress2 ~ treat + job_seek + econ_hard + sex + age)
-
-m2 <- brm(f1 + f2 + set_rescor(FALSE), data = jobs, cores = 4)
-

mediation() is a summary function, especially for mediation analysis, i.e. for multivariate response models with casual mediation effects.

-

In the model m2, treat is the treatment effect, job_seek is the mediator effect, f1 describes the mediator model and f2 describes the outcome model.

-

mediation() returns a data frame with information on the direct effect (median value of posterior samples from treatment of the outcome model), mediator effect (median value of posterior samples from mediator of the outcome model), indirect effect (median value of the multiplication of the posterior samples from mediator of the outcome model and the posterior samples from treatment of the mediation model) and the total effect (median value of sums of posterior samples used for the direct and indirect effect). The proportion mediated is the indirect effect divided by the total effect.

-

The simplest call just needs the model-object.

-
mediation(m2)
-#> 
-#> # Causal Mediation Analysis for Stan Model
-#> 
-#>   Treatment: treat
-#>    Mediator: job_seek
-#>    Response: depress2
-#> 
-#>                  Estimate    HDI (90%)
-#>   Direct effect:    -0.04 [-0.11 0.03]
-#> Indirect effect:    -0.02 [-0.04 0.00]
-#>    Total effect:    -0.05 [-0.13 0.02]
-#> 
-#> Proportion mediated: 28.14% [-79.57% 135.86%]
-

Typically, mediation() finds the treatment and mediator variables automatically. If this does not work, use the treatment and mediator arguments to specify the related variable names. For all values, the 90% HDIs are calculated by default. Use prob to calculate a different interval.

-

Here is a comparison with the mediation package. Note that the summary()-output of the mediation package shows the indirect effect first, followed by the direct effect.

-
summary(m1)
-#> 
-#> Causal Mediation Analysis 
-#> 
-#> Quasi-Bayesian Confidence Intervals
-#> 
-#>                Estimate 95% CI Lower 95% CI Upper p-value
-#> ACME            -0.0157      -0.0387         0.01    0.19
-#> ADE             -0.0438      -0.1315         0.04    0.35
-#> Total Effect    -0.0595      -0.1530         0.02    0.21
-#> Prop. Mediated   0.2137      -2.0277         2.70    0.32
-#> 
-#> Sample Size Used: 899 
-#> 
-#> 
-#> Simulations: 1000
-
-mediation(m2, prob = .95)
-#> 
-#> # Causal Mediation Analysis for Stan Model
-#> 
-#>   Treatment: treat
-#>    Mediator: job_seek
-#>    Response: depress2
-#> 
-#>                  Estimate    HDI (95%)
-#>   Direct effect:    -0.04 [-0.12 0.04]
-#> Indirect effect:    -0.02 [-0.04 0.01]
-#>    Total effect:    -0.05 [-0.15 0.03]
-#> 
-#> Proportion mediated: 28.14% [-178.65% 234.94%]
-

If you want to calculate mean instead of median values from the posterior samples, use the typical-argument. Furthermore, there is a print()-method, which allows to print more digits.

-
mediation(m2, typical = "mean", prob = .95) %>% print(digits = 4)
-#> 
-#> # Causal Mediation Analysis for Stan Model
-#> 
-#>   Treatment: treat
-#>    Mediator: job_seek
-#>    Response: depress2
-#> 
-#>                  Estimate        HDI (95%)
-#>   Direct effect:  -0.0395 [-0.1244 0.0450]
-#> Indirect effect:  -0.0158 [-0.0400 0.0086]
-#>    Total effect:  -0.0553 [-0.1482 0.0302]
-#> 
-#> Proportion mediated: 28.5975% [-178.1953% 235.3902%]
-

As you can see, the results are similar to what the mediation package produces for non-Bayesian models.

-
-

-References

-

Bürkner, P. C. (2017). brms: An R package for Bayesian multilevel models using Stan. Journal of Statistical Software, 80(1), 1-28

-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/articles/bayesian-statistics_files/figure-html/unnamed-chunk-6-1.png b/docs/articles/bayesian-statistics_files/figure-html/unnamed-chunk-6-1.png deleted file mode 100644 index 478c0a0b..00000000 Binary files a/docs/articles/bayesian-statistics_files/figure-html/unnamed-chunk-6-1.png and /dev/null differ diff --git a/docs/articles/bayesian-statistics_files/header-attrs-2.1/header-attrs.js b/docs/articles/bayesian-statistics_files/header-attrs-2.1/header-attrs.js deleted file mode 100644 index dd57d92e..00000000 --- a/docs/articles/bayesian-statistics_files/header-attrs-2.1/header-attrs.js +++ /dev/null @@ -1,12 +0,0 @@ -// Pandoc 2.9 adds attributes on both header and div. We remove the former (to -// be compatible with the behavior of Pandoc < 2.8). -document.addEventListener('DOMContentLoaded', function(e) { - var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); - var i, h, a; - for (i = 0; i < hs.length; i++) { - h = hs[i]; - if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 - a = h.attributes; - while (a.length > 0) h.removeAttribute(a[0].name); - } -}); diff --git a/docs/articles/index.html b/docs/articles/index.html deleted file mode 100644 index 429c2ef3..00000000 --- a/docs/articles/index.html +++ /dev/null @@ -1,175 +0,0 @@ - - - - - - - - -Articles • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
- -
- - - -
- - - - - - - - diff --git a/docs/articles/mixedmodels-statistics.html b/docs/articles/mixedmodels-statistics.html deleted file mode 100644 index 5fa1720d..00000000 --- a/docs/articles/mixedmodels-statistics.html +++ /dev/null @@ -1,247 +0,0 @@ - - - - - - - -Statistics for Mixed Effects Models • sjstats - - - - - - - - - - - - - - - - - -
-
- - - - -
-
- - - - -
-

-Statistics and Measures for Mixed Effects Models

-

This vignettes demontrates those functions of the sjstats-package that deal especially with mixed effects models. sjstats provides following functions:

- -

Befor we start, we fit a simple linear mixed model:

- -
-

-Sample Size Calculation for Mixed Models

-

The first two functions, design_effect() and samplesize_mixed(), can be used to approximately calculate the sample size in the context of power calculation. Calculating the sample size for simple linear models is pretty straightforward, however, for (linear) mixed models, statistical power is affected through the change of the variance of test statistics. This is what Hsieh et al. (2003) call a design effect (or variance inflation factor, VIF). Once this design effect is calculated, the sample size calculated for a standard design can be adjusted accordingly.

-
-

-Design Effect for Two-Level Mixed Models

-

design_effect() computes this design effect for linear mixed models with two-level design. It requires the approximated average number of observations per grouping cluster (i.e. level-2 unit) and the assumed intraclass correlation coefficient (ICC) for the multilevel-model. Typically, the minimum assumed value for the ICC is 0.05.

- -
-
-

-Calculating the Sample Size for Linear Mixed Models

-

samplesize_mixed() combines the functions for power calculation from the pwr-package and design effect design_effect(). It computes an approximated sample size for linear mixed models (two-level-designs), based on power-calculation for standard design and adjusted for design effect for 2-level-designs.

- -

There are more ways to perform power calculations for multilevel models, however, most of these require very detailed knowledge about the sample characteristics and performing simulation studys. samplesize_mixed() is a more pragmatic alternative to these approaches.

-
-
-
-

-Rescale model weights for complex samples

-

Most functions to fit multilevel and mixed effects models only allow to specify frequency weights, but not design (i.e. sampling or probability) weights, which should be used when analyzing complex samples and survey data.

-

scale_weights() implements an algorithm proposed by Aaparouhov (2006) and Carle (2009) to rescale design weights in survey data to account for the grouping structure of multilevel models, which then can be used for multilevel modelling.

-

To calculate a weight-vector that can be used in multilevel models, scale_weights() needs the data frame with survey data as x-argument. This data frame should contain 1) a cluster ID (argument cluster.id), which represents the strata of the survey data (the level-2-cluster variable) and 2) the probability weights (argument pweight), which represents the design or sampling weights of the survey data (level-1-weight).

-

scale_weights() then returns the original data frame, including two new variables: svywght_a, where the sample weights pweight are adjusted by a factor that represents the proportion of cluster size divided by the sum of sampling weights within each cluster. The adjustment factor for svywght_b is the sum of sample weights within each cluster devided by the sum of squared sample weights within each cluster (see Carle (2009), Appendix B, for details).

- -
-
-
-

-References

-

Aaparouhov T. 2006. General Multi-Level Modeling with Sampling Weights. Communications in Statistics—Theory and Methods (35): 439–460

-

Carle AC. 2009. Fitting multilevel models in complex survey data with design weights: Recommendations. BMC Medical Research Methodology 9(49): 1-13

-

Hsieh FY, Lavori PW, Cohen HJ, Feussner JR. 2003. An Overview of Variance Inflation Factors for Sample-Size Calculation. Evaluation & the Health Professions 26: 239–257. doi: 10.1177/0163278703255230

-
-
- - - -
- - - - -
- - - - - - diff --git a/docs/authors.html b/docs/authors.html deleted file mode 100644 index fb60f1a3..00000000 --- a/docs/authors.html +++ /dev/null @@ -1,171 +0,0 @@ - - - - - - - - -Citation and Authors • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-
- - - - -
- -
-
- - -

Lüdecke D (????). -sjstats: Statistical Functions for Regression Models (Version 0.18.0). -doi: 10.5281/zenodo.1284472, https://CRAN.R-project.org/package=sjstats. -

-
@Manual{,
-  title = {sjstats: Statistical Functions for Regression Models (Version 0.18.0)},
-  author = {Daniel Lüdecke},
-  url = {https://CRAN.R-project.org/package=sjstats},
-  doi = {10.5281/zenodo.1284472},
-}
- - - - - -
- -
- - - - -
- - - - - - - - diff --git a/docs/bootstrap-toc.css b/docs/bootstrap-toc.css deleted file mode 100644 index 5a859415..00000000 --- a/docs/bootstrap-toc.css +++ /dev/null @@ -1,60 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ - -/* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ - -/* All levels of nav */ -nav[data-toggle='toc'] .nav > li > a { - display: block; - padding: 4px 20px; - font-size: 13px; - font-weight: 500; - color: #767676; -} -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 19px; - color: #563d7c; - text-decoration: none; - background-color: transparent; - border-left: 1px solid #563d7c; -} -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 18px; - font-weight: bold; - color: #563d7c; - background-color: transparent; - border-left: 2px solid #563d7c; -} - -/* Nav: second level (shown on .active) */ -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} -nav[data-toggle='toc'] .nav .nav > li > a { - padding-top: 1px; - padding-bottom: 1px; - padding-left: 30px; - font-size: 12px; - font-weight: normal; -} -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 29px; -} -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 28px; - font-weight: 500; -} - -/* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ -nav[data-toggle='toc'] .nav > .active > ul { - display: block; -} diff --git a/docs/bootstrap-toc.js b/docs/bootstrap-toc.js deleted file mode 100644 index 1cdd573b..00000000 --- a/docs/bootstrap-toc.js +++ /dev/null @@ -1,159 +0,0 @@ -/*! - * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) - * Copyright 2015 Aidan Feldman - * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ -(function() { - 'use strict'; - - window.Toc = { - helpers: { - // return all matching elements in the set, or their descendants - findOrFilter: function($el, selector) { - // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ - // http://stackoverflow.com/a/12731439/358804 - var $descendants = $el.find(selector); - return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); - }, - - generateUniqueIdBase: function(el) { - var text = $(el).text(); - var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); - return anchor || el.tagName.toLowerCase(); - }, - - generateUniqueId: function(el) { - var anchorBase = this.generateUniqueIdBase(el); - for (var i = 0; ; i++) { - var anchor = anchorBase; - if (i > 0) { - // add suffix - anchor += '-' + i; - } - // check if ID already exists - if (!document.getElementById(anchor)) { - return anchor; - } - } - }, - - generateAnchor: function(el) { - if (el.id) { - return el.id; - } else { - var anchor = this.generateUniqueId(el); - el.id = anchor; - return anchor; - } - }, - - createNavList: function() { - return $(''); - }, - - createChildNavList: function($parent) { - var $childList = this.createNavList(); - $parent.append($childList); - return $childList; - }, - - generateNavEl: function(anchor, text) { - var $a = $(''); - $a.attr('href', '#' + anchor); - $a.text(text); - var $li = $('
  • '); - $li.append($a); - return $li; - }, - - generateNavItem: function(headingEl) { - var anchor = this.generateAnchor(headingEl); - var $heading = $(headingEl); - var text = $heading.data('toc-text') || $heading.text(); - return this.generateNavEl(anchor, text); - }, - - // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). - getTopLevel: function($scope) { - for (var i = 1; i <= 6; i++) { - var $headings = this.findOrFilter($scope, 'h' + i); - if ($headings.length > 1) { - return i; - } - } - - return 1; - }, - - // returns the elements for the top level, and the next below it - getHeadings: function($scope, topLevel) { - var topSelector = 'h' + topLevel; - - var secondaryLevel = topLevel + 1; - var secondarySelector = 'h' + secondaryLevel; - - return this.findOrFilter($scope, topSelector + ',' + secondarySelector); - }, - - getNavLevel: function(el) { - return parseInt(el.tagName.charAt(1), 10); - }, - - populateNav: function($topContext, topLevel, $headings) { - var $context = $topContext; - var $prevNav; - - var helpers = this; - $headings.each(function(i, el) { - var $newNav = helpers.generateNavItem(el); - var navLevel = helpers.getNavLevel(el); - - // determine the proper $context - if (navLevel === topLevel) { - // use top level - $context = $topContext; - } else if ($prevNav && $context === $topContext) { - // create a new level of the tree and switch to it - $context = helpers.createChildNavList($prevNav); - } // else use the current $context - - $context.append($newNav); - - $prevNav = $newNav; - }); - }, - - parseOps: function(arg) { - var opts; - if (arg.jquery) { - opts = { - $nav: arg - }; - } else { - opts = arg; - } - opts.$scope = opts.$scope || $(document.body); - return opts; - } - }, - - // accepts a jQuery object, or an options object - init: function(opts) { - opts = this.helpers.parseOps(opts); - - // ensure that the data attribute is in place for styling - opts.$nav.attr('data-toggle', 'toc'); - - var $topContext = this.helpers.createChildNavList(opts.$nav); - var topLevel = this.helpers.getTopLevel(opts.$scope); - var $headings = this.helpers.getHeadings(opts.$scope, topLevel); - this.helpers.populateNav($topContext, topLevel, $headings); - } - }; - - $(function() { - $('nav[data-toggle="toc"]').each(function(i, el) { - var $nav = $(el); - Toc.init($nav); - }); - }); -})(); diff --git a/docs/docsearch.css b/docs/docsearch.css deleted file mode 100644 index e5f1fe1d..00000000 --- a/docs/docsearch.css +++ /dev/null @@ -1,148 +0,0 @@ -/* Docsearch -------------------------------------------------------------- */ -/* - Source: https://github.com/algolia/docsearch/ - License: MIT -*/ - -.algolia-autocomplete { - display: block; - -webkit-box-flex: 1; - -ms-flex: 1; - flex: 1 -} - -.algolia-autocomplete .ds-dropdown-menu { - width: 100%; - min-width: none; - max-width: none; - padding: .75rem 0; - background-color: #fff; - background-clip: padding-box; - border: 1px solid rgba(0, 0, 0, .1); - box-shadow: 0 .5rem 1rem rgba(0, 0, 0, .175); -} - -@media (min-width:768px) { - .algolia-autocomplete .ds-dropdown-menu { - width: 175% - } -} - -.algolia-autocomplete .ds-dropdown-menu::before { - display: none -} - -.algolia-autocomplete .ds-dropdown-menu [class^=ds-dataset-] { - padding: 0; - background-color: rgb(255,255,255); - border: 0; - max-height: 80vh; -} - -.algolia-autocomplete .ds-dropdown-menu .ds-suggestions { - margin-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion { - padding: 0; - overflow: visible -} - -.algolia-autocomplete .algolia-docsearch-suggestion--category-header { - padding: .125rem 1rem; - margin-top: 0; - font-size: 1.3em; - font-weight: 500; - color: #00008B; - border-bottom: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--wrapper { - float: none; - padding-top: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--subcategory-column { - float: none; - width: auto; - padding: 0; - text-align: left -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content { - float: none; - width: auto; - padding: 0 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--content::before { - display: none -} - -.algolia-autocomplete .ds-suggestion:not(:first-child) .algolia-docsearch-suggestion--category-header { - padding-top: .75rem; - margin-top: .75rem; - border-top: 1px solid rgba(0, 0, 0, .1) -} - -.algolia-autocomplete .ds-suggestion .algolia-docsearch-suggestion--subcategory-column { - display: block; - padding: .1rem 1rem; - margin-bottom: 0.1; - font-size: 1.0em; - font-weight: 400 - /* display: none */ -} - -.algolia-autocomplete .algolia-docsearch-suggestion--title { - display: block; - padding: .25rem 1rem; - margin-bottom: 0; - font-size: 0.9em; - font-weight: 400 -} - -.algolia-autocomplete .algolia-docsearch-suggestion--text { - padding: 0 1rem .5rem; - margin-top: -.25rem; - font-size: 0.8em; - font-weight: 400; - line-height: 1.25 -} - -.algolia-autocomplete .algolia-docsearch-footer { - width: 110px; - height: 20px; - z-index: 3; - margin-top: 10.66667px; - float: right; - font-size: 0; - line-height: 0; -} - -.algolia-autocomplete .algolia-docsearch-footer--logo { - background-image: url("data:image/svg+xml;utf8,"); - background-repeat: no-repeat; - background-position: 50%; - background-size: 100%; - overflow: hidden; - text-indent: -9000px; - width: 100%; - height: 100%; - display: block; - transform: translate(-8px); -} - -.algolia-autocomplete .algolia-docsearch-suggestion--highlight { - color: #FF8C00; - background: rgba(232, 189, 54, 0.1) -} - - -.algolia-autocomplete .algolia-docsearch-suggestion--text .algolia-docsearch-suggestion--highlight { - box-shadow: inset 0 -2px 0 0 rgba(105, 105, 105, .5) -} - -.algolia-autocomplete .ds-suggestion.ds-cursor .algolia-docsearch-suggestion--content { - background-color: rgba(192, 192, 192, .15) -} diff --git a/docs/docsearch.js b/docs/docsearch.js deleted file mode 100644 index b35504cd..00000000 --- a/docs/docsearch.js +++ /dev/null @@ -1,85 +0,0 @@ -$(function() { - - // register a handler to move the focus to the search bar - // upon pressing shift + "/" (i.e. "?") - $(document).on('keydown', function(e) { - if (e.shiftKey && e.keyCode == 191) { - e.preventDefault(); - $("#search-input").focus(); - } - }); - - $(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { - - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; - - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); - - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } - } - }; - - mark(); - }); -}); - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); - } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} diff --git a/docs/favicon-16x16.png b/docs/favicon-16x16.png deleted file mode 100644 index a081ac2e..00000000 Binary files a/docs/favicon-16x16.png and /dev/null differ diff --git a/docs/favicon-32x32.png b/docs/favicon-32x32.png deleted file mode 100644 index 1addd238..00000000 Binary files a/docs/favicon-32x32.png and /dev/null differ diff --git a/docs/favicon.ico b/docs/favicon.ico deleted file mode 100644 index e460caa7..00000000 Binary files a/docs/favicon.ico and /dev/null differ diff --git a/docs/index.html b/docs/index.html deleted file mode 100644 index f3d87cad..00000000 --- a/docs/index.html +++ /dev/null @@ -1,197 +0,0 @@ - - - - - - - -Collection of Convenient Functions for Common Statistical Computations • sjstats - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    -
    -
    - - -

    Collection of convenient functions for common statistical computations, which are not directly provided by R’s base or stats packages.

    -

    This package aims at providing, first, shortcuts for statistical measures, which otherwise could only be calculated with additional effort (like Cramer’s V, Phi, or effict size statistics like Eta or Omega squared), or for which currently no functions available.

    -

    Second, another focus lies on weighted variants of common statistical measures and tests like weighted standard error, mean, t-test, correlation, and more.

    -

    The comprised tools include:

    -
      -
    • Especially for mixed models: design effect, sample size calculation
    • -
    • Especially for Bayesian models: mediation analysis
    • -
    • For anova-tables: Eta-squared, Partial Eta-squared, Omega-squared, Partial Omega-squared and Epsilon-squared statistics
    • -
    • Weighted statistics and tests for: mean, median, standard error, standard deviation, correlation, Chi-squared test, t-test, Mann-Whitney-U-test
    • -
    -
    -

    -Documentation

    -

    Please visit https://strengejacke.github.io/sjstats/ for documentation and vignettes.

    -
    -
    -

    -Installation

    -
    -

    -Latest development build

    -

    To install the latest development snapshot (see latest changes below), type following commands into the R console:

    -
    library(devtools)
    -devtools::install_github("strengejacke/sjstats")
    -
    -
    -

    -Officiale, stable release

    -

    CRAN_Status_Badge    downloads    total

    -

    To install the latest stable release from CRAN, type following command into the R console:

    -
    install.packages("sjstats")
    -
    -
    -
    -

    -Citation

    -

    In case you want / have to cite my package, please use citation('sjstats') for citation information.

    -

    DOI

    -
    -
    -
    - - -
    - - - -
    - - - - - - diff --git a/docs/link.svg b/docs/link.svg deleted file mode 100644 index 88ad8276..00000000 --- a/docs/link.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - diff --git a/docs/logo.png b/docs/logo.png deleted file mode 100644 index 7c24480b..00000000 Binary files a/docs/logo.png and /dev/null differ diff --git a/docs/news/index.html b/docs/news/index.html deleted file mode 100644 index 1a5db5f6..00000000 --- a/docs/news/index.html +++ /dev/null @@ -1,670 +0,0 @@ - - - - - - - - -Changelog • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    -sjstats 0.18.0 2020-05-06 -

    -
    -

    -General

    -
      -
    • Effect size computation functions (like eta_sq()) now internally call the related functions from the effectsize package.
    • -
    • Remove packages from “Suggest” that have been removed from CRAN.
    • -
    -
    -
    -
    -

    -sjstats 0.17.9 2020-02-06 -

    -
    -

    -Bug fixes

    -
      -
    • Fixed documentation for chisq_gof().
    • -
    • Fixed issue in anova_stats() with incorrect effect sizes for certain Anova types (that included an intercept).
    • -
    -
    -
    -
    -

    -sjstats 0.17.8 2020-01-21 -

    -
    -

    -Deprecated and defunct

    -

    sjstats is being re-structured, and many functions are re-implemented in new packages that are part of a new project called easystats.

    -

    Therefore, following functions are now deprecated:

    - -
    -
    -

    -General

    - -
    -
    -
    -

    -sjstats 0.17.7 2019-11-14 -

    -
    -

    -Deprecated and defunct

    -

    sjstats is being re-structured, and many functions are re-implemented in new packages that are part of a new project called easystats. The aim of easystats is to provide a unifying and consistent framework to tame, discipline and harness the scary R statistics and their pesky models.

    -

    Therefore, following functions are now deprecated:

    - -
    -
    -

    -General

    -
      -
    • Revise some functions to cope with the forthcoming insight update.
    • -
    -
    -
    -
    -

    -sjstats 0.17.6 2019-09-08 -

    -
    -

    -General

    - -
    -
    -

    -New functions

    -
      -
    • -svyglm.zip() to fit zero-inflated Poisson models for survey-designs.
    • -
    -
    -
    -

    -Changes to functions

    -
      -
    • -phi() and cramer() can now compute confidence intervals.
    • -
    • -tidy_stan() removes prior parameters from output.
    • -
    • -tidy_stan() now also prints the probability of direction.
    • -
    -
    -
    -

    -Bug fixes

    - -
    -
    -
    -

    -sjstats 0.17.5 2019-06-04 -

    -
    -

    -New functions

    - -
    -
    -

    -Deprecated and defunct

    -

    sjstats is being re-structured, and many functions are re-implemented in new packages that are part of a new project called easystats. The aim of easystats is to provide a unifying and consistent framework to tame, discipline and harness the scary R statistics and their pesky models.

    -

    Therefore, following functions are now deprecated:

    - -
    -
    -

    -Changes to functions

    -
      -
    • Anova-stats functions (like eta_sq()) get a method-argument to define the method for computing confidence intervals from bootstrapping.
    • -
    -
    -
    -

    -Bug fixes

    -
      -
    • In some situations, smpsize_lmm() could result in negative sample-size recommendations. This was fixed, and a warning is now shown indicating that the parameters for the power-calculation should be modified.
    • -
    • Fixed issue with wrong calculated effect size r in mwu() if group-factor contained more than two groups.
    • -
    -
    -
    -
    -

    -sjstats 0.17.4 2019-03-15 -

    -
    -

    -General

    -
      -
    • Following models/objects are now supported by model-information functions like model_family(), link_inverse() or model_frame(): MixMod (package GLMMadaptive), MCMCglmm, mlogit and gmnl.
    • -
    • Reduce package dependencies.
    • -
    -
    -
    -

    -New functions

    -
      -
    • -cred_int(), to compute uncertainty intervals of Bayesian models. Mimics the behaviour and style of hdi() and is thus a convenient complement to functions like posterior_interval().
    • -
    -
    -
    -

    -Changes to functions

    -
      -
    • -equi_test() now finds better defaults for models with binomial outcome (like logistic regression models).
    • -
    • -r2() for mixed models now also should work properly for mixed models fitted with rstanarm.
    • -
    • -anova_stats() and alike (e.g. eta_sq()) now all preserve original term names.
    • -
    • -model_family() now returns $is_count = TRUE, when model is a count-model, and $is_beta = TRUE for models with beta-family.
    • -
    • -pred_vars() checks that return value has only unique values.
    • -
    • -pred_vars() gets a zi-argument to return the variables from a model’s zero-inflation-formula.
    • -
    -
    -
    -

    -Bug fixes

    -
      -
    • Fix minor issues in wtd_sd() and wtd_mean() when weight was NULL (which usually shoudln’t be the case anyway).
    • -
    • Fix potential issue with deparse(), cutting off very long formulas in various functions.
    • -
    • Fix encoding issues in help-files.
    • -
    -
    -
    -
    -

    -sjstats 0.17.3 2019-01-07 -

    -
    -

    -General

    -
      -
    • Export dplyr::n(), to meet forthcoming changes in dplyr 0.8.0.
    • -
    -
    -
    -

    -Changes to functions

    -
      -
    • -boot_ci() gets a ci.lvl-argument.
    • -
    • The rotation-argument in pca_rotate() now supports all rotations from psych::principal().
    • -
    • -pred_vars() gets a fe.only-argument to return only fixed effects terms from mixed models, and a disp-argument to return the variables from a model’s dispersion-formula.
    • -
    • -icc() for Bayesian models gets a adjusted-argument, to calculate adjusted and conditional ICC (however, only for Gaussian models).
    • -
    • For icc() for non-Gaussian Bayes-models, a message is printed that recommends setting argument ppd to TRUE.
    • -
    • -resp_val() and resp_var() now also work for brms-models with additional response information (like trial() in formula).
    • -
    • -resp_var() gets a combine-argument, to return either the name of the matrix-column or the original variable names for matrix-columns.
    • -
    • -model_frame() now also returns the original variables for matrix-column-variables.
    • -
    • -model_frame() now also returns the variable from the dispersion-formula of glmmTMB-models.
    • -
    • -model_family() and link_inverse() now supports glmmPQL, felm and lm_robust-models.
    • -
    • -anova_stats() and alike (omeqa_sq() etc.) now support gam-models from package gam.
    • -
    • -p_value() now supports objects of class svyolr.
    • -
    -
    -
    -

    -Bug fixes

    -
      -
    • Fix issue with se() and get_re_var() for objects returned by icc().
    • -
    • Fix issue with icc() for Stan-models.
    • -
    • -var_names() did not clear terms with log-log transformation, e.g. log(log(y)).
    • -
    • Fix issue in model_frame() for models with splines with only one column.
    • -
    -
    -
    -
    -

    -sjstats 0.17.2 2018-11-15 -

    -
    -

    -General

    -
      -
    • Revised help-files for r2() and icc(), also by adding more references.
    • -
    -
    -
    -

    -New functions

    -
      -
    • -re_grp_var() to find group factors of random effects in mixed models.
    • -
    -
    -
    -

    -Changes to functions

    -
      -
    • -omega_sq() and eta_sq() give more informative messages when using non-supported objects.
    • -
    • -r2() and icc() give more informative warnings and messages.
    • -
    • -tidy_stan() supports printing simplex parameters of monotonic effects of brms models.
    • -
    • -grpmean() and mwu() get a file and encoding argument, to save the HTML output as file.
    • -
    -
    -
    -

    -Bug fixes

    -
      -
    • -model_frame() now correctly names the offset-columns for terms provided as offset-argument (i.e. for models where the offset was not specified inside the formula).
    • -
    • Fixed issue with weights-argument in grpmean() when variable name was passed as character vector.
    • -
    • Fixed issue with r2() for glmmTMB models with ar1 random effects structure.
    • -
    -
    -
    -
    -

    -sjstats 0.17.1 2018-10-02 -

    -
    -

    -New functions

    -
      -
    • -wtd_chisqtest() to compute a weighted Chi-squared test.
    • -
    • -wtd_median() to compute the weighted median of variables.
    • -
    • -wtd_cor() to compute weighted correlation coefficients of variables.
    • -
    -
    -
    -

    -Changes to functions

    -
      -
    • -mediation() can now cope with models from different families, e.g. if the moderator or outcome is binary, while the treatment-effect is continuous.
    • -
    • -model_frame(), link_inverse(), pred_vars(), resp_var(), resp_val(), r2() and model_family() now support clm2-objects from package ordinal.
    • -
    • -anova_stats() gives a more informative message for non-supported models or ANOVA-options.
    • -
    -
    -
    -

    -Bug fixes

    -
      -
    • Fixed issue with model_family() and link_inverse() for models fitted with pscl::hurdle() or pscl::zeroinfl().
    • -
    • Fixed issue with wrong title in grpmean() for grouped data frames, when grouping variable was an unlabelled factor.
    • -
    • Fix issue with model_frame() for coxph-models with polynomial or spline-terms.
    • -
    • Fix issue with mediation() for logical variables.
    • -
    -
    -
    -
    -

    -sjstats 0.17.0 2018-08-20 -

    -
    -

    -General

    -
      -
    • Reduce package dependencies.
    • -
    -
    -
    -

    -New functions

    -
      -
    • -wtd_ttest() to compute a weighted t-test.
    • -
    • -wtd_mwu() to compute a weighted Mann-Whitney-U or Kruskal-Wallis test.
    • -
    -
    -
    -

    -Changes to functions

    -
      -
    • -robust() was revised, getting more arguments to specify different types of covariance-matrix estimation, and handling these more flexible.
    • -
    • Improved print()-method for tidy_stan() for brmsfit-objects with categorical-families.
    • -
    • -se() now also computes standard errors for relative frequencies (proportions) of a vector.
    • -
    • -r2() now also computes r-squared values for glmmTMB-models from genpois-families.
    • -
    • -r2() gives more precise warnings for non-supported model-families.
    • -
    • -xtab_statistics() gets a weights-argument, to compute measures of association for contingency tables for weighted data.
    • -
    • The statistics-argument in xtab_statistics() gets a "fisher"-option, to force Fisher’s Exact Test to be used.
    • -
    • Improved variance calculation in icc() for generalized linear mixed models with Poisson or negative binomial families.
    • -
    • -icc() gets an adjusted-argument, to calculate the adjusted and conditional ICC for mixed models.
    • -
    • To get consistent argument names across functions, argument weight.by is now deprecated and renamed into weights.
    • -
    -
    -
    -

    -Bug fixes

    -
      -
    • Fix issues with effect size computation for repeated-measure Anova when using bootstrapping to compute confidence intervals.
    • -
    • -grpmean() now also adjusts the n-columm for weighted data.
    • -
    • -icc(), re_var() and get_re_var() now correctly compute the random-effect-variances for models with multiple random slopes per random effect term (e.g., (1 + rs1 + rs2 | grp)).
    • -
    • Fix issues in tidy_stan(), mcse(), hdi() and n_eff() for stan_polr()-models.
    • -
    • Plotting equi_test() did not work for intercept-only models.
    • -
    -
    -
    -
    - - - -
    - - - -
    - - - - - - - - diff --git a/docs/pkgdown.css b/docs/pkgdown.css deleted file mode 100644 index c01e5923..00000000 --- a/docs/pkgdown.css +++ /dev/null @@ -1,367 +0,0 @@ -/* Sticky footer */ - -/** - * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ - * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css - * - * .Site -> body > .container - * .Site-content -> body > .container .row - * .footer -> footer - * - * Key idea seems to be to ensure that .container and __all its parents__ - * have height set to 100% - * - */ - -html, body { - height: 100%; -} - -body { - position: relative; -} - -body > .container { - display: flex; - height: 100%; - flex-direction: column; -} - -body > .container .row { - flex: 1 0 auto; -} - -footer { - margin-top: 45px; - padding: 35px 0 36px; - border-top: 1px solid #e5e5e5; - color: #666; - display: flex; - flex-shrink: 0; -} -footer p { - margin-bottom: 0; -} -footer div { - flex: 1; -} -footer .pkgdown { - text-align: right; -} -footer p { - margin-bottom: 0; -} - -img.icon { - float: right; -} - -img { - max-width: 100%; -} - -/* Fix bug in bootstrap (only seen in firefox) */ -summary { - display: list-item; -} - -/* Typographic tweaking ---------------------------------*/ - -.contents .page-header { - margin-top: calc(-60px + 1em); -} - -dd { - margin-left: 3em; -} - -/* Section anchors ---------------------------------*/ - -a.anchor { - margin-left: -30px; - display:inline-block; - width: 30px; - height: 30px; - visibility: hidden; - - background-image: url(./link.svg); - background-repeat: no-repeat; - background-size: 20px 20px; - background-position: center center; -} - -.hasAnchor:hover a.anchor { - visibility: visible; -} - -@media (max-width: 767px) { - .hasAnchor:hover a.anchor { - visibility: hidden; - } -} - - -/* Fixes for fixed navbar --------------------------*/ - -.contents h1, .contents h2, .contents h3, .contents h4 { - padding-top: 60px; - margin-top: -40px; -} - -/* Navbar submenu --------------------------*/ - -.dropdown-submenu { - position: relative; -} - -.dropdown-submenu>.dropdown-menu { - top: 0; - left: 100%; - margin-top: -6px; - margin-left: -1px; - border-radius: 0 6px 6px 6px; -} - -.dropdown-submenu:hover>.dropdown-menu { - display: block; -} - -.dropdown-submenu>a:after { - display: block; - content: " "; - float: right; - width: 0; - height: 0; - border-color: transparent; - border-style: solid; - border-width: 5px 0 5px 5px; - border-left-color: #cccccc; - margin-top: 5px; - margin-right: -10px; -} - -.dropdown-submenu:hover>a:after { - border-left-color: #ffffff; -} - -.dropdown-submenu.pull-left { - float: none; -} - -.dropdown-submenu.pull-left>.dropdown-menu { - left: -100%; - margin-left: 10px; - border-radius: 6px 0 6px 6px; -} - -/* Sidebar --------------------------*/ - -#pkgdown-sidebar { - margin-top: 30px; - position: -webkit-sticky; - position: sticky; - top: 70px; -} - -#pkgdown-sidebar h2 { - font-size: 1.5em; - margin-top: 1em; -} - -#pkgdown-sidebar h2:first-child { - margin-top: 0; -} - -#pkgdown-sidebar .list-unstyled li { - margin-bottom: 0.5em; -} - -/* bootstrap-toc tweaks ------------------------------------------------------*/ - -/* All levels of nav */ - -nav[data-toggle='toc'] .nav > li > a { - padding: 4px 20px 4px 6px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; -} - -nav[data-toggle='toc'] .nav > li > a:hover, -nav[data-toggle='toc'] .nav > li > a:focus { - padding-left: 5px; - color: inherit; - border-left: 1px solid #878787; -} - -nav[data-toggle='toc'] .nav > .active > a, -nav[data-toggle='toc'] .nav > .active:hover > a, -nav[data-toggle='toc'] .nav > .active:focus > a { - padding-left: 5px; - font-size: 1.5rem; - font-weight: 400; - color: inherit; - border-left: 2px solid #878787; -} - -/* Nav: second level (shown on .active) */ - -nav[data-toggle='toc'] .nav .nav { - display: none; /* Hide by default, but at >768px, show it */ - padding-bottom: 10px; -} - -nav[data-toggle='toc'] .nav .nav > li > a { - padding-left: 16px; - font-size: 1.35rem; -} - -nav[data-toggle='toc'] .nav .nav > li > a:hover, -nav[data-toggle='toc'] .nav .nav > li > a:focus { - padding-left: 15px; -} - -nav[data-toggle='toc'] .nav .nav > .active > a, -nav[data-toggle='toc'] .nav .nav > .active:hover > a, -nav[data-toggle='toc'] .nav .nav > .active:focus > a { - padding-left: 15px; - font-weight: 500; - font-size: 1.35rem; -} - -/* orcid ------------------------------------------------------------------- */ - -.orcid { - font-size: 16px; - color: #A6CE39; - /* margins are required by official ORCID trademark and display guidelines */ - margin-left:4px; - margin-right:4px; - vertical-align: middle; -} - -/* Reference index & topics ----------------------------------------------- */ - -.ref-index th {font-weight: normal;} - -.ref-index td {vertical-align: top;} -.ref-index .icon {width: 40px;} -.ref-index .alias {width: 40%;} -.ref-index-icons .alias {width: calc(40% - 40px);} -.ref-index .title {width: 60%;} - -.ref-arguments th {text-align: right; padding-right: 10px;} -.ref-arguments th, .ref-arguments td {vertical-align: top;} -.ref-arguments .name {width: 20%;} -.ref-arguments .desc {width: 80%;} - -/* Nice scrolling for wide elements --------------------------------------- */ - -table { - display: block; - overflow: auto; -} - -/* Syntax highlighting ---------------------------------------------------- */ - -pre { - word-wrap: normal; - word-break: normal; - border: 1px solid #eee; -} - -pre, code { - background-color: #f8f8f8; - color: #333; -} - -pre code { - overflow: auto; - word-wrap: normal; - white-space: pre; -} - -pre .img { - margin: 5px 0; -} - -pre .img img { - background-color: #fff; - display: block; - height: auto; -} - -code a, pre a { - color: #375f84; -} - -a.sourceLine:hover { - text-decoration: none; -} - -.fl {color: #1514b5;} -.fu {color: #000000;} /* function */ -.ch,.st {color: #036a07;} /* string */ -.kw {color: #264D66;} /* keyword */ -.co {color: #888888;} /* comment */ - -.message { color: black; font-weight: bolder;} -.error { color: orange; font-weight: bolder;} -.warning { color: #6A0366; font-weight: bolder;} - -/* Clipboard --------------------------*/ - -.hasCopyButton { - position: relative; -} - -.btn-copy-ex { - position: absolute; - right: 0; - top: 0; - visibility: hidden; -} - -.hasCopyButton:hover button.btn-copy-ex { - visibility: visible; -} - -/* headroom.js ------------------------ */ - -.headroom { - will-change: transform; - transition: transform 200ms linear; -} -.headroom--pinned { - transform: translateY(0%); -} -.headroom--unpinned { - transform: translateY(-100%); -} - -/* mark.js ----------------------------*/ - -mark { - background-color: rgba(255, 255, 51, 0.5); - border-bottom: 2px solid rgba(255, 153, 51, 0.3); - padding: 1px; -} - -/* vertical spacing after htmlwidgets */ -.html-widget { - margin-bottom: 10px; -} - -/* fontawesome ------------------------ */ - -.fab { - font-family: "Font Awesome 5 Brands" !important; -} - -/* don't display links in code chunks when printing */ -/* source: https://stackoverflow.com/a/10781533 */ -@media print { - code a:link:after, code a:visited:after { - content: ""; - } -} diff --git a/docs/pkgdown.js b/docs/pkgdown.js deleted file mode 100644 index 7e7048fa..00000000 --- a/docs/pkgdown.js +++ /dev/null @@ -1,108 +0,0 @@ -/* http://gregfranko.com/blog/jquery-best-practices/ */ -(function($) { - $(function() { - - $('.navbar-fixed-top').headroom(); - - $('body').css('padding-top', $('.navbar').height() + 10); - $(window).resize(function(){ - $('body').css('padding-top', $('.navbar').height() + 10); - }); - - $('[data-toggle="tooltip"]').tooltip(); - - var cur_path = paths(location.pathname); - var links = $("#navbar ul li a"); - var max_length = -1; - var pos = -1; - for (var i = 0; i < links.length; i++) { - if (links[i].getAttribute("href") === "#") - continue; - // Ignore external links - if (links[i].host !== location.host) - continue; - - var nav_path = paths(links[i].pathname); - - var length = prefix_length(nav_path, cur_path); - if (length > max_length) { - max_length = length; - pos = i; - } - } - - // Add class to parent
  • , and enclosing
  • if in dropdown - if (pos >= 0) { - var menu_anchor = $(links[pos]); - menu_anchor.parent().addClass("active"); - menu_anchor.closest("li.dropdown").addClass("active"); - } - }); - - function paths(pathname) { - var pieces = pathname.split("/"); - pieces.shift(); // always starts with / - - var end = pieces[pieces.length - 1]; - if (end === "index.html" || end === "") - pieces.pop(); - return(pieces); - } - - // Returns -1 if not found - function prefix_length(needle, haystack) { - if (needle.length > haystack.length) - return(-1); - - // Special case for length-0 haystack, since for loop won't run - if (haystack.length === 0) { - return(needle.length === 0 ? 0 : -1); - } - - for (var i = 0; i < haystack.length; i++) { - if (needle[i] != haystack[i]) - return(i); - } - - return(haystack.length); - } - - /* Clipboard --------------------------*/ - - function changeTooltipMessage(element, msg) { - var tooltipOriginalTitle=element.getAttribute('data-original-title'); - element.setAttribute('data-original-title', msg); - $(element).tooltip('show'); - element.setAttribute('data-original-title', tooltipOriginalTitle); - } - - if(ClipboardJS.isSupported()) { - $(document).ready(function() { - var copyButton = ""; - - $(".examples, div.sourceCode").addClass("hasCopyButton"); - - // Insert copy buttons: - $(copyButton).prependTo(".hasCopyButton"); - - // Initialize tooltips: - $('.btn-copy-ex').tooltip({container: 'body'}); - - // Initialize clipboard: - var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { - text: function(trigger) { - return trigger.parentNode.textContent; - } - }); - - clipboardBtnCopies.on('success', function(e) { - changeTooltipMessage(e.trigger, 'Copied!'); - e.clearSelection(); - }); - - clipboardBtnCopies.on('error', function() { - changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); - }); - }); - } -})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml deleted file mode 100644 index f852a138..00000000 --- a/docs/pkgdown.yml +++ /dev/null @@ -1,6 +0,0 @@ -pandoc: 2.9.2.1 -pkgdown: 1.5.1 -pkgdown_sha: ~ -articles: [] -last_built: 2020-05-28T09:16Z - diff --git a/docs/reference/auto_prior.html b/docs/reference/auto_prior.html deleted file mode 100644 index 362ac3a7..00000000 --- a/docs/reference/auto_prior.html +++ /dev/null @@ -1,264 +0,0 @@ - - - - - - - - -Create default priors for brms-models — auto_prior • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This function creates default priors for brms-regression - models, based on the same automatic prior-scale adjustment as in - rstanarm.

    -
    - -
    auto_prior(formula, data, gaussian, locations = NULL)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    formula

    A formula describing the model, which just needs to contain -the model terms, but no notation of interaction, splines etc. Usually, -you want only those predictors in the formula, for which automatic -priors should be generated. Add informative priors afterwards to the -returned brmsprior-object.

    data

    The data that will be used to fit the model.

    gaussian

    Logical, if the outcome is gaussian or not.

    locations

    A numeric vector with location values for the priors. If -locations = NULL, 0 is used as location parameter.

    - -

    Value

    - -

    A brmsprior-object.

    -

    Details

    - -

    auto_prior() is a small, convenient function to create - some default priors for brms-models with automatically adjusted prior - scales, in a similar way like rstanarm does. The default scale for - the intercept is 10, for coefficients 2.5. If the outcome is gaussian, - both scales are multiplied with sd(y). Then, for categorical - variables, nothing more is changed. For numeric variables, the scales - are divided by the standard deviation of the related variable. -

    - All prior distributions are normal distributions. auto_prior() - is intended to quickly create default priors with feasible scales. If - more precise definitions of priors is necessary, this needs to be done - directly with brms-functions like set_prior().

    -

    Note

    - -

    As auto_prior() also sets priors on the intercept, the model - formula used in brms::brm() must be rewritten to something like - y ~ 0 + intercept ..., see set_prior.

    - -

    Examples

    -
    library(sjmisc) -data(efc) -efc$c172code <- as.factor(efc$c172code) -efc$c161sex <- to_label(efc$c161sex) - -mf <- formula(neg_c_7 ~ c161sex + c160age + c172code) - -if (requireNamespace("brms", quietly = TRUE)) - auto_prior(mf, efc, TRUE)
    #> prior class coef group resp dpar nlpar bound -#> 1 normal(0, 38.96) Intercept -#> 2 normal(0, 9.74) b c161sexFemale -#> 3 normal(0, 0.73) b c160age -#> 4 normal(0, 9.74) b c172code2 -#> 5 normal(0, 9.74) b c172code3
    -## compare to -# library(rstanarm) -# m <- stan_glm(mf, data = efc, chains = 2, iter = 200) -# ps <- prior_summary(m) -# ps$prior_intercept$adjusted_scale -# ps$prior$adjusted_scale - -## usage -# ap <- auto_prior(mf, efc, TRUE) -# brm(mf, data = efc, priors = ap) - -# add informative priors -mf <- formula(neg_c_7 ~ c161sex + c172code) - -if (requireNamespace("brms", quietly = TRUE)) { - auto_prior(mf, efc, TRUE) + - brms::prior(normal(.1554, 40), class = "b", coef = "c160age") -}
    #> prior class coef group resp dpar nlpar bound -#> 1 normal(0, 38.95) Intercept -#> 2 normal(0, 9.74) b c161sexFemale -#> 3 normal(0, 9.74) b c172code2 -#> 4 normal(0, 9.74) b c172code3 -#> 5 normal(0.1554, 40) b c160age
    -# example with binary response -efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) -mf <- formula(neg_c_7d ~ c161sex + c160age + c172code + e17age) - -if (requireNamespace("brms", quietly = TRUE)) - auto_prior(mf, efc, FALSE)
    #> prior class coef group resp dpar nlpar bound -#> 1 normal(0, 10) Intercept -#> 2 normal(0, 2.5) b c161sexFemale -#> 3 normal(0, 0.19) b c160age -#> 4 normal(0, 2.5) b c172code2 -#> 5 normal(0, 2.5) b c172code3 -#> 6 normal(0, 0.31) b e17age
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/boot_ci.html b/docs/reference/boot_ci.html deleted file mode 100644 index 05414ead..00000000 --- a/docs/reference/boot_ci.html +++ /dev/null @@ -1,308 +0,0 @@ - - - - - - - - -Standard error and confidence intervals for bootstrapped estimates — boot_ci • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Compute nonparametric bootstrap estimate, standard error, - confidence intervals and p-value for a vector of bootstrap - replicate estimates.

    -
    - -
    boot_ci(data, ..., method = c("dist", "quantile"), ci.lvl = 0.95)
    -
    -boot_se(data, ...)
    -
    -boot_p(data, ...)
    -
    -boot_est(data, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    data

    A data frame that containts the vector with bootstrapped -estimates, or directly the vector (see 'Examples').

    ...

    Optional, unquoted names of variables with bootstrapped estimates. -Required, if either data is a data frame (and no vector), -and only selected variables from data should be processed. -You may also use functions like : or tidyselect's -select_helpers.

    method

    Character vector, indicating if confidence intervals should be -based on bootstrap standard error, multiplied by the value of the -quantile function of the t-distribution (default), or on sample -quantiles of the bootstrapped values. See 'Details' in boot_ci(). -May be abbreviated.

    ci.lvl

    Numeric, the level of the confidence intervals.

    - -

    Value

    - -

    A tibble with either bootstrap estimate, - standard error, the lower and upper confidence intervals or the - p-value for all bootstrapped estimates.

    -

    Details

    - -

    The methods require one or more vectors of bootstrap replicate estimates - as input.

      -
    • boot_est() returns the bootstrapped estimate, simply by - computing the mean value of all bootstrap estimates.

    • -
    • boot_se() computes the nonparametric bootstrap standard - error by calculating the standard deviation of the input vector.

    • -
    • The mean value of the input vector and its standard error is used - by boot_ci() to calculate the lower and upper confidence - interval, assuming a t-distribution of bootstrap estimate replicates - (for method = "dist", the default, which is - mean(x) +/- qt(.975, df = length(x) - 1) * sd(x)); for - method = "quantile", 95% sample quantiles are used to compute - the confidence intervals (quantile(x, probs = c(.025, .975))). - Use ci.lvl to change the level for the confidence interval.

    • -
    • P-values from boot_p() are also based on t-statistics, - assuming normal distribution.

    • -
    - -

    References

    - -

    Carpenter J, Bithell J. Bootstrap confdence intervals: when, which, what? A practical guide for medical statisticians. Statist. Med. 2000; 19:1141-1164

    -

    See also

    - -

    bootstrap to generate nonparametric bootstrap samples.

    - -

    Examples

    -
    library(dplyr) -library(purrr) -data(efc) -bs <- bootstrap(efc, 100) - -# now run models for each bootstrapped sample -bs$models <- map(bs$strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)) - -# extract coefficient "dependency" and "gender" from each model -bs$dependency <- map_dbl(bs$models, ~coef(.x)[2]) -bs$gender <- map_dbl(bs$models, ~coef(.x)[3]) - -# get bootstrapped confidence intervals -boot_ci(bs$dependency)
    #> term conf.low conf.high -#> 1 x 1.320903 1.77407
    -# compare with model fit -fit <- lm(neg_c_7 ~ e42dep + c161sex, data = efc) -confint(fit)[2, ]
    #> 2.5 % 97.5 % -#> 1.292945 1.796430
    -# alternative function calls. -boot_ci(bs$dependency)
    #> term conf.low conf.high -#> 1 x 1.320903 1.77407
    boot_ci(bs, dependency)
    #> term conf.low conf.high -#> 1 dependency 1.320903 1.77407
    boot_ci(bs, dependency, gender)
    #> term conf.low conf.high -#> 1 dependency 1.3209034 1.7740701 -#> 2 gender -0.1016646 0.9788897
    boot_ci(bs, dependency, gender, method = "q")
    #> term conf.low conf.high -#> 1 dependency 1.30501832 1.763890 -#> 2 gender -0.07012629 0.916922
    - -# compare coefficients -mean(bs$dependency)
    #> [1] 1.547487
    boot_est(bs$dependency)
    #> term estimate -#> 1 x 1.547487
    coef(fit)[2]
    #> e42dep -#> 1.544687
    - -# bootstrap() and boot_ci() work fine within pipe-chains -efc %>% - bootstrap(100) %>% - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex, data = .x)), - dependency = map_dbl(models, ~coef(.x)[2]) - ) %>% - boot_ci(dependency)
    #> term conf.low conf.high -#> 1 dependency 1.263093 1.779068
    -# check p-value -boot_p(bs$gender)
    #> term p.value -#> 1 x 0.1103975
    summary(fit)$coefficients[3, ]
    #> Estimate Std. Error t value Pr(>|t|) -#> 0.4339069 0.2818786 1.5393398 0.1240780
    -if (FALSE) { -# 'spread_coef()' from the 'sjmisc'-package makes it easy to generate -# bootstrapped statistics like confidence intervals or p-values -library(dplyr) -library(sjmisc) -efc %>% - # generate bootstrap replicates - bootstrap(100) %>% - # apply lm to all bootstrapped data sets - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) - ) %>% - # spread model coefficient for all 100 models - spread_coef(models) %>% - # compute the CI for all bootstrapped model coefficients - boot_ci(e42dep, c161sex, c172code) - -# or... -efc %>% - # generate bootstrap replicates - bootstrap(100) %>% - # apply lm to all bootstrapped data sets - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c161sex + c172code, data = .x)) - ) %>% - # spread model coefficient for all 100 models - spread_coef(models, append = FALSE) %>% - # compute the CI for all bootstrapped model coefficients - boot_ci()}
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/bootstrap.html b/docs/reference/bootstrap.html deleted file mode 100644 index f6e496bd..00000000 --- a/docs/reference/bootstrap.html +++ /dev/null @@ -1,304 +0,0 @@ - - - - - - - - -Generate nonparametric bootstrap replications — bootstrap • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Generates n bootstrap samples of data and - returns the bootstrapped data frames as list-variable.

    -
    - -
    bootstrap(data, n, size)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    data

    A data frame.

    n

    Number of bootstraps to be generated.

    size

    Optional, size of the bootstrap samples. May either be a number -between 1 and nrow(data) or a value between 0 and 1 to sample -a proportion of observations from data (see 'Examples').

    - -

    Value

    - -

    A data frame with one column: a list-variable - strap, which contains resample-objects of class sj_resample. - These resample-objects are lists with three elements:

      -
    1. the original data frame, data

    2. -
    3. the rownmumbers id, i.e. rownumbers of data, indicating the resampled rows with replacement

    4. -
    5. the resample.id, indicating the index of the resample (i.e. the position of the sj_resample-object in the list strap)

    6. -
    - -

    Details

    - -

    By default, each bootstrap sample has the same number of observations - as data. To generate bootstrap samples without resampling - same observations (i.e. sampling without replacement), use - size to get bootstrapped data with a specific number - of observations. However, specifying the size-argument is much - less memory-efficient than the bootstrap with replacement. Hence, - it is recommended to ignore the size-argument, if it is - not really needed.

    -

    Note

    - -

    This function applies nonparametric bootstrapping, i.e. the function - draws samples with replacement. -

    - There is an as.data.frame- and a print-method to get or - print the resampled data frames. See 'Examples'. The as.data.frame- - method automatically applies whenever coercion is done because a data - frame is required as input. See 'Examples' in boot_ci.

    -

    See also

    - -

    boot_ci to calculate confidence intervals from - bootstrap samples.

    - -

    Examples

    -
    data(efc) -bs <- bootstrap(efc, 5) - -# now run models for each bootstrapped sample -lapply(bs$strap, function(x) lm(neg_c_7 ~ e42dep + c161sex, data = x))
    #> [[1]] -#> -#> Call: -#> lm(formula = neg_c_7 ~ e42dep + c161sex, data = x) -#> -#> Coefficients: -#> (Intercept) e42dep c161sex -#> 6.9036 1.4385 0.4329 -#> -#> -#> [[2]] -#> -#> Call: -#> lm(formula = neg_c_7 ~ e42dep + c161sex, data = x) -#> -#> Coefficients: -#> (Intercept) e42dep c161sex -#> 7.8918 1.3693 -0.1371 -#> -#> -#> [[3]] -#> -#> Call: -#> lm(formula = neg_c_7 ~ e42dep + c161sex, data = x) -#> -#> Coefficients: -#> (Intercept) e42dep c161sex -#> 7.7762 1.3265 0.1469 -#> -#> -#> [[4]] -#> -#> Call: -#> lm(formula = neg_c_7 ~ e42dep + c161sex, data = x) -#> -#> Coefficients: -#> (Intercept) e42dep c161sex -#> 5.4334 1.7106 0.7536 -#> -#> -#> [[5]] -#> -#> Call: -#> lm(formula = neg_c_7 ~ e42dep + c161sex, data = x) -#> -#> Coefficients: -#> (Intercept) e42dep c161sex -#> 6.3228 1.5540 0.5277 -#> -#>
    -# generate bootstrap samples with 600 observations for each sample -bs <- bootstrap(efc, 5, 600) - -# generate bootstrap samples with 70% observations of the original sample size -bs <- bootstrap(efc, 5, .7) - -# compute standard error for a simple vector from bootstraps -# use the `as.data.frame()`-method to get the resampled -# data frame -bs <- bootstrap(efc, 100) -bs$c12hour <- unlist(lapply(bs$strap, function(x) { - mean(as.data.frame(x)$c12hour, na.rm = TRUE) -})) - -# or as tidyverse-approach -if (require("dplyr") && require("purrr")) { - bs <- efc %>% - bootstrap(100) %>% - mutate( - c12hour = map_dbl(strap, ~mean(as.data.frame(.x)$c12hour, na.rm = TRUE)) - ) - - # bootstrapped standard error - boot_se(bs, c12hour) -}
    #> Loading required package: dplyr
    #> -#> Attaching package: 'dplyr'
    #> The following objects are masked from 'package:stats': -#> -#> filter, lag
    #> The following objects are masked from 'package:base': -#> -#> intersect, setdiff, setequal, union
    #> Loading required package: purrr
    #> -#> Attaching package: 'purrr'
    #> The following object is masked from 'package:sjmisc': -#> -#> is_empty
    #> term std.err -#> 1 c12hour 1.6688
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/check_assumptions.html b/docs/reference/check_assumptions.html deleted file mode 100644 index b1a7dd55..00000000 --- a/docs/reference/check_assumptions.html +++ /dev/null @@ -1,431 +0,0 @@ - - - - - - - - -Check model assumptions — check_assumptions • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -
      -
    • outliers() detects outliers in (generalized) linear models.

    • -
    • heteroskedastic() checks a linear model for (non-)constant error variance.

    • -
    • autocorrelation() checks for independence of errors.

    • -
    • normality() checks linear models for (non-)normality of residuals.

    • -
    • multicollin() checks predictors of linear models for multicollinearity.

    • -
    • check_assumptions() checks all of the above assumptions.

    • -
    - -
    - -
    check_assumptions(x, model.column = NULL, as.logical = FALSE, ...)
    -
    -outliers(x, iterations = 5)
    -
    -heteroskedastic(x, model.column = NULL)
    -
    -autocorrelation(x, model.column = NULL, ...)
    -
    -normality(x, model.column = NULL)
    -
    -multicollin(x, model.column = NULL)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    Fitted lm (for outliers(), may also be a glm model), -or a (nested) data frame with a list-variable that contains fitted model -objects.

    model.column

    Name or index of the list-variable that contains the fitted -model objects. Only applies, if x is a nested data frame (e.g -with models fitted to bootstrap replicates).

    as.logical

    Logical, if TRUE, the values returned by -check_assumptions() are TRUE or FALSE, indicating -whether each violation of model assumotion holds true or not. If -FALSE (the default), the p-value of the respective test-statistics -is returned.

    ...

    Other arguments, passed down to durbinWatsonTest.

    iterations

    Numeric, indicates the number of iterations to remove -outliers.

    - -

    Value

    - -

    A data frame with the respective statistics.

    - -

    Details

    - -

    These functions are wrappers that compute various test statistics, - however, each of them returns a tibble instead of a list of values. - Furthermore, all functions can also be applied to multiples models - in stored in list-variables (see 'Examples'). -

    - outliers() wraps outlierTest and iteratively - removes outliers for iterations times, or if the r-squared value - (for glm: the AIC) did not improve after removing outliers. The function - returns a tibble with r-squared and AIC statistics for the original - and updated model, as well as the update model itself ($updated.model), - the number ($removed.count) and indices of the removed observations - ($removed.obs). -

    - heteroskedastic() wraps ncvTest and returns - the p-value of the test statistics as tibble. A p-value < 0.05 indicates - a non-constant variance (heteroskedasticity). -

    - autocorrelation() wraps durbinWatsonTest - and returns the p-value of the test statistics as tibble. A p-value - < 0.05 indicates autocorrelated residuals. In such cases, robust - standard errors (see robust return more accurate results - for the estimates, or maybe a mixed model with error term for the - cluster groups should be used. -

    - normality() calls shapiro.test - and checks the standardized residuals for normal distribution. - The p-value of the test statistics is returned as tibble. A p-value - < 0.05 indicates a significant deviation from normal distribution. - Note that this formal test almost always yields significant results - for the distribution of residuals and visual inspection (e.g. qqplots) - are preferable (see plot_model with - type = "diag"). -

    - multicollin() wraps vif and returns - the maximum vif-value from a model as tibble. If this value is - larger than about 4, multicollinearity exists, else not. - In case of multicollinearity, the names of independent - variables that vioalte contribute to multicollinearity are printed - to the console. -

    - check_assumptions() runs all of the above tests and returns - a tibble with all test statistics included. In case the p-values - are too confusing, use the as.logical argument, where all - p-values are replaced with either TRUE (in case of violation) - or FALSE (in case of model conforms to assumption of linar - regression).

    - -

    Note

    - -

    These formal tests are very strict and in most cases violation of model - assumptions are alerted, though the model is actually ok. It is - preferable to check model assumptions based on visual inspection - (see plot_model with type = "diag").

    - - -

    Examples

    -
    data(efc) - -fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) -outliers(fit)
    #> No outliers detected.
    heteroskedastic(fit)
    #> Heteroscedasticity (non-constant error variance) detected: p = 0.000
    #> heteroskedastic -#> 1 3.885808e-07
    autocorrelation(fit)
    #> Autocorrelated residuals detected: p = 0.000
    #> autocorrelation -#> 1 0
    normality(fit)
    #> Non-normality of residuals detected: p = 0.000
    #> non.normality -#> 1 1.535796e-13
    check_assumptions(fit)
    #> -#> # Checking Model-Assumptions -#> -#> Model: barthtot ~ c160age + c12hour + c161sex + c172code -#> -#> violated statistic -#> Heteroskedasticity yes p = 0.000 -#> Non-normal residuals yes p = 0.000 -#> Autocorrelated residuals yes p = 0.000 -#> Multicollinearity no vif = 1.153
    -fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code + neg_c_7, - data = efc) -outliers(fit)
    #> 2 outliers removed in updated model.
    #> models adjusted.r2 aic -#> 1 original 0.3458095 7487.639 -#> 2 updated 0.3530485 7468.980
    check_assumptions(fit, as.logical = TRUE)
    #> heteroskedasticity multicollinearity non.normal.resid autocorrelation -#> 1 TRUE FALSE TRUE TRUE
    -# apply function to multiple models in list-variable -library(purrr) -library(dplyr) -tmp <- efc %>% - bootstrap(50) %>% - mutate( - models = map(strap, ~lm(neg_c_7 ~ e42dep + c12hour + c161sex, data = .x)) - ) - -# for list-variables, argument 'model.column' is the -# quoted name of the list-variable with fitted models -tmp %>% normality("models")
    #> non.normality -#> 1 3.230058e-19 -#> 2 3.827347e-16 -#> 3 9.098247e-22 -#> 4 9.981165e-19 -#> 5 6.428575e-18 -#> 6 3.224082e-20 -#> 7 2.118794e-19 -#> 8 1.288646e-16 -#> 9 2.328003e-19 -#> 10 8.999720e-21 -#> 11 6.353047e-21 -#> 12 1.026013e-19 -#> 13 2.390006e-19 -#> 14 6.701265e-19 -#> 15 2.551566e-19 -#> 16 3.057700e-18 -#> 17 2.117815e-19 -#> 18 1.077834e-17 -#> 19 7.594577e-18 -#> 20 9.239676e-19 -#> 21 3.149844e-21 -#> 22 5.519879e-20 -#> 23 3.637194e-18 -#> 24 1.764174e-21 -#> 25 6.623029e-19 -#> 26 1.181069e-19 -#> 27 3.168530e-19 -#> 28 4.854560e-18 -#> 29 2.001162e-20 -#> 30 7.352369e-22 -#> 31 1.018042e-20 -#> 32 7.376915e-19 -#> 33 1.207461e-18 -#> 34 4.070908e-16 -#> 35 7.510733e-18 -#> 36 2.049412e-20 -#> 37 1.243810e-19 -#> 38 2.813157e-19 -#> 39 3.095559e-22 -#> 40 8.518521e-21 -#> 41 5.261510e-16 -#> 42 1.156834e-18 -#> 43 5.360721e-21 -#> 44 3.306733e-18 -#> 45 8.478782e-20 -#> 46 1.000268e-20 -#> 47 3.684449e-21 -#> 48 1.301398e-17 -#> 49 3.625559e-21 -#> 50 8.619752e-18
    tmp %>% heteroskedastic("models")
    #> heteroskedastic -#> 1 1.117145e-06 -#> 2 4.136378e-16 -#> 3 2.304193e-11 -#> 4 2.303091e-13 -#> 5 2.989163e-09 -#> 6 1.617715e-06 -#> 7 2.462472e-06 -#> 8 5.650847e-11 -#> 9 1.949355e-05 -#> 10 9.487583e-12 -#> 11 8.865950e-05 -#> 12 1.001106e-11 -#> 13 9.380742e-09 -#> 14 2.692178e-11 -#> 15 2.777129e-12 -#> 16 1.156712e-08 -#> 17 1.142231e-09 -#> 18 3.003215e-05 -#> 19 3.513974e-10 -#> 20 8.914693e-07 -#> 21 5.075324e-07 -#> 22 1.443102e-04 -#> 23 3.889332e-08 -#> 24 2.962249e-05 -#> 25 1.108236e-11 -#> 26 1.494601e-11 -#> 27 9.264544e-11 -#> 28 2.394459e-08 -#> 29 3.738415e-03 -#> 30 3.710447e-07 -#> 31 2.980297e-12 -#> 32 4.978182e-15 -#> 33 1.403946e-07 -#> 34 1.833919e-14 -#> 35 2.311357e-05 -#> 36 1.506452e-13 -#> 37 3.740443e-08 -#> 38 2.266819e-07 -#> 39 9.948937e-11 -#> 40 8.844100e-08 -#> 41 3.306678e-14 -#> 42 4.618659e-10 -#> 43 4.902720e-07 -#> 44 8.314707e-12 -#> 45 8.818074e-11 -#> 46 8.447289e-10 -#> 47 5.206397e-09 -#> 48 3.063647e-15 -#> 49 1.658216e-08 -#> 50 1.059808e-12
    -# Durbin-Watson-Test from package 'car' takes a little bit longer due -# to simulation of p-values... -
    # NOT RUN { -tmp %>% check_assumptions("models", as.logical = TRUE, reps = 100) -# }
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/chisq_gof.html b/docs/reference/chisq_gof.html deleted file mode 100644 index 4a63755b..00000000 --- a/docs/reference/chisq_gof.html +++ /dev/null @@ -1,236 +0,0 @@ - - - - - - - - -Compute model quality — chisq_gof • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    For logistic regression models, performs a Chi-squared - goodness-of-fit-test.

    -
    - -
    chisq_gof(x, prob = NULL, weights = NULL)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    A numeric vector or a glm-object.

    prob

    Vector of probabilities (indicating the population probabilities) -of the same length as x's amount of categories / factor levels. -Use nrow(table(x)) to determine the amount of necessary values -for prob. Only used, when x is a vector, and not a -glm-object.

    weights

    Vector with weights, used to weight x.

    - -

    Value

    - -

    For vectors, returns the object of the computed chisq.test. - For glm-objects, an object of class chisq_gof with - following values: p.value, the p-value for the goodness-of-fit test; - z.score, the standardized z-score for the goodness-of-fit test; - rss, the residual sums of squares term and chisq, the pearson - chi-squared statistic.

    -

    Details

    - -

    For vectors, this function is a convenient function for the - chisq.test(), performing goodness-of-fit test. For - glm-objects, this function performs a goodness-of-fit test. - A well-fitting model shows no significant difference between the - model and the observed data, i.e. the reported p-values should be - greater than 0.05.

    -

    References

    - -

    Hosmer, D. W., & Lemeshow, S. (2000). Applied Logistic Regression. Hoboken, NJ, USA: John Wiley & Sons, Inc. doi: 10.1002/0471722146

    - -

    Examples

    -
    data(efc) -efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) -m <- glm( - neg_c_7d ~ c161sex + barthtot + c172code, - data = efc, - family = binomial(link = "logit") -) - -# goodness-of-fit test for logistic regression -chisq_gof(m)
    #> -#> # Chi-squared Goodness-of-Fit Test -#> -#> Chi-squared: 852.765 -#> z-score: 1.025 -#> p-value: 0.305 -#>
    #> Summary: model seems to fit well.
    -# goodness-of-fit test for vectors against probabilities -# differing from population -chisq_gof(efc$e42dep, c(0.3,0.2,0.22,0.28))
    #> -#> Chi-squared test for given probabilities -#> -#> data: dummy -#> X-squared = 234.76, df = 3, p-value < 2.2e-16 -#>
    -# equal to population -chisq_gof(efc$e42dep, prop.table(table(efc$e42dep)))
    #> -#> Chi-squared test for given probabilities -#> -#> data: dummy -#> X-squared = 0, df = 3, p-value = 1 -#>
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/cod.html b/docs/reference/cod.html deleted file mode 100644 index 7fc5bded..00000000 --- a/docs/reference/cod.html +++ /dev/null @@ -1,347 +0,0 @@ - - - - - - - - -Goodness-of-fit measures for regression models — cod • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Compute Goodness-of-fit measures for various regression models, - including mixed and Bayesian regression models.

    - -
    - -
    cod(x)
    -
    -r2(x, ...)
    -
    -# S3 method for lme
    -r2(x, n = NULL, ...)
    -
    -# S3 method for stanreg
    -r2(x, loo = FALSE, ...)
    -
    -# S3 method for brmsfit
    -r2(x, loo = FALSE, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    Fitted model of class lm, glm, merMod, -glmmTMB, lme, plm, stanreg or brmsfit. -For method cod(), only a glm with binrary response.

    ...

    Currently not used.

    n

    Optional, an lme object, representing the fitted null-model -(unconditional model) to x. If n is given, the pseudo-r-squared -for random intercept and random slope variances are computed -(Kwok et al. 2008) as well as the Omega squared value -(Xu 2003). See 'Examples' and 'Details'.

    loo

    Logical, if TRUE and x is a stanreg or -brmsfit object, a LOO-adjusted r-squared is calculated. Else, -a rather "unadjusted" r-squared will be returned by calling -rstantools::bayes_R2().

    - -

    Value

    - -

    For r2(), depending on the model, returns:

      -
    • For linear models, the r-squared and adjusted r-squared values.

    • -
    • For mixed models, the marginal and conditional r-squared values.

    • -
    • For glm objects, Cox & Snell's and Nagelkerke's pseudo r-squared values.

    • -
    • For brmsfit or stanreg objects, the Bayesian version of r-squared is computed, calling rstantools::bayes_R2().

    • -
    • If loo = TRUE, for brmsfit or stanreg objects a LOO-adjusted version of r-squared is returned.

    • -
    • Models that are not currently supported return NULL.

    • -

    For cod(), returns the D Coefficient of Discrimination, - also known as Tjur's R-squared value.

    - -

    Details

    - -

    For linear models, the r-squared and adjusted r-squared value is returned, - as provided by the summary-function. -

    - For mixed models (from lme4 or glmmTMB) marginal and - conditional r-squared values are calculated, based on - Nakagawa et al. 2017. The distributional variance - (or observation-level variance) is based on lognormal approximation, - log(1+var(x)/mu^2). -

    - For lme-models, an r-squared approximation by computing the - correlation between the fitted and observed values, as suggested by - Byrnes (2008), is returned as well as a simplified version of - the Omega-squared value (1 - (residual variance / response variance), - Xu (2003), Nakagawa, Schielzeth 2013), unless n - is specified. -

    - If n is given, for lme-models pseudo r-squared measures based - on the variances of random intercept (tau 00, between-group-variance) - and random slope (tau 11, random-slope-variance), as well as the - r-squared statistics as proposed by Snijders and Bosker 2012 and - the Omega-squared value (1 - (residual variance full model / residual - variance null model)) as suggested by Xu (2003) are returned. -

    - For generalized linear models, Cox & Snell's and Nagelkerke's - pseudo r-squared values are returned. -

    - The ("unadjusted") r-squared value and its standard error for - brmsfit or stanreg objects are robust measures, i.e. - the median is used to compute r-squared, and the median absolute - deviation as the measure of variability. If loo = TRUE, - a LOO-adjusted r-squared is calculated, which comes conceptionally - closer to an adjusted r-squared measure.

    - -

    Note

    - -
    -
    cod()

    This method calculates the Coefficient of Discrimination D - for generalized linear (mixed) models for binary data. It is - an alternative to other Pseudo-R-squared values like Nakelkerke's - R2 or Cox-Snell R2. The Coefficient of Discrimination D - can be read like any other (Pseudo-)R-squared value.

    -
    r2()

    For mixed models, the marginal r-squared considers only the variance - of the fixed effects, while the conditional r-squared takes both - the fixed and random effects into account. -

    - For lme-objects, if n is given, the Pseudo-R2 statistic - is the proportion of explained variance in the random effect after - adding co-variates or predictors to the model, or in short: the - proportion of the explained variance in the random effect of the - full (conditional) model x compared to the null (unconditional) - model n. -

    - The Omega-squared statistics, if n is given, is 1 - the proportion - of the residual variance of the full model compared to the null model's - residual variance, or in short: the the proportion of the residual - variation explained by the covariates. -

    - Alternative ways to assess the "goodness-of-fit" is to compare the ICC - of the null model with the ICC of the full model (see icc).

    -
    - -

    References

    - -
      -
    • DRAFT r-sig-mixed-models FAQ

    • -
    • Bolker B et al. (2017): GLMM FAQ

    • -
    • Byrnes, J. 2008. Re: Coefficient of determination (R^2) when using lme() (https://stat.ethz.ch/pipermail/r-sig-mixed-models/2008q2/000713.html)

    • -
    • Kwok OM, Underhill AT, Berry JW, Luo W, Elliott TR, Yoon M. 2008. Analyzing Longitudinal Data with Multilevel Models: An Example with Individuals Living with Lower Extremity Intra-Articular Fractures. Rehabilitation Psychology 53(3): 370-86. doi: 10.1037/a0012765

    • -
    • Nakagawa S, Schielzeth H. 2013. A general and simple method for obtaining R2 from generalized linear mixed-effects models. Methods in Ecology and Evolution, 4(2):133-142. doi: 10.1111/j.2041-210x.2012.00261.x

    • -
    • Nakagawa S, Johnson P, Schielzeth H (2017) The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded. J. R. Soc. Interface 14. doi: 10.1098/rsif.2017.0213

    • -
    • Rabe-Hesketh S, Skrondal A. 2012. Multilevel and longitudinal modeling using Stata. 3rd ed. College Station, Tex: Stata Press Publication

    • -
    • Raudenbush SW, Bryk AS. 2002. Hierarchical linear models: applications and data analysis methods. 2nd ed. Thousand Oaks: Sage Publications

    • -
    • Snijders TAB, Bosker RJ. 2012. Multilevel analysis: an introduction to basic and advanced multilevel modeling. 2nd ed. Los Angeles: Sage

    • -
    • Xu, R. 2003. Measuring explained variation in linear mixed effects models. Statist. Med. 22:3527-3541. doi: 10.1002/sim.1572

    • -
    • Tjur T. 2009. Coefficients of determination in logistic regression models - a new proposal: The coefficient of discrimination. The American Statistician, 63(4): 366-372

    • -
    - - -

    Examples

    -
    data(efc) - -# Tjur's R-squared value -efc$services <- ifelse(efc$tot_sc_e > 0, 1, 0) -fit <- glm(services ~ neg_c_7 + c161sex + e42dep, - data = efc, family = binomial(link = "logit")) -cod(fit)
    #> -#> R-Squared for (Generalized) Linear (Mixed) Model -#> -#> Tjur's D: 0.023 -#>
    -library(lme4)
    #> Loading required package: Matrix
    fit <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) -r2(fit)
    #> -#> R-Squared for (Generalized) Linear (Mixed) Model -#> -#> Family : gaussian (identity) -#> Formula: ~Days | Subject Reaction ~ Days NA -#> -#> Marginal R2: 0.279 -#> Conditional R2: 0.799 -#>
    -fit <- lm(barthtot ~ c160age + c12hour, data = efc) -r2(fit)
    #> -#> R-Squared for (Generalized) Linear (Mixed) Model -#> -#> R-squared: 0.256 -#> adjusted R-squared: 0.254 -#>
    -# Pseudo-R-squared values -fit <- glm(services ~ neg_c_7 + c161sex + e42dep, - data = efc, family = binomial(link = "logit")) -r2(fit)
    #> -#> R-Squared for (Generalized) Linear (Mixed) Model -#> -#> Cox & Snell's R-squared: 0.023 -#> Nagelkerke's R-squared: 0.030 -#>
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/converge_ok.html b/docs/reference/converge_ok.html deleted file mode 100644 index b64e312d..00000000 --- a/docs/reference/converge_ok.html +++ /dev/null @@ -1,261 +0,0 @@ - - - - - - - - -Convergence test for mixed effects models — converge_ok • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    converge_ok() provides an alternative convergence test for - merMod-objects; is_singular() checks - post-fitting convergence warnings. If the model fit is singular, - warning about negative eigenvalues of the Hessian can most likely - be ignored.

    - -
    - -
    converge_ok(x, tolerance = 0.001)
    -
    -is_singular(x, tolerance = 1e-05, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    A merMod-object. For is_singluar(), may also be a -glmmTMB-object.

    tolerance

    Indicates up to which value the convergence result is -accepted. The smaller tolerance is, the stricter the test -will be.

    ...

    Currently not used.

    - -

    Value

    - -

    For converge_ok(), a logical vector, which is TRUE if - convergence is fine and FALSE if convergence is suspicious. - Additionally, the convergence value is returned as return value's name. - is_singluar() returns TRUE if the model fit is singular.

    - -

    Details

    - -

    converge_ok() provides an alternative convergence test for - merMod-objects, as discussed - here - and suggested by Ben Bolker in - this comment. -

    - If a model is "singular", this means that some dimensions of the variance-covariance - matrix have been estimated as exactly zero. is_singular() checks if - a model fit is singular, and can be used in case of post-fitting convergence - warnings, such as warnings about negative eigenvalues of the Hessian. If the fit - is singular (i.e. is_singular() returns TRUE), these warnings - can most likely be ignored. -

    - There is no gold-standard about how to deal with singularity and which - random-effects specification to choose. Beside using fully Bayesian methods - (with informative priors), proposals in a frequentist framework are:

      -
    • avoid fitting overly complex models, such that the variance-covariance matrices can be estimated precisely enough (Matuschek et al. 2017)

    • -
    • use some form of model selection to choose a model that balances predictive accuracy and overfitting/type I error (Bates et al. 2015, Matuschek et al. 2017)

    • -
    • “keep it maximal”, i.e. fit the most complex model consistent with the experimental design, removing only terms required to allow a non-singular fit (Barr et al. 2013)

    • -
    - -

    References

    - -
      -
    • Bates D, Kliegl R, Vasishth S, Baayen H. Parsimonious Mixed Models. arXiv:1506.04967, June 2015.

    • -
    • Barr DJ, Levy R, Scheepers C, Tily HJ. Random effects structure for confirmatory hypothesis testing: Keep it maximal. Journal of Memory and Language, 68(3):255-278, April 2013.

    • -
    • Matuschek H, Kliegl R, Vasishth S, Baayen H, Bates D. Balancing type I error and power in linear mixed models. Journal of Memory and Language, 94:305-315, 2017.

    • -
    - - -

    Examples

    -
    library(sjmisc) -library(lme4) -data(efc) -# create binary response -efc$hi_qol <- dicho(efc$quol_5) -# prepare group variable -efc$grp = as.factor(efc$e15relat) -# data frame for fitted model -mydf <- data.frame(hi_qol = as.factor(efc$hi_qol), - sex = as.factor(efc$c161sex), - c12hour = as.numeric(efc$c12hour), - neg_c_7 = as.numeric(efc$neg_c_7), - grp = efc$grp) -# fit glmer -fit <- glmer(hi_qol ~ sex + c12hour + neg_c_7 + (1|grp), - data = mydf, family = binomial("logit")) - -converge_ok(fit)
    #> 1.19757278379967e-05 -#> TRUE
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/crosstable_statistics.html b/docs/reference/crosstable_statistics.html deleted file mode 100644 index 83c2cd90..00000000 --- a/docs/reference/crosstable_statistics.html +++ /dev/null @@ -1,353 +0,0 @@ - - - - - - - - -Measures of association for contingency tables — cramer • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This function calculates various measure of association for - contingency tables and returns the statistic and p-value. - Supported measures are Cramer's V, Phi, Spearman's rho, - Kendall's tau and Pearson's r.

    -
    - -
    cramer(tab, ...)
    -
    -# S3 method for formula
    -cramer(
    -  formula,
    -  data,
    -  ci.lvl = NULL,
    -  n = 1000,
    -  method = c("dist", "quantile"),
    -  ...
    -)
    -
    -phi(tab, ...)
    -
    -crosstable_statistics(
    -  data,
    -  x1 = NULL,
    -  x2 = NULL,
    -  statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"),
    -  weights = NULL,
    -  ...
    -)
    -
    -xtab_statistics(
    -  data,
    -  x1 = NULL,
    -  x2 = NULL,
    -  statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"),
    -  weights = NULL,
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    tab

    A table or ftable. Tables of class -xtabs and other will be coerced to ftable -objects.

    ...

    Other arguments, passed down to the statistic functions -chisq.test, fisher.test or -cor.test.

    formula

    A formula of the form lhs ~ rhs where lhs is a -numeric variable giving the data values and rhs a factor giving the -corresponding groups.

    data

    A data frame or a table object. If a table object, x1 and -x2 will be ignored. For Kendall's tau, Spearman's rho -or Pearson's product moment correlation coefficient, data needs -to be a data frame. If x1 and x2 are not specified, -the first two columns of the data frames are used as variables -to compute the crosstab.

    ci.lvl

    Scalar between 0 and 1. If not NULL, returns a data -frame including lower and upper confidence intervals.

    n

    Number of bootstraps to be generated.

    method

    Character vector, indicating if confidence intervals should be -based on bootstrap standard error, multiplied by the value of the -quantile function of the t-distribution (default), or on sample -quantiles of the bootstrapped values. See 'Details' in boot_ci(). -May be abbreviated.

    x1

    Name of first variable that should be used to compute the -contingency table. If data is a table object, this argument -will be irgnored.

    x2

    Name of second variable that should be used to compute the -contingency table. If data is a table object, this argument -will be irgnored.

    statistics

    Name of measure of association that should be computed. May -be one of "auto", "cramer", "phi", "spearman", -"kendall", "pearson" or "fisher". See 'Details'.

    weights

    Name of variable in x that indicated the vector of -weights that will be applied to weight all observations. Default is -NULL, so no weights are used.

    - -

    Value

    - -

    For phi(), the table's Phi value. For cramer(), the - table's Cramer's V. -

    - For crosstable_statistics(), a list with following components:

    -
    estimate

    the value of the estimated measure of association.

    -
    p.value

    the p-value for the test.

    -
    statistic

    the value of the test statistic.

    -
    stat.name

    the name of the test statistic.

    -
    stat.html

    if applicable, the name of the test statistic, in HTML-format.

    -
    df

    the degrees of freedom for the contingency table.

    -
    method

    character string indicating the name of the measure of association.

    -
    method.html

    if applicable, the name of the measure of association, in HTML-format.

    -
    method.short

    the short form of association measure, equals the statistics-argument.

    -
    fisher

    logical, if Fisher's exact test was used to calculate the p-value.

    - - - -

    Details

    - -

    The p-value for Cramer's V and the Phi coefficient are based - on chisq.test(). If any expected value of a table cell is - smaller than 5, or smaller than 10 and the df is 1, then fisher.test() - is used to compute the p-value, unless statistics = "fisher"; in - this case, the use of fisher.test() is forced to compute the - p-value. The test statistic is calculated with cramer() resp. - phi(). -

    - Both test statistic and p-value for Spearman's rho, Kendall's tau - and Pearson's r are calculated with cor.test(). -

    - When statistics = "auto", only Cramer's V or Phi are calculated, - based on the dimension of the table (i.e. if the table has more than - two rows or columns, Cramer's V is calculated, else Phi).

    - -

    Examples

    -
    # Phi coefficient for 2x2 tables -tab <- table(sample(1:2, 30, TRUE), sample(1:2, 30, TRUE)) -phi(tab)
    #> [1] 0.1336306
    -# Cramer's V for nominal variables with more than 2 categories -tab <- table(sample(1:2, 30, TRUE), sample(1:3, 30, TRUE)) -cramer(tab)
    #> [1] 0.180269
    -# formula notation -data(efc) -cramer(e16sex ~ c161sex, data = efc)
    #> [1] 0.05258249
    -# bootstrapped confidence intervals -cramer(e16sex ~ c161sex, data = efc, ci.lvl = .95, n = 100)
    #> cramer conf.low conf.high -#> 1 0.05258249 -0.00860759 0.111614
    -# 2x2 table, compute Phi automatically -crosstable_statistics(efc, e16sex, c161sex)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 2.2327 -#> Phi: 0.0526 -#> p-value: 0.1351
    -# more dimensions than 2x2, compute Cramer's V automatically -crosstable_statistics(efc, c172code, c161sex)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 4.1085 -#> Cramer's V: 0.0699 -#> p-value: 0.1282
    -# ordinal data, use Kendall's tau -crosstable_statistics(efc, e42dep, quol_5, statistics = "kendall")
    #> -#> # Measure of Association for Contingency Tables -#> -#> z: -9.5951 -#> Kendall's tau: -0.2496 -#> p-value: <0.001
    -# calcilate Spearman's rho, with continuity correction -crosstable_statistics(efc, - e42dep, - quol_5, - statistics = "spearman", - exact = FALSE, - continuity = TRUE -)
    #> -#> # Measure of Association for Contingency Tables -#> -#> S: 157974157.4198 -#> Spearman's rho: -0.3177 -#> p-value: <0.001
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/cv.html b/docs/reference/cv.html deleted file mode 100644 index 14f47380..00000000 --- a/docs/reference/cv.html +++ /dev/null @@ -1,190 +0,0 @@ - - - - - - - - -Compute model quality — cv • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Compute the coefficient of variation.

    -
    - -
    cv(x, ...)
    - -

    Arguments

    - - - - - - - - - - -
    x

    Fitted linear model of class lm, merMod (lme4) -or lme (nlme).

    ...

    More fitted model objects, to compute multiple coefficients of -variation at once.

    - -

    Value

    - -

    Numeric, the coefficient of variation.

    -

    Details

    - -

    The advantage of the cv is that it is unitless. This allows - coefficient of variation to be compared to each other in ways - that other measures, like standard deviations or root mean - squared residuals, cannot be.

    - -

    Examples

    -
    data(efc) -fit <- lm(barthtot ~ c160age + c12hour, data = efc) -cv(fit)
    #> [1] 0.3948098
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/cv_error.html b/docs/reference/cv_error.html deleted file mode 100644 index 3f01f6d1..00000000 --- a/docs/reference/cv_error.html +++ /dev/null @@ -1,219 +0,0 @@ - - - - - - - - -Test and training error from model cross-validation — cv_error • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    cv_error() computes the root mean squared error from a model fitted - to kfold cross-validated test-training-data. cv_compare() - does the same, for multiple formulas at once (by calling cv_error() - for each formula).

    -
    - -
    cv_error(data, formula, k = 5)
    -
    -cv_compare(data, formulas, k = 5)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    data

    A data frame.

    formula

    The formula to fit the linear model for the test and training data.

    k

    The number of folds for the kfold-crossvalidation.

    formulas

    A list of formulas, to fit linear models for the test and training data.

    - -

    Value

    - -

    A data frame with the root mean squared errors for the training and test data.

    -

    Details

    - -

    cv_error() first generates cross-validated test-training pairs, using - crossv_kfold and then fits a linear model, which - is described in formula, to the training data. Then, predictions - for the test data are computed, based on the trained models. - The training error is the mean value of the rmse for - all trained models; the test error is the rmse based on all - residuals from the test data.

    - -

    Examples

    -
    data(efc) -cv_error(efc, neg_c_7 ~ barthtot + c161sex)
    #> Warning: unnest() has a new interface. See ?unnest for details. -#> Try `df %>% unnest(c(predicted, residuals))`, with `mutate()` if needed
    #> model train.error test.error -#> 1 neg_c_7 ~ barthtot + c161sex 3.5065 3.519
    -cv_compare(efc, formulas = list( - neg_c_7 ~ barthtot + c161sex, - neg_c_7 ~ barthtot + c161sex + e42dep, - neg_c_7 ~ barthtot + c12hour -))
    #> Warning: unnest() has a new interface. See ?unnest for details. -#> Try `df %>% unnest(c(predicted, residuals))`, with `mutate()` if needed
    #> Warning: unnest() has a new interface. See ?unnest for details. -#> Try `df %>% unnest(c(predicted, residuals))`, with `mutate()` if needed
    #> Warning: unnest() has a new interface. See ?unnest for details. -#> Try `df %>% unnest(c(predicted, residuals))`, with `mutate()` if needed
    #> model train.error test.error -#> 1 neg_c_7 ~ barthtot + c161sex 3.5066 3.5223 -#> 2 neg_c_7 ~ barthtot + c161sex + e42dep 3.4865 3.5089 -#> 3 neg_c_7 ~ barthtot + c12hour 3.5028 3.5205
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/deff.html b/docs/reference/deff.html deleted file mode 100644 index 47609cea..00000000 --- a/docs/reference/deff.html +++ /dev/null @@ -1,214 +0,0 @@ - - - - - - - - -Design effects for two-level mixed models — deff • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Compute the design effect (also called Variance Inflation Factor) - for mixed models with two-level design.

    - -
    - -
    deff(n, icc = 0.05)
    - -

    Arguments

    - - - - - - - - - - -
    n

    Average number of observations per grouping cluster (i.e. level-2 unit).

    icc

    Assumed intraclass correlation coefficient for multilevel-model.

    - -

    Value

    - -

    The design effect (Variance Inflation Factor) for the two-level model.

    - -

    Details

    - -

    The formula for the design effect is simply (1 + (n - 1) * icc).

    - -

    References

    - -

    Bland JM. 2000. Sample size in guidelines trials. Fam Pract. (17), 17-20. -

    - Hsieh FY, Lavori PW, Cohen HJ, Feussner JR. 2003. An Overview of Variance Inflation Factors for Sample-Size Calculation. Evaluation and the Health Professions 26: 239-257. doi: 10.1177/0163278703255230 -

    - Snijders TAB. 2005. Power and Sample Size in Multilevel Linear Models. In: Everitt BS, Howell DC (Hrsg.). Encyclopedia of Statistics in Behavioral Science. Chichester, UK: John Wiley and Sons, Ltd. doi: 10.1002/0470013192.bsa492 -

    - Thompson DM, Fernald DH, Mold JW. 2012. Intraclass Correlation Coefficients Typical of Cluster-Randomized Studies: Estimates From the Robert Wood Johnson Prescription for Health Projects. The Annals of Family Medicine;10(3):235-40. doi: 10.1370/afm.1347

    - - -

    Examples

    -
    # Design effect for two-level model with 30 observations per -# cluster group (level-2 unit) and an assumed intraclass -# correlation coefficient of 0.05. -deff(n = 30)
    #> [1] 2.45
    -# Design effect for two-level model with 24 observation per cluster -# group and an assumed intraclass correlation coefficient of 0.2. -deff(n = 24, icc = 0.2)
    #> [1] 5.6
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/design_effect.html b/docs/reference/design_effect.html deleted file mode 100644 index a7cc0bde..00000000 --- a/docs/reference/design_effect.html +++ /dev/null @@ -1,202 +0,0 @@ - - - - - - - - -Design effects for two-level mixed models — design_effect • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Compute the design effect (also called Variance Inflation Factor) - for mixed models with two-level design.

    -
    - -
    design_effect(n, icc = 0.05)
    - -

    Arguments

    - - - - - - - - - - -
    n

    Average number of observations per grouping cluster (i.e. level-2 unit).

    icc

    Assumed intraclass correlation coefficient for multilevel-model.

    - -

    Value

    - -

    The design effect (Variance Inflation Factor) for the two-level model.

    -

    Details

    - -

    The formula for the design effect is simply (1 + (n - 1) * icc).

    -

    References

    - -

    Bland JM. 2000. Sample size in guidelines trials. Fam Pract. (17), 17-20. -

    - Hsieh FY, Lavori PW, Cohen HJ, Feussner JR. 2003. An Overview of Variance Inflation Factors for Sample-Size Calculation. Evaluation and the Health Professions 26: 239-257. doi: 10.1177/0163278703255230 - -

    - Snijders TAB. 2005. Power and Sample Size in Multilevel Linear Models. In: Everitt BS, Howell DC (Hrsg.). Encyclopedia of Statistics in Behavioral Science. Chichester, UK: John Wiley and Sons, Ltd. doi: 10.1002/0470013192.bsa492 - -

    - Thompson DM, Fernald DH, Mold JW. 2012. Intraclass Correlation Coefficients Typical of Cluster-Randomized Studies: Estimates From the Robert Wood Johnson Prescription for Health Projects. The Annals of Family Medicine;10(3):235-40. doi: 10.1370/afm.1347

    - -

    Examples

    -
    # Design effect for two-level model with 30 observations per -# cluster group (level-2 unit) and an assumed intraclass -# correlation coefficient of 0.05. -design_effect(n = 30)
    #> [1] 2.45
    -# Design effect for two-level model with 24 observation per cluster -# group and an assumed intraclass correlation coefficient of 0.2. -design_effect(n = 24, icc = 0.2)
    #> [1] 5.6
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/dot-badlink.html b/docs/reference/dot-badlink.html deleted file mode 100644 index de078495..00000000 --- a/docs/reference/dot-badlink.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -helper-function, telling user if model is supported or not — .badlink • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    helper-function, telling user if model is supported or not

    - -
    - -
    .badlink(link, family)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/dot-collapse_cond.html b/docs/reference/dot-collapse_cond.html deleted file mode 100644 index 017fc371..00000000 --- a/docs/reference/dot-collapse_cond.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -glmmTMB returns a list of model information, one for conditional and one for zero-inflated part, so here we "unlist" it — .collapse_cond • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    glmmTMB returns a list of model information, one for conditional and one for zero-inflated part, so here we "unlist" it

    - -
    - -
    .collapse_cond(x)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/dot-get_variance_beta.html b/docs/reference/dot-get_variance_beta.html deleted file mode 100644 index 270ca22f..00000000 --- a/docs/reference/dot-get_variance_beta.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -Get distributional variance for beta-family — .get_variance_beta • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Get distributional variance for beta-family

    - -
    - -
    .get_variance_beta(mu, phi)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/dot-get_variance_dispersion.html b/docs/reference/dot-get_variance_dispersion.html deleted file mode 100644 index c89c6a51..00000000 --- a/docs/reference/dot-get_variance_dispersion.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -Get dispersion-specific variance — .get_variance_dispersion • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Get dispersion-specific variance

    - -
    - -
    .get_variance_dispersion(x, vals, faminfo, obs.terms)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/dot-get_variance_fixed.html b/docs/reference/dot-get_variance_fixed.html deleted file mode 100644 index 628a9ad5..00000000 --- a/docs/reference/dot-get_variance_fixed.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -Get fixed effects variance — .get_variance_fixed • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Get fixed effects variance

    - -
    - -
    .get_variance_fixed(vals)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/dot-get_variance_random.html b/docs/reference/dot-get_variance_random.html deleted file mode 100644 index c01edb9e..00000000 --- a/docs/reference/dot-get_variance_random.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -Compute variance associated with a random-effects term (Johnson 2014) — .get_variance_random • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Compute variance associated with a random-effects term (Johnson 2014)

    - -
    - -
    .get_variance_random(terms, x, vals)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/dot-get_variance_residual.html b/docs/reference/dot-get_variance_residual.html deleted file mode 100644 index 6340ec4e..00000000 --- a/docs/reference/dot-get_variance_residual.html +++ /dev/null @@ -1,163 +0,0 @@ - - - - - - - - -Get residual (distribution specific) variance from random effects — .get_variance_residual • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Get residual (distribution specific) variance from random effects

    - -
    - -
    .get_variance_residual(x, var.cor, faminfo, name)
    - - -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/efc.html b/docs/reference/efc.html deleted file mode 100644 index 5b83250f..00000000 --- a/docs/reference/efc.html +++ /dev/null @@ -1,164 +0,0 @@ - - - - - - - - -Sample dataset from the EUROFAMCARE project — efc • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    German data set from the European study on family care of older people.

    -
    - - - -

    References

    - -

    Lamura G, Döhner H, Kofahl C, editors. Family carers of older people in Europe: a six-country comparative study. Münster: LIT, 2008.

    - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/eta_sq.html b/docs/reference/eta_sq.html deleted file mode 100644 index 9a8d66d0..00000000 --- a/docs/reference/eta_sq.html +++ /dev/null @@ -1,248 +0,0 @@ - - - - - - - - -Effect size statistics for anova — anova_stats • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Returns the (partial) eta-squared, (partial) omega-squared, - epsilon-squared statistic or Cohen's F for all terms in an anovas. - anova_stats() returns a tidy summary, including all these statistics - and power for each term.

    -
    - -
    anova_stats(model, digits = 3)
    -
    -epsilon_sq(model, partial = FALSE, ci.lvl = NULL)
    -
    -eta_sq(model, partial = FALSE, ci.lvl = NULL)
    -
    -omega_sq(model, partial = FALSE, ci.lvl = NULL)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    model

    A fitted anova-model of class aov or anova. Other -models are coerced to anova.

    digits

    Amount of digits for returned values.

    partial

    Logical, if TRUE, the partial eta-squared is returned.

    ci.lvl

    Scalar between 0 and 1. If not NULL, returns a data -frame with effect sizes including lower and upper confidence intervals.

    - -

    Value

    - -

    A data frame with the term name(s) and effect size statistics; if - ci.lvl is not NULL, a data frame including lower and - upper confidence intervals is returned. For anova_stats(), a tidy - data frame with all statistics is returned (excluding confidence intervals).

    -

    Details

    - -

    See details in eta_squared.

    -

    References

    - -

    Levine TR, Hullett CR (2002): Eta Squared, Partial Eta Squared, and Misreporting of Effect Size in Communication Research (pdf) -

    - Tippey K, Longnecker MT (2016): An Ad Hoc Method for Computing Pseudo-Effect Size for Mixed Model. (pdf)

    - -

    Examples

    -
    # load sample data -data(efc) - -# fit linear model -fit <- aov( - c12hour ~ as.factor(e42dep) + as.factor(c172code) + c160age, - data = efc -) - -eta_sq(fit)
    #> term etasq -#> 1 as.factor(e42dep) 0.266 -#> 2 as.factor(c172code) 0.005 -#> 3 c160age 0.048
    omega_sq(fit)
    #> term omegasq -#> 1 as.factor(e42dep) 0.263 -#> 2 as.factor(c172code) 0.004 -#> 3 c160age 0.048
    eta_sq(fit, partial = TRUE)
    #> term partial.etasq -#> 1 as.factor(e42dep) 0.281 -#> 2 as.factor(c172code) 0.008 -#> 3 c160age 0.066
    eta_sq(fit, partial = TRUE, ci.lvl = .8)
    #> term partial.etasq conf.low conf.high -#> 1 as.factor(e42dep) 0.281 0.248 0.311 -#> 2 as.factor(c172code) 0.008 0.001 0.016 -#> 3 c160age 0.066 0.047 0.089
    -anova_stats(car::Anova(fit, type = 2))
    #> Registered S3 methods overwritten by 'car': -#> method from -#> influence.merMod lme4 -#> cooks.distance.influence.merMod lme4 -#> dfbeta.influence.merMod lme4 -#> dfbetas.influence.merMod lme4
    #> term sumsq meansq df statistic p.value etasq -#> 1 as.factor(e42dep) 426461.571 142153.857 3 80.299 0.000 0.212 -#> 2 as.factor(c172code) 7352.049 3676.025 2 2.076 0.126 0.004 -#> 3 c160age 105169.595 105169.595 1 59.408 0.000 0.052 -#> 4 Residuals 1476436.343 1770.307 834 NA NA NA -#> partial.etasq omegasq partial.omegasq epsilonsq cohens.f power -#> 1 0.224 0.209 0.221 0.209 0.537 1.000 -#> 2 0.005 0.002 0.003 0.002 0.071 0.429 -#> 3 0.066 0.051 0.065 0.051 0.267 1.000 -#> 4 NA NA NA NA NA NA
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/figures/logo.png b/docs/reference/figures/logo.png deleted file mode 100644 index 7c24480b..00000000 Binary files a/docs/reference/figures/logo.png and /dev/null differ diff --git a/docs/reference/find_beta-1.png b/docs/reference/find_beta-1.png deleted file mode 100644 index 5bdecac5..00000000 Binary files a/docs/reference/find_beta-1.png and /dev/null differ diff --git a/docs/reference/find_beta-2.png b/docs/reference/find_beta-2.png deleted file mode 100644 index 8bbe844e..00000000 Binary files a/docs/reference/find_beta-2.png and /dev/null differ diff --git a/docs/reference/find_beta-3.png b/docs/reference/find_beta-3.png deleted file mode 100644 index 0a721d2e..00000000 Binary files a/docs/reference/find_beta-3.png and /dev/null differ diff --git a/docs/reference/find_beta-4.png b/docs/reference/find_beta-4.png deleted file mode 100644 index 03413a91..00000000 Binary files a/docs/reference/find_beta-4.png and /dev/null differ diff --git a/docs/reference/find_beta.html b/docs/reference/find_beta.html deleted file mode 100644 index f749ec10..00000000 --- a/docs/reference/find_beta.html +++ /dev/null @@ -1,292 +0,0 @@ - - - - - - - - -Determining distribution parameters — find_beta • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    find_beta(), find_normal() and find_cauchy() find the - shape, mean and standard deviation resp. the location and scale parameters - to describe the beta, normal or cauchy distribution, based on two - percentiles. find_beta2() finds the shape parameters for a Beta - distribution, based on a probability value and its standard error - or confidence intervals.

    -
    - -
    find_beta(x1, p1, x2, p2)
    -
    -find_beta2(x, se, ci, n)
    -
    -find_cauchy(x1, p1, x2, p2)
    -
    -find_normal(x1, p1, x2, p2)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x1

    Value for the first percentile.

    p1

    Probability of the first percentile.

    x2

    Value for the second percentile.

    p2

    Probability of the second percentile.

    x

    Numeric, a probability value between 0 and 1. Typically indicates -a prevalence rate of an outcome of interest; Or an integer value -with the number of observed events. In this case, specify n -to indicate the toral number of observations.

    se

    The standard error of x. Either se or ci must -be specified.

    ci

    The upper limit of the confidence interval of x. Either -se or ci must be specified.

    n

    Numeric, number of total observations. Needs to be specified, if -x is an integer (number of observed events), and no -probability. See 'Examples'.

    - -

    Value

    - -

    A list of length two, with the two distribution parameters than can - be used to define the distribution, which (best) describes - the shape for the given input parameters.

    -

    Details

    - -

    These functions can be used to find parameter for various distributions, - to define prior probabilities for Bayesian analyses. x1, - p1, x2 and p2 are parameters that describe two - quantiles. Given this knowledge, the distribution parameters are - returned.

    - Use find_beta2(), if the known parameters are, e.g. a prevalence - rate or similar probability, and its standard deviation or confidence - interval. In this case. x should be a probability, - for example a prevalence rate of a certain event. se then - needs to be the standard error for this probability. Alternatively, - ci can be specified, which should indicate the upper limit - of the confidence interval od the probability (prevalence rate) x. - If the number of events out of a total number of trials is known - (e.g. 12 heads out of 30 coin tosses), x can also be the number - of observed events, while n indicates the total amount of trials - (in the above example, the function call would be: find_beta2(x = 12, n = 30)).

    -

    References

    - -

    Cook JD. Determining distribution parameters from quantiles. 2010: Department of Biostatistics, Texas (PDF)

    - -

    Examples

    -
    # example from blogpost: -# https://www.johndcook.com/blog/2010/01/31/parameters-from-percentiles/ -# 10% of patients respond within 30 days of treatment -# and 80% respond within 90 days of treatment -find_normal(x1 = 30, p1 = .1, x2 = 90, p2 = .8)
    #> $mean -#> [1] 53.78387 -#> -#> $sd -#> [1] 30.48026 -#>
    find_cauchy(x1 = 30, p1 = .1, x2 = 90, p2 = .8)
    #> $location -#> [1] 48.54102 -#> -#> $scale -#> [1] 57.06339 -#>
    -parms <- find_normal(x1 = 30, p1 = .1, x2 = 90, p2 = .8) -curve( - dnorm(x, mean = parms$mean, sd = parms$sd), - from = 0, to = 200 -)
    -parms <- find_cauchy(x1 = 30, p1 = .1, x2 = 90, p2 = .8) -curve( - dcauchy(x, location = parms$location, scale = parms$scale), - from = 0, to = 200 -)
    - -find_beta2(x = .25, ci = .5)
    #> $shape1 -#> [1] 2.860267 -#> -#> $shape2 -#> [1] 7.93757 -#>
    -shapes <- find_beta2(x = .25, ci = .5) -curve(dbeta(x, shapes[[1]], shapes[[2]]))
    -# find Beta distribution for 3 events out of 20 observations -find_beta2(x = 3, n = 20)
    #> $shape1 -#> [1] 4.157811 -#> -#> $shape2 -#> [1] 22.03272 -#>
    -shapes <- find_beta2(x = 3, n = 20) -curve(dbeta(x, shapes[[1]], shapes[[2]]))
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/fish.html b/docs/reference/fish.html deleted file mode 100644 index 860c1863..00000000 --- a/docs/reference/fish.html +++ /dev/null @@ -1,164 +0,0 @@ - - - - - - - - -Sample dataset — fish • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Sample data from the UCLA idre website.

    -
    - - - -

    References

    - -

    https://stats.idre.ucla.edu/r/dae/zip/

    - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/gmd.html b/docs/reference/gmd.html deleted file mode 100644 index e1090b55..00000000 --- a/docs/reference/gmd.html +++ /dev/null @@ -1,198 +0,0 @@ - - - - - - - - -Gini's Mean Difference — gmd • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    gmd() computes Gini's mean difference for a numeric vector - or for all numeric vectors in a data frame.

    -
    - -
    gmd(x, ...)
    - -

    Arguments

    - - - - - - - - - - -
    x

    A vector or data frame.

    ...

    Optional, unquoted names of variables that should be selected for -further processing. Required, if x is a data frame (and no vector) -and only selected variables from x should be processed. You may also -use functions like : or tidyselect's select_helpers.

    - -

    Value

    - -

    For numeric vectors, Gini's mean difference. For non-numeric vectors - or vectors of length < 2, returns NA.

    -

    Note

    - -

    Gini's mean difference is defined as the mean absolute difference between - any two distinct elements of a vector. Missing values from x are - silently removed.

    -

    References

    - -

    David HA. Gini's mean difference rediscovered. Biometrika 1968(55): 573-575

    - -

    Examples

    -
    data(efc) -gmd(efc$e17age)
    #> [1] 9.297005
    gmd(efc, e17age, c160age, c12hour)
    #> # A tibble: 1 x 3 -#> e17age c160age c12hour -#> <dbl> <dbl> <dbl> -#> 1 9.30 15.2 47.9
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/grpmean.html b/docs/reference/grpmean.html deleted file mode 100644 index 64bc99f2..00000000 --- a/docs/reference/grpmean.html +++ /dev/null @@ -1,354 +0,0 @@ - - - - - - - - -Summary of mean values by group — grpmean • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Computes mean, sd and se for each sub-group (indicated by grp) - of dv.

    -
    - -
    grpmean(
    -  x,
    -  dv,
    -  grp,
    -  weights = NULL,
    -  digits = 2,
    -  out = c("txt", "viewer", "browser"),
    -  encoding = "UTF-8",
    -  file = NULL
    -)
    -
    -means_by_group(
    -  x,
    -  dv,
    -  grp,
    -  weights = NULL,
    -  digits = 2,
    -  out = c("txt", "viewer", "browser"),
    -  encoding = "UTF-8",
    -  file = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A (grouped) data frame.

    dv

    Name of the dependent variable, for which the mean value, grouped -by grp, is computed.

    grp

    Factor with the cross-classifying variable, where dv is -grouped into the categories represented by grp. Numeric vectors -are coerced to factors.

    weights

    Name of variable in x that indicated the vector of -weights that will be applied to weight all observations. Default is -NULL, so no weights are used.

    digits

    Numeric, amount of digits after decimal point when rounding -estimates and values.

    out

    Character vector, indicating whether the results should be printed -to console (out = "txt") or as HTML-table in the viewer-pane -(out = "viewer") or browser (out = "browser"), of if the -results should be plotted (out = "plot", only applies to certain -functions). May be abbreviated.

    encoding

    Character vector, indicating the charset encoding used -for variable and value labels. Default is "UTF-8". Only used -when out is not "txt".

    file

    Destination file, if the output should be saved as file. -Only used when out is not "txt".

    - -

    Value

    - -

    For non-grouped data frames, grpmean() returns a data frame with - following columns: term, mean, N, std.dev, - std.error and p.value. For grouped data frames, returns - a list of such data frames.

    -

    Details

    - -

    This function performs a One-Way-Anova with dv as dependent - and grp as independent variable, by calling - lm(count ~ as.factor(grp)). Then contrast - is called to get p-values for each sub-group. P-values indicate whether - each group-mean is significantly different from the total mean.

    - -

    Examples

    -
    data(efc) -grpmean(efc, c12hour, e42dep)
    #> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> term mean N std.dev std.error p.value -#> 1 independent 9.91 66 8.01 0.99 <0.001 -#> 2 slightly dependent 17.54 225 17.74 1.18 <0.001 -#> 3 moderately dependent 34.52 306 41.54 2.37 0.98 -#> 4 severely dependent 75.90 304 61.72 3.54 <0.001 -#> 5 Total 42.44 901 50.82 1.69 -#> -#> Anova: R2=0.245; adj.R2=0.242; F=96.908; p=0.000
    -data(iris) -grpmean(iris, Sepal.Width, Species)
    #> -#> # Grouped Means for Sepal.Width by Species -#> -#> term mean N std.dev std.error p.value -#> 1 setosa 3.43 50 0.38 0.05 <0.001 -#> 2 versicolor 2.77 50 0.31 0.04 <0.001 -#> 3 virginica 2.97 50 0.32 0.05 0.04 -#> 4 Total 3.06 150 0.44 0.04 -#> -#> Anova: R2=0.401; adj.R2=0.393; F=49.160; p=0.000
    -# also works for grouped data frames -library(dplyr) -efc %>% - group_by(c172code) %>% - grpmean(c12hour, e42dep)
    #> -#> Grouped by: -#> carer's level of education: low level of education -#> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> term mean N std.dev std.error p.value -#> 1 independent 16.33 12 10.74 3.10 0.02 -#> 2 slightly dependent 15.38 42 9.55 1.47 <0.001 -#> 3 moderately dependent 42.05 61 46.53 5.96 0.70 -#> 4 severely dependent 85.52 65 56.42 7.00 <0.001 -#> 5 Total 49.81 180 52.24 3.89 -#> -#> Anova: R2=0.307; adj.R2=0.295; F=25.955; p=0.000 -#> -#> -#> Grouped by: -#> carer's level of education: intermediate level of education -#> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> term mean N std.dev std.error p.value -#> 1 independent 7.96 45 3.91 0.58 <0.001 -#> 2 slightly dependent 17.12 135 16.52 1.42 <0.001 -#> 3 moderately dependent 33.55 163 41.05 3.22 0.75 -#> 4 severely dependent 79.71 163 63.13 4.94 <0.001 -#> 5 Total 41.76 506 51.42 2.29 -#> -#> Anova: R2=0.284; adj.R2=0.280; F=66.374; p=0.000 -#> -#> -#> Grouped by: -#> carer's level of education: high level of education -#> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> term mean N std.dev std.error p.value -#> 1 independent 15.20 5 18.43 8.24 0.36 -#> 2 slightly dependent 18.08 39 12.98 2.08 0.15 -#> 3 moderately dependent 28.42 62 35.64 4.53 0.67 -#> 4 severely dependent 63.38 50 62.69 8.87 <0.001 -#> 5 Total 36.62 156 46.38 3.71 -#> -#> Anova: R2=0.167; adj.R2=0.151; F=10.155; p=0.000 -#> -#>
    -# weighting -efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) -grpmean(efc, c12hour, e42dep, weights = weight)
    #> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> term mean N std.dev std.error p.value -#> 1 independent 9.97 70 8.62 1.06 <0.001 -#> 2 slightly dependent 17.25 225 16.84 1.12 <0.001 -#> 3 moderately dependent 36.25 304 43.38 2.48 0.76 -#> 4 severely dependent 78.32 313 63.24 3.63 <0.001 -#> 5 Total 43.98 901 52.55 1.75 -#> -#> Anova: R2=0.250; adj.R2=0.248; F=99.732; p=0.000
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/hdi.html b/docs/reference/hdi.html deleted file mode 100644 index 2fc57f7d..00000000 --- a/docs/reference/hdi.html +++ /dev/null @@ -1,508 +0,0 @@ - - - - - - - - -Compute statistics for MCMC samples and Stan models — hdi • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    hdi() computes the highest density interval for values from - MCMC samples, while cred_int() computes the credible interval (or - uncertainty interval). rope() calculates the proportion of a posterior - distribution that lies within a region of practical equivalence. - equi_test() combines these two functions and performs a - "HDI+ROPE decision rule" (Test for Practical Equivalence) (Kruschke 2018) - to check whether parameter values should be accepted or rejected against - the background of a formulated null hypothesis. n_eff() calculates - the the number of effective samples (effective sample size). mcse() - returns the Monte Carlo standard error. mediation() is a short - summary for multivariate-response mediation-models.

    - -
    - -
    hdi(x, ...)
    -
    -# S3 method for stanreg
    -hdi(x, prob = 0.9, trans = NULL, type = c("fixed",
    -  "random", "all"), ...)
    -
    -# S3 method for brmsfit
    -hdi(x, prob = 0.9, trans = NULL, type = c("fixed",
    -  "random", "all"), ...)
    -
    -cred_int(x, ...)
    -
    -# S3 method for stanreg
    -cred_int(x, prob = 0.9, trans = NULL,
    -  type = c("fixed", "random", "all"), ...)
    -
    -# S3 method for brmsfit
    -cred_int(x, prob = 0.9, trans = NULL,
    -  type = c("fixed", "random", "all"), ...)
    -
    -equi_test(x, ...)
    -
    -# S3 method for stanreg
    -equi_test(x, rope, eff_size, out = c("txt", "viewer",
    -  "browser", "plot"), ...)
    -
    -# S3 method for brmsfit
    -equi_test(x, rope, eff_size, out = c("txt", "viewer",
    -  "browser", "plot"), ...)
    -
    -mcse(x, ...)
    -
    -# S3 method for brmsfit
    -mcse(x, type = c("fixed", "random", "all"), ...)
    -
    -# S3 method for stanreg
    -mcse(x, type = c("fixed", "random", "all"), ...)
    -
    -mediation(x, ...)
    -
    -# S3 method for brmsfit
    -mediation(x, treatment, mediator, prob = 0.9,
    -  typical = "median", ...)
    -
    -n_eff(x, ...)
    -
    -# S3 method for stanreg
    -n_eff(x, type = c("fixed", "random", "all"), ...)
    -
    -# S3 method for brmsfit
    -n_eff(x, type = c("fixed", "random", "all"), ...)
    -
    -rope(x, rope, ...)
    -
    -# S3 method for stanreg
    -rope(x, rope, trans = NULL, type = c("fixed",
    -  "random", "all"), ...)
    -
    -# S3 method for brmsfit
    -rope(x, rope, trans = NULL, type = c("fixed",
    -  "random", "all"), ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A stanreg, stanfit, or brmsfit object. For -hdi() and rope(), may also be a data frame or a vector -of values from a probability distribution (e.g., posterior probabilities -from MCMC sampling).

    ...

    Further arguments passed down to equi_test() when -plot = TRUE:

      -
    • colors: - Color of the density regions for the 95% distribution of the posterior - samples.

    • -
    • rope.color and rope.alpha: - Fill color and alpha-value of the ROPE (region of practical equivalence).

    • -
    • x.title: - Title for the x-axis of the plot.

    • -
    • legend.title: - Title for the plot legend.

    • -
    • labels: - Character vector of same length as terms plotted on the y-axis, to - give axis labels user-defined labels.

    • -
    prob

    Vector of scalars between 0 and 1, indicating the mass within -the credible interval that is to be estimated. See hdi.

    trans

    Name of a function or character vector naming a function, used -to apply transformations on the returned HDI-values resp. -(for rope()) on the values of the posterior distribution, before -calculating the rope based on the boundaries given in rope. Note -that the values in rope are not transformed.

    type

    For mixed effects models, specify the type of effects that should -be returned. type = "fixed" returns fixed effects only, -type = "random" the random effects and type = "all" returns -both fixed and random effects.

    rope

    Vector of length two, indicating the lower and upper limit of a -range around zero, which indicates the region of practical equivalence. -Values of the posterior distribution within this range are considered as -being "practically equivalent to zero".

    eff_size

    A scalar indicating the effect size (the size of an negligible -effect) that is used to calculate the limits of the ROPE for the test of -practical equivalence. If not specified, an effect size of .1 is used for -linear models, as suggested by Kruschke 2018 (see 'Details'). -If rope is specified, this argument will be ignored.

    out

    Character vector, indicating whether the results should be printed -to console (out = "txt") or as HTML-table in the viewer-pane -(out = "viewer") or browser (out = "browser"), of if the -results should be plotted (out = "plot", only applies to certain -functions). May be abbreviated.

    treatment

    Character, name of the treatment variable (or direct effect) -in a (multivariate response) mediator-model. If missing, mediation() -tries to find the treatment variable automatically, however, this may fail.

    mediator

    Character, name of the mediator variable in a (multivariate -response) mediator-model. If missing, mediation() tries to find the -treatment variable automatically, however, this may fail.

    typical

    The typical value that will represent the Bayesian point estimate. -By default, the posterior median is returned. See typical_value -for possible values for this argument.

    - -

    Value

    - -

    For hdi(), if x is a vector, returns a vector of length - two with the lower and upper limit of the HDI; if x is a - stanreg, stanfit or brmsfit object, returns a - tibble with lower and upper HDI-limits for each predictor. To distinguish - multiple HDI values, column names for the HDI get a suffix when prob - has more than one element. -

    - For rope(), returns a tibble with two columns: the proportion of - values from x that are within and outside the boundaries of - rope. -

    - equi_test() returns a tibble with a column decision that - indicates whether or not a parameter value is accepted/rejected; - inside.rope, which indicates the proportion of the whole posterior - distribution that lies inside the ROPE (not just the proportion of - values from the 95% HDI); and the lower and upper interval from the 95%-HDI. -

    - mcse() and n_eff() return a tibble with two columns: one - with the term names and one with the related statistic resp. effective - sample size. -

    - mediation() returns a data frame with direct, indirect, mediator and - total effect of a multivariate-response mediation-model, as well as the - proportion mediated. The effect sizes are mean values of the posterior - samples.

    - -

    Details

    - -
    -
    HDI

    Computation for HDI is based on the code from Kruschke 2015, pp. 727f. - For default sampling in Stan (4000 samples), the 90% intervals for HDI are - more stable than, for instance, 95% intervals. An effective sample size - (see nsamples) of at least 10.000 is recommended if - 95% intervals should be computed (see Kruschke 2015, p. 183ff).

    -
    Credible Intervals

    Credible intervals (or uncertainty intervals) are simply the quantiles - for a given probability of the posterior draws. See - posterior_interval for more details.

    -
    MCSE

    The Monte Carlo Standard Error is another useful measure of accuracy of - the chains. It is defined as standard deviation of the chains divided by - their effective sample size (the formula for mcse() is from - Kruschke 2015, p. 187). The MCSE “provides a quantitative suggestion - of how big the estimation noise is”.

    -
    Number of Effective Samples

    The effective sample size divides the actual sample size by the amount - of autocorrelation. The effective sample size is a measure of “how - much independent information there is in autocorrelated chains”, or: - “What would be the sample size of a completely non-autocorrelated chain - that yielded the same information?” (Kruschke 2015, p182-3). - The ratio of effective number of samples and total number of samples - (provided in tidy_stan()) ranges from 0 to 1, and should be close - to 1. The closer this ratio comes to zero means that the chains may be - inefficient, but possibly still okay.

    -
    ROPE

    There are no fixed rules to set the limits for the region of practical - equivalence. However, there are some conventions described by - Kruschke (2018) how to specify the limits of the rope. One - convention for linear models is to set the limits about .1 SD of the - dependent variable around zero (i.e. 0 +/- .1 * sd(y)), where - .1 stands for half of a small effect size. Another, more conservative - convention to set the ROPE limits is a range of half a standard - deviation around zero (see Norman et al. 2003), which indicates - a clinical relevant effect (i.e. 0 +/- .25 * sd(y) or even - 0 +/- .5 * sd(y))

    -
    Test for Practical Equivalence

    equi_test() computes the 95%-HDI for x and checks if a - model predictor's HDI lies completely outside, completely inside or - partially inside the ROPE. If the HDI is completely outside the ROPE, - the "null hypothesis" for this parameter is "rejected". If the ROPE - completely covers the HDI, i.e. all most credible values of a parameter - are inside the region of practical equivalence, the null hypothesis - is accepted. Else, it's undecided whether to accept or reject the - null hypothesis. In short, desirable results are low proportions inside - the ROPE (the closer to zero the better) and the H0 should be rejected. -

    - If neither the rope nor eff_size argument are specified, - the effect size (the size of an negligible effect) will be set to 0.1 - and the ROPE is 0 +/- .1 * sd(y) for linear models. This is the - suggested way to specify the ROPE limits according to Kruschke (2018). - For models with binary outcome, there is no direct way to specify the - effect size that defines the ROPE limits. Two examples from Kruschke - suggest that a negligible change is about .05 on the logit-scale. - In these cases, it is recommended to specify the rope argument, - however, if not specified, the ROPE limits are caluclated as suggested - by Kruschke: The effect size is the probability of "success" for the - outcome, divided by pi. For all other models, - 0 +/- .1 * sd(intercept) is used to determine the ROPE limits. -

    - If eff_size is specified, but rope is not, then - the same formulas apply, except that .1 is replaced by the - value in eff_size. If rope is specified, eff_size - will be ignored. See also section ROPE in 'Details'. -

    - The advantage of Bayesian testing for practical equivalence over - classical frequentist null hypothesis significance testing is that - discrete decisions are avoided, “because such decisions encourage - people to ignore the magnitude of the parameter value and its uncertainty” - (Kruschke (2018)).

    -
    Mediation Analysis

    mediation() returns a data frame with information on the - direct effect (mean value of posterior samples from treatment - of the outcome model), mediator effect (mean value of posterior - samples from mediator of the outcome model), indirect effect - (mean value of the multiplication of the posterior samples from - mediator of the outcome model and the posterior samples from - treatment of the mediation model) and the total effect (mean - value of sums of posterior samples used for the direct and indirect - effect). The proportion mediated is the indirect effect divided - by the total effect. -

    - For all values, the 90% HDIs are calculated by default. Use prob - to calculate a different interval. -

    - The arguments treatment and mediator do not necessarily - need to be specified. If missing, mediation() tries to find the - treatment and mediator variable automatically. If this does not work, - specify these variables.

    -
    - -

    Note

    - -

    Since equi_test() computes 95% HDI, a number of 10.000 samples - produces more stable results (see Kruschke 2015, p183ff).

    - -

    References

    - -

    Kruschke JK. Doing Bayesian Data Analysis: A Tutorial with R, JAGS, and Stan. 2nd edition. Academic Press, 2015 -

    - Kruschke JK. Rejecting or Accepting Parameter Values in Bayesian Estimation. Advances in Methods and Practices in Psychological Science. 2018; doi: 10.1177/2515245918771304 -

    - Norman GR, Sloan JA, Wyrwich KW. Interpretation of Changes in Health-related Quality of Life: The Remarkable Universality of Half a Standard Deviation. Medical Care. 2003;41: 582-592. doi: 10.1097/01.MLR.0000062554.74615.4C

    - - -

    Examples

    -
    # NOT RUN {
    -if (require("rstanarm")) {
    -  fit <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1)
    -  hdi(fit)
    -
    -  # return multiple intervals
    -  hdi(fit, prob = c(.5, .7, .9))
    -
    -  # fit logistic regression model
    -  fit <- stan_glm(
    -    vs ~ wt + am,
    -    data = mtcars,
    -    family = binomial("logit"),
    -    chains = 1
    -  )
    -  # compute hdi, transform on "odds ratio scale"
    -  hdi(fit, trans = exp)
    -
    -  # compute rope, on scale of linear predictor. finds proportion
    -  # of posterior distribution values between -1 and 1.
    -  rope(fit, rope = c(-1, 1))
    -
    -  # compute rope, boundaries as "odds ratios". finds proportion of
    -  # posterior distribution values, which - after being exponentiated -
    -  # are between .8 and 1.25 (about -.22 and .22 on linear scale)
    -  rope(fit, rope = c(.8, 1.25), trans = exp)
    -
    -  # Test for Practical Equivalence
    -  equi_test(fit)
    -  equi_test(fit, out = "plot")
    -}
    -# }
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/icc.html b/docs/reference/icc.html deleted file mode 100644 index 54b71a56..00000000 --- a/docs/reference/icc.html +++ /dev/null @@ -1,478 +0,0 @@ - - - - - - - - -Intraclass-Correlation Coefficient — icc • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    This function calculates the intraclass-correlation - (icc) - sometimes also called variance partition coefficient - (vpc) - for random intercepts of mixed effects models. Currently, - merMod, glmmTMB, - stanreg and brmsfit objects are supported.

    - -
    - -
    icc(x, ...)
    -
    -# S3 method for merMod
    -icc(x, adjusted = FALSE, ...)
    -
    -# S3 method for glmmTMB
    -icc(x, adjusted = FALSE, ...)
    -
    -# S3 method for stanreg
    -icc(x, re.form = NULL, typical = "mean",
    -  prob = 0.89, ppd = FALSE, adjusted = FALSE, ...)
    -
    -# S3 method for brmsfit
    -icc(x, re.form = NULL, typical = "mean",
    -  prob = 0.89, ppd = FALSE, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    Fitted mixed effects model (of class merMod, glmmTMB, -stanreg or brmsfit).

    ...

    Currently not used.

    adjusted

    Logical, if TRUE, the adjusted (and -conditional) ICC is calculated, which reflects the uncertainty of all -random effects (see 'Details'). For Bayesian models, if ppd = TRUE, -adjusted will be ignored.

    re.form

    Formula containing group-level effects to be considered in -the prediction. If NULL (default), include all group-level effects. -Else, for instance for nested models, name a specific group-level effect -to calculate the ICC for this group-level. Only applies if ppd = TRUE.

    typical

    Character vector, naming the function that will be used as -measure of central tendency for the ICC. The default is "mean". See -typical_value for options.

    prob

    Vector of scalars between 0 and 1, indicating the mass within -the credible interval that is to be estimated. See hdi.

    ppd

    Logical, if TRUE, variance decomposition is based on the -posterior predictive distribution, which is the correct way for Bayesian -non-Gaussian models. By default, ppd is set to TRUE for -non-Gaussian models.If adjusted = TRUE and ppd = FALSE, -variance decomposition is approximated following the suggestion by -Nakagawa et al. 2017 (see 'Details'), however, this is currently -only implemented for Gaussian models.

    - -

    Value

    - -

    A numeric vector with all random intercept intraclass-correlation-coefficients. - Furthermore, if adjusted = FALSE, between- and within-group variances - as well as random-slope variance are returned as attributes. -

    - For stanreg or brmsfit objects, the HDI for each statistic - is also included as attribute.

    - -

    Details

    - -

    The "simple" ICC (with both ppd and adjusted set to - FALSE) is calculated by dividing the between-group-variance (random - intercept variance) by the total variance (i.e. sum of between-group-variance - and within-group (residual) variance).

    - The calculation of the ICC for generalized linear mixed models with binary outcome is based on - Wu et al. (2012). For other distributions (negative binomial, poisson, ...), - calculation is based on Nakagawa et al. 2017, however, for - non-Gaussian models it is recommended to compute the adjusted ICC (with - adjusted = TRUE, see below). -

    - ICC for unconditional and conditional models -

    - Usually, the ICC is calculated for the null model ("unconditional model"). - However, according to Raudenbush and Bryk (2002) or - Rabe-Hesketh and Skrondal (2012) it is also feasible to compute the ICC - for full models with covariates ("conditional models") and compare how - much a level-2 variable explains the portion of variation in the grouping - structure (random intercept). -

    - ICC for random-slope models -

    - Caution: For models with random slopes and random intercepts, - the ICC would differ at each unit of the predictors. Hence, the ICC for these - kind of models cannot be understood simply as proportion of variance - (see Goldstein et al. 2010). For convenience reasons, as the - icc() function also extracts the different random effects - variances, the ICC for random-slope-intercept-models is reported - nonetheless, but it is usually no meaningful summary of the - proportion of variances. -

    - To get a meaningful ICC also for models with random slopes, use adjusted = TRUE. - The adjusted ICC uses the mean random effect variance, which is based - on the random effect variances for each value of the random slope - (see Johnson et al. 2014). -

    - ICC for models with multiple or nested random effects -

    - Caution: By default, for three-level-models, depending on the - nested structure of the model, or for models with multiple random effects, - icc() only reports the proportion of variance explained for each - grouping level. Use adjusted = TRUE to calculate the adjusted and - conditional ICC, which condition on all random effects. -

    - Adjusted and conditional ICC -

    - If adjusted = TRUE, an adjusted and conditional ICC are calculated, - which take all sources of uncertainty (of all random effects) - into account to report an "adjusted" ICC, as well as the conditional ICC. - The latter also takes the fixed effects variances into account (see - Nakagawa et al. 2017). If random effects are not nested and not - cross-classified, the adjusted (adjusted = TRUE) and unadjusted - (adjusted = FALSE) ICC are identical. adjust = TRUE returns - a meaningful ICC for models with random slopes. Furthermore, the adjusted - ICC is recommended for models with other distributions than Gaussian. -

    - ICC for specific group-levels -

    - To calculate the proportion of variance for specific levels related to each - other (e.g., similarity of level-1-units within - level-2-units or level-2-units within level-3-units) must be computed - manually. Use get_re_var to get the between-group-variances - and residual variance of the model, and calculate the ICC for the various level - correlations. -

    - For example, for the ICC between level 1 and 2:
    - sum(get_re_var(fit)) / (sum(get_re_var(fit)) + get_re_var(fit, "sigma_2")) -

    - or for the ICC between level 2 and 3:
    - get_re_var(fit)[2] / sum(get_re_var(fit)) -

    - ICC for Bayesian models -

    - If ppd = TRUE, icc() calculates a variance decomposition based on - the posterior predictive distribution. In this case, first, the draws from - the posterior predictive distribution not conditioned on group-level - terms (posterior_predict(..., re.form = NA)) are calculated as well - as draws from this distribution conditioned on all random effects - (by default, unless specified else in re.form) are taken. Then, second, - the variances for each of these draws are calculated. The "ICC" is then the - ratio between these two variances. This is the recommended way to - analyse random-effect-variances for non-Gaussian models. It is then possible - to compare variances accross models, also by specifying different group-level - terms via the re.form-argument. -

    - Sometimes, when the variance of the posterior predictive distribution is - very large, the variance ratio in the output makes no sense, e.g. because - it is negative. In such cases, it might help to use a more robust measure - to calculate the central tendency of the variances. For example, use - typical = "median".

    - -

    Note

    - -

    Some notes on why the ICC is useful, based on Grace-Martin:

      -
    • It can help you determine whether or not a linear mixed model is even necessary. If you find that the correlation is zero, that means the observations within clusters are no more similar than observations from different clusters. Go ahead and use a simpler analysis technique.

    • -
    • It can be theoretically meaningful to understand how much of the overall variation in the response is explained simply by clustering. For example, in a repeated measures psychological study you can tell to what extent mood is a trait (varies among people, but not within a person on different occasions) or state (varies little on average among people, but varies a lot across occasions).

    • -
    • It can also be meaningful to see how the ICC (as well as the between and within cluster variances) changes as variable are added to the model.

    • -

    In short, the ICC can be interpreted as “the proportion of the variance - explained by the grouping structure in the population” (Hox 2002: 15). -

    - The random effect variances indicate the between- and within-group - variances as well as random-slope variance and random-slope-intercept - correlation. The components are denoted as following:

      -
    • Within-group (residual) variance: sigma_2

    • -
    • Between-group-variance: tau.00 (variation between individual intercepts and average intercept)

    • -
    • Random-slope-variance: tau.11 (variation between individual slopes and average slope)

    • -
    • Random-Intercept-Slope-covariance: tau.01

    • -
    • Random-Intercept-Slope-correlation: rho.01

    • -
    - -

    References

    - -
      -
    • Aguinis H, Gottfredson RK, Culpepper SA. 2013. Best-Practice Recommendations for Estimating Cross-Level Interaction Effects Using Multilevel Modeling. Journal of Management 39(6): 1490-1528 (doi: 10.1177/0149206313478188 -)

    • -
    • Goldstein H, Browne W, Rasbash J. 2010. Partitioning Variation in Multilevel Models. Understanding Statistics, 1:4, 223-231 (doi: 10.1207/S15328031US0104_02 -)

    • -
    • Grace-Martion K. The Intraclass Correlation Coefficient in Mixed Models, web

    • -
    • Hox J. 2002. Multilevel analysis: techniques and applications. Mahwah, NJ: Erlbaum

    • -
    • Johnson PC, O'Hara RB. 2014. Extension of Nakagawa & Schielzeth's R2GLMM to random slopes models. Methods Ecol Evol, 5: 944-946. (doi: 10.1111/2041-210X.12225 -)

    • -
    • Nakagawa S, Johnson P, Schielzeth H (2017) The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded. J. R. Soc. Interface 14. doi: 10.1098/rsif.2017.0213

    • -
    • Rabe-Hesketh S, Skrondal A. 2012. Multilevel and longitudinal modeling using Stata. 3rd ed. College Station, Tex: Stata Press Publication

    • -
    • Raudenbush SW, Bryk AS. 2002. Hierarchical linear models: applications and data analysis methods. 2nd ed. Thousand Oaks: Sage Publications

    • -
    • Wu S, Crespi CM, Wong WK. 2012. Comparison of methods for estimating the intraclass correlation coefficient for binary responses in cancer prevention cluster randomized trials. Contempory Clinical Trials 33: 869-880 (doi: 10.1016/j.cct.2012.05.004 -)

    • -

    Further helpful online-ressources:

    - -

    See also

    - - - - -

    Examples

    -
    library(lme4) -fit0 <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) -icc(fit0)
    #> -#> Intraclass Correlation Coefficient for Linear mixed model -#> -#> Family : gaussian (identity) -#> Formula: Reaction ~ 1 + (1 | Subject) -#> -#> ICC (Subject): 0.3949 -#>
    -# note: ICC for random-slope-intercept model usually not -# meaningful, unless you use "adjusted = TRUE" - see 'Note'. -fit1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) -icc(fit1)
    #> Caution! ICC for random-slope-intercept models usually not meaningful. Use `adjusted = TRUE` to use the mean random effect variance to calculate the ICC. See 'Note' in `?icc`.
    #> -#> Intraclass Correlation Coefficient for Linear mixed model -#> -#> Family : gaussian (identity) -#> Formula: Reaction ~ Days + (Days | Subject) -#> -#> ICC (Subject): 0.4830 -#>
    icc(fit1, adjusted = TRUE)
    #> -#> Intraclass Correlation Coefficient for Generalized Linear Mixed Model -#> -#> Family : gaussian (identity) -#> Formula: ~Days | Subject Reaction ~ Days NA -#> -#> Adjusted ICC: 0.7217 -#> Conditional ICC: 0.5206 -#>
    -sleepstudy$mygrp <- sample(1:45, size = 180, replace = TRUE) -fit2 <- lmer(Reaction ~ Days + (1 | mygrp) + (1 | Subject), sleepstudy) - -icc1 <- icc(fit1)
    #> Caution! ICC for random-slope-intercept models usually not meaningful. Use `adjusted = TRUE` to use the mean random effect variance to calculate the ICC. See 'Note' in `?icc`.
    icc2 <- icc(fit2) - -print(icc1, comp = "var")
    #> -#> Intraclass Correlation Coefficient for Linear mixed model -#> -#> Family : gaussian (identity) -#> Formula: Reaction ~ Days + (Days | Subject) -#> -#> Within-group-variance: 654.941 -#> Between-group-variance: 611.898 (Subject) -#> Random-slope-variance: 35.081 (Subject.Days) -#> Slope-Intercept-covariance: 9.614 (Subject.(Intercept)) -#> Slope-Intercept-correlation: 0.066 (Subject) -#>
    print(icc2, comp = "var")
    #> -#> Intraclass Correlation Coefficient for Linear mixed model -#> -#> Family : gaussian (identity) -#> Formula: Reaction ~ Days + (1 | mygrp) + (1 | Subject) -#> -#> Within-group-variance: 946.474 -#> Between-group-variance: 14.082 (mygrp) -#> Between-group-variance: 1381.597 (Subject) -#>
    -
    # NOT RUN { -# compute ICC for Bayesian mixed model, with an ICC for each -# sample of the posterior. The print()-method then shows -# the median ICC as well as 89% HDI for the ICC. -# Change interval with print-method: -# print(icc(m, posterior = TRUE), prob = .5) - -if (requireNamespace("brms", quietly = TRUE)) { - library(dplyr) - sleepstudy$mygrp <- sample(1:5, size = 180, replace = TRUE) - sleepstudy <- sleepstudy %>% - group_by(mygrp) %>% - mutate(mysubgrp = sample(1:30, size = n(), replace = TRUE)) - m <- brms::brm( - Reaction ~ Days + (1 | mygrp / mysubgrp) + (1 | Subject), - data = sleepstudy - ) - - # by default, 89% interval - icc(m) - - # show 50% interval - icc(m, prob = .5) - - # variances based on posterior predictive distribution - icc(m, ppd = TRUE) -} -# }
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/index.html b/docs/reference/index.html deleted file mode 100644 index e56b6186..00000000 --- a/docs/reference/index.html +++ /dev/null @@ -1,467 +0,0 @@ - - - - - - - - -Function reference • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -

    Regression Models

    -

    -
    -

    svyglm.nb()

    -

    Survey-weighted negative binomial generalised linear model

    -

    svyglm.zip()

    -

    Survey-weighted zero-inflated Poisson model

    -

    Bootstrapping

    -

    -
    -

    bootstrap()

    -

    Generate nonparametric bootstrap replications

    -

    boot_ci() boot_se() boot_p() boot_est()

    -

    Standard error and confidence intervals for bootstrapped estimates

    -

    Effect Size Statistics for Anova

    -

    -
    -

    anova_stats() epsilon_sq() eta_sq() omega_sq()

    -

    Effect size statistics for anova

    -

    Statistics for Crosstables

    -

    -
    -

    cramer() phi() crosstable_statistics() xtab_statistics()

    -

    Measures of association for contingency tables

    -

    table_values()

    -

    Expected and relative table values

    -

    Weighted Statistics

    -

    -
    -

    weight() weight2()

    -

    Weight a variable

    -

    survey_median() weighted_chisqtest() weighted_correlation() weighted_mean() weighted_median() weighted_mannwhitney() weighted_sd() wtd_sd() weighted_se() weighted_ttest()

    -

    Weighted statistics for tests and variables

    -

    Other (Summary) Statistics

    -

    -
    -

    gmd()

    -

    Gini's Mean Difference

    -

    mwu() mannwhitney()

    -

    Mann-Whitney-U-Test

    -

    mean_n()

    -

    Row means with min amount of valid values

    -

    means_by_group() grpmean()

    -

    Summary of mean values by group

    -

    var_pop() sd_pop()

    -

    Calculate population variance and standard deviation

    -

    Tools for Regression Models

    -

    -
    -

    chisq_gof()

    -

    Compute model quality

    -

    cv()

    -

    Compute model quality

    -

    cv_error() cv_compare()

    -

    Test and training error from model cross-validation

    -

    Tools for Mixed Models

    -

    -
    -

    design_effect()

    -

    Design effects for two-level mixed models

    -

    samplesize_mixed() smpsize_lmm()

    -

    Sample size for linear mixed models

    -

    se_ybar()

    -

    Standard error of sample mean for mixed models

    -

    Tools for Bayesian Models

    -

    -
    -

    auto_prior()

    -

    Create default priors for brms-models

    -

    mediation()

    -

    Summary of Bayesian multivariate-response mediation-models

    -

    Find Parameters of Distribution

    -

    -
    -

    find_beta() find_beta2() find_cauchy() find_normal()

    -

    Determining distribution parameters

    -

    Miscellaneous

    -

    -
    -

    inequ_trend()

    -

    Compute trends in status inequalities

    -

    is_prime()

    -

    Find prime numbers

    -

    odds_to_rr() or_to_rr()

    -

    Get relative risks estimates from logistic regressions or odds ratio values

    -

    prop() props()

    -

    Proportions of values in a vector

    -
    - - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/inequ_trend-1.png b/docs/reference/inequ_trend-1.png deleted file mode 100644 index cd9014d8..00000000 Binary files a/docs/reference/inequ_trend-1.png and /dev/null differ diff --git a/docs/reference/inequ_trend-2.png b/docs/reference/inequ_trend-2.png deleted file mode 100644 index 2802b9cc..00000000 Binary files a/docs/reference/inequ_trend-2.png and /dev/null differ diff --git a/docs/reference/inequ_trend.html b/docs/reference/inequ_trend.html deleted file mode 100644 index 6296ae53..00000000 --- a/docs/reference/inequ_trend.html +++ /dev/null @@ -1,275 +0,0 @@ - - - - - - - - -Compute trends in status inequalities — inequ_trend • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This method computes the proportional change of absolute - (rate differences) and relative (rate ratios) inequalities - of prevalence rates for two different status groups, as proposed - by Mackenbach et al. (2015).

    -
    - -
    inequ_trend(data, prev.low, prev.hi)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    data

    A data frame that contains the variables with prevalence rates for both low -and high status groups (see 'Examples').

    prev.low

    The name of the variable with the prevalence rates for -the low status groups.

    prev.hi

    The name of the variable with the prevalence rates for -the hi status groups.

    - -

    Value

    - -

    A data frame with the prevalence rates as well as the values for the - proportional change in absolute (rd) and relative (rr) - ineqqualities.

    -

    Details

    - -

    Given the time trend of prevalence rates of an outcome for two status - groups (e.g. the mortality rates for people with lower and higher - socioeconomic status over 40 years), this function computes the - proportional change of absolute and relative inequalities, expressed - in changes in rate differences and rate ratios. The function implements - the algorithm proposed by Mackenbach et al. 2015.

    -

    References

    - -

    Mackenbach JP, Martikainen P, Menvielle G, de Gelder R. 2015. The Arithmetic of Reducing Relative and Absolute Inequalities in Health: A Theoretical Analysis Illustrated with European Mortality Data. Journal of Epidemiology and Community Health 70(7): 730-36. doi: 10.1136/jech-2015-207018

    - -

    Examples

    -
    # This example reproduces Fig. 1 of Mackenbach et al. 2015, p.5 - -# 40 simulated time points, with an initial rate ratio of 2 and -# a rate difference of 100 (i.e. low status group starts with a -# prevalence rate of 200, the high status group with 100) - -# annual decline of prevalence is 1% for the low, and 3% for the -# high status group - -n <- 40 -time <- seq(1, n, by = 1) -lo <- rep(200, times = n) -for (i in 2:n) lo[i] <- lo[i - 1] * .99 - -hi <- rep(100, times = n) -for (i in 2:n) hi[i] <- hi[i - 1] * .97 - -prev.data <- data.frame(lo, hi) - -# print values -inequ_trend(prev.data, lo, hi)
    #> $data -#> lo hi rr rd -#> 1 200.0000 100.00000 2.000000 100.0000 -#> 2 198.0000 97.00000 2.041237 101.0000 -#> 3 196.0200 94.09000 2.083324 101.9300 -#> 4 194.0598 91.26730 2.126280 102.7925 -#> 5 192.1192 88.52928 2.170120 103.5899 -#> 6 190.1980 85.87340 2.214865 104.3246 -#> 7 188.2960 83.29720 2.260533 104.9988 -#> 8 186.4131 80.79828 2.307141 105.6148 -#> 9 184.5489 78.37434 2.354711 106.1746 -#> 10 182.7034 76.02311 2.403262 106.6803 -#> 11 180.8764 73.74241 2.452814 107.1340 -#> 12 179.0677 71.53014 2.503387 107.5375 -#> 13 177.2770 69.38424 2.555004 107.8927 -#> 14 175.5042 67.30271 2.607684 108.2015 -#> 15 173.7492 65.28363 2.661451 108.4655 -#> 16 172.0117 63.32512 2.716326 108.6866 -#> 17 170.2916 61.42537 2.772333 108.8662 -#> 18 168.5886 59.58260 2.829494 109.0060 -#> 19 166.9028 57.79513 2.887834 109.1076 -#> 20 165.2337 56.06127 2.947377 109.1725 -#> 21 163.5814 54.37943 3.008148 109.2020 -#> 22 161.9456 52.74805 3.070172 109.1975 -#> 23 160.3261 51.16561 3.133474 109.1605 -#> 24 158.7229 49.63064 3.198082 109.0922 -#> 25 157.1356 48.14172 3.264022 108.9939 -#> 26 155.5643 46.69747 3.331321 108.8668 -#> 27 154.0086 45.29655 3.400008 108.7121 -#> 28 152.4685 43.93765 3.470111 108.5309 -#> 29 150.9439 42.61952 3.541660 108.3243 -#> 30 149.4344 41.34093 3.614684 108.0935 -#> 31 147.9401 40.10071 3.689214 107.8394 -#> 32 146.4607 38.89769 3.765280 107.5630 -#> 33 144.9961 37.73076 3.842915 107.2653 -#> 34 143.5461 36.59883 3.922150 106.9473 -#> 35 142.1106 35.50087 4.003019 106.6098 -#> 36 140.6895 34.43584 4.085555 106.2537 -#> 37 139.2826 33.40277 4.169794 105.8799 -#> 38 137.8898 32.40068 4.255769 105.4891 -#> 39 136.5109 31.42866 4.343517 105.0823 -#> 40 135.1458 30.48580 4.433074 104.6600 -#> -#> attr(,"class") -#> [1] "sj_inequ_trend"
    -# plot trends - here we see that the relative inequalities -# are increasing over time, while the absolute inequalities -# are first increasing as well, but later are decreasing -# (while rel. inequ. are still increasing) -plot(inequ_trend(prev.data, lo, hi))
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/is_prime.html b/docs/reference/is_prime.html deleted file mode 100644 index 373dba74..00000000 --- a/docs/reference/is_prime.html +++ /dev/null @@ -1,178 +0,0 @@ - - - - - - - - -Find prime numbers — is_prime • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This functions checks whether a number is, or numbers in a - vector are prime numbers.

    -
    - -
    is_prime(x)
    - -

    Arguments

    - - - - - - -
    x

    An integer, or a vector of integers.

    - -

    Value

    - -

    TRUE for each prime number in x, FALSE otherwise.

    - -

    Examples

    -
    is_prime(89)
    #> [1] TRUE
    is_prime(15)
    #> [1] FALSE
    is_prime(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
    #> [1] TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE FALSE FALSE
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/mean_n.html b/docs/reference/mean_n.html deleted file mode 100644 index 9cb392d1..00000000 --- a/docs/reference/mean_n.html +++ /dev/null @@ -1,223 +0,0 @@ - - - - - - - - -Row means with min amount of valid values — mean_n • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This function is similar to the SPSS MEAN.n function and computes - row means from a data.frame or matrix if at least n - values of a row are valid (and not NA).

    -
    - -
    mean_n(dat, n, digits = 2)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    dat

    A data frame with at least two columns, where row means are applied.

    n

    May either be

      -
    • a numeric value that indicates the amount of valid values per row to calculate the row mean;

    • -
    • or a value between 0 and 1, indicating a proportion of valid values per row to calculate the row mean (see 'Details').

    • -

    If a row's sum of valid values is less than n, NA will be returned as row mean value.

    digits

    Numeric value indicating the number of decimal places to be used for rounding mean -value. Negative values are allowed (see 'Details').

    - -

    Value

    - -

    A vector with row mean values of df for those rows with at least n - valid values. Else, NA is returned.

    -

    Details

    - -

    Rounding to a negative number of digits means rounding to a power of - ten, so for example mean_n(df, 3, digits = -2) rounds to the - nearest hundred.

    - For n, must be a numeric value from 0 to ncol(dat). If - a row in dat has at least n non-missing values, the - row mean is returned. If n is a non-integer value from 0 to 1, - n is considered to indicate the proportion of necessary non-missing - values per row. E.g., if n = .75, a row must have at least ncol(dat) * n - non-missing values for the row mean to be calculated. See 'Examples'.

    -

    References

    - -

    r4stats.com

    - -

    Examples

    -
    dat <- data.frame(c1 = c(1,2,NA,4), - c2 = c(NA,2,NA,5), - c3 = c(NA,4,NA,NA), - c4 = c(2,3,7,8)) - -# needs at least 4 non-missing values per row -mean_n(dat, 4) # 1 valid return value
    #> [1] NA 2.75 NA NA
    -# needs at least 3 non-missing values per row -mean_n(dat, 3) # 2 valid return values
    #> [1] NA 2.75 NA 5.67
    -# needs at least 2 non-missing values per row -mean_n(dat, 2)
    #> [1] 1.50 2.75 NA 5.67
    -# needs at least 1 non-missing value per row -mean_n(dat, 1) # all means are shown
    #> [1] 1.50 2.75 7.00 5.67
    -# needs at least 50% of non-missing values per row -mean_n(dat, .5) # 3 valid return values
    #> [1] 1.50 2.75 NA 5.67
    -# needs at least 75% of non-missing values per row -mean_n(dat, .75) # 2 valid return values
    #> [1] NA 2.75 NA 5.67
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/means_by_group.html b/docs/reference/means_by_group.html deleted file mode 100644 index f82f20de..00000000 --- a/docs/reference/means_by_group.html +++ /dev/null @@ -1,340 +0,0 @@ - - - - - - - - -Summary of mean values by group — means_by_group • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Computes mean, sd and se for each sub-group (indicated by grp) - of dv.

    -
    - -
    means_by_group(
    -  x,
    -  dv,
    -  grp,
    -  weights = NULL,
    -  digits = 2,
    -  out = c("txt", "viewer", "browser"),
    -  encoding = "UTF-8",
    -  file = NULL
    -)
    -
    -grpmean(
    -  x,
    -  dv,
    -  grp,
    -  weights = NULL,
    -  digits = 2,
    -  out = c("txt", "viewer", "browser"),
    -  encoding = "UTF-8",
    -  file = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A (grouped) data frame.

    dv

    Name of the dependent variable, for which the mean value, grouped -by grp, is computed.

    grp

    Factor with the cross-classifying variable, where dv is -grouped into the categories represented by grp. Numeric vectors -are coerced to factors.

    weights

    Name of variable in x that indicated the vector of -weights that will be applied to weight all observations. Default is -NULL, so no weights are used.

    digits

    Numeric, amount of digits after decimal point when rounding -estimates and values.

    out

    Character vector, indicating whether the results should be printed -to console (out = "txt") or as HTML-table in the viewer-pane -(out = "viewer") or browser (out = "browser"), of if the -results should be plotted (out = "plot", only applies to certain -functions). May be abbreviated.

    encoding

    Character vector, indicating the charset encoding used -for variable and value labels. Default is "UTF-8". Only used -when out is not "txt".

    file

    Destination file, if the output should be saved as file. -Only used when out is not "txt".

    - -

    Value

    - -

    For non-grouped data frames, means_by_group() returns a data frame with - following columns: term, mean, N, std.dev, - std.error and p.value. For grouped data frames, returns - a list of such data frames.

    -

    Details

    - -

    This function performs a One-Way-Anova with dv as dependent - and grp as independent variable, by calling - lm(count ~ as.factor(grp)). Then contrast - is called to get p-values for each sub-group. P-values indicate whether - each group-mean is significantly different from the total mean.

    - -

    Examples

    -
    data(efc) -means_by_group(efc, c12hour, e42dep)
    #> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> Category | Mean | N | SD | SE | p -#> ---------------------------------------------------------- -#> independent | 9.91 | 66 | 8.01 | 0.99 | <0.001 -#> slightly dependent | 17.54 | 225 | 17.74 | 1.18 | <0.001 -#> moderately dependent | 34.52 | 306 | 41.54 | 2.37 | 0.98 -#> severely dependent | 75.90 | 304 | 61.72 | 3.54 | <0.001 -#> Total | 42.44 | 901 | 50.82 | 1.69 | -#> -#> Anova: R2=0.245; adj.R2=0.242; F=96.908; p=0.000
    -data(iris) -means_by_group(iris, Sepal.Width, Species)
    #> -#> # Grouped Means for Sepal.Width by Species -#> -#> Category | Mean | N | SD | SE | p -#> ---------------------------------------------- -#> setosa | 3.43 | 50 | 0.38 | 0.05 | <0.001 -#> versicolor | 2.77 | 50 | 0.31 | 0.04 | <0.001 -#> virginica | 2.97 | 50 | 0.32 | 0.05 | 0.04 -#> Total | 3.06 | 150 | 0.44 | 0.04 | -#> -#> Anova: R2=0.401; adj.R2=0.393; F=49.160; p=0.000
    -# also works for grouped data frames -if (require("dplyr")) { - efc %>% - group_by(c172code) %>% - means_by_group(c12hour, e42dep) -}
    #> -#> Grouped by: -#> carer's level of education: low level of education -#> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> Category | Mean | N | SD | SE | p -#> ---------------------------------------------------------- -#> independent | 16.33 | 12 | 10.74 | 3.10 | 0.02 -#> slightly dependent | 15.38 | 42 | 9.55 | 1.47 | <0.001 -#> moderately dependent | 42.05 | 61 | 46.53 | 5.96 | 0.70 -#> severely dependent | 85.52 | 65 | 56.42 | 7.00 | <0.001 -#> Total | 49.81 | 180 | 52.24 | 3.89 | -#> -#> Anova: R2=0.307; adj.R2=0.295; F=25.955; p=0.000 -#> -#> -#> Grouped by: -#> carer's level of education: intermediate level of education -#> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> Category | Mean | N | SD | SE | p -#> ---------------------------------------------------------- -#> independent | 7.96 | 45 | 3.91 | 0.58 | <0.001 -#> slightly dependent | 17.12 | 135 | 16.52 | 1.42 | <0.001 -#> moderately dependent | 33.55 | 163 | 41.05 | 3.22 | 0.75 -#> severely dependent | 79.71 | 163 | 63.13 | 4.94 | <0.001 -#> Total | 41.76 | 506 | 51.42 | 2.29 | -#> -#> Anova: R2=0.284; adj.R2=0.280; F=66.374; p=0.000 -#> -#> -#> Grouped by: -#> carer's level of education: high level of education -#> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> Category | Mean | N | SD | SE | p -#> ---------------------------------------------------------- -#> independent | 15.20 | 5 | 18.43 | 8.24 | 0.36 -#> slightly dependent | 18.08 | 39 | 12.98 | 2.08 | 0.15 -#> moderately dependent | 28.42 | 62 | 35.64 | 4.53 | 0.67 -#> severely dependent | 63.38 | 50 | 62.69 | 8.87 | <0.001 -#> Total | 36.62 | 156 | 46.38 | 3.71 | -#> -#> Anova: R2=0.167; adj.R2=0.151; F=10.155; p=0.000 -#> -#>
    -# weighting -efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) -means_by_group(efc, c12hour, e42dep, weights = weight)
    #> -#> # Grouped Means for average number of hours of care per week by elder's dependency -#> -#> Category | Mean | N | SD | SE | p -#> ---------------------------------------------------------- -#> independent | 8.86 | 72 | 6.09 | 0.75 | <0.001 -#> slightly dependent | 17.28 | 225 | 15.09 | 1.01 | <0.001 -#> moderately dependent | 35.26 | 296 | 41.66 | 2.38 | 0.75 -#> severely dependent | 76.53 | 297 | 61.73 | 3.54 | <0.001 -#> Total | 42.35 | 901 | 50.62 | 1.69 | -#> -#> Anova: R2=0.256; adj.R2=0.254; F=103.078; p=0.000
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/mediation.html b/docs/reference/mediation.html deleted file mode 100644 index 1e0d1ccc..00000000 --- a/docs/reference/mediation.html +++ /dev/null @@ -1,228 +0,0 @@ - - - - - - - - -Summary of Bayesian multivariate-response mediation-models — mediation • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    mediation() is a short summary for multivariate-response - mediation-models.

    -
    - -
    mediation(x, ...)
    -
    -# S3 method for brmsfit
    -mediation(x, treatment, mediator, prob = 0.9, typical = "median", ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A stanreg, stanfit, or brmsfit object.

    ...

    Not used.

    treatment

    Character, name of the treatment variable (or direct effect) -in a (multivariate response) mediator-model. If missing, mediation() -tries to find the treatment variable automatically, however, this may fail.

    mediator

    Character, name of the mediator variable in a (multivariate -response) mediator-model. If missing, mediation() tries to find the -treatment variable automatically, however, this may fail.

    prob

    Vector of scalars between 0 and 1, indicating the mass within -the credible interval that is to be estimated.

    typical

    The typical value that will represent the Bayesian point estimate. -By default, the posterior median is returned. See typical_value -for possible values for this argument.

    - -

    Value

    - -

    A data frame with direct, indirect, mediator and - total effect of a multivariate-response mediation-model, as well as the - proportion mediated. The effect sizes are mean values of the posterior - samples.

    -

    Details

    - -

    mediation() returns a data frame with information on the - direct effect (mean value of posterior samples from treatment - of the outcome model), mediator effect (mean value of posterior - samples from mediator of the outcome model), indirect effect - (mean value of the multiplication of the posterior samples from - mediator of the outcome model and the posterior samples from - treatment of the mediation model) and the total effect (mean - value of sums of posterior samples used for the direct and indirect - effect). The proportion mediated is the indirect effect divided - by the total effect. -

    - For all values, the 90% HDIs are calculated by default. Use prob - to calculate a different interval. -

    - The arguments treatment and mediator do not necessarily - need to be specified. If missing, mediation() tries to find the - treatment and mediator variable automatically. If this does not work, - specify these variables.

    - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/mwu.html b/docs/reference/mwu.html deleted file mode 100644 index ae5377aa..00000000 --- a/docs/reference/mwu.html +++ /dev/null @@ -1,299 +0,0 @@ - - - - - - - - -Mann-Whitney-U-Test — mwu • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This function performs a Mann-Whitney-U-Test (or Wilcoxon rank sum test, - see wilcox.test and wilcox_test) - for x, for each group indicated by grp. If grp - has more than two categories, a comparison between each combination of - two groups is performed.

    - The function reports U, p and Z-values as well as effect size r - and group-rank-means.

    -
    - -
    mwu(
    -  data,
    -  x,
    -  grp,
    -  distribution = "asymptotic",
    -  out = c("txt", "viewer", "browser"),
    -  encoding = "UTF-8",
    -  file = NULL
    -)
    -
    -mannwhitney(
    -  data,
    -  x,
    -  grp,
    -  distribution = "asymptotic",
    -  out = c("txt", "viewer", "browser"),
    -  encoding = "UTF-8",
    -  file = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    A data frame.

    x

    Bare (unquoted) variable name, or a character vector with the variable name.

    grp

    Bare (unquoted) name of the cross-classifying variable, where -x is grouped into the categories represented by grp, -or a character vector with the variable name.

    distribution

    Indicates how the null distribution of the test statistic should be computed. -May be one of "exact", "approximate" or "asymptotic" -(default). See wilcox_test for details.

    out

    Character vector, indicating whether the results should be printed -to console (out = "txt") or as HTML-table in the viewer-pane -(out = "viewer") or browser (out = "browser"), of if the -results should be plotted (out = "plot", only applies to certain -functions). May be abbreviated.

    encoding

    Character vector, indicating the charset encoding used -for variable and value labels. Default is "UTF-8". Only used -when out is not "txt".

    file

    Destination file, if the output should be saved as file. -Only used when out is not "txt".

    - -

    Value

    - -

    (Invisibly) returns a data frame with U, p and Z-values for each group-comparison - as well as effect-size r; additionally, group-labels and groups' n's are - also included.

    -

    Note

    - -

    This function calls the wilcox_test with formula. If grp - has more than two groups, additionally a Kruskal-Wallis-Test (see kruskal.test) - is performed.

    - Interpretation of effect sizes, as a rule-of-thumb:

      -
    • small effect >= 0.1

    • -
    • medium effect >= 0.3

    • -
    • large effect >= 0.5

    • -
    - - -

    Examples

    -
    data(efc) -# Mann-Whitney-U-Tests for elder's age by elder's dependency. -mwu(efc, e17age, e42dep)
    #> -#> # Mann-Whitney-U-Test -#> -#> Groups 1 = independent (n = 65) | 2 = slightly dependent (n = 224): -#> U = 7635.000, W = 5490.000, p = 0.003, Z = -3.020 -#> effect-size r = 0.178 -#> rank-mean(1) = 117.46 -#> rank-mean(2) = 152.99 -#> -#> Groups 1 = independent (n = 65) | 3 = moderately dependent (n = 304): -#> U = 8692.000, W = 6547.000, p < 0.001, Z = -4.273 -#> effect-size r = 0.222 -#> rank-mean(1) = 133.72 -#> rank-mean(3) = 195.96 -#> -#> Groups 1 = independent (n = 65) | 4 = severely dependent (n = 297): -#> U = 7905.500, W = 5760.500, p < 0.001, Z = -5.096 -#> effect-size r = 0.268 -#> rank-mean(1) = 121.62 -#> rank-mean(4) = 194.60 -#> -#> Groups 2 = slightly dependent (n = 224) | 3 = moderately dependent (n = 304): -#> U = 54664.500, W = 29464.500, p = 0.008, Z = -2.647 -#> effect-size r = 0.115 -#> rank-mean(2) = 244.04 -#> rank-mean(3) = 279.58 -#> -#> Groups 2 = slightly dependent (n = 224) | 4 = severely dependent (n = 297): -#> U = 51007.500, W = 25807.500, p < 0.001, Z = -4.386 -#> effect-size r = 0.192 -#> rank-mean(2) = 227.71 -#> rank-mean(4) = 286.11 -#> -#> Groups 3 = moderately dependent (n = 304) | 4 = severely dependent (n = 297): -#> U = 87819.500, W = 41459.500, p = 0.083, Z = -1.732 -#> effect-size r = 0.071 -#> rank-mean(3) = 288.88 -#> rank-mean(4) = 313.41 -#> -#> # Kruskal-Wallis-Test -#> -#> chi-squared = 38.476 -#> df = 3 -#> p < 0.001
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/nhanes_sample.html b/docs/reference/nhanes_sample.html deleted file mode 100644 index f389f06b..00000000 --- a/docs/reference/nhanes_sample.html +++ /dev/null @@ -1,168 +0,0 @@ - - - - - - - - -Sample dataset from the National Health and Nutrition Examination Survey — nhanes_sample • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Selected variables from the National Health and Nutrition Examination - Survey that are used in the example from Lumley (2010), Appendix E. - See svyglm.nb for examples.

    -
    - - - -

    References

    - -

    Lumley T (2010). Complex Surveys: a guide to analysis using R. Wiley

    - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/odds_to_rr.html b/docs/reference/odds_to_rr.html deleted file mode 100644 index 01688bc8..00000000 --- a/docs/reference/odds_to_rr.html +++ /dev/null @@ -1,256 +0,0 @@ - - - - - - - - -Get relative risks estimates from logistic regressions or odds ratio values — odds_to_rr • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    odds_to_rr() converts odds ratios from a logistic regression - model (including mixed models) into relative risks; or_to_rr() - converts a single odds ratio estimate into a relative risk estimate.

    -
    - -
    odds_to_rr(fit)
    -
    -or_to_rr(or, p0)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    fit

    A fitted binomial generalized linear (mixed) model with logit-link function -(logistic (multilevel) regression model).

    or

    Numeric, an odds ratio estimate.

    p0

    Numeric, the risk of having a positive outcome in the control or -unexposed group (reference group), i.e. the number of outcome or "successes" -in the control divided by the total number of observations in the control -group.

    - -

    Value

    - -

    A data frame with relative risks and lower/upper confidence interval for - the relative risks estimates; for or_to_rr(), the risk ratio - estimate.

    -

    Details

    - -

    This function extracts the odds ratios (exponentiated model coefficients) - from logistic regressions (fitted with glm or glmer) - and their related confidence intervals, and transforms these values - into relative risks (and their related confidence intervals). -

    - The formula for transformation is based on Zhang and Yu (1998), - Wang (2013) and Grant (2014): - RR <- OR / (1 - P0 + (P0 * OR)), where OR is the odds - ratio and P0 indicates the proportion of the incidence in - the outcome variable for the control group (reference group).

    -

    References

    - -

    Grant RL. 2014. Converting an odds ratio to a range of plausible relative risks for better communication of research findings. BMJ 348:f7450. doi: 10.1136/bmj.f7450 - -

    - Wang Z. 2013. Converting Odds Ratio to Relative Risk in Cohort Studies with Partial Data Information. J Stat Soft 2013;55. doi: 10.18637/jss.v055.i05 - -

    - Zhang J, Yu KF. 1998. What's the Relative Risk? A Method of Correcting the Odds Ratio in Cohort Studies of Common Outcomes. JAMA; 280(19): 1690-1. doi: 10.1001/jama.280.19.1690

    - -

    Examples

    -
    library(sjmisc) -library(lme4)
    #> Loading required package: Matrix
    # create binary response -sleepstudy$Reaction.dicho <- dicho(sleepstudy$Reaction, dich.by = "median") -# fit model -fit <- glmer(Reaction.dicho ~ Days + (Days | Subject), - data = sleepstudy, family = binomial("logit")) -# convert to relative risks -odds_to_rr(fit)
    #> Parameter Odds Ratio Risk Ratio CI_low CI_high -#> 1 (Intercept) 0.02201714 0.04308565 0.004411356 0.3597023 -#> 2 Days 2.43719045 1.41812942 1.212151446 1.5885357
    - -data(efc) -# create binary response -y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) -# create data frame for fitted model -mydf <- data.frame( - y = as.factor(y), - sex = to_factor(efc$c161sex), - dep = to_factor(efc$e42dep), - barthel = efc$barthtot, - education = to_factor(efc$c172code) -) -# fit model -fit <- glm(y ~., data = mydf, family = binomial(link = "logit")) -# convert to relative risks -odds_to_rr(fit)
    #> Waiting for profiling to be done...
    #> Parameter Odds Ratio Risk Ratio CI_low CI_high -#> 1 (Intercept) 2.014220 1.2942650 0.8005943 1.6101037 -#> 2 sex2 1.913887 1.3686646 1.1632760 1.5625768 -#> 3 dep2 1.624596 1.4468228 0.8511852 2.3180826 -#> 4 dep3 3.080617 2.1859172 1.4029261 3.1216957 -#> 5 dep4 2.484804 1.9230853 1.0500095 3.0354848 -#> 6 barthel 0.970638 0.9865251 0.9822535 0.9906641 -#> 7 education2 1.254234 1.1027736 0.9233955 1.2698368 -#> 8 education3 1.327901 1.1280723 0.9057139 1.3294549
    -# replicate OR/RR for coefficient "sex" from above regression -# p0 ~ .44, or ~ 1.914 -prop.table(table(mydf$y, mydf$sex))
    #> -#> 1 2 -#> 0 0.1324355 0.3153760 -#> 1 0.1054994 0.4466891
    or_to_rr(1.914, 0.1055 / (.1324 + .1055))
    #> [1] 1.361962
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/overdisp-1.png b/docs/reference/overdisp-1.png deleted file mode 100644 index 24d2460b..00000000 Binary files a/docs/reference/overdisp-1.png and /dev/null differ diff --git a/docs/reference/overdisp.html b/docs/reference/overdisp.html deleted file mode 100644 index eabecf15..00000000 --- a/docs/reference/overdisp.html +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - -Deprecated functions — overdisp • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    A list of deprecated functions.

    -
    - -
    overdisp(x, ...)
    -
    -zero_count(x, ...)
    -
    -pca(x, ...)
    -
    -pca_rotate(x, ...)
    -
    -r2(x)
    -
    -icc(x)
    -
    -p_value(x, ...)
    -
    -se(x, ...)
    - -

    Arguments

    - - - - - - - - - - -
    x

    An object.

    ...

    Currently not used.

    - -

    Value

    - -

    Nothing.

    - -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.4.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/p_value.html b/docs/reference/p_value.html deleted file mode 100644 index 4eeb008f..00000000 --- a/docs/reference/p_value.html +++ /dev/null @@ -1,251 +0,0 @@ - - - - - - - - -Get p-values from regression model objects — p_value • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    This function returns the p-values for fitted model objects.

    - -
    - -
    p_value(fit, ...)
    -
    -# S3 method for lmerMod
    -p_value(fit, p.kr = FALSE, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    fit

    A model object.

    ...

    Currently not used.

    p.kr

    Logical, if TRUE, the computation of p-values is based on -conditional F-tests with Kenward-Roger approximation for the df (see -'Details').

    - -

    Value

    - -

    A data.frame with the model coefficients' names (term), - p-values (p.value) and standard errors (std.error).

    - -

    Details

    - -

    For linear mixed models (lmerMod-objects), the computation of - p-values (if p.kr = TRUE) is based on conditional F-tests - with Kenward-Roger approximation for the df, using the - pbkrtest-package. If pbkrtest is not available or - p.kr = FALSE, or if x is a glmerMod-object, - computation of p-values is based on normal-distribution assumption, - treating the t-statistics as Wald z-statistics. -

    - If p-values already have been computed (e.g. for merModLmerTest-objects - from the lmerTest-package), these will be returned. -

    - The print()-method has a summary-argument, that - in - case p.kr = TRUE - also prints information on the approximated - degrees of freedom (see 'Examples'). A shortcut is the - summary()-method, which simply calls print(..., summary = TRUE).

    - - -

    Examples

    -
    data(efc) -# linear model fit -fit <- lm(neg_c_7 ~ e42dep + c172code, data = efc) -p_value(fit)
    #> term p.value std.error -#> 1 (Intercept) 0.000 0.566 -#> 2 e42dep 0.000 0.133 -#> 3 c172code 0.207 0.198
    -# Generalized Least Squares fit -library(nlme)
    #> -#> Attaching package: 'nlme'
    #> The following object is masked from 'package:lme4': -#> -#> lmList
    #> The following object is masked from 'package:dplyr': -#> -#> collapse
    fit <- gls(follicles ~ sin(2*pi*Time) + cos(2*pi*Time), Ovary, - correlation = corAR1(form = ~ 1 | Mare)) -p_value(fit)
    #> term p.value std.error -#> 1 (Intercept) 0.000 0.665 -#> 2 sin(2 * pi * Time) 0.000 0.645 -#> 3 cos(2 * pi * Time) 0.198 0.698
    -# lme4-fit -library(lme4) -sleepstudy$mygrp <- sample(1:45, size = 180, replace = TRUE) -fit <- lmer(Reaction ~ Days + (1 | mygrp) + (1 | Subject), sleepstudy)
    #> boundary (singular) fit: see ?isSingular
    pv <- p_value(fit, p.kr = TRUE)
    #> Computing p-values via Kenward-Roger approximation. Use `p.kr = FALSE` if computation takes too long.
    -# normal output -pv
    #> term p.value std.error -#> 1 (Intercept) 0 9.766 -#> 2 Days 0 0.815
    -# add information on df and t-statistic -print(pv, summary = TRUE)
    #> term p.value std.error df statistic -#> 1 (Intercept) 0 9.766 22.785 25.742 -#> 2 Days 0 0.815 160.682 12.838
    # or -summary(pv)
    #> term p.value std.error df statistic -#> 1 (Intercept) 0 9.766 22.785 25.742 -#> 2 Days 0 0.815 160.682 12.838
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/pca.html b/docs/reference/pca.html deleted file mode 100644 index c6bb7f88..00000000 --- a/docs/reference/pca.html +++ /dev/null @@ -1,242 +0,0 @@ - - - - - - - - -Tidy summary of Principal Component Analysis — pca • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    ...

    - -
    - -
    pca(x)
    -
    -pca_rotate(x, nf = NULL, rotation = c("varimax", "quartimax", "promax",
    -  "oblimin", "simplimax", "cluster", "none"))
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    A data frame or a prcomp object.

    nf

    Number of components to extract. If rotation = "varmiax" -and nf = NULL, number of components is based on the Kaiser-criteria.

    rotation

    Rotation of the factor loadings. May be one of -"varimax", "quartimax", "promax", "oblimin", "simplimax", "cluster" -or "none".

    - -

    Value

    - -

    A tidy data frame with either all loadings of principal components - (for pca()) or a rotated loadings matrix (for pca_rotate()).

    - -

    Details

    - -

    The print()-method for pca_rotate() has a - cutoff-argument, which is a scalar between 0 and 1, indicating - which (absolute) values from the loadings should be blank in the - output. By default, all loadings below .1 (or -.1) are not shown.

    - - -

    Examples

    -
    data(efc) -# recveive first item of COPE-index scale -start <- which(colnames(efc) == "c82cop1") -# recveive last item of COPE-index scale -end <- which(colnames(efc) == "c90cop9") - -# extract principal components -pca(efc[, start:end])
    #> PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 -#> Standard deviation 1.7976 1.1311 0.9665 0.8609 0.8156 0.7951 0.7426 0.7251 -#> Eigenvalue 3.2314 1.2793 0.9342 0.7412 0.6651 0.6322 0.5515 0.5258 -#> Proportion variance 0.3590 0.1421 0.1038 0.0824 0.0739 0.0702 0.0613 0.0584 -#> Cumulative variance 0.3590 0.5012 0.6050 0.6873 0.7612 0.8315 0.8928 0.9512 -#> PC9 -#> Standard deviation 0.6627 -#> Eigenvalue 0.4392 -#> Proportion variance 0.0488 -#> Cumulative variance 1.0000
    -# extract principal components, varimax-rotation. -# number of components based on Kaiser-criteria -pca_rotate(efc[, start:end])
    #> variable PC1 PC2 -#> 1 c82cop1 0.2911 0.5964 -#> 2 c83cop2 -0.5976 -0.4235 -#> 3 c84cop3 -0.6885 -0.1564 -#> 4 c85cop4 -0.726 -0.119 -#> 5 c86cop5 -0.6426 -#> 6 c87cop6 -0.6934 0.1213 -#> 7 c88cop7 -0.6768 -0.3796 -#> 8 c89cop8 0.6364 -#> 9 c90cop9 0.7542 -#> -#> PC1 PC2 -#> Proportion variance 0.312 0.190 -#> Cumulative variance 0.312 0.501 -#> Proportion explained 0.622 0.378 -#> Cumulative explained 0.622 1.000
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/pred_accuracy.html b/docs/reference/pred_accuracy.html deleted file mode 100644 index 84c1bbca..00000000 --- a/docs/reference/pred_accuracy.html +++ /dev/null @@ -1,254 +0,0 @@ - - - - - - - - -Accuracy of predictions from model fit — pred_accuracy • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    This function calculates the predictive accuracy of linear - or logistic regression models.

    - -
    - -
    pred_accuracy(data, fit, method = c("cv", "boot"), k = 5, n = 1000)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    data

    A data frame.

    fit

    Fitted model object of class lm or glm, the latter -being a logistic regression model (binary response).

    method

    Character string, indicating whether crossvalidation -(method = "cv") or bootstrapping (method = "boot") -is used to compute the accuracy values.

    k

    The number of folds for the kfold-crossvalidation.

    n

    Number of bootstraps to be generated.

    - -

    Value

    - -

    A list with two values: The accuracy of the model predictions, i.e. - the proportion of accurately predicted values from the model and - its standard error, std.error.

    - -

    Details

    - -

    For linar models, the accuracy is the correlation coefficient - between the actual and the predicted value of the outcome. For - logistic regression models, the accuracy corresponds to the - AUC-value, calculated with the auc-function. -

    - The accuracy is the mean value of multiple correlation resp. - AUC-values, which are either computed with crossvalidation - or nonparametric bootstrapping (see argument method). - The standard error is the standard deviation of the computed - correlation resp. AUC-values.

    - -

    See also

    - - - - -

    Examples

    -
    data(efc) -fit <- lm(neg_c_7 ~ barthtot + c161sex, data = efc) - -# accuracy for linear model, with crossvalidation -pred_accuracy(efc, fit)
    #> -#> # Accuracy of Model Predictions -#> -#> Accuracy: 41.12% -#> SE: 6.56%-points -#> Method: Correlation between observed and predicted
    -# accuracy for linear model, with bootstrapping -pred_accuracy(efc, fit, method = "boot", n = 100)
    #> -#> # Accuracy of Model Predictions -#> -#> Accuracy: 41.30% -#> SE: 2.78%-points -#> Method: Correlation between observed and predicted
    -# accuracy for logistic regression, with crossvalidation -efc$services <- sjmisc::dicho(efc$tot_sc_e, dich.by = 0, as.num = TRUE) -fit <- glm(services ~ neg_c_7 + c161sex + e42dep, - data = efc, family = binomial(link = "logit")) -pred_accuracy(efc, fit)
    #> -#> # Accuracy of Model Predictions -#> -#> Accuracy: 58.38% -#> SE: 3.92%-points -#> Method: Area under Curve
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/pred_vars.html b/docs/reference/pred_vars.html deleted file mode 100644 index 4adf9d68..00000000 --- a/docs/reference/pred_vars.html +++ /dev/null @@ -1,417 +0,0 @@ - - - - - - - - -Access information from model objects — link_inverse • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Several functions to retrieve information from model objects, - like variable names, link-inverse function, model frame, - model family etc., in a tidy and consistent way.

    - -
    - -
    link_inverse(x, multi.resp = FALSE, mv = FALSE)
    -
    -model_family(x, multi.resp = FALSE, mv = FALSE)
    -
    -model_frame(x, fe.only = TRUE)
    -
    -pred_vars(x, ...)
    -
    -# S3 method for default
    -pred_vars(x, fe.only = FALSE, ...)
    -
    -# S3 method for glmmTMB
    -pred_vars(x, fe.only = FALSE, zi = FALSE,
    -  disp = FALSE, ...)
    -
    -# S3 method for MixMod
    -pred_vars(x, fe.only = FALSE, zi = FALSE, ...)
    -
    -re_grp_var(x)
    -
    -grp_var(x)
    -
    -resp_val(x)
    -
    -resp_var(x, combine = TRUE)
    -
    -var_names(x)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A fitted model; for var_names(), x may also be a -character vector.

    mv, multi.resp

    Logical, if TRUE and model is a multivariate response -model from a brmsfit object or of class stanmvreg, then a -list of values (one for each regression) is returned.

    fe.only

    Logical, if TRUE and x is a mixed effects -model, model_frame() returns the model frame for fixed effects only, -and pred_vars() returns only fixed effects terms. Note that the default -for model_frame() is fe.only = TRUE, while for pred_vars() -the default is fe.only = FALSE.

    ...

    Currently not used.

    zi

    Logical, if TRUE and model has a zero-inflation-formula, -the variable(s) used in this formula are also returned.

    disp

    Logical, if TRUE and model is of class glmmTMB and -has a dispersion-formula, the variable(s) used in the dispersion-formula -are also returned.

    combine

    Logical, if TRUE and the response is a matrix-column, -the name of the response matches the notation in formula, and would for -instance also contain patterns like "cbind(...)". Else, the original -variable names from the matrix-column are returned. See 'Examples'.

    - -

    Value

    - -

    For pred_vars() and resp_var(), the name(s) of the - response or predictor variables from x as character vector. - resp_val() returns the values from x's response vector. - re_grp_var() returns the group factor of random effects in - mixed models, or NULL if x has no such random effects term - (grp_var() is an alias for re_grp_var()). -

    - link_inverse() returns, if known, the inverse link function from - x; else NULL for those models where the inverse link function - can't be identified. -

    - model_frame() is similar to model.frame(), - but should also work for model objects that don't have a S3-generic for - model.frame(). -

    - var_names() returns the "cleaned" variable - names, i.e. things like s() for splines or log() are - removed. -

    - model_family() returns a list with information about the - model family (see 'Details').

    - -

    Details

    - -

    model_family() returns a list with information about the - model family for many different model objects. Following information - is returned, where all values starting with is_ are logicals.

      -
    • is_bin: family is binomial (but not negative binomial)

    • -
    • is_pois: family is either poisson or negative binomial

    • -
    • is_negbin: family is negative binomial

    • -
    • is_count: model is a count model (i.e. family is either poisson or negative binomial)

    • -
    • is_beta: family is beta

    • -
    • is_logit: model has logit link

    • -
    • is_linear: family is gaussian

    • -
    • is_ordinal: family is ordinal or cumulative link

    • -
    • is_categorical: family is categorical link

    • -
    • is_zeroinf: model has zero-inflation component

    • -
    • is_multivariate: model is a multivariate response model (currently only works for brmsfit objects)

    • -
    • is_trial: model response contains additional information about the trials

    • -
    • link.fun: the link-function

    • -
    • family: the family-object

    • -

    model_frame() slighty differs from model.frame(), especially - for spline terms and matrix-variables created with cbind() (for example - in binomial models, where the response is a combination of successes and - trials) . Where model.frame() returns a matrix for splines, - model_frame() returns the data of the original variable and uses - the same column name as in the data-argument from the model-function. - This makes it easier, for instance, to get data that should be used as new - data in predict(). For matrix-variables created with cbind(), - model_frame() returns the original variable as matrix and - additionally each column as own variable. See 'Examples'.

    - - -

    Examples

    -
    data(efc) -fit <- lm(neg_c_7 ~ e42dep + c161sex, data = efc) - -pred_vars(fit)
    #> [1] "e42dep" "c161sex"
    resp_var(fit)
    #> [1] "neg_c_7"
    resp_val(fit)
    #> [1] 12 20 11 10 12 19 15 11 15 10 28 18 13 18 16 13 11 11 13 17 11 9 8 14 11 -#> [26] 23 11 15 11 25 9 15 20 9 10 19 8 17 16 17 14 14 16 19 17 15 16 19 17 10 -#> [51] 14 14 9 12 25 17 22 13 15 7 19 11 15 13 11 14 17 7 15 11 19 10 10 20 10 -#> [76] 12 15 7 13 12 16 10 15 15 15 25 11 10 11 14 10 10 13 10 11 18 14 12 10 9 -#> [101] 13 14 10 10 13 13 12 12 18 7 13 14 11 16 15 15 9 17 17 22 16 14 9 13 9 -#> [126] 17 17 9 13 14 12 18 7 10 12 20 12 14 12 10 12 11 14 11 13 10 12 12 10 9 -#> [151] 15 12 11 14 16 18 11 11 14 14 12 10 9 12 8 10 11 10 11 13 7 10 11 12 15 -#> [176] 10 16 13 20 7 12 17 14 10 12 9 7 16 13 14 8 8 20 7 15 7 9 14 11 12 -#> [201] 11 12 18 8 13 16 8 13 14 11 8 12 24 11 11 13 9 13 20 12 16 15 20 10 12 -#> [226] 12 12 11 10 9 10 8 10 12 10 9 11 7 9 11 11 12 11 14 12 12 19 12 15 11 -#> [251] 17 8 13 11 10 8 10 19 10 18 8 11 9 10 13 11 9 9 8 9 8 8 11 9 10 -#> [276] 12 9 17 20 12 7 9 7 8 8 14 7 10 8 16 9 16 13 8 20 16 9 9 8 15 -#> [301] 16 19 8 12 17 12 14 11 9 11 9 8 10 8 10 15 13 8 10 10 12 14 12 7 8 -#> [326] 16 9 16 7 8 13 9 7 9 9 8 17 7 8 9 7 10 10 11 18 9 10 13 8 12 -#> [351] 9 7 10 8 7 7 12 12 9 8 10 18 16 11 15 10 9 9 12 18 12 13 17 9 8 -#> [376] 7 16 12 14 15 10 9 17 17 21 17 17 15 9 12 12 22 11 14 11 9 8 12 13 13 -#> [401] 9 10 12 9 11 13 11 17 10 18 10 16 10 10 14 11 11 10 11 8 15 12 10 13 13 -#> [426] 13 12 9 13 10 15 18 11 14 11 12 12 14 15 8 10 9 7 8 18 7 7 7 11 8 -#> [451] 11 11 16 13 14 14 7 9 7 17 7 10 9 9 7 12 14 7 10 20 7 8 9 11 10 -#> [476] 14 7 8 8 10 8 12 10 14 11 8 11 17 10 22 8 9 19 11 18 16 18 15 19 10 -#> [501] 13 15 7 8 22 8 20 19 10 7 25 9 11 7 11 9 8 12 9 20 7 12 9 9 8 -#> [526] 10 8 17 12 9 9 8 7 8 9 17 17 8 9 9 10 9 7 8 27 25 14 28 16 11 -#> [551] 15 7 9 7 7 8 13 19 15 14 20 20 14 10 11 15 7 14 11 13 16 13 10 17 10 -#> [576] 12 11 7 8 15 13 11 7 18 17 12 18 17 13 10 19 7 8 10 18 17 19 8 12 10 -#> [601] 14 10 13 9 8 8 9 15 11 7 8 11 21 8 11 10 10 11 10 11 9 13 17 9 8 -#> [626] 8 9 13 14 14 9 12 8 11 10 11 11 10 10 10 12 13 7 8 12 8 8 13 10 12 -#> [651] 16 8 10 13 10 9 10 12 11 9 10 9 13 10 9 10 8 7 8 7 7 9 8 11 9 -#> [676] 10 12 11 7 16 12 10 8 12 23 10 10 18 13 12 18 9 12 13 9 7 10 7 8 17 -#> [701] 11 14 11 23 14 8 7 15 8 12 9 15 17 13 13 10 20 10 11 25 10 12 10 12 10 -#> [726] 8 14 8 18 8 15 11 12 10 7 10 13 14 7 7 14 11 11 11 9 7 15 9 9 18 -#> [751] 8 15 7 8 13 8 8 9 7 7 9 8 8 13 10 11 13 11 8 12 8 9 16 11 19 -#> [776] 12 12 9 10 10 9 13 7 11 13 10 10 13 9 14 15 15 9 10 8 8 9 9 9 9 -#> [801] 9 13 9 12 14 12 8 10 7 22 18 16 13 15 24 11 14 12 11 10 7 10 10 12 10 -#> [826] 7 9 16 14 12 9 10 8 9 7 8 10 9 8 10 10 7 11 8 10 11 14 7 8 10 -#> [851] 10 11 11 8 8 9 11 7 7 8 9 9 7 13 15 11 24 8 9 7 10 15 18 22 18 -#> [876] 9 11 14 7 9 17 23 12 13 15 8 8 14 10 10 -#> attr(,"label") -#> [1] "Negative impact with 7 items"
    -link_inverse(fit)(2.3)
    #> [1] 2.3
    -# example from ?stats::glm -counts <- c(18, 17, 15, 20, 10, 20, 25, 13, 12) -outcome <- gl(3, 1, 9) -treatment <- gl(3, 3) -m <- glm(counts ~ outcome + treatment, family = poisson()) - -link_inverse(m)(.3)
    #> [1] 1.349859
    # same as -exp(.3)
    #> [1] 1.349859
    -outcome <- as.numeric(outcome) -m <- glm(counts ~ log(outcome) + as.factor(treatment), family = poisson()) -var_names(m)
    #> [1] "counts" "outcome" "treatment"
    -# model.frame and model_frame behave slightly different -library(splines) -m <- lm(neg_c_7 ~ e42dep + ns(c160age, knots = 2), data = efc) -head(model.frame(m))
    #> neg_c_7 e42dep ns(c160age, knots = 2).1 ns(c160age, knots = 2).2 -#> 1 12 3 0.49465270 0.08689310 -#> 2 20 3 0.49766116 0.04922034 -#> 3 11 3 0.45855117 0.53896628 -#> 4 10 4 0.47509770 0.33176607 -#> 5 12 4 0.50819077 -0.08263434 -#> 6 19 4 0.49465270 0.08689310
    head(model_frame(m))
    #> neg_c_7 e42dep c160age -#> 1 12 3 56 -#> 2 20 3 54 -#> 3 11 3 80 -#> 4 10 4 69 -#> 5 12 4 47 -#> 6 19 4 56
    -library(lme4) -data(cbpp) -cbpp$trials <- cbpp$size - cbpp$incidence -m <- glm(cbind(incidence, trials) ~ period, data = cbpp, family = binomial) -head(model.frame(m))
    #> cbind(incidence, trials).incidence cbind(incidence, trials).trials period -#> 1 2 12 1 -#> 2 3 9 2 -#> 3 4 5 3 -#> 4 0 5 4 -#> 5 3 19 1 -#> 6 1 17 2
    head(model_frame(m))
    #> cbind(incidence, trials).incidence cbind(incidence, trials).trials period -#> 1 2 12 1 -#> 2 3 9 2 -#> 3 4 5 3 -#> 4 0 5 4 -#> 5 3 19 1 -#> 6 1 17 2 -#> incidence trials -#> 1 2 12 -#> 2 3 9 -#> 3 4 5 -#> 4 0 5 -#> 5 3 19 -#> 6 1 17
    -resp_var(m, combine = TRUE)
    #> [1] "cbind(incidence, trials)"
    resp_var(m, combine = FALSE)
    #> [1] "incidence" "trials"
    -# get random effects grouping factor from mixed models -library(lme4) -data(sleepstudy) -m <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) -re_grp_var(m)
    #> [1] "Subject"
    -# get model predictors, with and w/o dispersion formula -
    # NOT RUN { -library(glmmTMB) -data("Salamanders") -m <- glmmTMB( - count ~ spp + cover + mined + poly(DOP, 3) + (1 | site), - ziformula = ~spp + mined, - dispformula = ~DOY, - data = Salamanders, - family = nbinom2 -) - -pred_vars(m) -pred_vars(m, fe.only = TRUE) -pred_vars(m, disp = TRUE) -# }
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/prop.html b/docs/reference/prop.html deleted file mode 100644 index de6fb551..00000000 --- a/docs/reference/prop.html +++ /dev/null @@ -1,294 +0,0 @@ - - - - - - - - -Proportions of values in a vector — prop • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    prop() calculates the proportion of a value or category - in a variable. props() does the same, but allows for - multiple logical conditions in one statement. It is similar - to mean() with logical predicates, however, both - prop() and props() work with grouped data frames.

    -
    - -
    prop(data, ..., weights = NULL, na.rm = TRUE, digits = 4)
    -
    -props(data, ..., na.rm = TRUE, digits = 4)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    data

    A data frame. May also be a grouped data frame (see 'Examples').

    ...

    One or more value pairs of comparisons (logical predicates). Put -variable names the left-hand-side and values to match on the -right hand side. Expressions may be quoted or unquoted. See -'Examples'.

    weights

    Vector of weights that will be applied to weight all observations. -Must be a vector of same length as the input vector. Default is -NULL, so no weights are used.

    na.rm

    Logical, whether to remove NA values from the vector when the -proportion is calculated. na.rm = FALSE gives you the raw -percentage of a value in a vector, na.rm = TRUE the valid -percentage.

    digits

    Amount of digits for returned values.

    - -

    Value

    - -

    For one condition, a numeric value with the proportion of the values - inside a vector. For more than one condition, a tibble with one column - of conditions and one column with proportions. For grouped data frames, - returns a tibble with one column per group with grouping categories, - followed by one column with proportions per condition.

    -

    Details

    - -

    prop() only allows one logical statement per comparison, - while props() allows multiple logical statements per comparison. - However, prop() supports weighting of variables before calculating - proportions, and comparisons may also be quoted. Hence, prop() - also processes comparisons, which are passed as character vector - (see 'Examples').

    - -

    Examples

    -
    data(efc) - -# proportion of value 1 in e42dep -prop(efc, e42dep == 1)
    #> [1] 0.0733
    -# expression may also be completely quoted -prop(efc, "e42dep == 1")
    #> [1] 0.0733
    -# use "props()" for multiple logical statements -props(efc, e17age > 70 & e17age < 80)
    #> [1] 0.3199
    -# proportion of value 1 in e42dep, and all values greater -# than 2 in e42dep, including missing values. will return a tibble -prop(efc, e42dep == 1, e42dep > 2, na.rm = FALSE)
    #> condition prop -#> 1 e42dep==1 0.0727 -#> 2 e42dep>2 0.6718
    -# for factors or character vectors, use quoted or unquoted values -library(sjmisc) -# convert numeric to factor, using labels as factor levels -efc$e16sex <- to_label(efc$e16sex) -efc$n4pstu <- to_label(efc$n4pstu) - -# get proportion of female older persons -prop(efc, e16sex == female)
    #> [1] 0.6715
    -# get proportion of male older persons -prop(efc, e16sex == "male")
    #> [1] 0.3285
    -# "props()" needs quotes around non-numeric factor levels -props(efc, - e17age > 70 & e17age < 80, - n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3' -)
    #> condition prop -#> 1 e17age>70&e17age<80 0.3199 -#> 2 n4pstu==CareLevel1|n4pstu==CareLevel3 0.3137
    -# also works with pipe-chains -library(dplyr) -efc %>% prop(e17age > 70)
    #> [1] 0.8092
    efc %>% prop(e17age > 70, e16sex == 1)
    #> condition prop -#> 1 e17age>70 0.8092 -#> 2 e16sex==1 0.0000
    -# and with group_by -efc %>% - group_by(e16sex) %>% - prop(e42dep > 2)
    #> Warning: Factor `e16sex` contains implicit NA, consider using `forcats::fct_explicit_na`
    #> Warning: Factor `e16sex` contains implicit NA, consider using `forcats::fct_explicit_na`
    #> elder's gender e42dep>2 -#> 1 male 0.6847 -#> 2 female 0.6744
    -efc %>% - select(e42dep, c161sex, c172code, e16sex) %>% - group_by(c161sex, c172code) %>% - prop(e42dep > 2, e16sex == 1)
    #> carer's gender carer's level of education e42dep>2 e16sex==1 -#> 1 Male low level of education 0.6829 0 -#> 5 Male intermediate level of education 0.6590 0 -#> 3 Male high level of education 0.7872 0 -#> 4 Female low level of education 0.7101 0 -#> 2 Female intermediate level of education 0.5929 0 -#> 6 Female high level of education 0.6881 0
    -# same for "props()" -efc %>% - select(e42dep, c161sex, c172code, c12hour, n4pstu) %>% - group_by(c161sex, c172code) %>% - props( - e42dep > 2, - c12hour > 20 & c12hour < 40, - n4pstu == 'Care Level 1' | n4pstu == 'Care Level 3' - )
    #> carer's gender carer's level of education e42dep>2 c12hour>20&c12hour<40 -#> 1 Male low level of education 0.6829 0.2439 -#> 5 Male intermediate level of education 0.6590 0.1756 -#> 3 Male high level of education 0.7872 0.1489 -#> 4 Female low level of education 0.7101 0.1957 -#> 2 Female intermediate level of education 0.5929 0.1504 -#> 6 Female high level of education 0.6881 0.2018 -#> n4pstu==CareLevel1|n4pstu==CareLevel3 -#> 1 0.2250 -#> 5 0.3111 -#> 3 0.3191 -#> 4 0.3433 -#> 2 0.3540 -#> 6 0.2752
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/r2.html b/docs/reference/r2.html deleted file mode 100644 index 93598f82..00000000 --- a/docs/reference/r2.html +++ /dev/null @@ -1,189 +0,0 @@ - - - - - - - - -Deprecated functions — r2 • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    A list of deprecated functions.

    -
    - -
    r2(x)
    -
    -icc(x)
    -
    -p_value(x, ...)
    -
    -se(x, ...)
    -
    -cohens_f(x, ...)
    -
    -std_beta(x, ...)
    -
    -robust(x, ...)
    - -

    Arguments

    - - - - - - - - - - -
    x

    An object.

    ...

    Currently not used.

    - -

    Value

    - -

    Nothing.

    - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/re_var.html b/docs/reference/re_var.html deleted file mode 100644 index c2959834..00000000 --- a/docs/reference/re_var.html +++ /dev/null @@ -1,288 +0,0 @@ - - - - - - - - -Random effect variances — re_var • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    These functions extracts random effect variances as well as - random-intercept-slope-correlation of mixed effects models. - Currently, merMod, glmmTMB, - stanreg and brmsfit - objects are supported.

    - -
    - -
    re_var(x, adjusted = FALSE)
    -
    -get_re_var(x, comp = c("tau.00", "tau.01", "tau.11", "rho.01",
    -  "sigma_2"))
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    Fitted mixed effects model (of class merMod, glmmTMB, -stanreg or brmsfit). get_re_var() also accepts -an object returned by the icc function.

    adjusted

    Logical, if TRUE, returns the variance of the fixed -and random effects as well as of the additive dispersion and -distribution-specific variance, which are used to calculate the -adjusted and conditional r2 and icc.

    comp

    Name of the variance component to be returned. See 'Details'.

    - -

    Value

    - -

    get_re_var() returns the value of the requested variance component, - re_var() returns all random effects variances.

    - -

    Details

    - -

    The random effect variances indicate the between- and within-group - variances as well as random-slope variance and random-slope-intercept - correlation. Use following values for comp to get the particular - variance component:

    -
    "sigma_2"

    Within-group (residual) variance

    -
    "tau.00"

    Between-group-variance (variation between individual intercepts and average intercept)

    -
    "tau.11"

    Random-slope-variance (variation between individual slopes and average slope)

    -
    "tau.01"

    Random-Intercept-Slope-covariance

    -
    "rho.01"

    Random-Intercept-Slope-correlation

    -

    The within-group-variance is affected by factors at level one, i.e. - by the lower-level direct effects. Level two factors (i.e. cross-level - direct effects) affect the between-group-variance. Cross-level - interaction effects are group-level factors that explain the - variance in random slopes (Aguinis et al. 2013). -

    - If adjusted = TRUE, the variance of the fixed and random - effects as well as of the additive dispersion and - distribution-specific variance are returned (see Johnson et al. 2014 - and Nakagawa et al. 2017):

    -
    "fixed"

    variance attributable to the fixed effects

    -
    "random"

    (mean) variance of random effects

    -
    "dispersion"

    variance due to additive dispersion

    -
    "distribution"

    distribution-specific variance

    -
    "residual"

    sum of dispersion and distribution

    -
    - -

    References

    - -
      -
    • Aguinis H, Gottfredson RK, Culpepper SA. 2013. Best-Practice Recommendations for Estimating Cross-Level Interaction Effects Using Multilevel Modeling. Journal of Management 39(6): 1490-1528 (doi: 10.1177/0149206313478188 -)

    • -
    • Johnson PC, O'Hara RB. 2014. Extension of Nakagawa & Schielzeth's R2GLMM to random slopes models. Methods Ecol Evol, 5: 944-946. (doi: 10.1111/2041-210X.12225 -)

    • -
    • Nakagawa S, Johnson P, Schielzeth H (2017) The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded. J. R. Soc. Interface 14. doi: 10.1098/rsif.2017.0213

    • -
    - -

    See also

    - - - - -

    Examples

    -
    library(lme4) -fit1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) - -# all random effect variance components -re_var(fit1)
    #> Within-group-variance: 654.941 -#> Between-group-variance: 611.898 (Subject) -#> Random-slope-variance: 35.081 (Subject.Days) -#> Slope-Intercept-covariance: 9.614 (Subject.(Intercept)) -#> Slope-Intercept-correlation: 0.066 (Subject)
    re_var(fit1, adjusted = TRUE)
    #> -#> Variance Components of Mixed Models -#> -#> Family : gaussian (identity) -#> Formula: list(conditional = Reaction ~ Days, random = ~Days | Subject) -#> -#> fixed: 908.953 -#> random: 1698.233 -#> residual: 654.941 -#> dispersion: 0.000 -#> distribution: 654.941 -#> -#>
    -# just the rand. slope-intercept covariance -get_re_var(fit1, "tau.01")
    #> Subject.(Intercept) -#> 9.613886
    -sleepstudy$mygrp <- sample(1:45, size = 180, replace = TRUE) -fit2 <- lmer(Reaction ~ Days + (1 | mygrp) + (Days | Subject), sleepstudy) -re_var(fit2)
    #> Within-group-variance: 605.912 -#> Between-group-variance: 44.917 (mygrp) -#> Between-group-variance: 615.511 (Subject) -#> Random-slope-variance: 38.301 (Subject.Days) -#> Slope-Intercept-covariance: 1.108 (Subject.(Intercept)) -#> Slope-Intercept-correlation: 0.007 (Subject)
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/reexports.html b/docs/reference/reexports.html deleted file mode 100644 index 778e83cf..00000000 --- a/docs/reference/reexports.html +++ /dev/null @@ -1,187 +0,0 @@ - - - - - - - - -Objects exported from other packages — reexports • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    These objects are imported from other packages. Follow the links -below to see their documentation.

    - -
    bayestestR

    ci, equivalence_test

    - -
    insight

    link_inverse

    - -
    magrittr

    %>%

    - -
    performance

    mse, rmse

    - -
    sjmisc

    typical_value

    - - -
    - - - - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/reliab_test.html b/docs/reference/reliab_test.html deleted file mode 100644 index 6bf2ef4d..00000000 --- a/docs/reference/reliab_test.html +++ /dev/null @@ -1,372 +0,0 @@ - - - - - - - - -Check internal consistency of a test or questionnaire — reliab_test • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    These function compute various measures of internal consistencies - for tests or item-scales of questionnaires.

    - -
    - -
    reliab_test(x, scale.items = FALSE, digits = 3, out = c("txt",
    -  "viewer", "browser"))
    -
    -split_half(x, digits = 3)
    -
    -cronb(x)
    -
    -difficulty(x)
    -
    -mic(x, cor.method = c("pearson", "spearman", "kendall"))
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    x

    Depending on the function, x may be a matrix as -returned by the cor-function, or a data frame -with items (e.g. from a test or questionnaire).

    scale.items

    Logical, if TRUE, the data frame's vectors will be scaled. Recommended, -when the variables have different measures / scales.

    digits

    Amount of digits for returned values.

    out

    Character vector, indicating whether the results should be printed -to console (out = "txt") or as HTML-table in the viewer-pane -(out = "viewer") or browser (out = "browser"), of if the -results should be plotted (out = "plot", only applies to certain -functions). May be abbreviated.

    cor.method

    Correlation computation method. May be one of -"spearman" (default), "pearson" or "kendall". -You may use initial letter only.

    - -

    Value

    - - -
    -
    reliab_test()

    A data frame with the corrected item-total correlations (item - discrimination, column item.discr) and Cronbach's alpha - (if item deleted, column alpha.if.deleted) for each item - of the scale, or NULL if data frame had too less columns.

    -
    split_half()

    A list with two values: the split-half reliability splithalf and - the Spearman-Brown corrected split-half reliability spearmanbrown.

    -
    cronb()

    The Cronbach's Alpha value for x.

    -
    mic()

    The mean inter-item-correlation value for x.

    -
    difficulty()

    The item difficulty value for x.

    -
    - - -

    Details

    - -
    -
    reliab_test()

    This function calculates the item discriminations (corrected item-total - correlations for each item of x with the remaining items) and - the Cronbach's alpha for each item, if it was deleted from the scale. - The absolute value of the item discrimination indices should be - above 0.1. An index between 0.1 and 0.3 is considered as "fair", - while an index above 0.3 (or below -0.3) is "good". Items with - low discrimination indices are often ambiguously worded and - should be examined. Items with negative indices should be - examined to determine why a negative value was obtained (e.g. - reversed answer categories regarding positive and negative poles).

    -
    split_half()

    This function calculates the split-half reliability for items in - the data frame x, including the Spearman-Brown adjustment. - Splitting is done by selecting odd versus even columns in x. - A value closer to 1 indicates greater internal consistency.

    -
    cronb()

    The Cronbach's Alpha value for x. A value closer to 1 - indicates greater internal consistency, where usually following - rule of thumb is applied to interprete the results: - α < 0.5 is unacceptable, - 0.5 < α < 0.6 is poor, - 0.6 < α < 0.7 is questionable, - 0.7 < α < 0.8 is acceptable, - and everything > 0.8 is good or excellent.

    -
    mic()

    This function calculates a mean inter-item-correlation, i.e. - a correlation matrix of x will be computed (unless - x is already a matrix as returned by the - cor-function) and the mean - of the sum of all item's correlation values is returned. - Requires either a data frame or a computed cor-object. -

    - “Ideally, the average inter-item correlation for a set of - items should be between .20 and .40, suggesting that while the - items are reasonably homogenous, they do contain sufficiently - unique variance so as to not be isomorphic with each other. - When values are lower than .20, then the items may not be - representative of the same content domain. If values are higher than - .40, the items may be only capturing a small bandwidth of the construct.” - (Piedmont 2014)

    -
    difficulty()

    This function calculates the item difficutly, which should - range between 0.2 and 0.8. Lower values are a signal for - more difficult items, while higher values close to one - are a sign for easier items. The ideal value for item difficulty - is p + (1 - p) / 2, where p = 1 / max(x). In most - cases, the ideal item difficulty lies between 0.5 and 0.8.

    -
    - -

    References

    - -

    Spearman C. 1910. Correlation calculated from faulty data. British Journal of Psychology (3): 271-295. doi: 10.1111/j.2044-8295.1910.tb00206.x -

    - Brown W. 1910. Some experimental results in the correlation of mental abilities. British Journal of Psychology (3): 296-322. doi: 10.1111/j.2044-8295.1910.tb00207.x -

    - Piedmont RL. 2014. Inter-item Correlations. In: Michalos AC (eds) Encyclopedia of Quality of Life and Well-Being Research. Dordrecht: Springer, 3303-3304. doi: 10.1007/978-94-007-0753-5_1493

    - - -

    Examples

    -
    library(sjlabelled) -# Data from the EUROFAMCARE sample dataset -data(efc) - -# retrieve variable and value labels -varlabs <- get_label(efc) - -# recveive first item of COPE-index scale -start <- which(colnames(efc) == "c82cop1") -# recveive last item of COPE-index scale -end <- which(colnames(efc) == "c90cop9") - -# create data frame with COPE-index scale -x <- efc[, c(start:end)] -colnames(x) <- varlabs[c(start:end)] - -# reliability tests -reliab_test(x)
    #> term -#> 1 do you feel you cope well as caregiver? -#> 2 do you find caregiving too demanding? -#> 3 does caregiving cause difficulties in your relationship with your friends? -#> 4 does caregiving have negative effect on your physical health? -#> 5 does caregiving cause difficulties in your relationship with your family? -#> 6 does caregiving cause financial difficulties? -#> 7 do you feel trapped in your role as caregiver? -#> 8 do you feel supported by friends/neighbours? -#> 9 do you feel caregiving worthwhile? -#> alpha.if.deleted item.discr -#> 1 0.539 -0.241 -#> 2 0.384 0.329 -#> 3 0.339 0.408 -#> 4 0.324 0.441 -#> 5 0.380 0.357 -#> 6 0.366 0.416 -#> 7 0.353 0.368 -#> 8 0.534 -0.029 -#> 9 0.556 -0.112
    -# split-half-reliability -split_half(x)
    #> -#> # Internal Consistency -#> -#> Split-Half Reliability: 0.410 -#> Spearman-Brown Adjustment: 0.581
    -# cronbach's alpha -cronb(x)
    #> [1] 0.459369
    -# mean inter-item-correlation -mic(x)
    #> [1] 0.09176831
    -# item difficulty -difficulty(x)
    #> -#> # Item Difficulty -#> -#> difficulty ideal -#> do you feel you cope well as caregiver? 0.78 0.62 -#> do you find caregiving too demanding? 0.51 0.62 -#> does caregiving cause difficulties in your relationship with your friends? 0.41 0.62 -#> does caregiving have negative effect on your physical health? 0.44 0.62 -#> does caregiving cause difficulties in your relationship with your family? 0.35 0.62 -#> does caregiving cause financial difficulties? 0.32 0.62 -#> do you feel trapped in your role as caregiver? 0.48 0.62 -#> do you feel supported by friends/neighbours? 0.54 0.62 -#> do you feel caregiving worthwhile? 0.73 0.62
    -
    # NOT RUN { -library(sjPlot) -sjt.df(reliab_test(x), describe = FALSE, show.cmmn.row = TRUE, - string.cmmn = sprintf("Cronbach's &alpha;=%.2f", cronb(x))) - -# Compute PCA on Cope-Index, and perform a -# reliability check on each extracted factor. -factors <- sjt.pca(x)$factor.index -findex <- sort(unique(factors)) -library(sjPlot) -for (i in seq_len(length(findex))) { - rel.df <- subset(x, select = which(factors == findex[i])) - if (ncol(rel.df) >= 3) { - sjt.df(reliab_test(rel.df), describe = FALSE, show.cmmn.row = TRUE, - use.viewer = FALSE, title = "Item-Total-Statistic", - string.cmmn = sprintf("Scale's overall Cronbach's &alpha;=%.2f", - cronb(rel.df))) - } - } -# }
    -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/rmse-1.png b/docs/reference/rmse-1.png deleted file mode 100644 index 9b9a407b..00000000 Binary files a/docs/reference/rmse-1.png and /dev/null differ diff --git a/docs/reference/rmse-2.png b/docs/reference/rmse-2.png deleted file mode 100644 index f1f2bcf6..00000000 Binary files a/docs/reference/rmse-2.png and /dev/null differ diff --git a/docs/reference/rmse.html b/docs/reference/rmse.html deleted file mode 100644 index ba06ba35..00000000 --- a/docs/reference/rmse.html +++ /dev/null @@ -1,416 +0,0 @@ - - - - - - - - -Compute model quality — cv • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Compute various measures or tests to assess the model quality, - like root mean squared error, residual standard error or mean square error - of fitted linear (mixed effects) models. For logistic regression models, - or mixed models with binary outcome, the error rate, binned residuals, - Chi-square goodness-of-fit-test or the Hosmer-Lemeshow Goodness-of-fit-test - can be performed.

    - -
    - -
    cv(x, ...)
    -
    -chisq_gof(x, prob = NULL, weights = NULL)
    -
    -hoslem_gof(x, n.bins = 10)
    -
    -rmse(x, normalized = FALSE)
    -
    -rse(x)
    -
    -mse(x)
    -
    -error_rate(x)
    -
    -binned_resid(x, term = NULL, n.bins = NULL)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    Fitted linear model of class lm, merMod (lme4) -or lme (nlme). For error_rate(), hoslem_gof() -and binned_resid(), a glm-object with binomial-family. For -chisq_gof(), a numeric vector or a glm-object.

    ...

    More fitted model objects, to compute multiple coefficients of -variation at once.

    prob

    Vector of probabilities (indicating the population probabilities) -of the same length as x's amount of categories / factor levels. -Use nrow(table(x)) to determine the amount of necessary values -for prob. Only used, when x is a vector, and not a -glm-object.

    weights

    Vector with weights, used to weight x.

    n.bins

    Numeric, the number of bins to divide the data. For -hoslem_gof(), the default is 10. For binned_resid(), if -n.bins = NULL, the square root of the number of observations is -taken.

    normalized

    Logical, use TRUE if normalized rmse should be returned.

    term

    Name of independent variable from x. If not NULL, -average residuals for the categories of term are plotted; else, -average residuals for the estimated probabilities of the response are -plotted.

    - -

    Value

    - - -
    -
    rmse(), rse(), mse(), cv()

    These functions return a number, the requested statistic.

    -
    error_rate()

    A list with four values: the error rate of the full and the null model, - as well as the chi-squared and p-value from the Likelihood-Ratio-Test - between the full and null model.

    -
    binned_resid()

    A data frame representing the data that is mapped to the plot, which is - automatically plotted. In case all residuals are inside the error bounds, - points are black. If some of the residuals are outside the error bounds - (indicates by the grey-shaded area), blue points indicate residuals that - are OK, while red points indicate model under- or overfitting for the - related range of estimated probabilities.

    -
    chisq_gof()

    For vectors, returns the object of the computed chisq.test. - For glm-objects, an object of class chisq_gof with - following values: p.value, the p-value for the goodness-of-fit test; - z.score, the standardized z-score for the goodness-of-fit test; - rss, the residual sums of squares term and chisq, the pearson - chi-squared statistic.

    -
    hoslem_gof()

    An object of class hoslem_test with following values: chisq, - the Hosmer-Lemeshow chi-squared statistic; df, degrees of freedom - and p.value the p-value for the goodness-of-fit test.

    -
    - - -

    Details

    - -
    -
    Root Mean Square Error

    The RMSE is the square root of the variance of the residuals and indicates - the absolute fit of the model to the data (difference between observed data - to model's predicted values). “RMSE can be interpreted as the standard - deviation of the unexplained variance, and has the useful property - of being in the same units as the response variable. Lower values - of RMSE indicate better fit. RMSE is a good measure of how accurately - the model predicts the response, and is the most important criterion - for fit if the main purpose of the model is prediction.” - (Grace-Martin K: Assessing the Fit of Regression Models) -

    - The normalized RMSE is the proportion of the RMSE related to the - range of the response variable. Hence, lower values indicate - less residual variance.

    -
    Residual Standard Error

    The residual standard error is the square root of the residual - sum of squares divided by the residual degrees of freedom.

    -
    Mean Square Error

    The mean square error is the mean of the sum of squared residuals, - i.e. it measures the average of the squares of the errors. Lower - values (closer to zero) indicate better fit.

    -
    Coefficient of Variation

    The advantage of the cv is that it is unitless. This allows - coefficient of variation to be compared to each other in ways - that other measures, like standard deviations or root mean - squared residuals, cannot be. -

    - “It is interesting to note the differences between a model's CV - and R-squared values. Both are unitless measures that are indicative - of model fit, but they define model fit in two different ways: CV - evaluates the relative closeness of the predictions to the actual - values while R-squared evaluates how much of the variability in the - actual values is explained by the model.” - (source: UCLA-FAQ)

    -
    Error Rate

    The error rate is a crude measure for model fit for logistic regression - models. It is defined as the proportion of cases for which the - deterministic prediction is wrong, i.e. the proportion where the the - predicted probability is above 0.5, although y = 0 (and vice versa). - In general, the error rate should be below 0.5 (i.e. 50%), the - closer to zero, the better. Furthermore, the error rate of the full - model should be considerably below the null model's error rate - (cf. Gelman and Hill 2007, pp. 99). The print()-method also - prints the results from the Likelihood-Ratio-Test, comparing the full - to the null model.

    -
    Binned Residuals

    Binned residual plots are achieved by “dividing the data into - categories (bins) based on their fitted values, and then plotting - the average residual versus the average fitted value for each bin.” - (Gelman, Hill 2007: 97). If the model were true, one would - expect about 95% of the residuals to fall inside the error bounds. -

    - If term is not NULL, one can compare the residuals in - relation to a specific model predictor. This may be helpful to check - if a term would fit better when transformed, e.g. a rising and falling - pattern of residuals along the x-axis (the pattern is indicated by - a green line) is a signal to consider taking the logarithm of the - predictor (cf. Gelman and Hill 2007, pp. 97ff).

    -
    Chi-squared Goodness-of-Fit Test

    For vectors, this function is a convenient function for the - chisq.test(), performing goodness-of-fit test. For - glm-objects, this function performs a goodness-of-fit test. - A well-fitting model shows no significant difference between the - model and the observed data, i.e. the reported p-values should be - greater than 0.05.

    -
    Hosmer-Lemeshow Goodness-of-Fit Test

    A well-fitting model shows no significant difference between - the model and the observed data, i.e. the reported p-value should be - greater than 0.05.

    -
    - -

    References

    - -

    Gelman A, Hill J (2007) Data Analysis Using Regression and Multilevel/Hierarchical Models. Cambridge, New York: Cambridge University Press -

    - Everitt, Brian (1998). The Cambridge Dictionary of Statistics. Cambridge, UK New York: Cambridge University Press -

    - Hosmer, D. W., & Lemeshow, S. (2000). Applied Logistic Regression. Hoboken, NJ, USA: John Wiley & Sons, Inc. doi: 10.1002/0471722146 -

    - Grace-Martin K: Assessing the Fit of Regression Models

    - -

    See also

    - -

    r2 for R-squared or pseudo-R-squared values.

    - - -

    Examples

    -
    data(efc) -fit <- lm(barthtot ~ c160age + c12hour, data = efc) -rmse(fit)
    #> [1] 25.46792
    rse(fit)
    #> [1] 25.51134
    cv(fit)
    #> [1] 0.3948098
    -library(lme4) -fit <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) -rmse(fit)
    #> [1] 23.43815
    mse(fit)
    #> [1] 549.3468
    cv(fit)
    #> [1] 0.07851768
    -# normalized RMSE -library(nlme) -fit <- lme(distance ~ age, data = Orthodont) -rmse(fit, normalized = TRUE)
    #> [1] 0.07242178
    -#coefficient of variation for variable -cv(efc$e17age)
    #> [1] 0.1023027
    -# Error Rate -efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) -m <- glm( - neg_c_7d ~ c161sex + barthtot + c172code, - data = efc, - family = binomial(link = "logit") -) -error_rate(m)
    #> -#> # Error Rate of Logistic Regression Model -#> -#> Full model: 29.45% -#> Null model: 45.15% -#> -#> # Likelihood-Ratio-Test -#> -#> Chi-squared: 166.050 -#> p-value: 0.000 -#>
    -# Binned residuals -binned_resid(m)
    binned_resid(m, "barthtot")
    -# goodness-of-fit test for logistic regression -chisq_gof(m)
    #> -#> # Chi-squared Goodness-of-Fit Test -#> -#> Chi-squared: 852.765 -#> z-score: 1.025 -#> p-value: 0.305 -#>
    #> Summary: model seems to fit well.
    -# goodness-of-fit test for logistic regression -hoslem_gof(m)
    #> -#> # Hosmer-Lemeshow Goodness-of-Fit Test -#> -#> Chi-squared: 18.707 -#> df: 8 -#> p-value: 0.017 -#>
    #> Summary: model does not fit well.
    -# goodness-of-fit test for vectors against probabilities -# differing from population -chisq_gof(efc$e42dep, c(0.3,0.2,0.22,0.28))
    #> -#> Chi-squared test for given probabilities -#> -#> data: dummy -#> X-squared = 234.76, df = 3, p-value < 2.2e-16 -#>
    # equal to population -chisq_gof(efc$e42dep, prop.table(table(efc$e42dep)))
    #> -#> Chi-squared test for given probabilities -#> -#> data: dummy -#> X-squared = 0, df = 3, p-value = 1 -#>
    - -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/robust.html b/docs/reference/robust.html deleted file mode 100644 index 251e3a34..00000000 --- a/docs/reference/robust.html +++ /dev/null @@ -1,351 +0,0 @@ - - - - - - - - -Robust standard errors for regression models — robust • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    robust() computes robust standard error for regression models. - This method calls one of the vcov*()-functions from the - sandwich-package for robust covariance matrix estimators. Results are - returned as tidy data frame. -

    - svy() is intended to compute standard errors for survey - designs (complex samples) fitted with regular lm or - glm functions, as alternative to the survey-package. - It simulates sampling weights by adjusting the residual degrees - of freedom based on the precision weights used to fit x, - and then calls robust() with the adjusted model.

    -
    - -
    robust(
    -  x,
    -  vcov.fun = "vcovHC",
    -  vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5"),
    -  vcov.args = NULL,
    -  conf.int = FALSE,
    -  exponentiate = FALSE
    -)
    -
    -svy(
    -  x,
    -  vcov.fun = "vcovHC",
    -  vcov.type = c("HC1", "const", "HC", "HC0", "HC3", "HC2", "HC4", "HC4m", "HC5"),
    -  vcov.args = NULL,
    -  conf.int = FALSE,
    -  exponentiate = FALSE
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A fitted model of any class that is supported by the vcov*()-functions -from the sandwich package. For svy(), x must be -lm object, fitted with weights.

    vcov.fun

    String, indicating the name of the vcov*()-function -from the sandwich-package, e.g. vcov.fun = "vcovCL".

    vcov.type

    Character vector, specifying the estimation type for the -robust covariance matrix estimation (see vcovHC -for details).

    vcov.args

    List of named vectors, used as additional arguments that -are passed down to vcov.fun.

    conf.int

    Logical, TRUE if confidence intervals based on robust -standard errors should be included.

    exponentiate

    Logical, whether to exponentiate the coefficient estimates -and confidence intervals (typical for logistic regression).

    - -

    Value

    - -

    A summary of the model, including estimates, robust standard error, - p-value and - optionally - the confidence intervals.

    -

    Note

    - -

    svy() simply calls robust(), but first adjusts the - residual degrees of freedom based on the model weights. - Hence, for svy(), x should be fitted with weights. - This simulates sampling weights like in survey designs, though - lm and glm implement precision weights. - The results from svy() are usually more accurate than simple - weighted standard errors for complex samples. However, results from - the survey package are still more exactly, especially - regarding the estimates. -

    - vcov.type for svy() defaults to "HC1", because - standard errors with this estimation type come closest to the standard - errors from the survey-package. -

    - Currently, svy() only works for objects of class lm.

    - -

    Examples

    -
    data(efc) -fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) -summary(fit)
    #> -#> Call: -#> lm(formula = barthtot ~ c160age + c12hour + c161sex + c172code, -#> data = efc) -#> -#> Residuals: -#> Min 1Q Median 3Q Max -#> -74.639 -15.246 4.251 19.009 73.327 -#> -#> Coefficients: -#> Estimate Std. Error t value Pr(>|t|) -#> (Intercept) 90.06448 6.17237 14.592 <2e-16 *** -#> c160age -0.22156 0.07111 -3.116 0.0019 ** -#> c12hour -0.27810 0.01865 -14.915 <2e-16 *** -#> c161sex -0.26178 2.08649 -0.125 0.9002 -#> c172code -0.76215 1.41971 -0.537 0.5915 -#> --- -#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 -#> -#> Residual standard error: 25.35 on 816 degrees of freedom -#> (87 observations deleted due to missingness) -#> Multiple R-squared: 0.2696, Adjusted R-squared: 0.266 -#> F-statistic: 75.28 on 4 and 816 DF, p-value: < 2.2e-16 -#>
    robust(fit)
    #> term estimate std.error statistic p.value -#> 1 (Intercept) 90.0644792 5.58174936 16.1355291 4.832660e-51 -#> 2 c160age -0.2215581 0.07106417 -3.1177189 1.886499e-03 -#> 3 c12hour -0.2781004 0.02119537 -13.1208077 8.012747e-36 -#> 4 c161sex -0.2617783 1.91864853 -0.1364389 8.915080e-01 -#> 5 c172code -0.7621525 1.39456606 -0.5465159 5.848608e-01
    -confint(fit)
    #> 2.5 % 97.5 % -#> (Intercept) 77.9488902 102.18006829 -#> c160age -0.3611297 -0.08198647 -#> c12hour -0.3146997 -0.24150107 -#> c161sex -4.3573007 3.83374416 -#> c172code -3.5488594 2.02455439
    robust(fit, conf.int = TRUE)
    #> term estimate std.error conf.low conf.high statistic -#> 1 (Intercept) 90.0644792 5.58174936 79.1082006 101.02075786 16.1355291 -#> 2 c160age -0.2215581 0.07106417 -0.3610482 -0.08206799 -3.1177189 -#> 3 c12hour -0.2781004 0.02119537 -0.3197042 -0.23649650 -13.1208077 -#> 4 c161sex -0.2617783 1.91864853 -4.0278463 3.50428977 -0.1364389 -#> 5 c172code -0.7621525 1.39456606 -3.4995120 1.97520692 -0.5465159 -#> p.value -#> 1 4.832660e-51 -#> 2 1.886499e-03 -#> 3 8.012747e-36 -#> 4 8.915080e-01 -#> 5 5.848608e-01
    robust(fit, vcov.type = "HC1", conf.int = TRUE) # "HC1" should be Stata default
    #> term estimate std.error conf.low conf.high statistic -#> 1 (Intercept) 90.0644792 5.55391758 79.1628309 100.96612755 16.2163874 -#> 2 c160age -0.2215581 0.07066514 -0.3602650 -0.08285124 -3.1353241 -#> 3 c12hour -0.2781004 0.02103734 -0.3193941 -0.23680669 -13.2193675 -#> 4 c161sex -0.2617783 1.90939743 -4.0096876 3.48613101 -0.1370999 -#> 5 c172code -0.7621525 1.38747118 -3.4855856 1.96128056 -0.5493105 -#> p.value -#> 1 1.788307e-51 -#> 2 1.778136e-03 -#> 3 2.730414e-36 -#> 4 8.909856e-01 -#> 5 5.829427e-01
    -library(sjmisc) -# dichtomozize service usage by "service usage yes/no" -efc$services <- sjmisc::dicho(efc$tot_sc_e, dich.by = 0) -fit <- glm(services ~ neg_c_7 + c161sex + e42dep, - data = efc, family = binomial(link = "logit")) - -robust(fit)
    #> term estimate std.error statistic p.value -#> 1 (Intercept) -0.5198021 0.38048112 -1.366171 0.172232067 -#> 2 neg_c_7 0.0419026 0.02035790 2.058296 0.039853669 -#> 3 c161sex -0.2189336 0.16263979 -1.346126 0.178606278 -#> 4 e42dep 0.2134784 0.07970607 2.678320 0.007536269
    robust(fit, conf.int = TRUE, exponentiate = TRUE)
    #> term estimate std.error conf.low conf.high statistic p.value -#> 1 (Intercept) 0.5946382 0.38048112 0.2818017 1.254764 -1.366171 0.172232067 -#> 2 neg_c_7 1.0427929 0.02035790 1.0019492 1.085302 2.058296 0.039853669 -#> 3 c161sex 0.8033751 0.16263979 0.5838345 1.105470 -1.346126 0.178606278 -#> 4 e42dep 1.2379767 0.07970607 1.0587020 1.447609 2.678320 0.007536269
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/samplesize_mixed.html b/docs/reference/samplesize_mixed.html deleted file mode 100644 index 964bcab1..00000000 --- a/docs/reference/samplesize_mixed.html +++ /dev/null @@ -1,262 +0,0 @@ - - - - - - - - -Sample size for linear mixed models — samplesize_mixed • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Compute an approximated sample size for linear mixed models - (two-level-designs), based on power-calculation for standard - design and adjusted for design effect for 2-level-designs.

    -
    - -
    samplesize_mixed(
    -  eff.size,
    -  df.n = NULL,
    -  power = 0.8,
    -  sig.level = 0.05,
    -  k,
    -  n,
    -  icc = 0.05
    -)
    -
    -smpsize_lmm(
    -  eff.size,
    -  df.n = NULL,
    -  power = 0.8,
    -  sig.level = 0.05,
    -  k,
    -  n,
    -  icc = 0.05
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    eff.size

    Effect size.

    df.n

    Optional argument for the degrees of freedom for numerator. See 'Details'.

    power

    Power of test (1 minus Type II error probability).

    sig.level

    Significance level (Type I error probability).

    k

    Number of cluster groups (level-2-unit) in multilevel-design.

    n

    Optional, number of observations per cluster groups -(level-2-unit) in multilevel-design.

    icc

    Expected intraclass correlation coefficient for multilevel-model.

    - -

    Value

    - -

    A list with two values: The number of subjects per cluster, and the - total sample size for the linear mixed model.

    -

    Details

    - -

    The sample size calculation is based on a power-calculation for the - standard design. If df.n is not specified, a power-calculation - for an unpaired two-sample t-test will be computed (using - pwr.t.test of the pwr-package). - If df.n is given, a power-calculation for general linear models - will be computed (using pwr.f2.test of the - pwr-package). The sample size of the standard design - is then adjusted for the design effect of two-level-designs (see - design_effect). Thus, the sample size calculation is appropriate - in particular for two-level-designs (see Snijders 2005). Models that - additionally include repeated measures (three-level-designs) may work - as well, however, the computed sample size may be less accurate.

    -

    References

    - -

    Cohen J. 1988. Statistical power analysis for the behavioral sciences (2nd ed.). Hillsdale,NJ: Lawrence Erlbaum. -

    - Hsieh FY, Lavori PW, Cohen HJ, Feussner JR. 2003. An Overview of Variance Inflation Factors for Sample-Size Calculation. Evaluation and the Health Professions 26: 239-257. doi: 10.1177/0163278703255230 - -

    - Snijders TAB. 2005. Power and Sample Size in Multilevel Linear Models. In: Everitt BS, Howell DC (Hrsg.). Encyclopedia of Statistics in Behavioral Science. Chichester, UK: John Wiley and Sons, Ltd. doi: 10.1002/0470013192.bsa492

    - -

    Examples

    -
    # Sample size for multilevel model with 30 cluster groups and a small to -# medium effect size (Cohen's d) of 0.3. 27 subjects per cluster and -# hence a total sample size of about 802 observations is needed. -samplesize_mixed(eff.size = .3, k = 30)
    #> $`Subjects per Cluster` -#> [1] 27 -#> -#> $`Total Sample Size` -#> [1] 802 -#>
    -# Sample size for multilevel model with 20 cluster groups and a medium -# to large effect size for linear models of 0.2. Five subjects per cluster and -# hence a total sample size of about 107 observations is needed. -samplesize_mixed(eff.size = .2, df.n = 5, k = 20, power = .9)
    #> $`Subjects per Cluster` -#> [1] 5 -#> -#> $`Total Sample Size` -#> [1] 107 -#>
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/scale_weights.html b/docs/reference/scale_weights.html deleted file mode 100644 index bbb9f62b..00000000 --- a/docs/reference/scale_weights.html +++ /dev/null @@ -1,276 +0,0 @@ - - - - - - - - -Rescale design weights for multilevel analysis — scale_weights • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Most functions to fit multilevel and mixed effects models only - allow to specify frequency weights, but not design (i.e. sampling or probability) - weights, which should be used when analyzing complex samples and survey data. - scale_weights() implements an algorithm proposed by Aaparouhov (2006) - and Carle (2009) to rescale design weights in survey data to account for - the grouping structure of multilevel models, which then can be used for - multilevel modelling.

    -
    - -
    scale_weights(x, cluster.id, pweight)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    A data frame.

    cluster.id

    Variable indicating the grouping structure (strata) of -the survey data (level-2-cluster variable).

    pweight

    Variable indicating the probability (design or sampling) -weights of the survey data (level-1-weight).

    - -

    Value

    - -

    x, with two new variables: svywght_a and svywght_b, - which represent the rescaled design weights to use in multilevel models - (use these variables for the weights argument).

    -

    Details

    - -

    Rescaling is based on two methods: For svywght_a, the sample - weights pweight are adjusted by a factor that represents the proportion - of cluster size divided by the sum of sampling weights within each cluster. - The adjustment factor for svywght_b is the sum of sample weights - within each cluster devided by the sum of squared sample weights within - each cluster (see Carle (2009), Appendix B). -

    - Regarding the choice between scaling methods A and B, Carle suggests - that "analysts who wish to discuss point estimates should report results - based on weighting method A. For analysts more interested in residual - between-cluster variance, method B may generally provide the least biased - estimates". In general, it is recommended to fit a non-weighted model - and weighted models with both scaling methods and when comparing the - models, see whether the "inferential decisions converge", to gain - confidence in the results. -

    - Though the bias of scaled weights decreases with increasing cluster size, - method A is preferred when insufficient or low cluster size is a concern. -

    - The cluster ID and probably PSU may be used as random effects (e.g. - nested design, or cluster and PSU as varying intercepts), depending - on the survey design that should be mimicked.

    -

    References

    - -

    Carle AC. Fitting multilevel models in complex survey data with design weights: Recommendations BMC Medical Research Methodology 2009, 9(49): 1-13 -

    - Asparouhov T. General Multi-Level Modeling with Sampling Weights Communications in Statistics - Theory and Methods 2006, 35: 439-460

    - -

    Examples

    -
    data(nhanes_sample) -scale_weights(nhanes_sample, SDMVSTRA, WTINT2YR)
    #> This function will be removed in a future update. There's an enhanced implementation in 'parameters::rescale_weights()' that should be used instead.
    #> # A tibble: 2,992 x 9 -#> total age RIAGENDR RIDRETH1 SDMVPSU SDMVSTRA WTINT2YR svywght_a svywght_b -#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 1 2.2 1 3 2 31 97594. 1.57 1.20 -#> 2 7 2.08 2 3 1 29 39599. 0.623 0.525 -#> 3 3 1.48 2 1 2 42 26620. 0.898 0.544 -#> 4 4 1.32 2 4 2 33 34999. 0.708 0.550 -#> 5 1 2 2 1 1 41 14746. 0.422 0.312 -#> 6 6 2.2 2 4 1 38 28232. 0.688 0.516 -#> 7 350 1.6 1 3 2 33 93162. 1.89 1.46 -#> 8 NA 1.48 2 3 1 29 82276. 1.29 1.09 -#> 9 3 2.28 2 4 1 41 24726. 0.707 0.523 -#> 10 30 0.84 1 3 2 35 39895. 0.760 0.594 -#> # ... with 2,982 more rows
    -if (require("lme4")) { - nhanes_sample <- scale_weights(nhanes_sample, SDMVSTRA, WTINT2YR) - glmer( - total ~ factor(RIAGENDR) * (log(age) + factor(RIDRETH1)) + (1 | SDMVPSU), - family = poisson(), - data = nhanes_sample, - weights = svywght_a - ) -}
    #> This function will be removed in a future update. There's an enhanced implementation in 'parameters::rescale_weights()' that should be used instead.
    #> Generalized linear mixed model fit by maximum likelihood (Laplace -#> Approximation) [glmerMod] -#> Family: poisson ( log ) -#> Formula: total ~ factor(RIAGENDR) * (log(age) + factor(RIDRETH1)) + (1 | -#> SDMVPSU) -#> Data: nhanes_sample -#> Weights: svywght_a -#> AIC BIC logLik deviance df.resid -#> 78844.27 78920.47 -39409.14 78818.27 2582 -#> Random effects: -#> Groups Name Std.Dev. -#> SDMVPSU (Intercept) 0.1018 -#> Number of obs: 2595, groups: SDMVPSU, 2 -#> Fixed Effects: -#> (Intercept) factor(RIAGENDR)2 -#> 2.491801 -1.021308 -#> log(age) factor(RIDRETH1)2 -#> 0.838726 -0.088627 -#> factor(RIDRETH1)3 factor(RIDRETH1)4 -#> -0.013333 0.722511 -#> factor(RIDRETH1)5 factor(RIAGENDR)2:log(age) -#> -0.106521 -1.012695 -#> factor(RIAGENDR)2:factor(RIDRETH1)2 factor(RIAGENDR)2:factor(RIDRETH1)3 -#> -0.009086 0.732985 -#> factor(RIAGENDR)2:factor(RIDRETH1)4 factor(RIAGENDR)2:factor(RIDRETH1)5 -#> 0.275967 0.542074
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/se.html b/docs/reference/se.html deleted file mode 100644 index 09d8418f..00000000 --- a/docs/reference/se.html +++ /dev/null @@ -1,383 +0,0 @@ - - - - - - - - -Standard Error for variables or coefficients — se • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Compute standard error for a variable, for all variables - of a data frame, for joint random and fixed effects - coefficients of (non-/linear) mixed models, the adjusted - standard errors for generalized linear (mixed) models, or - for intraclass correlation coefficients (ICC).

    - -
    - -
    se(x, ...)
    - -

    Arguments

    - - - - - - - - - - -
    x

    (Numeric) vector, a data frame, an lm, glm, -merMod (lme4), or stanreg model object, an ICC object -(as obtained by the icc-function), a table or -xtabs object, or a list with estimate and p-value. For the latter -case, the list must contain elements named estimate and -p.value (see 'Examples' and 'Details').

    ...

    Currently not used.

    - -

    Value

    - -

    The standard error of x.

    - -

    Details

    - -

    Standard error for variables -

    - For variables and data frames, the standard error is the square root of the - variance divided by the number of observations (length of vector). -

    - Standard error for mixed models -

    - For linear mixed models, and generalized linear mixed models, this - function computes the standard errors for joint (sums of) random and fixed - effects coefficients (unlike se.coef, which returns the - standard error for fixed and random effects separately). Hence, se() - returns the appropriate standard errors for coef.merMod. -

    - Standard error for generalized linear models -

    - For generalized linear models, approximated standard errors, using the delta - method for transformed regression parameters are returned (Oehlert 1992). -

    - Standard error for proportions and mean value -

    - To compute the standard error for relative frequencies (i.e. proportions, or - mean value if x has only two categories), this vector must be supplied - as table, e.g. se(table(iris$Species)). se() than computes the - relative frequencies (proportions) for each value and the related standard - error for each value. This might be useful to add standard errors or confidence - intervals to descriptive statistics. If standard errors for weighted variables - are required, use xtabs(), e.g. se(xtabs(weights ~ variable)). -

    - Standard error for regression coefficient and p-value -

    - se() also returns the standard error of an estimate (regression - coefficient) and p-value, assuming a normal distribution to compute - the z-score from the p-value (formula in short: b / qnorm(p / 2)). - See 'Examples'.

    - -

    Note

    - -

    Computation of standard errors for coefficients of mixed models - is based on this code. - Standard errors for generalized linear (mixed) models, if - type = "re", are approximations based on the delta - method (Oehlert 1992). -

    - A remark on standard errors: - “Standard error represents variation in the point estimate, but - confidence interval has usual Bayesian interpretation only with flat prior.” - (Gelman 2017)

    - -

    References

    - -

    Oehlert GW. 1992. A note on the delta method. American Statistician 46(1). -

    - Gelman A 2017. How to interpret confidence intervals? http://andrewgelman.com/2017/03/04/interpret-confidence-intervals/

    - - -

    Examples

    -
    library(lme4) -library(sjmisc) - -# compute standard error for vector -se(rnorm(n = 100, mean = 3))
    #> [1] 0.09589874
    -# compute standard error for each variable in a data frame -data(efc) -se(efc[, 1:3])
    #> c12hour e15relat e16sex -#> 1.69162290 0.06942207 0.01565588
    -# compute standard error for merMod-coefficients -library(lme4) -fit <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) -se(fit)
    #> $Subject -#> (Intercept) Days -#> 1 13.86572 2.775269 -#> 2 13.86572 2.775269 -#> 3 13.86572 2.775269 -#> 4 13.86572 2.775269 -#> 5 13.86572 2.775269 -#> 6 13.86572 2.775269 -#> 7 13.86572 2.775269 -#> 8 13.86572 2.775269 -#> 9 13.86572 2.775269 -#> 10 13.86572 2.775269 -#> 11 13.86572 2.775269 -#> 12 13.86572 2.775269 -#> 13 13.86572 2.775269 -#> 14 13.86572 2.775269 -#> 15 13.86572 2.775269 -#> 16 13.86572 2.775269 -#> 17 13.86572 2.775269 -#> 18 13.86572 2.775269 -#>
    -# compute odds-ratio adjusted standard errors, based on delta method -# with first-order Taylor approximation. -data(efc) -efc$services <- sjmisc::dicho(efc$tot_sc_e, dich.by = 0) -fit <- glm( - services ~ neg_c_7 + c161sex + e42dep, - data = efc, - family = binomial(link = "logit") -) -se(fit)
    #> # A tibble: 4 x 3 -#> term estimate std.error -#> <chr> <dbl> <dbl> -#> 1 (Intercept) 0.595 0.224 -#> 2 neg_c_7 1.04 0.0204 -#> 3 c161sex 0.803 0.130 -#> 4 e42dep 1.24 0.0972
    -# compute odds-ratio adjusted standard errors for generalized -# linear mixed model, also based on delta method - -# create binary response -sleepstudy$Reaction.dicho <- dicho(sleepstudy$Reaction, dich.by = "median") -fit <- glmer( - Reaction.dicho ~ Days + (Days | Subject), - data = sleepstudy, - family = binomial("logit") -) -se(fit)
    #> $Subject -#> (Intercept) Days -#> 1 1.859251 0.4700877 -#> 2 2.622321 0.4115004 -#> 3 2.622321 0.4115004 -#> 4 1.633616 0.3289753 -#> 5 1.745522 0.4826391 -#> 6 1.714059 0.3560052 -#> 7 1.715195 0.4646524 -#> 8 2.219966 0.4133501 -#> 9 2.622321 0.4115004 -#> 10 1.806673 0.5077360 -#> 11 2.314690 0.3959941 -#> 12 2.111495 0.4335945 -#> 13 1.830037 0.3388438 -#> 14 1.686571 0.4933028 -#> 15 1.988285 0.4534415 -#> 16 2.167228 0.4013762 -#> 17 2.314690 0.3959941 -#> 18 1.778644 0.4321839 -#>
    -# compute standard error for proportions -efc$e42dep <- to_label(efc$e42dep) -se(table(efc$e42dep))
    #> value proportion std.error -#> 1 independent 0.07325194 0.008680166 -#> 2 slightly dependent 0.24972253 0.014420404 -#> 3 moderately dependent 0.33962264 0.015777276 -#> 4 severely dependent 0.33740289 0.015752039
    -# including weights -efc$weights <- rnorm(nrow(efc), 1, .25) -se(xtabs(efc$weights ~ efc$e42dep))
    #> value proportion std.error -#> 1 independent 0.07061821 0.008552969 -#> 2 slightly dependent 0.25359865 0.014525167 -#> 3 moderately dependent 0.33761265 0.015787989 -#> 4 severely dependent 0.33817049 0.015794372
    -# compute standard error from regression coefficient and p-value -se(list(estimate = .3, p.value = .002))
    #> [1] 0.09708008
    -
    # NOT RUN { -# compute standard error of ICC for the linear mixed model -icc(fit) -se(icc(fit)) - -# the standard error for the ICC can be computed manually in this way, -# taking the fitted model example from above -library(dplyr) -library(purrr) -dummy <- sleepstudy %>% - # generate 100 bootstrap replicates of dataset - bootstrap(100) %>% - # run mixed effects regression on each bootstrap replicate - # and compute ICC for each "bootstrapped" regression - mutate( - models = map(strap, ~lmer(Reaction ~ Days + (Days | Subject), data = .x)), - icc = map_dbl(models, ~icc(.x)) - ) - -# now compute SE and p-values for the bootstrapped ICC, values -# may differ from above example due to random seed -boot_se(dummy, icc) -boot_p(dummy, icc) -# }
    - -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/se_ybar.html b/docs/reference/se_ybar.html deleted file mode 100644 index 969a20d2..00000000 --- a/docs/reference/se_ybar.html +++ /dev/null @@ -1,188 +0,0 @@ - - - - - - - - -Standard error of sample mean for mixed models — se_ybar • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Compute the standard error for the sample mean for mixed models, - regarding the extent to which clustering affects the standard errors. - May be used as part of the multilevel power calculation for cluster sampling - (see Gelman and Hill 2007, 447ff).

    -
    - -
    se_ybar(fit)
    - -

    Arguments

    - - - - - - -
    fit

    Fitted mixed effects model (merMod-class).

    - -

    Value

    - -

    The standard error of the sample mean of fit.

    -

    References

    - -

    Gelman A, Hill J. 2007. Data analysis using regression and multilevel/hierarchical models. Cambridge, New York: Cambridge University Press

    - -

    Examples

    -
    if (require("lme4")) { - fit <- lmer(Reaction ~ 1 + (1 | Subject), sleepstudy) - se_ybar(fit) -}
    #> Subject -#> 9.049936
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/sjstats-package.html b/docs/reference/sjstats-package.html deleted file mode 100644 index 925573e0..00000000 --- a/docs/reference/sjstats-package.html +++ /dev/null @@ -1,209 +0,0 @@ - - - - - - - - -Collection of Convenient Functions for Common Statistical Computations — sjstats-package • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Collection of convenient functions for common statistical computations, which are not directly provided by R's base or stats packages.

    -

    This package aims at providing, first, shortcuts for statistical measures, which otherwise could only be calculated with additional effort (like standard errors or root mean squared errors).

    -

    Second, these shortcut functions are generic (if appropriate), and can be applied not only to vectors, but also to other objects as well (e.g., the Coefficient of Variation can be computed for vectors, linear models, or linear mixed models; the r2()-function returns the r-squared value for lm, glm, merMod, glmmTMB, or lme and other objects).

    -

    Most functions of this package are designed as summary functions, i.e. they do not transform the input vector; rather, they return a summary, which is sometimes a vector and sometimes a tidy data frame. he focus of most functions lies on summary statistics or fit measures for regression models, including generalized linear models, mixed effects models or Bayesian models. However, some of the functions deal with other statistical measures, like Cronbach's Alpha, Cramer's V, Phi etc.

    -

    The comprised tools include:

      -
    • For regression and mixed models: Coefficient of Variation, Root Mean Squared Error, Residual Standard Error, Coefficient of Discrimination, R-squared and pseudo-R-squared values, standardized beta values

    • -
    • Especially for mixed models: Design effect, ICC, sample size calculation and convergence tests

    • -
    • Especially for Bayesian models: Highest Density Interval, region of practical equivalence (rope), Monte Carlo Standard Errors, ratio of number of effective samples, mediation analysis, Test for Practical Equivalence

    • -
    • Fit and accuracy measures for regression models: Overdispersion tests, accuracy of predictions, test/training-error comparisons, error rate and binned residual plots for logistic regression models

    • -
    • For anova-tables: Eta-squared, Partial Eta-squared, Omega-squared and Partial Omega-squared statistics

    • -
    - -

    Furthermore, sjstats has functions to access information from model objects, which either support more model objects than their stats counterparts, or provide easy access to model attributes, like:

    -
      -
    • model_frame() to get the model frame

    • -
    • model_family() to get information about the model family, link functions etc.

    • -
    • link_inverse() to get the link-inverse function

    • -
    • pred_vars() and resp_var() to get the names of either the dependent or independent variables, or

    • -
    • var_names() to get the "cleaned" variables names from a model object (cleaned means, things like s() or log() are removed from the returned character vector with variable names.)

    • -
    - -

    Other statistics:

      -
    • Cramer's V, Cronbach's Alpha, Mean Inter-Item-Correlation, Mann-Whitney-U-Test, Item-scale reliability tests

    • -
    - -
    - - - - -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/smpsize_lmm.html b/docs/reference/smpsize_lmm.html deleted file mode 100644 index 2da74000..00000000 --- a/docs/reference/smpsize_lmm.html +++ /dev/null @@ -1,260 +0,0 @@ - - - - - - - - -Sample size for linear mixed models — smpsize_lmm • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    Compute an approximated sample size for linear mixed models - (two-level-designs), based on power-calculation for standard - design and adjusted for design effect for 2-level-designs.

    - -
    - -
    smpsize_lmm(eff.size, df.n = NULL, power = 0.8, sig.level = 0.05, k,
    -  n, icc = 0.05)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    eff.size

    Effect size.

    df.n

    Optional argument for the degrees of freedom for numerator. See 'Details'.

    power

    Power of test (1 minus Type II error probability).

    sig.level

    Significance level (Type I error probability).

    k

    Number of cluster groups (level-2-unit) in multilevel-design.

    n

    Optional, number of observations per cluster groups -(level-2-unit) in multilevel-design.

    icc

    Expected intraclass correlation coefficient for multilevel-model.

    - -

    Value

    - -

    A list with two values: The number of subjects per cluster, and the - total sample size for the linear mixed model.

    - -

    Details

    - -

    The sample size calculation is based on a power-calculation for the - standard design. If df.n is not specified, a power-calculation - for an unpaired two-sample t-test will be computed (using - pwr.t.test of the pwr-package). - If df.n is given, a power-calculation for general linear models - will be computed (using pwr.f2.test of the - pwr-package). The sample size of the standard design - is then adjusted for the design effect of two-level-designs (see - deff). Thus, the sample size calculation is appropriate - in particular for two-level-designs (see Snijders 2005). Models that - additionally include repeated measures (three-level-designs) may work - as well, however, the computed sample size may be less accurate.

    - -

    References

    - -

    Cohen J. 1988. Statistical power analysis for the behavioral sciences (2nd ed.). Hillsdale,NJ: Lawrence Erlbaum. -

    - Hsieh FY, Lavori PW, Cohen HJ, Feussner JR. 2003. An Overview of Variance Inflation Factors for Sample-Size Calculation. Evaluation and the Health Professions 26: 239-257. doi: 10.1177/0163278703255230 -

    - Snijders TAB. 2005. Power and Sample Size in Multilevel Linear Models. In: Everitt BS, Howell DC (Hrsg.). Encyclopedia of Statistics in Behavioral Science. Chichester, UK: John Wiley and Sons, Ltd. doi: 10.1002/0470013192.bsa492

    - - -

    Examples

    -
    # Sample size for multilevel model with 30 cluster groups and a small to -# medium effect size (Cohen's d) of 0.3. 27 subjects per cluster and -# hence a total sample size of about 802 observations is needed. -smpsize_lmm(eff.size = .3, k = 30)
    #> $`Subjects per Cluster` -#> [1] 27 -#> -#> $`Total Sample Size` -#> [1] 802 -#>
    -# Sample size for multilevel model with 20 cluster groups and a medium -# to large effect size for linear models of 0.2. Five subjects per cluster and -# hence a total sample size of about 107 observations is needed. -smpsize_lmm(eff.size = .2, df.n = 5, k = 20, power = .9)
    #> $`Subjects per Cluster` -#> [1] 5 -#> -#> $`Total Sample Size` -#> [1] 107 -#>
    - -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/std_beta.html b/docs/reference/std_beta.html deleted file mode 100644 index 5702cf9b..00000000 --- a/docs/reference/std_beta.html +++ /dev/null @@ -1,272 +0,0 @@ - - - - - - - - -Standardized beta coefficients and CI of linear and mixed models — std_beta • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Returns the standardized beta coefficients, std. error and confidence intervals - of a fitted linear (mixed) models.

    -
    - -
    std_beta(fit, ...)
    -
    -# S3 method for merMod
    -std_beta(fit, ci.lvl = 0.95, ...)
    -
    -# S3 method for lm
    -std_beta(fit, type = "std", ci.lvl = 0.95, ...)
    -
    -# S3 method for gls
    -std_beta(fit, type = "std", ci.lvl = 0.95, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    fit

    Fitted linear (mixed) model of class lm, merMod -(lme4 package), gls or stanreg.

    ...

    Currently not used.

    ci.lvl

    Numeric, the level of the confidence intervals.

    type

    If fit is of class lm, normal standardized coefficients -are computed by default. Use type = "std2" to follow -Gelman's (2008) -suggestion, rescaling the estimates by deviding them by two standard -deviations, so resulting coefficients are directly comparable for -untransformed binary predictors.

    - -

    Value

    - -

    A tibble with term names, standardized beta coefficients, - standard error and confidence intervals of fit.

    -

    Details

    - -

    “Standardized coefficients refer to how many standard deviations a dependent variable will change, - per standard deviation increase in the predictor variable. Standardization of the coefficient is - usually done to answer the question of which of the independent variables have a greater effect - on the dependent variable in a multiple regression analysis, when the variables are measured - in different units of measurement (for example, income measured in dollars and family size - measured in number of individuals)” (Source: Wikipedia)

    -

    Note

    - -

    For gls-objects, standardized beta coefficients may be wrong - for categorical variables (factors), because the model.matrix for - gls objects returns the original data of the categorical vector, - and not the 'dummy' coded vectors as for other classes. See, as example:

    - head(model.matrix(lm(neg_c_7 ~ as.factor(e42dep), data = efc, na.action = na.omit))) -

    and

    - head(model.matrix(nlme::gls(neg_c_7 ~ as.factor(e42dep), data = efc, na.action = na.omit))). -

    - In such cases, use to_dummy to create dummies from - factors.

    -

    References

    - -

    Wikipedia: Standardized coefficient -

    - Gelman A. 2008. Scaling regression inputs by dividing by two standard deviations. Statistics in Medicine 27: 2865-2873 http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf

    - -

    Examples

    -
    # fit linear model -fit <- lm(Ozone ~ Wind + Temp + Solar.R, data = airquality) -# print std. beta coefficients -std_beta(fit)
    #> term std.estimate std.error conf.low conf.high -#> 1 Wind -0.3564122 0.06996619 -0.4935434 -0.2192810 -#> 2 Temp 0.4731461 0.07260889 0.3308353 0.6154569 -#> 3 Solar.R 0.1638655 0.06351430 0.0393798 0.2883513
    -# print std. beta coefficients and ci, using -# 2 sd and center binary predictors -std_beta(fit, type = "std2")
    #> term std.estimate std.error conf.low conf.high -#> 1 Wind -23.71992 4.656386 -32.846272 -14.59358 -#> 2 Temp 31.48879 4.832262 22.017729 40.95985 -#> 3 Solar.R 10.90557 4.226999 2.620802 19.19034
    -# std. beta for mixed models -library(lme4) -fit1 <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) -std_beta(fit)
    #> term std.estimate std.error conf.low conf.high -#> 1 Wind -0.3564122 0.06996619 -0.4935434 -0.2192810 -#> 2 Temp 0.4731461 0.07260889 0.3308353 0.6154569 -#> 3 Solar.R 0.1638655 0.06351430 0.0393798 0.2883513
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/svyglm.nb.html b/docs/reference/svyglm.nb.html deleted file mode 100644 index 190254da..00000000 --- a/docs/reference/svyglm.nb.html +++ /dev/null @@ -1,275 +0,0 @@ - - - - - - - - -Survey-weighted negative binomial generalised linear model — svyglm.nb • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    svyglm.nb() is an extension to the survey-package - to fit survey-weighted negative binomial models. It uses - svymle to fit sampling-weighted - maximum likelihood estimates, based on starting values provided - by glm.nb, as proposed by Lumley - (2010, pp249).

    -
    - -
    svyglm.nb(formula, design, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    formula

    An object of class formula, i.e. a symbolic description -of the model to be fitted. See 'Details' in glm.

    design

    An object of class svydesign, providing -a specification of the survey design.

    ...

    Other arguments passed down to glm.nb.

    - -

    Value

    - -

    An object of class svymle and svyglm.nb, - with some additional information about the model.

    -

    Details

    - -

    For details on the computation method, see Lumley (2010), Appendix E - (especially 254ff.) -

    - sjstats implements following S3-methods for svyglm.nb-objects: - family(), model.frame(), formula(), print(), - predict() and residuals(). However, these functions have some - limitations:

      -
    • family() simply returns the family-object from the - underlying glm.nb-model.

    • -
    • The predict()-method just re-fits the svyglm.nb-model - with glm.nb, overwrites the $coefficients - from this model-object with the coefficients from the returned - svymle-object and finally calls - predict.glm to compute the predicted values.

    • -
    • residuals() re-fits the svyglm.nb-model with - glm.nb and then computes the Pearson-residuals - from the glm.nb-object.

    • -
    - -

    References

    - -

    Lumley T (2010). Complex Surveys: a guide to analysis using R. Wiley

    - -

    Examples

    -
    # ------------------------------------------ -# This example reproduces the results from -# Lumley 2010, figure E.7 (Appendix E, p256) -# ------------------------------------------ -if (require("survey")) { - data(nhanes_sample) - - # create survey design - des <- svydesign( - id = ~SDMVPSU, - strat = ~SDMVSTRA, - weights = ~WTINT2YR, - nest = TRUE, - data = nhanes_sample - ) - - # fit negative binomial regression - fit <- svyglm.nb(total ~ factor(RIAGENDR) * (log(age) + factor(RIDRETH1)), des) - - # print coefficients and standard errors - fit -}
    #> Loading required package: survey
    #> Loading required package: grid
    #> Loading required package: survival
    #> -#> Attaching package: 'survey'
    #> The following object is masked from 'package:sjstats': -#> -#> cv
    #> The following object is masked from 'package:graphics': -#> -#> dotchart
    #> term irr std.error conf.low conf.high -#> 2 (Intercept) 9.8463 0.1556 7.2578 13.3580 -#> 3 factor(RIAGENDR)2 0.4511 0.1805 0.3167 0.6426 -#> 4 log(age) 2.9163 0.2331 1.8467 4.6056 -#> 5 factor(RIDRETH1)2 1.0859 0.1477 0.8130 1.4504 -#> 6 factor(RIDRETH1)3 1.0977 0.1779 0.7746 1.5556 -#> 7 factor(RIDRETH1)4 2.2686 0.2974 1.2665 4.0634 -#> 8 factor(RIDRETH1)5 1.0589 0.3789 0.5039 2.2250 -#> 9 factor(RIAGENDR)2:log(age) 0.2947 0.2651 0.1753 0.4955 -#> 10 factor(RIAGENDR)2:factor(RIDRETH1)2 0.8314 0.2611 0.4984 1.3870 -#> 11 factor(RIAGENDR)2:factor(RIDRETH1)3 1.8285 0.1931 1.2523 2.6698 -#> 12 factor(RIAGENDR)2:factor(RIDRETH1)4 1.0668 0.3747 0.5119 2.2232 -#> 13 factor(RIAGENDR)2:factor(RIDRETH1)5 1.4564 0.4427 0.6116 3.4680 -#> p.value -#> 2 <0.001 *** -#> 3 <0.001 *** -#> 4 <0.001 *** -#> 5 0.5769 -#> 6 0.6003 -#> 7 0.0059 ** -#> 8 0.8800 -#> 9 <0.001 *** -#> 10 0.4795 -#> 11 0.0018 ** -#> 12 0.8630 -#> 13 0.3957 -#> -#> Dispersion parameter Theta: 0.8062 -#> Standard Error of Theta: 0.0216
    #> -#> Showing robust standard errors on link-scale (untransformed).
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/svyglm.zip.html b/docs/reference/svyglm.zip.html deleted file mode 100644 index bb9fd14b..00000000 --- a/docs/reference/svyglm.zip.html +++ /dev/null @@ -1,232 +0,0 @@ - - - - - - - - -Survey-weighted zero-inflated Poisson model — svyglm.zip • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    svyglm.zip() is an extension to the survey-package - to fit survey-weighted zero-inflated Poisson models. It uses - svymle to fit sampling-weighted - maximum likelihood estimates, based on starting values provided - by zeroinfl.

    -
    - -
    svyglm.zip(formula, design, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    formula

    An object of class formula, i.e. a symbolic description -of the model to be fitted. See 'Details' in zeroinfl.

    design

    An object of class svydesign, providing -a specification of the survey design.

    ...

    Other arguments passed down to zeroinfl.

    - -

    Value

    - -

    An object of class svymle and svyglm.zip, - with some additional information about the model.

    -

    Details

    - -

    Code modified from https://notstatschat.rbind.io/2015/05/26/zero-inflated-poisson-from-complex-samples/.

    - -

    Examples

    -
    if (require("survey")) { - data(nhanes_sample) - set.seed(123) - nhanes_sample$malepartners <- rpois(nrow(nhanes_sample), 2) - nhanes_sample$malepartners[sample(1:2992, 400)] <- 0 - - # create survey design - des <- svydesign( - id = ~SDMVPSU, - strat = ~SDMVSTRA, - weights = ~WTINT2YR, - nest = TRUE, - data = nhanes_sample - ) - - # fit negative binomial regression - fit <- svyglm.zip( - malepartners ~ age + factor(RIDRETH1) | age + factor(RIDRETH1), - des - ) - - # print coefficients and standard errors - fit -}
    #> Warning: non-integer #successes in a binomial glm!
    #> term estimate std.error conf.low conf.high p.value -#> 2 age 0.0149 0.0354 0.9469 1.0879 0.6745 -#> 3 factor(RIDRETH1)2 0.0185 0.0754 0.8787 1.1810 0.8062 -#> 4 factor(RIDRETH1)3 -0.0449 0.0284 0.9043 1.0107 0.1133 -#> 5 factor(RIDRETH1)4 -0.0240 0.0276 0.9250 1.0305 0.3843 -#> 6 factor(RIDRETH1)5 0.0371 0.0617 0.9197 1.1712 0.5470 -#> 7 tp.(Intercept) -1.6694 0.4717 0.0747 0.4748 <0.001 *** -#> 8 tp.age -0.0333 0.2831 0.5553 1.6848 0.9064 -#> 9 tp.factor(RIDRETH1)2 0.1548 0.2571 0.7053 1.9323 0.5472 -#> 10 tp.factor(RIDRETH1)3 -0.3969 0.2111 0.4446 1.0169 0.0601 . -#> 11 tp.factor(RIDRETH1)4 -0.2330 0.3050 0.4357 1.4402 0.4450 -#> 12 tp.factor(RIDRETH1)5 -0.3303 0.4744 0.2836 1.8214 0.4863
    #> -#> Showing robust standard errors on link-scale (untransformed).
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/table_values.html b/docs/reference/table_values.html deleted file mode 100644 index f040d6a0..00000000 --- a/docs/reference/table_values.html +++ /dev/null @@ -1,199 +0,0 @@ - - - - - - - - -Expected and relative table values — table_values • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This function calculates a table's cell, row and column percentages as - well as expected values and returns all results as lists of tables.

    -
    - -
    table_values(tab, digits = 2)
    - -

    Arguments

    - - - - - - - - - - -
    tab

    Simple table or ftable of which -cell, row and column percentages as well as expected values are calculated. -Tables of class xtabs and other will be coerced to -ftable objects.

    digits

    Amount of digits for the table percentage values.

    - -

    Value

    - -

    (Invisibly) returns a list with four tables:

      -
    1. cell a table with cell percentages of tab

    2. -
    3. row a table with row percentages of tab

    4. -
    5. col a table with column percentages of tab

    6. -
    7. expected a table with expected values of tab

    8. -
    - - -

    Examples

    -
    tab <- table(sample(1:2, 30, TRUE), sample(1:3, 30, TRUE)) -# show expected values -table_values(tab)$expected
    #> A B C -#> A 4 5 4 -#> B 5 7 5
    # show cell percentages -table_values(tab)$cell
    #> 1 2 3 -#> -#> 1 16.67 10.00 13.33 -#> 2 13.33 30.00 16.67
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/tidy_stan.html b/docs/reference/tidy_stan.html deleted file mode 100644 index bbceb104..00000000 --- a/docs/reference/tidy_stan.html +++ /dev/null @@ -1,265 +0,0 @@ - - - - - - - - -Tidy summary output for stan models — tidy_stan • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Returns a tidy summary output for stan models.

    -
    - -
    tidy_stan(
    -  x,
    -  prob = 0.89,
    -  typical = "median",
    -  trans = NULL,
    -  effects = c("all", "fixed", "random"),
    -  component = c("all", "conditional", "zero_inflated", "zi"),
    -  digits = 2
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    A stanreg, stanfit or brmsfit object.

    prob

    Vector of scalars between 0 and 1, indicating the mass within -the credible interval that is to be estimated.

    typical

    The typical value that will represent the Bayesian point estimate. -By default, the posterior median is returned. See typical_value -for possible values for this argument.

    trans

    Name of a function or character vector naming a function, used -to apply transformations on the estimates and uncertainty intervals. The -values for standard errors are not transformed! If trans -is not NULL, credible intervals instead of HDI -are computed, due to the possible asymmetry of the HDI.

    effects

    Should results for fixed effects, random effects or both be returned? -Only applies to mixed models. May be abbreviated.

    component

    Should results for all parameters, parameters for the conditional model -or the zero-inflated part of the model be returned? May be abbreviated. Only -applies to brms-models.

    digits

    Amount of digits to round numerical values in the output.

    - -

    Value

    - -

    A data frame, summarizing x, with consistent column names. - To distinguish multiple HDI values, column names for the HDI get a suffix - when prob has more than one element.

    -

    Details

    - -

    The returned data frame has an additonal class-attribute, - tidy_stan, to pass the result to its own print()-method. - The print()-method creates a cleaner output, especially for multilevel, - zero-inflated or multivariate response models, where - for instance - - the conditional part of a model is printed separately from the zero-inflated - part, or random and fixed effects are printed separately. -

    - The returned data frame gives information on:

      -
    • The Bayesian point estimate (column estimate, which is by - default the posterior median; other statistics are also possible, - see argument typical).

    • -
    • The standard error (which is actually the median absolute deviation).

    • -
    • The HDI. Computation for HDI is based on the - code from Kruschke 2015, pp. 727f.

    • -
    • The Probability of Direction (pd), which is an index for "effect significance" - (see Makowski et al. 2019). A value of 95% or higher indicates a - "significant" (i.e. statistically clear) effect.

    • -
    • The effective numbers of samples, ESS.

    • -
    • The Rhat statistics. When Rhat is above 1, it usually indicates that - the chain has not yet converged, indicating that the drawn samples - might not be trustworthy. Drawing more iteration may solve this issue.

    • -
    • The Monte Carlo standard error (see mcse). It is defined - as standard deviation of the chains divided by their effective sample - size and “provides a quantitative suggestion of how big the - estimation noise is” (Kruschke 2015, p.187).

    • -
    - -

    References

    - -

    Kruschke JK. Doing Bayesian Data Analysis: A Tutorial with R, JAGS, and Stan 2nd edition. Academic Press, 2015 -

    -Gelman A, Carlin JB, Stern HS, Dunson DB, Vehtari A, Rubin DB. Bayesian data analysis 3rd ed. Boca Raton: Chapman and Hall/CRC, 2013 -

    -Gelman A, Rubin DB. Inference from iterative simulation using multiple sequences Statistical Science 1992;7: 457-511 -

    -Makowski D, Ben-Shachar MS, Lüdecke D. bayestestR: Describing Effects and their Uncertainty, Existence and Significance within the Bayesian Framework. Journal of Open Source Software 2019;4:1541. doi: 10.21105/joss.01541 - -

    -McElreath R. Statistical Rethinking. A Bayesian Course with Examples in R and Stan Chapman and Hall, 2015

    - -

    Examples

    -
    if (FALSE) { -if (require("rstanarm")) { - fit <- stan_glm(mpg ~ wt + am, data = mtcars, chains = 1) - tidy_stan(fit) - tidy_stan(fit, prob = c(.89, .5)) -}}
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/typical_value.html b/docs/reference/typical_value.html deleted file mode 100644 index ab60fec5..00000000 --- a/docs/reference/typical_value.html +++ /dev/null @@ -1,270 +0,0 @@ - - - - - - - - -Return the typical value of a vector — typical_value • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - -
    - -
    -
    - - -
    - -

    This function returns the "typical" value of a variable.

    - -
    - -
    typical_value(x, fun = "mean", weights = NULL, ...)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    x

    A variable.

    fun

    Character vector, naming the function to be applied to -x. Currently, "mean", "weighted.mean", -"median" and "mode" are supported, which call the -corresponding R functions (except "mode", which calls an -internal function to compute the most common value). "zero" -simply returns 0. Note: By default, if x is a factor, -only fun = "mode" is applicable; for all other functions (including -the default, "mean") the reference level of x is returned. -For character vectors, only the mode is returned. You can use a named -vector to apply other different functions to numeric and categorical -x, where factors are first converted to numeric vectors, e.g. -fun = c(numeric = "median", factor = "mean"). See 'Examples'.

    weights

    Name of variable in x that indicated the vector of -weights that will be applied to weight all observations. Default is -NULL, so no weights are used.

    ...

    Further arguments, passed down to fun.

    - -

    Value

    - -

    The "typical" value of x.

    - -

    Details

    - -

    By default, for numeric variables, typical_value() returns the - mean value of x (unless changed with the fun-argument). -

    - For factors, the reference level is returned or the most common value - (if fun = "mode"), unless fun is a named vector. If - fun is a named vector, specify the function for numeric - and categorical variables as element names, e.g. - fun = c(numeric = "median", factor = "mean"). In this case, - factors are converted to numeric values (using to_value) - and the related function is applied. You may abbreviate the names - fun = c(n = "median", f = "mean"). See also 'Examples'. -

    - For character vectors the most common value (mode) is returned.

    - - -

    Examples

    -
    data(iris) -typical_value(iris$Sepal.Length)
    #> [1] 5.843333
    -library(purrr) -map(iris, ~ typical_value(.x))
    #> $Sepal.Length -#> [1] 5.843333 -#> -#> $Sepal.Width -#> [1] 3.057333 -#> -#> $Petal.Length -#> [1] 3.758 -#> -#> $Petal.Width -#> [1] 1.199333 -#> -#> $Species -#> [1] "setosa" -#>
    -# example from ?stats::weighted.mean -wt <- c(5, 5, 4, 1) / 15 -x <- c(3.7, 3.3, 3.5, 2.8) - -typical_value(x, fun = "weighted.mean")
    #> [1] 3.325
    typical_value(x, fun = "weighted.mean", weights = wt)
    #> [1] 3.453333
    -# for factors, return either reference level or mode value -set.seed(123) -x <- sample(iris$Species, size = 30, replace = TRUE) -typical_value(x)
    #> [1] "setosa"
    typical_value(x, fun = "mode")
    #> [1] "virginica"
    -# for factors, use a named vector to apply other functions than "mode" -map(iris, ~ typical_value(.x, fun = c(n = "median", f = "mean")))
    #> $Sepal.Length -#> [1] 5.8 -#> -#> $Sepal.Width -#> [1] 3 -#> -#> $Petal.Length -#> [1] 4.35 -#> -#> $Petal.Width -#> [1] 1.3 -#> -#> $Species -#> [1] 2 -#>
    - -
    -
    - -
    - -
    - - -
    -

    Site built with pkgdown 1.3.0.

    -
    -
    -
    - - - - - - diff --git a/docs/reference/var_pop.html b/docs/reference/var_pop.html deleted file mode 100644 index 3e4770e2..00000000 --- a/docs/reference/var_pop.html +++ /dev/null @@ -1,190 +0,0 @@ - - - - - - - - -Calculate population variance and standard deviation — var_pop • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Calculate the population variance or standard deviation of a vector.

    -
    - -
    var_pop(x)
    -
    -sd_pop(x)
    - -

    Arguments

    - - - - - - -
    x

    (Numeric) vector.

    - -

    Value

    - -

    The population variance or standard deviation of x.

    -

    Details

    - -

    Unlike var, which returns the sample variance, - var_pop() returns the population variance. sd_pop() - returns the standard deviation based on the population variance.

    - -

    Examples

    -
    data(efc) - -# sampling variance -var(efc$c12hour, na.rm = TRUE)
    #> [1] 2581.152
    # population variance -var_pop(efc$c12hour)
    #> [1] 2578.291
    -# sampling sd -sd(efc$c12hour, na.rm = TRUE)
    #> [1] 50.80504
    # population sd -sd_pop(efc$c12hour)
    #> [1] 50.77687
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/weight.html b/docs/reference/weight.html deleted file mode 100644 index 3558dfb6..00000000 --- a/docs/reference/weight.html +++ /dev/null @@ -1,222 +0,0 @@ - - - - - - - - -Weight a variable — weight • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    These functions weight the variable x by - a specific vector of weights.

    -
    - -
    weight(x, weights, digits = 0)
    -
    -weight2(x, weights)
    - -

    Arguments

    - - - - - - - - - - - - - - -
    x

    (Unweighted) variable.

    weights

    Vector with same length as x, which -contains weight factors. Each value of x has a -specific assigned weight in weights.

    digits

    Numeric value indicating the number of decimal places to be -used for rounding the weighted values. By default, this value is -0, i.e. the returned values are integer values.

    - -

    Value

    - -

    The weighted x.

    -

    Details

    - -

    weight2() sums up all weights values of the associated - categories of x, whereas weight() uses a - xtabs formula to weight cases. Thus, weight() - may return a vector of different length than x.

    -

    Note

    - -

    The values of the returned vector are in sorted order, whereas the values' - order of the original x may be spread randomly. Hence, x can't be - used, for instance, for further cross tabulation. In case you want to have - weighted contingency tables or (grouped) box plots etc., use the weightBy - argument of most functions.

    - -

    Examples

    -
    v <- sample(1:4, 20, TRUE) -table(v)
    #> v -#> 1 2 3 4 -#> 3 6 5 6
    w <- abs(rnorm(20)) -table(weight(v, w))
    #> -#> 1 2 3 4 -#> 2 5 5 3
    table(weight2(v, w))
    #> -#> 1 2 3 4 -#> 2 5 5 3
    -set.seed(1) -x <- sample(letters[1:5], size = 20, replace = TRUE) -w <- runif(n = 20) - -table(x)
    #> x -#> a b c d e -#> 6 4 3 1 6
    table(weight(x, w))
    #> -#> a b c e -#> 3 3 2 3
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/weighted_sd.html b/docs/reference/weighted_sd.html deleted file mode 100644 index 196b69f9..00000000 --- a/docs/reference/weighted_sd.html +++ /dev/null @@ -1,402 +0,0 @@ - - - - - - - - -Weighted statistics for tests and variables — survey_median • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Weighted statistics for variables -

    - weighted_sd(), weighted_se(), weighted_mean() and weighted_median() - compute weighted standard deviation, standard error, mean or median for a - variable or for all variables of a data frame. survey_median() computes the - median for a variable in a survey-design (see svydesign). - weighted_correlation() computes a weighted correlation for a two-sided alternative - hypothesis. -

    - Weighted tests -

    - weighted_ttest() computes a weighted t-test, while weighted_mannwhitney() - computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test - (for more than two groups). weighted_chisqtest() computes a weighted - Chi-squared test for contigency tables.

    -
    - -
    survey_median(x, design)
    -
    -weighted_chisqtest(data, ...)
    -
    -# S3 method for default
    -weighted_chisqtest(data, x, y, weights, ...)
    -
    -# S3 method for formula
    -weighted_chisqtest(formula, data, ...)
    -
    -weighted_correlation(data, ...)
    -
    -# S3 method for default
    -weighted_correlation(data, x, y, weights, ci.lvl = 0.95, ...)
    -
    -# S3 method for formula
    -weighted_correlation(formula, data, ci.lvl = 0.95, ...)
    -
    -weighted_mean(x, weights = NULL)
    -
    -weighted_median(x, weights = NULL)
    -
    -weighted_mannwhitney(data, ...)
    -
    -# S3 method for default
    -weighted_mannwhitney(data, x, grp, weights, ...)
    -
    -# S3 method for formula
    -weighted_mannwhitney(formula, data, ...)
    -
    -weighted_sd(x, weights = NULL)
    -
    -wtd_sd(x, weights = NULL)
    -
    -weighted_se(x, weights = NULL)
    -
    -weighted_ttest(data, ...)
    -
    -# S3 method for default
    -weighted_ttest(
    -  data,
    -  x,
    -  y = NULL,
    -  weights,
    -  mu = 0,
    -  paired = FALSE,
    -  ci.lvl = 0.95,
    -  alternative = c("two.sided", "less", "greater"),
    -  ...
    -)
    -
    -# S3 method for formula
    -weighted_ttest(
    -  formula,
    -  data,
    -  mu = 0,
    -  paired = FALSE,
    -  ci.lvl = 0.95,
    -  alternative = c("two.sided", "less", "greater"),
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    (Numeric) vector or a data frame. For survey_median(), weighted_ttest(), -weighted_mannwhitney() and weighted_chisqtest() the bare (unquoted) variable -name, or a character vector with the variable name.

    design

    An object of class svydesign, providing -a specification of the survey design.

    data

    A data frame.

    ...

    For weighted_ttest() and weighted_mannwhitney(), currently not used. -For weighted_chisqtest(), further arguments passed down to -chisq.test.

    y

    Optional, bare (unquoted) variable name, or a character vector with -the variable name.

    weights

    Bare (unquoted) variable name, or a character vector with -the variable name of the numeric vector of weights. If weights = NULL, -unweighted statistic is reported.

    formula

    A formula of the form lhs ~ rhs1 + rhs2 where lhs is a -numeric variable giving the data values and rhs1 a factor with two -levels giving the corresponding groups and rhs2 a variable with weights.

    ci.lvl

    Confidence level of the interval.

    grp

    Bare (unquoted) name of the cross-classifying variable, where -x is grouped into the categories represented by grp, -or a character vector with the variable name.

    mu

    A number indicating the true value of the mean (or difference in -means if you are performing a two sample test).

    paired

    Logical, whether to compute a paired t-test.

    alternative

    A character string specifying the alternative hypothesis, -must be one of "two.sided" (default), "greater" or -"less". You can specify just the initial letter.

    - -

    Value

    - -

    The weighted (test) statistic.

    -

    Note

    - -

    weighted_chisq() is a convenient wrapper for crosstable_statistics. - For a weighted one-way Anova, use means_by_group() with - weights-argument. -

    - weighted_ttest() assumes unequal variance between the two groups.

    - -

    Examples

    -
    # weighted sd and se ---- - -weighted_sd(rnorm(n = 100, mean = 3), runif(n = 100))
    #> [1] 0.8498705
    -data(efc) -weighted_sd(efc[, 1:3], runif(n = nrow(efc)))
    #> c12hour e15relat e16sex -#> 51.7876181 2.0540843 0.4699551
    weighted_se(efc[, 1:3], runif(n = nrow(efc)))
    #> c12hour e15relat e16sex -#> 1.66065784 0.06942749 0.01562877
    -# survey_median ---- - -# median for variables from weighted survey designs -if (require("survey")) { - data(nhanes_sample) - - des <- svydesign( - id = ~SDMVPSU, - strat = ~SDMVSTRA, - weights = ~WTINT2YR, - nest = TRUE, - data = nhanes_sample - ) - - survey_median(total, des) - survey_median("total", des) -}
    #> [1] 6
    -# weighted t-test ---- - -efc$weight <- abs(rnorm(nrow(efc), 1, .3)) -weighted_ttest(efc, e17age, weights = weight)
    #> -#> One Sample t-test (two.sided) -#> # t=292.68 df=890 p-value=0.000 -#> -#> mean of e17age: 79.189 [78.658, 79.720] -#>
    weighted_ttest(efc, e17age, c160age, weights = weight)
    #> -#> Two-Sample t-test (two.sided) -#> -#> # comparison between e17age and c160age -#> # t=49.92 df=1469 p-value=0.000 -#> -#> mean of e17age : 79.187 -#> mean of c160age : 53.208 -#> difference of mean: 25.980 [24.959 27.001] -#>
    weighted_ttest(e17age ~ e16sex + weight, efc)
    #> -#> Two-Sample t-test (two.sided) -#> -#> # comparison of e17age by e16sex -#> # t=-7.46 df=604 p-value=0.000 -#> -#> mean in group [1] male : 76.401 -#> mean in group [2] female: 80.518 -#> difference of mean : -4.117 [-5.201 -3.034] -#>
    -# weighted Mann-Whitney-U-test ---- - -weighted_mannwhitney(c12hour ~ c161sex + weight, efc)
    #> -#> Weighted Mann-Whitney-U test (two.sided) -#> -#> # comparison of c12hour by c161sex -#> # Chisq=3.26 df=899 p-value=0.001 -#> -#> difference in mean rank score: 0.075 -#>
    -# weighted Chi-squared-test ---- - -weighted_chisqtest(efc, c161sex, e16sex, weights = weight, correct = FALSE)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 2.0566 -#> Phi: 0.0479 -#> p-value: 0.1515
    weighted_chisqtest(c172code ~ c161sex + weight, efc)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 4.8005 -#> Cramer's V: 0.0758 -#> p-value: 0.0907
    -
    - -
    - - - -
    - - - - - - - - diff --git a/docs/reference/wtd_sd.html b/docs/reference/wtd_sd.html deleted file mode 100644 index 1c55bf4b..00000000 --- a/docs/reference/wtd_sd.html +++ /dev/null @@ -1,421 +0,0 @@ - - - - - - - - -Weighted statistics for tests and variables — svy_md • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    Weighted statistics for variables -

    - wtd_sd(), wtd_se(), wtd_mean() and wtd_median() - compute weighted standard deviation, standard error, mean or median for a - variable or for all variables of a data frame. svy_md() computes the - median for a variable in a survey-design (see svydesign). - wtd_cor() computes a weighted correlation for a two-sided alternative - hypothesis. -

    - Weighted tests -

    - wtd_ttest() computes a weighted t-test, while wtd_mwu() - computes a weighted Mann-Whitney-U test or a Kruskal-Wallis test - (for more than two groups). wtd_chisqtest() computes a weighted - Chi-squared test for contigency tables.

    -
    - -
    svy_md(x, design)
    -
    -survey_median(x, design)
    -
    -wtd_chisqtest(data, ...)
    -
    -# S3 method for default
    -wtd_chisqtest(data, x, y, weights, ...)
    -
    -# S3 method for formula
    -wtd_chisqtest(formula, data, ...)
    -
    -wtd_cor(data, ...)
    -
    -# S3 method for default
    -wtd_cor(data, x, y, weights, ci.lvl = 0.95, ...)
    -
    -# S3 method for formula
    -wtd_cor(formula, data, ci.lvl = 0.95, ...)
    -
    -wtd_mean(x, weights = NULL)
    -
    -wtd_median(x, weights = NULL)
    -
    -wtd_mwu(data, ...)
    -
    -# S3 method for default
    -wtd_mwu(data, x, grp, weights, ...)
    -
    -# S3 method for formula
    -wtd_mwu(formula, data, ...)
    -
    -wtd_sd(x, weights = NULL)
    -
    -wtd_se(x, weights = NULL)
    -
    -wtd_ttest(data, ...)
    -
    -# S3 method for default
    -wtd_ttest(
    -  data,
    -  x,
    -  y = NULL,
    -  weights,
    -  mu = 0,
    -  paired = FALSE,
    -  ci.lvl = 0.95,
    -  alternative = c("two.sided", "less", "greater"),
    -  ...
    -)
    -
    -# S3 method for formula
    -wtd_ttest(
    -  formula,
    -  data,
    -  mu = 0,
    -  paired = FALSE,
    -  ci.lvl = 0.95,
    -  alternative = c("two.sided", "less", "greater"),
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    x

    (Numeric) vector or a data frame. For svy_md(), wtd_ttest(), -wtd_mwu() and wtd_chisqtest() the bare (unquoted) variable -name, or a character vector with the variable name.

    design

    An object of class svydesign, providing -a specification of the survey design.

    data

    A data frame.

    ...

    For wtd_ttest() and wtd_mwu(), currently not used. -For wtd_chisqtest(), further arguments passed down to -chisq.test.

    y

    Optional, bare (unquoted) variable name, or a character vector with -the variable name.

    weights

    Bare (unquoted) variable name, or a character vector with -the variable name of the numeric vector of weights. If weights = NULL, -unweighted statistic is reported.

    formula

    A formula of the form lhs ~ rhs1 + rhs2 where lhs is a -numeric variable giving the data values and rhs1 a factor with two -levels giving the corresponding groups and rhs2 a variable with weights.

    ci.lvl

    Confidence level of the interval.

    grp

    Bare (unquoted) name of the cross-classifying variable, where -x is grouped into the categories represented by grp, -or a character vector with the variable name.

    mu

    A number indicating the true value of the mean (or difference in -means if you are performing a two sample test).

    paired

    Logical, whether to compute a paired t-test.

    alternative

    A character string specifying the alternative hypothesis, -must be one of "two.sided" (default), "greater" or -"less". You can specify just the initial letter.

    - -

    Value

    - -

    The weighted (test) statistic.

    -

    Note

    - -

    wtd_chisq() is a convenient wrapper for xtab_statistics. - For a weighted one-way Anova, use grpmean() with - weights-argument. -

    - wtd_ttest() assumes unequal variance between the two groups.

    - -

    Examples

    -
    # weighted sd and se ---- - -wtd_sd(rnorm(n = 100, mean = 3), runif(n = 100))
    #> [1] 0.8498705
    -data(efc) -wtd_sd(efc[, 1:3], runif(n = nrow(efc)))
    #> c12hour e15relat e16sex -#> 51.7876181 2.0540843 0.4699551
    wtd_se(efc[, 1:3], runif(n = nrow(efc)))
    #> c12hour e15relat e16sex -#> 1.66065784 0.06942749 0.01562877
    -# svy_md ---- - -# median for variables from weighted survey designs -library(survey) -data(nhanes_sample) - -des <- svydesign( - id = ~SDMVPSU, - strat = ~SDMVSTRA, - weights = ~WTINT2YR, - nest = TRUE, - data = nhanes_sample -) - -svy_md(total, des)
    #> [1] 6
    svy_md("total", des)
    #> [1] 6
    -# weighted t-test ---- - -efc$weight <- abs(rnorm(nrow(efc), 1, .3)) -wtd_ttest(efc, e17age, weights = weight)
    #> -#> One Sample t-test (two.sided) -#> # t=292.68 df=890 p-value=0.000 -#> -#> mean of e17age: 79.189 [78.658, 79.720] -#>
    wtd_ttest(efc, e17age, c160age, weights = weight)
    #> -#> Two-Sample t-test (two.sided) -#> -#> # comparison between e17age and c160age -#> # t=49.92 df=1469 p-value=0.000 -#> -#> mean of e17age : 79.187 -#> mean of c160age : 53.208 -#> difference of mean: 25.980 [24.959 27.001] -#>
    wtd_ttest(e17age ~ e16sex + weight, efc)
    #> -#> Two-Sample t-test (two.sided) -#> -#> # comparison of e17age by e16sex -#> # t=-7.46 df=604 p-value=0.000 -#> -#> mean in group [1] male : 76.401 -#> mean in group [2] female: 80.518 -#> difference of mean : -4.117 [-5.201 -3.034] -#>
    -# weighted Mann-Whitney-U-test ---- - -wtd_mwu(c12hour ~ c161sex + weight, efc)
    #> -#> Weighted Mann-Whitney-U test (two.sided) -#> -#> # comparison of c12hour by c161sex -#> # Chisq=3.26 df=899 p-value=0.001 -#> -#> difference in mean rank score: 0.075 -#>
    -# weighted Chi-squared-test ---- - -wtd_chisqtest(efc, c161sex, e16sex, weights = weight, correct = FALSE)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 2.0566 -#> Phi: 0.0479 -#> p-value: 0.1515
    wtd_chisqtest(c172code ~ c161sex + weight, efc)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 4.8005 -#> Cramer's V: 0.0758 -#> p-value: 0.0907
    -
    -
    - -
    - - -
    - - -
    -

    Site built with pkgdown 1.4.1.

    -
    - -
    -
    - - - - - - - - diff --git a/docs/reference/xtab_statistics.html b/docs/reference/xtab_statistics.html deleted file mode 100644 index b86a8ce4..00000000 --- a/docs/reference/xtab_statistics.html +++ /dev/null @@ -1,373 +0,0 @@ - - - - - - - - -Measures of association for contingency tables — cramer • sjstats - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    -
    - - - - -
    - -
    -
    - - -
    -

    This function calculates various measure of association for - contingency tables and returns the statistic and p-value. - Supported measures are Cramer's V, Phi, Spearman's rho, - Kendall's tau and Pearson's r.

    -
    - -
    cramer(tab, ...)
    -
    -# S3 method for formula
    -cramer(
    -  formula,
    -  data,
    -  ci.lvl = NULL,
    -  n = 1000,
    -  method = c("dist", "quantile"),
    -  ...
    -)
    -
    -phi(tab, ...)
    -
    -xtab_statistics(
    -  data,
    -  x1 = NULL,
    -  x2 = NULL,
    -  statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"),
    -  weights = NULL,
    -  ...
    -)
    -
    -crosstable_statistics(
    -  data,
    -  x1 = NULL,
    -  x2 = NULL,
    -  statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"),
    -  weights = NULL,
    -  ...
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    tab

    A table or ftable. Tables of class -xtabs and other will be coerced to ftable -objects.

    ...

    Other arguments, passed down to the statistic functions -chisq.test, fisher.test or -cor.test.

    formula

    A formula of the form lhs ~ rhs where lhs is a -numeric variable giving the data values and rhs a factor giving the -corresponding groups.

    data

    A data frame or a table object. If a table object, x1 and -x2 will be ignored. For Kendall's tau, Spearman's rho -or Pearson's product moment correlation coefficient, data needs -to be a data frame. If x1 and x2 are not specified, -the first two columns of the data frames are used as variables -to compute the crosstab.

    ci.lvl

    Scalar between 0 and 1. If not NULL, returns a data -frame including lower and upper confidence intervals.

    n

    Number of bootstraps to be generated.

    method

    Character vector, indicating if confidence intervals should be -based on bootstrap standard error, multiplied by the value of the -quantile function of the t-distribution (default), or on sample -quantiles of the bootstrapped values. See 'Details' in boot_ci(). -May be abbreviated.

    x1

    Name of first variable that should be used to compute the -contingency table. If data is a table object, this argument -will be irgnored.

    x2

    Name of second variable that should be used to compute the -contingency table. If data is a table object, this argument -will be irgnored.

    statistics

    Name of measure of association that should be computed. May -be one of "auto", "cramer", "phi", "spearman", -"kendall", "pearson" or "fisher". See 'Details'.

    weights

    Name of variable in x that indicated the vector of -weights that will be applied to weight all observations. Default is -NULL, so no weights are used.

    - -

    Value

    - -

    For phi(), the table's Phi value. For cramer(), the - table's Cramer's V. -

    - For crosstable_statistics(), a list with following components:

    -
    estimate

    the value of the estimated measure of association.

    -
    p.value

    the p-value for the test.

    -
    statistic

    the value of the test statistic.

    -
    stat.name

    the name of the test statistic.

    -
    stat.html

    if applicable, the name of the test statistic, in HTML-format.

    -
    df

    the degrees of freedom for the contingency table.

    -
    method

    character string indicating the name of the measure of association.

    -
    method.html

    if applicable, the name of the measure of association, in HTML-format.

    -
    method.short

    the short form of association measure, equals the statistics-argument.

    -
    fisher

    logical, if Fisher's exact test was used to calculate the p-value.

    - -
    - -

    Details

    - -

    The p-value for Cramer's V and the Phi coefficient are based - on chisq.test(). If any expected value of a table cell is - smaller than 5, or smaller than 10 and the df is 1, then fisher.test() - is used to compute the p-value, unless statistics = "fisher"; in - this case, the use of fisher.test() is forced to compute the - p-value. The test statistic is calculated with cramer() resp. - phi(). -

    - Both test statistic and p-value for Spearman's rho, Kendall's tau - and Pearson's r are calculated with cor.test(). -

    - When statistics = "auto", only Cramer's V or Phi are calculated, - based on the dimension of the table (i.e. if the table has more than - two rows or columns, Cramer's V is calculated, else Phi).

    - -

    Examples

    -
    # Phi coefficient for 2x2 tables -tab <- table(sample(1:2, 30, TRUE), sample(1:2, 30, TRUE)) -phi(tab)
    #> [1] 0.1443376
    -# Cramer's V for nominal variables with more than 2 categories -tab <- table(sample(1:2, 30, TRUE), sample(1:3, 30, TRUE)) -cramer(tab)
    #> [1] 0.3795188
    -# formula notation -data(efc) -cramer(e16sex ~ c161sex, data = efc)
    #> [1] 0.05258249
    -# bootstrapped confidence intervals -cramer(e16sex ~ c161sex, data = efc, ci.lvl = .95, n = 100)
    #> cramer conf.low conf.high -#> 1 0.05258249 -0.005303236 0.1088931
    -# 2x2 table, compute Phi automatically -crosstable_statistics(efc, e16sex, c161sex)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 2.2327 -#> Phi: 0.0526 -#> p-value: 0.1351
    -# more dimensions than 2x2, compute Cramer's V automatically -crosstable_statistics(efc, c172code, c161sex)
    #> -#> # Measure of Association for Contingency Tables -#> -#> Chi-squared: 4.1085 -#> Cramer's V: 0.0699 -#> p-value: 0.1282
    -# ordinal data, use Kendall's tau -crosstable_statistics(efc, e42dep, quol_5, statistics = "kendall")
    #> -#> # Measure of Association for Contingency Tables -#> -#> z: -9.5951 -#> Kendall's tau: -0.2496 -#> p-value: <0.001
    -# calcilate Spearman's rho, with continuity correction -crosstable_statistics(efc, - e42dep, - quol_5, - statistics = "spearman", - exact = FALSE, - continuity = TRUE -)
    #> -#> # Measure of Association for Contingency Tables -#> -#> S: 157974157.4198 -#> Spearman's rho: -0.3177 -#> p-value: <0.001
    -
    -
    - -
    - - - -
    - - - - - - - - diff --git a/man/chi_squared_test.Rd b/man/chi_squared_test.Rd new file mode 100644 index 00000000..f6dd5d77 --- /dev/null +++ b/man/chi_squared_test.Rd @@ -0,0 +1,87 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/chi_squared_test.R +\name{chi_squared_test} +\alias{chi_squared_test} +\title{Chi-Squared test} +\usage{ +chi_squared_test( + data, + select = NULL, + by = NULL, + probabilities = NULL, + weights = NULL, + paired = FALSE, + ... +) +} +\arguments{ +\item{data}{A data frame.} + +\item{select}{Name of the dependent variable (as string) to be used for the +test. \code{select} can also be a character vector, specifying the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} + +\item{by}{Name of the grouping variable to be used for the test. If \code{by} is +not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if +\code{probabilities} is provided, \code{by} must be \code{NULL}.} + +\item{probabilities}{A numeric vector of probabilities for each cell in the +contingency table. The length of the vector must match the number of cells +in the table, i.e. the number of unique levels of the variable specified +in \code{select}. If \code{probabilities} is provided, a chi-squared test for given +probabilities is conducted. Furthermore, if \code{probabilities} is given, \code{by} +must be \code{NULL}. The probabilities must sum to 1.} + +\item{weights}{Name of an (optional) weighting variable to be used for the test.} + +\item{paired}{Logical, if \code{TRUE}, a McNemar test is conducted for 2x2 tables. +Note that \code{paired} only works for 2x2 tables.} + +\item{...}{Additional arguments passed down to \code{\link[=chisq.test]{chisq.test()}}.} +} +\value{ +A data frame with test results. The returned effects sizes are +Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +given probabilities. +} +\description{ +This function performs a \eqn{chi}^2 test for contingency +tables or tests for given probabilities. The returned effects sizes are +Cramer's V for tables with more than two rows and columns, Phi (\eqn{\phi}) +for 2x2 tables, and \ifelse{latex}{\eqn{Fei}}{פ (Fei)} for tests against +given probabilities (see \emph{Ben-Shachar et al. 2023}). +} +\details{ +The function is a wrapper around \code{\link[=chisq.test]{chisq.test()}} and +\code{\link[=fisher.test]{fisher.test()}} (for small expected values) for contingency tables, and +\code{chisq.test()} for given probabilities. When \code{probabilities} are provided, +these are rescaled to sum to 1 (i.e. \code{rescale.p = TRUE}). When \code{fisher.test()} +is called, simulated p-values are returned (i.e. \code{simulate.p.value = TRUE}, +see \code{?fisher.test}). If \code{paired = TRUE} and a 2x2 table is provided, +a McNemar test (see \code{\link[=mcnemar.test]{mcnemar.test()}}) is conducted. + +The weighted version of the chi-squared test is based on the a weighted +table, using \code{\link[=xtabs]{xtabs()}} as input for \code{chisq.test()}. +} +\examples{ +data(efc) +efc$weight <- abs(rnorm(nrow(efc), 1, 0.3)) + +# Chi-squared test +chi_squared_test(efc, "c161sex", by = "e16sex") + +# weighted Chi-squared test +chi_squared_test(efc, "c161sex", by = "e16sex", weights = "weight") + +# Chi-squared test for given probabilities +chi_squared_test(efc, "c161sex", probabilities = c(0.3, 0.7)) +} +\references{ +Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +\doi{10.3390/math11091982} +} diff --git a/man/crosstable_statistics.Rd b/man/crosstable_statistics.Rd index 358c283a..cbea412f 100644 --- a/man/crosstable_statistics.Rd +++ b/man/crosstable_statistics.Rd @@ -1,16 +1,19 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cramer.R, R/phi.R, R/xtab_statistics.R -\name{cramer} +\name{cramers_v} +\alias{cramers_v} \alias{cramer} -\alias{cramer.formula} +\alias{cramers_v.formula} \alias{phi} \alias{crosstable_statistics} \alias{xtab_statistics} \title{Measures of association for contingency tables} \usage{ +cramers_v(tab, ...) + cramer(tab, ...) -\method{cramer}{formula}( +\method{cramers_v}{formula}( formula, data, ci.lvl = NULL, @@ -40,24 +43,21 @@ xtab_statistics( ) } \arguments{ -\item{tab}{A \code{\link{table}} or \code{\link[stats]{ftable}}. Tables of class -\code{\link[stats]{xtabs}} and other will be coerced to \code{ftable} -objects.} +\item{tab}{A \code{\link[=table]{table()}} or \code{\link[=ftable]{ftable()}}. Tables of class \code{\link[=xtabs]{xtabs()}} and +other will be coerced to \code{ftable} objects.} \item{...}{Other arguments, passed down to the statistic functions -\code{\link[stats]{chisq.test}}, \code{\link[stats]{fisher.test}} or -\code{\link[stats]{cor.test}}.} +\code{\link[=chisq.test]{chisq.test()}}, \code{\link[=fisher.test]{fisher.test()}} or \code{\link[=cor.test]{cor.test()}}.} \item{formula}{A formula of the form \code{lhs ~ rhs} where \code{lhs} is a numeric variable giving the data values and \code{rhs} a factor giving the corresponding groups.} \item{data}{A data frame or a table object. If a table object, \code{x1} and -\code{x2} will be ignored. For Kendall's \emph{tau}, Spearman's \emph{rho} -or Pearson's product moment correlation coefficient, \code{data} needs -to be a data frame. If \code{x1} and \code{x2} are not specified, -the first two columns of the data frames are used as variables -to compute the crosstab.} +\code{x2} will be ignored. For Kendall's \emph{tau}, Spearman's \emph{rho} or Pearson's +product moment correlation coefficient, \code{data} needs to be a data frame. +If \code{x1} and \code{x2} are not specified, the first two columns of the data +frames are used as variables to compute the crosstab.} \item{ci.lvl}{Scalar between 0 and 1. If not \code{NULL}, returns a data frame including lower and upper confidence intervals.} @@ -71,37 +71,37 @@ quantiles of the bootstrapped values. See 'Details' in \code{boot_ci()}. May be abbreviated.} \item{x1}{Name of first variable that should be used to compute the -contingency table. If \code{data} is a table object, this argument -will be irgnored.} +contingency table. If \code{data} is a table object, this argument will be +irgnored.} \item{x2}{Name of second variable that should be used to compute the -contingency table. If \code{data} is a table object, this argument -will be irgnored.} +contingency table. If \code{data} is a table object, this argument will be +irgnored.} \item{statistics}{Name of measure of association that should be computed. May -be one of \code{"auto"}, \code{"cramer"}, \code{"phi"}, \code{"spearman"}, -\code{"kendall"}, \code{"pearson"} or \code{"fisher"}. See 'Details'.} +be one of \code{"auto"}, \code{"cramer"}, \code{"phi"}, \code{"spearman"}, \code{"kendall"}, +\code{"pearson"} or \code{"fisher"}. See 'Details'.} -\item{weights}{Name of variable in \code{x} that indicated the vector of -weights that will be applied to weight all observations. Default is -\code{NULL}, so no weights are used.} +\item{weights}{Name of variable in \code{x} that indicated the vector of weights +that will be applied to weight all observations. Default is \code{NULL}, so no +weights are used.} } \value{ -For \code{phi()}, the table's Phi value. For \code{cramer()}, the +For \code{\link[=phi]{phi()}}, the table's Phi value. For [\verb{cramers_v()]}, the table's Cramer's V. -\cr \cr + For \code{crosstable_statistics()}, a list with following components: -\describe{ -\item{\code{estimate}}{the value of the estimated measure of association.} -\item{\code{p.value}}{the p-value for the test.} -\item{\code{statistic}}{the value of the test statistic.} -\item{\code{stat.name}}{the name of the test statistic.} -\item{\code{stat.html}}{if applicable, the name of the test statistic, in HTML-format.} -\item{\code{df}}{the degrees of freedom for the contingency table.} -\item{\code{method}}{character string indicating the name of the measure of association.} -\item{\code{method.html}}{if applicable, the name of the measure of association, in HTML-format.} -\item{\code{method.short}}{the short form of association measure, equals the \code{statistics}-argument.} -\item{\code{fisher}}{logical, if Fisher's exact test was used to calculate the p-value.} +\itemize{ +\item \code{estimate}: the value of the estimated measure of association. +\item \code{p.value}: the p-value for the test. +\item \code{statistic}: the value of the test statistic. +\item \code{stat.name}: the name of the test statistic. +\item \code{stat.html}: if applicable, the name of the test statistic, in HTML-format. +\item \code{df}: the degrees of freedom for the contingency table. +\item \code{method}: character string indicating the name of the measure of association. +\item \code{method.html}: if applicable, the name of the measure of association, in HTML-format. +\item \code{method.short}: the short form of association measure, equals the \code{statistics}-argument. +\item \code{fisher}: logical, if Fisher's exact test was used to calculate the p-value. } } \description{ @@ -112,19 +112,18 @@ Kendall's tau and Pearson's r. } \details{ The p-value for Cramer's V and the Phi coefficient are based -on \code{chisq.test()}. If any expected value of a table cell is -smaller than 5, or smaller than 10 and the df is 1, then \code{fisher.test()} -is used to compute the p-value, unless \code{statistics = "fisher"}; in -this case, the use of \code{fisher.test()} is forced to compute the -p-value. The test statistic is calculated with \code{cramer()} resp. -\code{phi()}. -\cr \cr -Both test statistic and p-value for Spearman's rho, Kendall's tau -and Pearson's r are calculated with \code{cor.test()}. -\cr \cr -When \code{statistics = "auto"}, only Cramer's V or Phi are calculated, -based on the dimension of the table (i.e. if the table has more than -two rows or columns, Cramer's V is calculated, else Phi). +on \code{chisq.test()}. If any expected value of a table cell is smaller than 5, +or smaller than 10 and the df is 1, then \code{fisher.test()} is used to compute +the p-value, unless \code{statistics = "fisher"}; in this case, the use of +\code{fisher.test()} is forced to compute the p-value. The test statistic is +calculated with \code{cramers_v()} resp. \code{phi()}. + +Both test statistic and p-value for Spearman's rho, Kendall's tau and +Pearson's r are calculated with \code{cor.test()}. + +When \code{statistics = "auto"}, only Cramer's V or Phi are calculated, based on +the dimension of the table (i.e. if the table has more than two rows or +columns, Cramer's V is calculated, else Phi). } \examples{ # Phi coefficient for 2x2 tables @@ -160,3 +159,9 @@ crosstable_statistics(efc, continuity = TRUE ) } +\references{ +Ben-Shachar, M.S., Patil, I., Thériault, R., Wiernik, B.M., +Lüdecke, D. (2023). Phi, Fei, Fo, Fum: Effect Sizes for Categorical Data +That Use the Chi‑Squared Statistic. Mathematics, 11, 1982. +\doi{10.3390/math11091982} +} diff --git a/man/find_beta.Rd b/man/find_beta.Rd index 16d28385..ccd5dcc1 100644 --- a/man/find_beta.Rd +++ b/man/find_beta.Rd @@ -54,21 +54,20 @@ or confidence intervals. } \details{ These functions can be used to find parameter for various distributions, -to define prior probabilities for Bayesian analyses. \code{x1}, -\code{p1}, \code{x2} and \code{p2} are parameters that describe two -quantiles. Given this knowledge, the distribution parameters are -returned. \cr \cr -Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence -rate or similar probability, and its standard deviation or confidence -interval. In this case. \code{x} should be a probability, -for example a prevalence rate of a certain event. \code{se} then -needs to be the standard error for this probability. Alternatively, -\code{ci} can be specified, which should indicate the upper limit -of the confidence interval od the probability (prevalence rate) \code{x}. -If the number of events out of a total number of trials is known -(e.g. 12 heads out of 30 coin tosses), \code{x} can also be the number -of observed events, while \code{n} indicates the total amount of trials -(in the above example, the function call would be: \code{find_beta2(x = 12, n = 30)}). +to define prior probabilities for Bayesian analyses. \code{x1}, \code{p1}, \code{x2} and +\code{p2} are parameters that describe two quantiles. Given this knowledge, the +distribution parameters are returned. + +Use \code{find_beta2()}, if the known parameters are, e.g. a prevalence rate or +similar probability, and its standard deviation or confidence interval. In +this case. \code{x} should be a probability, for example a prevalence rate of a +certain event. \code{se} then needs to be the standard error for this probability. +Alternatively, \code{ci} can be specified, which should indicate the upper limit +of the confidence interval od the probability (prevalence rate) \code{x}. If the +number of events out of a total number of trials is known (e.g. 12 heads out +of 30 coin tosses), \code{x} can also be the number of observed events, while \code{n} +indicates the total amount of trials (in the above example, the function +call would be: \code{find_beta2(x = 12, n = 30)}). } \examples{ # example from blogpost: diff --git a/man/kruskal_wallis_test.Rd b/man/kruskal_wallis_test.Rd new file mode 100644 index 00000000..d3033c42 --- /dev/null +++ b/man/kruskal_wallis_test.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/kruskal_wallis_test.R +\name{kruskal_wallis_test} +\alias{kruskal_wallis_test} +\title{Kruskal-Wallis test} +\usage{ +kruskal_wallis_test(data, select = NULL, by = NULL, weights = NULL) +} +\arguments{ +\item{data}{A data frame.} + +\item{select}{Name of the dependent variable (as string) to be used for the +test. \code{select} can also be a character vector, specifying the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} + +\item{by}{Name of the grouping variable to be used for the test. If \code{by} is +not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if +\code{probabilities} is provided, \code{by} must be \code{NULL}.} + +\item{weights}{Name of an (optional) weighting variable to be used for the test.} +} +\value{ +A data frame with test results. +} +\description{ +This function performs a Kruskal-Wallis rank sum test, to test +the null hypothesis that the population median of all of the groups are +equal. The alternative is that they differ in at least one. +} +\details{ +The function simply is a wrapper around \code{\link[=kruskal.test]{kruskal.test()}}. The +weighted version of the Kruskal-Wallis test is based on the \strong{survey} package, +using \code{\link[survey:svyranktest]{survey::svyranktest()}}. +} +\examples{ +data(efc) +# Kruskal-Wallis test for elder's age by education +kruskal_wallis_test(efc, "e17age", by = "c172code") + +# when data is in wide-format, specify all relevant continuous +# variables in `select` and omit `by` +set.seed(123) +wide_data <- data.frame( + scale1 = runif(20), + scale2 = runif(20), + scale3 = runif(20) +) +kruskal_wallis_test(wide_data, select = c("scale1", "scale2", "scale3")) + +# same as if we had data in long format, with grouping variable +long_data <- data.frame( + scales = c(wide_data$scale1, wide_data$scale2, wide_data$scale3), + groups = rep(c("A", "B", "C"), each = 20) +) +kruskal_wallis_test(long_data, select = "scales", by = "groups") +} diff --git a/man/mann_whitney_test.Rd b/man/mann_whitney_test.Rd new file mode 100644 index 00000000..ab376af7 --- /dev/null +++ b/man/mann_whitney_test.Rd @@ -0,0 +1,80 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mann_whitney_test.R +\name{mann_whitney_test} +\alias{mann_whitney_test} +\title{Mann-Whitney-Test} +\usage{ +mann_whitney_test( + data, + select = NULL, + by = NULL, + weights = NULL, + distribution = "asymptotic" +) +} +\arguments{ +\item{data}{A data frame.} + +\item{select}{Name of the dependent variable (as string) to be used for the +test. \code{select} can also be a character vector, specifying the names of +multiple continuous variables. In this case, \code{by} is ignored and variables +specified in \code{select} are used to compute the test. This can be useful if +the data is in wide-format and no grouping variable is available.} + +\item{by}{Name of the grouping variable to be used for the test. If \code{by} is +not a factor, it will be coerced to a factor. For \code{chi_squared_test()}, if +\code{probabilities} is provided, \code{by} must be \code{NULL}.} + +\item{weights}{Name of an (optional) weighting variable to be used for the test.} + +\item{distribution}{Indicates how the null distribution of the test statistic +should be computed. May be one of \code{"exact"}, \code{"approximate"} or \code{"asymptotic"} +(default). See \code{\link[coin:LocationTests]{coin::wilcox_test()}} for details.} +} +\value{ +A data frame with test results. The function returns p and Z-values +as well as effect size r and group-rank-means. +} +\description{ +This function performs a Mann-Whitney-Test (or Wilcoxon rank +sum test for \emph{unpaired} samples. + +A Mann-Whitney-Test is a non-parametric test for the null hypothesis that two +independent samples have identical continuous distributions. It can be used +when the two continuous variables are not normally distributed. +} +\details{ +This function is based on \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}} +(the latter to extract effect sizes). The weighted version of the test is +based on \code{\link[survey:svyranktest]{survey::svyranktest()}}. + +Interpretation of the effect size \strong{r}, as a rule-of-thumb: +\itemize{ +\item small effect >= 0.1 +\item medium effect >= 0.3 +\item large effect >= 0.5 +} + +\strong{r} is calcuated as: + +\if{html}{\out{
    }}\preformatted{r = |Z| / sqrt(n1 + n2) +}\if{html}{\out{
    }} +} +\examples{ +data(efc) +# Mann-Whitney-U-Tests for elder's age by elder's sex. +mann_whitney_test(efc, "e17age", by = "e16sex") + +# when data is in wide-format, specify all relevant continuous +# variables in `select` and omit `by` +set.seed(123) +wide_data <- data.frame(scale1 = runif(20), scale2 = runif(20)) +mann_whitney_test(wide_data, select = c("scale1", "scale2")) + +# same as if we had data in long format, with grouping variable +long_data <- data.frame( + scales = c(wide_data$scale1, wide_data$scale2), + groups = rep(c("A", "B"), each = 20) +) +mann_whitney_test(long_data, select = "scales", by = "groups") +} diff --git a/man/mean_n.Rd b/man/mean_n.Rd deleted file mode 100644 index 35f6915e..00000000 --- a/man/mean_n.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mean_n.R -\name{mean_n} -\alias{mean_n} -\title{Row means with min amount of valid values} -\usage{ -mean_n(dat, n, digits = 2) -} -\arguments{ -\item{dat}{A data frame with at least two columns, where row means are applied.} - -\item{n}{May either be -\itemize{ -\item a numeric value that indicates the amount of valid values per row to calculate the row mean; -\item or a value between 0 and 1, indicating a proportion of valid values per row to calculate the row mean (see 'Details'). -} -If a row's sum of valid values is less than \code{n}, \code{NA} will be returned as row mean value.} - -\item{digits}{Numeric value indicating the number of decimal places to be used for rounding mean -value. Negative values are allowed (see 'Details').} -} -\value{ -A vector with row mean values of \code{df} for those rows with at least \code{n} -valid values. Else, \code{NA} is returned. -} -\description{ -This function is similar to the SPSS \code{MEAN.n} function and computes -row means from a \code{data.frame} or \code{matrix} if at least \code{n} -values of a row are valid (and not \code{NA}). -} -\details{ -Rounding to a negative number of \code{digits} means rounding to a power of -ten, so for example mean_n(df, 3, digits = -2) rounds to the -nearest hundred. \cr \cr -For \code{n}, must be a numeric value from \code{0} to \code{ncol(dat)}. If -a \emph{row} in \code{dat} has at least \code{n} non-missing values, the -row mean is returned. If \code{n} is a non-integer value from 0 to 1, -\code{n} is considered to indicate the proportion of necessary non-missing -values per row. E.g., if \code{n = .75}, a row must have at least \code{ncol(dat) * n} -non-missing values for the row mean to be calculated. See 'Examples'. -} -\examples{ -dat <- data.frame(c1 = c(1,2,NA,4), - c2 = c(NA,2,NA,5), - c3 = c(NA,4,NA,NA), - c4 = c(2,3,7,8)) - -# needs at least 4 non-missing values per row -mean_n(dat, 4) # 1 valid return value - -# needs at least 3 non-missing values per row -mean_n(dat, 3) # 2 valid return values - -# needs at least 2 non-missing values per row -mean_n(dat, 2) - -# needs at least 1 non-missing value per row -mean_n(dat, 1) # all means are shown - -# needs at least 50\% of non-missing values per row -mean_n(dat, .5) # 3 valid return values - -# needs at least 75\% of non-missing values per row -mean_n(dat, .75) # 2 valid return values - -} -\references{ -\href{https://r4stats.com/2014/09/03/adding-the-spss-mean-n-function-to-r/}{r4stats.com} -} diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd deleted file mode 100644 index ad82ba3e..00000000 --- a/man/means_by_group.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/grpmean.R -\name{means_by_group} -\alias{means_by_group} -\alias{grpmean} -\title{Summary of mean values by group} -\usage{ -means_by_group( - x, - dv, - grp, - weights = NULL, - digits = 2, - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL -) - -grpmean( - x, - dv, - grp, - weights = NULL, - digits = 2, - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL -) -} -\arguments{ -\item{x}{A (grouped) data frame.} - -\item{dv}{Name of the dependent variable, for which the mean value, grouped -by \code{grp}, is computed.} - -\item{grp}{Factor with the cross-classifying variable, where \code{dv} is -grouped into the categories represented by \code{grp}. Numeric vectors -are coerced to factors.} - -\item{weights}{Name of variable in \code{x} that indicated the vector of -weights that will be applied to weight all observations. Default is -\code{NULL}, so no weights are used.} - -\item{digits}{Numeric, amount of digits after decimal point when rounding -estimates and values.} - -\item{out}{Character vector, indicating whether the results should be printed -to console (\code{out = "txt"}) or as HTML-table in the viewer-pane -(\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the -results should be plotted (\code{out = "plot"}, only applies to certain -functions). May be abbreviated.} - -\item{encoding}{Character vector, indicating the charset encoding used -for variable and value labels. Default is \code{"UTF-8"}. Only used -when \code{out} is not \code{"txt"}.} - -\item{file}{Destination file, if the output should be saved as file. -Only used when \code{out} is not \code{"txt"}.} -} -\value{ -For non-grouped data frames, \code{means_by_group()} returns a data frame with -following columns: \code{term}, \code{mean}, \code{N}, \code{std.dev}, -\code{std.error} and \code{p.value}. For grouped data frames, returns -a list of such data frames. -} -\description{ -Computes mean, sd and se for each sub-group (indicated by \code{grp}) -of \code{dv}. -} -\details{ -This function performs a One-Way-Anova with \code{dv} as dependent -and \code{grp} as independent variable, by calling -\code{lm(count ~ as.factor(grp))}. Then \code{\link[emmeans]{contrast}} -is called to get p-values for each sub-group. P-values indicate whether -each group-mean is significantly different from the total mean. -} -\examples{ -data(efc) -means_by_group(efc, c12hour, e42dep) - -data(iris) -means_by_group(iris, Sepal.Width, Species) - -# also works for grouped data frames -if (require("dplyr")) { - efc \%>\% - group_by(c172code) \%>\% - means_by_group(c12hour, e42dep) -} - -# weighting -efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) -means_by_group(efc, c12hour, e42dep, weights = weight) -} diff --git a/man/mwu.Rd b/man/mwu.Rd deleted file mode 100644 index d8fe669d..00000000 --- a/man/mwu.Rd +++ /dev/null @@ -1,104 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mwu.R -\name{mwu} -\alias{mwu} -\alias{mwu.default} -\alias{mwu.formula} -\alias{mannwhitney} -\title{Mann-Whitney-U-Test} -\usage{ -mwu(data, ...) - -\method{mwu}{default}( - data, - x, - grp, - distribution = "asymptotic", - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL, - ... -) - -\method{mwu}{formula}( - formula, - data, - distribution = "asymptotic", - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL, - ... -) - -mannwhitney(data, ...) -} -\arguments{ -\item{data}{A data frame.} - -\item{...}{For \code{weighted_ttest()} and \code{weighted_mannwhitney()}, currently not used. -For \code{weighted_chisqtest()}, further arguments passed down to -\code{\link[stats]{chisq.test}}.} - -\item{x}{Bare (unquoted) variable name, or a character vector with the variable name.} - -\item{grp}{Bare (unquoted) name of the cross-classifying variable, where -\code{x} is grouped into the categories represented by \code{grp}, -or a character vector with the variable name.} - -\item{distribution}{Indicates how the null distribution of the test statistic -should be computed. May be one of \code{"exact"}, \code{"approximate"} or \code{"asymptotic"} -(default). See \code{\link[coin:LocationTests]{coin::wilcox_test()}} for details.} - -\item{out}{Character vector, indicating whether the results should be printed -to console (\code{out = "txt"}) or as HTML-table in the viewer-pane -(\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the -results should be plotted (\code{out = "plot"}, only applies to certain -functions). May be abbreviated.} - -\item{encoding}{Character vector, indicating the charset encoding used -for variable and value labels. Default is \code{"UTF-8"}. Only used -when \code{out} is not \code{"txt"}.} - -\item{file}{Destination file, if the output should be saved as file. -Only used when \code{out} is not \code{"txt"}.} - -\item{formula}{A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a -numeric variable giving the data values and \code{rhs1} a factor with two -levels giving the corresponding groups and \code{rhs2} a variable with weights.} -} -\value{ -(Invisibly) returns a data frame with U, p and Z-values for each group-comparison -as well as effect-size r; additionally, group-labels and groups' n's are -also included. -} -\description{ -This function performs a Mann-Whitney-U-Test (or Wilcoxon rank -sum test for \emph{unpaired} samples, see \code{\link[=wilcox.test]{wilcox.test()}} and \code{\link[coin:LocationTests]{coin::wilcox_test()}}) -comparing \code{x} by each group indicated by \code{grp}. If \code{grp} has more than two -categories, a comparison between each combination of two groups is performed. - -The function reports U, p and Z-values as well as effect size r and group-rank-means. -} -\note{ -This function calls the \code{\link[coin]{wilcox_test}} with formula. If \code{grp} -has more than two groups, additionally a Kruskal-Wallis-Test (see \code{\link{kruskal.test}}) -is performed. \cr \cr -Interpretation of effect sizes, as a rule-of-thumb: -\itemize{ -\item small effect >= 0.1 -\item medium effect >= 0.3 -\item large effect >= 0.5 -} -} -\examples{ -data(efc) -# Mann-Whitney-U-Tests for elder's age by elder's sex. -mwu(efc, e17age, e16sex) - -# using formula interface -mwu(e17age ~ e16sex, efc) - -# Mann-Whitney-Tests for elder's age by each level elder's dependency. -mwu(efc, e17age, e42dep) - -} diff --git a/man/r2.Rd b/man/r2.Rd index 4b64705e..d31c90e3 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -11,6 +11,8 @@ \alias{icc} \alias{p_value} \alias{se} +\alias{means_by_group} +\alias{mean_n} \title{Deprecated functions} \usage{ r2(x) @@ -32,6 +34,10 @@ icc(x) p_value(x, ...) se(x, ...) + +means_by_group(x, ...) + +mean_n(x, ...) } \arguments{ \item{x}{An object.} diff --git a/man/weighted_sd.Rd b/man/weighted_sd.Rd index 3044601e..c553f998 100644 --- a/man/weighted_sd.Rd +++ b/man/weighted_sd.Rd @@ -1,12 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/svy_median.R, R/wtd_chisqtest.R, R/wtd_cor.R, -% R/wtd_mean.R, R/wtd_median.R, R/wtd_mwu.R, R/wtd_sd.R, R/wtd_se.R, -% R/wtd_ttest.R +% Please edit documentation in R/svy_median.R, R/wtd_cor.R, R/wtd_mean.R, +% R/wtd_median.R, R/wtd_mwu.R, R/wtd_sd.R, R/wtd_se.R, R/wtd_ttest.R \name{survey_median} \alias{survey_median} -\alias{weighted_chisqtest} -\alias{weighted_chisqtest.default} -\alias{weighted_chisqtest.formula} \alias{weighted_correlation} \alias{weighted_correlation.default} \alias{weighted_correlation.formula} @@ -26,12 +22,6 @@ \usage{ survey_median(x, design) -weighted_chisqtest(data, ...) - -\method{weighted_chisqtest}{default}(data, x, y, weights, ...) - -\method{weighted_chisqtest}{formula}(formula, data, ...) - weighted_correlation(data, ...) \method{weighted_correlation}{default}(data, x, y, weights, ci.lvl = 0.95, ...) @@ -101,12 +91,12 @@ the variable name.} the variable name of the numeric vector of weights. If \code{weights = NULL}, unweighted statistic is reported.} +\item{ci.lvl}{Confidence level of the interval.} + \item{formula}{A formula of the form \code{lhs ~ rhs1 + rhs2} where \code{lhs} is a numeric variable giving the data values and \code{rhs1} a factor with two levels giving the corresponding groups and \code{rhs2} a variable with weights.} -\item{ci.lvl}{Confidence level of the interval.} - \item{grp}{Bare (unquoted) name of the cross-classifying variable, where \code{x} is grouped into the categories represented by \code{grp}, or a character vector with the variable name.} @@ -177,14 +167,4 @@ efc$weight <- abs(rnorm(nrow(efc), 1, .3)) weighted_ttest(efc, e17age, weights = weight) weighted_ttest(efc, e17age, c160age, weights = weight) weighted_ttest(e17age ~ e16sex + weight, efc) - -# weighted Mann-Whitney-U-test ---- -weighted_mannwhitney(c12hour ~ c161sex + weight, efc) - -# weighted Chi-squared-test ---- -weighted_chisqtest(efc, c161sex, e16sex, weights = weight, correct = FALSE) -weighted_chisqtest(c172code ~ c161sex + weight, efc) - -# weighted Chi-squared-test for given probabilities ---- -weighted_chisqtest(c172code ~ weight, efc, p = c(.33, .33, .34)) } diff --git a/pkgdown/extra.scss b/pkgdown/extra.scss new file mode 100644 index 00000000..a6d05312 --- /dev/null +++ b/pkgdown/extra.scss @@ -0,0 +1,22 @@ +// font size for text +body { + font-size: 0.90rem !important; +} + +// color for links +$link-color: #03638e; + +a { + text-decoration: none !important; + color: $link-color +} + +code a:any-link { + text-decoration: none !important; + color: $link-color +} + +.nav-text.text-default.me-auto { + color: rgba(0,0,0,0.9) !important; +} + diff --git a/tests/testthat/test-grpmean.R b/tests/testthat/test-grpmean.R deleted file mode 100644 index 354ef69a..00000000 --- a/tests/testthat/test-grpmean.R +++ /dev/null @@ -1,30 +0,0 @@ -if (require("testthat") && require("sjstats") && require("dplyr")) { - data(efc) - set.seed(123) - efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) - efc_grouped <- group_by(efc, c172code) - - test_that("means_by_group", { - expect_equal(means_by_group(efc, c12hour, e42dep)$mean, c(9.90909, 17.53778, 34.51961, 75.90132, 42.4384), tolerance = 1e-3) - }) - - test_that("means_by_group, weighting", { - w <- "weight" - expect_equal(means_by_group(efc, c12hour, e42dep, weights = weight)$mean, c(9.43932, 17.28629, 35.16486, 79.23457, 43.0544), tolerance = 1e-3) - expect_equal(means_by_group(efc, c12hour, e42dep, weights = "weight")$mean, c(9.43932, 17.28629, 35.16486, 79.23457, 43.0544), tolerance = 1e-3) - expect_equal(means_by_group(efc, c12hour, e42dep, weights = w)$mean, c(9.43932, 17.28629, 35.16486, 79.23457, 43.0544), tolerance = 1e-3) - }) - - test_that("means_by_group, grouping", { - m <- means_by_group(efc_grouped, c12hour, e42dep) - expect_equal(length(m), 3) - }) - - test_that("means_by_group, grouped weighting", { - w <- "weight" - means_by_group(efc_grouped, c12hour, e42dep, weights = weight) - means_by_group(efc_grouped, c12hour, e42dep, weights = "weight") - means_by_group(efc_grouped, c12hour, e42dep, weights = w) - }) - -} diff --git a/tests/testthat/test-wtd.R b/tests/testthat/test-wtd.R index 21077b3b..7642253b 100644 --- a/tests/testthat/test-wtd.R +++ b/tests/testthat/test-wtd.R @@ -17,21 +17,6 @@ if (require("testthat") && require("sjstats")) { expect_equal(weighted_median(efc$c12hour, weights = NULL), 20, tolerance = 1e-5) }) - test_that("weighted_chisqtest", { - w <- weighted_chisqtest(efc, c161sex, c172code, weights = weight) - expect_equal(w$estimate, 0.06668895, tolerance = 1e-5) - expect_equal(w$p.value, 0.1517221, tolerance = 1e-5) - - w <- weighted_chisqtest(c161sex ~ c172code + weight, efc) - expect_equal(w$estimate, 0.06668895, tolerance = 1e-5) - expect_equal(w$p.value, 0.1517221, tolerance = 1e-5) - }) - - test_that("weighted_mannwhitney", { - weighted_mannwhitney(efc, c12hour, c161sex, weights = weight) - weighted_mannwhitney(c12hour ~ c161sex + weight, efc) - }) - test_that("weighted_ttest", { weighted_ttest(efc, e17age, weights = weight) weighted_ttest(efc, e17age, c160age, weights = weight)