diff --git a/NAMESPACE b/NAMESPACE index f586c75..7947a77 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -82,7 +82,6 @@ export(find_beta2) export(find_cauchy) export(find_normal) export(gmd) -export(grpmean) export(icc) export(inequ_trend) export(is_prime) @@ -131,14 +130,10 @@ importFrom(dplyr,case_when) importFrom(dplyr,filter) importFrom(dplyr,group_vars) importFrom(dplyr,mutate) -importFrom(dplyr,n_distinct) -importFrom(dplyr,pull) importFrom(dplyr,quos) importFrom(dplyr,select) importFrom(dplyr,select_if) importFrom(dplyr,summarise) -importFrom(emmeans,contrast) -importFrom(emmeans,emmeans) importFrom(insight,export_table) importFrom(insight,find_formula) importFrom(insight,find_response) @@ -156,7 +151,6 @@ importFrom(performance,rmse) importFrom(purrr,flatten_df) importFrom(purrr,map) importFrom(purrr,map2) -importFrom(purrr,map_chr) importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_lgl) @@ -165,15 +159,10 @@ importFrom(rlang,.data) importFrom(rlang,enquo) importFrom(rlang,quo_name) importFrom(sjlabelled,as_numeric) -importFrom(sjlabelled,drop_labels) -importFrom(sjlabelled,get_label) -importFrom(sjlabelled,get_labels) -importFrom(sjmisc,add_variables) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_float) importFrom(sjmisc,is_num_fac) importFrom(sjmisc,str_contains) -importFrom(sjmisc,to_value) importFrom(sjmisc,trim) importFrom(sjmisc,typical_value) importFrom(stats,approx) diff --git a/R/Deprecated.R b/R/Deprecated.R index 862b8da..94a27e2 100644 --- a/R/Deprecated.R +++ b/R/Deprecated.R @@ -83,3 +83,19 @@ se <- function(x, ...) { .Defunct("parameters::standard_error()") parameters::standard_error(x) } + + +#' @rdname r2 +#' @export +means_by_group <- function(x, ...) { + .Defunct("datawizard::means_by_group()") + datawizard::means_by_group(x, ...) +} + + +#' @rdname r2 +#' @export +mean_n <- function(x, ...) { + .Defunct("datawizard::row_means()") + datawizard::row_means(x, ...) +} diff --git a/R/grpmean.R b/R/grpmean.R deleted file mode 100644 index ab2eb2d..0000000 --- a/R/grpmean.R +++ /dev/null @@ -1,335 +0,0 @@ -#' @title Summary of mean values by group -#' @name means_by_group -#' -#' @description Computes mean, sd and se for each sub-group (indicated by \code{grp}) -#' of \code{dv}. -#' -#' @param x A (grouped) data frame. -#' @param dv Name of the dependent variable, for which the mean value, grouped -#' by \code{grp}, is computed. -#' @param grp Factor with the cross-classifying variable, where \code{dv} is -#' grouped into the categories represented by \code{grp}. Numeric vectors -#' are coerced to factors. -#' @param weights Name of variable in \code{x} that indicated the vector of -#' weights that will be applied to weight all observations. Default is -#' \code{NULL}, so no weights are used. -#' @param digits Numeric, amount of digits after decimal point when rounding -#' estimates and values. -#' @param file Destination file, if the output should be saved as file. -#' Only used when \code{out} is not \code{"txt"}. -#' @param encoding Character vector, indicating the charset encoding used -#' for variable and value labels. Default is \code{"UTF-8"}. Only used -#' when \code{out} is not \code{"txt"}. -#' @param out Character vector, indicating whether the results should be printed -#' to console (\code{out = "txt"}) or as HTML-table in the viewer-pane -#' (\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the -#' results should be plotted (\code{out = "plot"}, only applies to certain -#' functions). May be abbreviated. -#' -#' @return For non-grouped data frames, \code{means_by_group()} returns a data frame with -#' following columns: \code{term}, \code{mean}, \code{N}, \code{std.dev}, -#' \code{std.error} and \code{p.value}. For grouped data frames, returns -#' a list of such data frames. -#' -#' @details This function performs a One-Way-Anova with \code{dv} as dependent -#' and \code{grp} as independent variable, by calling -#' \code{lm(count ~ as.factor(grp))}. Then \code{\link[emmeans]{contrast}} -#' is called to get p-values for each sub-group. P-values indicate whether -#' each group-mean is significantly different from the total mean. -#' -#' @examples -#' data(efc) -#' means_by_group(efc, c12hour, e42dep) -#' -#' data(iris) -#' means_by_group(iris, Sepal.Width, Species) -#' -#' # also works for grouped data frames -#' if (require("dplyr")) { -#' efc %>% -#' group_by(c172code) %>% -#' means_by_group(c12hour, e42dep) -#' } -#' -#' # weighting -#' efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) -#' means_by_group(efc, c12hour, e42dep, weights = weight) -#' @importFrom sjlabelled get_label drop_labels get_labels -#' @importFrom stats lm na.omit sd weighted.mean -#' @importFrom purrr map_chr map_df -#' @importFrom sjmisc to_value is_empty -#' @importFrom rlang enquo .data quo_name -#' @export -means_by_group <- function(x, - dv, - grp, - weights = NULL, - digits = 2, - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL) { - - out <- match.arg(out) - - if (out != "txt" && !requireNamespace("sjPlot", quietly = TRUE)) { - message("Package `sjPlot` needs to be loaded to print HTML tables.") - out <- "txt" - } - - # create quosures - grp.name <- rlang::quo_name(rlang::enquo(grp)) - dv.name <- rlang::quo_name(rlang::enquo(dv)) - - # weights need extra checking, might be NULL - if (!missing(weights)) { - .weights <- try(rlang::quo_name(rlang::enquo(weights)), silent = TRUE) - if (inherits(.weights, "try-error")) .weights <- NULL - - w.string <- try(eval(weights), silent = TRUE) - if (!inherits(w.string, "try-error") && !is.null(w.string) && is.character(w.string)) .weights <- w.string - - if (sjmisc::is_empty(.weights) || .weights == "NULL") .weights <- NULL - } else - .weights <- NULL - - - # create string with variable names - vars <- c(grp.name, dv.name, .weights) - - # get data - x <- suppressMessages(dplyr::select(x, !! vars)) - - # set value and row labels - varGrpLabel <- sjlabelled::get_label(x[[grp.name]], def.value = grp.name) - varCountLabel <- sjlabelled::get_label(x[[dv.name]], def.value = dv.name) - - # first, drop unused labels - x[[grp.name]] <- sjlabelled::drop_labels(x[[grp.name]], drop.na = TRUE) - - # now get valid value labels - value.labels <- sjlabelled::get_labels( - x[[grp.name]], attr.only = F, values = "n", non.labelled = TRUE - ) - - # return values - dataframes <- list() - - # do we have a grouped data frame? - if (inherits(x, "grouped_df")) { - # get grouped data - grps <- get_grouped_data(x) - - # now plot everything - for (i in seq_len(nrow(grps))) { - # copy back labels to grouped data frame - tmp <- sjlabelled::copy_labels(grps$data[[i]], x) - - # get grouped means table - dummy <- means_by_group_helper( - x = tmp, - dv = dv.name, - grp = grp.name, - weight.by = .weights, - value.labels = value.labels, - varCountLabel = varCountLabel, - varGrpLabel = varGrpLabel - ) - - attr(dummy, "group") <- get_grouped_title(x, grps, i, sep = "\n") - - # save data frame for return value - dataframes[[length(dataframes) + 1]] <- dummy - } - - # add class-attr for print-method() - if (out == "txt") - class(dataframes) <- c("sj_grpmeans", "list") - else - class(dataframes) <- c("sjt_grpmeans", "list") - - } else { - dataframes <- means_by_group_helper( - x = x, - dv = dv.name, - grp = grp.name, - weight.by = .weights, - value.labels = value.labels, - varCountLabel = varCountLabel, - varGrpLabel = varGrpLabel - ) - - # add class-attr for print-method() - if (out == "txt") - class(dataframes) <- c("sj_grpmean", class(dataframes)) - else - class(dataframes) <- c("sjt_grpmean", class(dataframes)) - } - - # save how to print output - attr(dataframes, "print") <- out - attr(dataframes, "encoding") <- encoding - attr(dataframes, "file") <- file - attr(dataframes, "digits") <- digits - - dataframes -} - - -#' @importFrom stats pf lm weighted.mean na.omit sd -#' @importFrom sjmisc to_value add_variables -#' @importFrom emmeans emmeans contrast -#' @importFrom dplyr pull select n_distinct -#' @importFrom purrr map_chr -#' @importFrom rlang .data -means_by_group_helper <- function(x, dv, grp, weight.by, value.labels, varCountLabel, varGrpLabel) { - # copy vectors from data frame - dv <- x[[dv]] - grp <- x[[grp]] - - if (!is.null(weight.by)) - weight.by <- x[[weight.by]] - else - weight.by <- 1 - - # convert values to numeric - dv <- sjmisc::to_value(dv) - - # create data frame, for emmeans - mydf <- stats::na.omit(data.frame( - dv = dv, - grp = as.factor(grp), - weight.by = weight.by - )) - - # compute anova statistics for mean table - fit <- stats::lm(dv ~ grp, weights = weight.by, data = mydf) - - # p-values of contrast-means - means.p <- fit %>% - emmeans::emmeans(specs = "grp") %>% - emmeans::contrast(method = "eff") %>% - summary() %>% - dplyr::pull("p.value") - - ## TODO - # efc %>% - # group_by(c172code, c161sex) %>% - # means_by_group(c12hour, e42dep) - - - # check if value labels length matches group count - if (dplyr::n_distinct(mydf$grp) != length(value.labels)) { - # get unique factor levels and check if these are numeric. - # if so, we match the values from value labels and the remaining - # factor levels, so we get the correct value labels for printing - nl <- unique(mydf$grp) - if (sjmisc::is_num_fac(nl)) - value.labels <- value.labels[names(value.labels) %in% levels(nl)] - else - value.labels <- nl - } - - - # create summary - dat <- mydf %>% - dplyr::group_by(.data$grp) %>% - summarise( - mean = stats::weighted.mean(.data$dv, w = .data$weight.by, na.rm = TRUE), - N = round(sum(.data$weight.by)), - std.dev = weighted_sd(.data$dv, .data$weight.by), - std.error = weighted_se(.data$dv, .data$weight.by) - ) %>% - mutate(p.value = means.p) %>% - dplyr::select(-.data$grp) - - # finally, add total-row - dat <- dplyr::bind_rows( - dat, - data_frame( - mean = stats::weighted.mean(mydf$dv, w = mydf$weight.by, na.rm = TRUE), - N = nrow(mydf), - std.dev = weighted_sd(mydf$dv, mydf$weight.by), - std.error = weighted_se(mydf$dv, mydf$weight.by), - p.value = NA - ) - ) - - - # add row labels - dat <- sjmisc::add_variables( - dat, - term = c(unname(value.labels), "Total"), - .after = -1 - ) - - - # get anova statistics for mean table - sum.fit <- summary(fit) - - # r-squared values - r2 <- sum.fit$r.squared - r2.adj <- sum.fit$adj.r.squared - - # F-statistics - fstat <- sum.fit$fstatistic - pval <- stats::pf(fstat[1], fstat[2], fstat[3], lower.tail = F) - - - # copy as attributes - attr(dat, "r2") <- r2 - attr(dat, "adj.r2") <- r2.adj - attr(dat, "fstat") <- fstat[1] - attr(dat, "p.value") <- pval - attr(dat, "dv.label") <- varCountLabel - attr(dat, "grp.label") <- varGrpLabel - - dat -} - - -get_grouped_title <- function(x, grps, i, sep = "\n") { - # create title for first grouping level - tp <- get_title_part(x, grps, 1, i) - title <- sprintf("%s: %s", tp[1], tp[2]) - - # do we have another groupng variable? - if (length(dplyr::group_vars(x)) > 1) { - tp <- get_title_part(x, grps, 2, i) - title <- sprintf("%s%s%s: %s", title, sep, tp[1], tp[2]) - } - - # return title - title -} - - -get_title_part <- function(x, grps, level, i) { - # prepare title for group - var.name <- colnames(grps)[level] - - # get values from value labels - vals <- sjlabelled::get_values(x[[var.name]]) - # if we have no value labels, get values directly - if (is.null(vals)) { - vals <- unique(x[[var.name]]) - lab.pos <- i - } else { - # find position of value labels for current group - lab.pos <- which(vals == grps[[var.name]][i]) - } - - # get variable and value labels - t1 <- sjlabelled::get_label(x[[var.name]], def.value = var.name) - t2 <- sjlabelled::get_labels(x[[var.name]])[lab.pos] - - # if we have no value label, use value instead - if (is.null(t2)) t2 <- vals[lab.pos] - - # generate title - c(t1, t2) -} - - -#' @rdname means_by_group -#' @export -grpmean <- means_by_group diff --git a/R/mean_n.R b/R/mean_n.R deleted file mode 100644 index 5d6a2e0..0000000 --- a/R/mean_n.R +++ /dev/null @@ -1,78 +0,0 @@ -#' @title Row means with min amount of valid values -#' @name mean_n -#' @description This function is similar to the SPSS \code{MEAN.n} function and computes -#' row means from a \code{data.frame} or \code{matrix} if at least \code{n} -#' values of a row are valid (and not \code{NA}). -#' -#' @param dat A data frame with at least two columns, where row means are applied. -#' @param n May either be -#' \itemize{ -#' \item a numeric value that indicates the amount of valid values per row to calculate the row mean; -#' \item or a value between 0 and 1, indicating a proportion of valid values per row to calculate the row mean (see 'Details'). -#' } -#' If a row's sum of valid values is less than \code{n}, \code{NA} will be returned as row mean value. -#' @param digits Numeric value indicating the number of decimal places to be used for rounding mean -#' value. Negative values are allowed (see 'Details'). -#' -#' @return A vector with row mean values of \code{df} for those rows with at least \code{n} -#' valid values. Else, \code{NA} is returned. -#' -#' @details Rounding to a negative number of \code{digits} means rounding to a power of -#' ten, so for example mean_n(df, 3, digits = -2) rounds to the -#' nearest hundred. \cr \cr -#' For \code{n}, must be a numeric value from \code{0} to \code{ncol(dat)}. If -#' a \emph{row} in \code{dat} has at least \code{n} non-missing values, the -#' row mean is returned. If \code{n} is a non-integer value from 0 to 1, -#' \code{n} is considered to indicate the proportion of necessary non-missing -#' values per row. E.g., if \code{n = .75}, a row must have at least \code{ncol(dat) * n} -#' non-missing values for the row mean to be calculated. See 'Examples'. -#' -#' @references \href{https://r4stats.com/2014/09/03/adding-the-spss-mean-n-function-to-r/}{r4stats.com} -#' -#' @examples -#' dat <- data.frame(c1 = c(1,2,NA,4), -#' c2 = c(NA,2,NA,5), -#' c3 = c(NA,4,NA,NA), -#' c4 = c(2,3,7,8)) -#' -#' # needs at least 4 non-missing values per row -#' mean_n(dat, 4) # 1 valid return value -#' -#' # needs at least 3 non-missing values per row -#' mean_n(dat, 3) # 2 valid return values -#' -#' # needs at least 2 non-missing values per row -#' mean_n(dat, 2) -#' -#' # needs at least 1 non-missing value per row -#' mean_n(dat, 1) # all means are shown -#' -#' # needs at least 50% of non-missing values per row -#' mean_n(dat, .5) # 3 valid return values -#' -#' # needs at least 75% of non-missing values per row -#' mean_n(dat, .75) # 2 valid return values -#' -#' @export -mean_n <- function(dat, n, digits = 2) { - # is 'n' indicating a proportion? - digs <- n %% 1 - if (digs != 0) n <- round(ncol(dat) * digs) - - # coerce matrix to data frame - if (is.matrix(dat)) dat <- as.data.frame(dat) - - # check if we have a data framme with at least two columns - if (!is.data.frame(dat) || ncol(dat) < 2) { - warning("`dat` must be a data frame with at least two columns.", call. = TRUE) - return(NA) - } - - # n may not be larger as df's amount of columns - if (ncol(dat) < n) { - warning("`n` must be smaller or equal to number of columns in data frame.", call. = TRUE) - return(NA) - } - - round(apply(dat, 1, function(x) ifelse(sum(!is.na(x)) >= n, mean(x, na.rm = TRUE), NA)), digits) -} diff --git a/_pkgdown.yml b/_pkgdown.yml index cf57a80..bfe8987 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -32,8 +32,6 @@ reference: - gmd - mann_whitney_test - chi_squared_test - - mean_n - - means_by_group - var_pop - title: "Tools for Regression Models" diff --git a/man/crosstable_statistics.Rd b/man/crosstable_statistics.Rd index 358c283..20b2f03 100644 --- a/man/crosstable_statistics.Rd +++ b/man/crosstable_statistics.Rd @@ -81,10 +81,6 @@ will be irgnored.} \item{statistics}{Name of measure of association that should be computed. May be one of \code{"auto"}, \code{"cramer"}, \code{"phi"}, \code{"spearman"}, \code{"kendall"}, \code{"pearson"} or \code{"fisher"}. See 'Details'.} - -\item{weights}{Name of variable in \code{x} that indicated the vector of -weights that will be applied to weight all observations. Default is -\code{NULL}, so no weights are used.} } \value{ For \code{phi()}, the table's Phi value. For \code{cramer()}, the diff --git a/man/mean_n.Rd b/man/mean_n.Rd deleted file mode 100644 index 35f6915..0000000 --- a/man/mean_n.Rd +++ /dev/null @@ -1,69 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mean_n.R -\name{mean_n} -\alias{mean_n} -\title{Row means with min amount of valid values} -\usage{ -mean_n(dat, n, digits = 2) -} -\arguments{ -\item{dat}{A data frame with at least two columns, where row means are applied.} - -\item{n}{May either be -\itemize{ -\item a numeric value that indicates the amount of valid values per row to calculate the row mean; -\item or a value between 0 and 1, indicating a proportion of valid values per row to calculate the row mean (see 'Details'). -} -If a row's sum of valid values is less than \code{n}, \code{NA} will be returned as row mean value.} - -\item{digits}{Numeric value indicating the number of decimal places to be used for rounding mean -value. Negative values are allowed (see 'Details').} -} -\value{ -A vector with row mean values of \code{df} for those rows with at least \code{n} -valid values. Else, \code{NA} is returned. -} -\description{ -This function is similar to the SPSS \code{MEAN.n} function and computes -row means from a \code{data.frame} or \code{matrix} if at least \code{n} -values of a row are valid (and not \code{NA}). -} -\details{ -Rounding to a negative number of \code{digits} means rounding to a power of -ten, so for example mean_n(df, 3, digits = -2) rounds to the -nearest hundred. \cr \cr -For \code{n}, must be a numeric value from \code{0} to \code{ncol(dat)}. If -a \emph{row} in \code{dat} has at least \code{n} non-missing values, the -row mean is returned. If \code{n} is a non-integer value from 0 to 1, -\code{n} is considered to indicate the proportion of necessary non-missing -values per row. E.g., if \code{n = .75}, a row must have at least \code{ncol(dat) * n} -non-missing values for the row mean to be calculated. See 'Examples'. -} -\examples{ -dat <- data.frame(c1 = c(1,2,NA,4), - c2 = c(NA,2,NA,5), - c3 = c(NA,4,NA,NA), - c4 = c(2,3,7,8)) - -# needs at least 4 non-missing values per row -mean_n(dat, 4) # 1 valid return value - -# needs at least 3 non-missing values per row -mean_n(dat, 3) # 2 valid return values - -# needs at least 2 non-missing values per row -mean_n(dat, 2) - -# needs at least 1 non-missing value per row -mean_n(dat, 1) # all means are shown - -# needs at least 50\% of non-missing values per row -mean_n(dat, .5) # 3 valid return values - -# needs at least 75\% of non-missing values per row -mean_n(dat, .75) # 2 valid return values - -} -\references{ -\href{https://r4stats.com/2014/09/03/adding-the-spss-mean-n-function-to-r/}{r4stats.com} -} diff --git a/man/means_by_group.Rd b/man/means_by_group.Rd deleted file mode 100644 index ad82ba3..0000000 --- a/man/means_by_group.Rd +++ /dev/null @@ -1,94 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/grpmean.R -\name{means_by_group} -\alias{means_by_group} -\alias{grpmean} -\title{Summary of mean values by group} -\usage{ -means_by_group( - x, - dv, - grp, - weights = NULL, - digits = 2, - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL -) - -grpmean( - x, - dv, - grp, - weights = NULL, - digits = 2, - out = c("txt", "viewer", "browser"), - encoding = "UTF-8", - file = NULL -) -} -\arguments{ -\item{x}{A (grouped) data frame.} - -\item{dv}{Name of the dependent variable, for which the mean value, grouped -by \code{grp}, is computed.} - -\item{grp}{Factor with the cross-classifying variable, where \code{dv} is -grouped into the categories represented by \code{grp}. Numeric vectors -are coerced to factors.} - -\item{weights}{Name of variable in \code{x} that indicated the vector of -weights that will be applied to weight all observations. Default is -\code{NULL}, so no weights are used.} - -\item{digits}{Numeric, amount of digits after decimal point when rounding -estimates and values.} - -\item{out}{Character vector, indicating whether the results should be printed -to console (\code{out = "txt"}) or as HTML-table in the viewer-pane -(\code{out = "viewer"}) or browser (\code{out = "browser"}), of if the -results should be plotted (\code{out = "plot"}, only applies to certain -functions). May be abbreviated.} - -\item{encoding}{Character vector, indicating the charset encoding used -for variable and value labels. Default is \code{"UTF-8"}. Only used -when \code{out} is not \code{"txt"}.} - -\item{file}{Destination file, if the output should be saved as file. -Only used when \code{out} is not \code{"txt"}.} -} -\value{ -For non-grouped data frames, \code{means_by_group()} returns a data frame with -following columns: \code{term}, \code{mean}, \code{N}, \code{std.dev}, -\code{std.error} and \code{p.value}. For grouped data frames, returns -a list of such data frames. -} -\description{ -Computes mean, sd and se for each sub-group (indicated by \code{grp}) -of \code{dv}. -} -\details{ -This function performs a One-Way-Anova with \code{dv} as dependent -and \code{grp} as independent variable, by calling -\code{lm(count ~ as.factor(grp))}. Then \code{\link[emmeans]{contrast}} -is called to get p-values for each sub-group. P-values indicate whether -each group-mean is significantly different from the total mean. -} -\examples{ -data(efc) -means_by_group(efc, c12hour, e42dep) - -data(iris) -means_by_group(iris, Sepal.Width, Species) - -# also works for grouped data frames -if (require("dplyr")) { - efc \%>\% - group_by(c172code) \%>\% - means_by_group(c12hour, e42dep) -} - -# weighting -efc$weight <- abs(rnorm(n = nrow(efc), mean = 1, sd = .5)) -means_by_group(efc, c12hour, e42dep, weights = weight) -} diff --git a/man/r2.Rd b/man/r2.Rd index 4b64705..d31c90e 100644 --- a/man/r2.Rd +++ b/man/r2.Rd @@ -11,6 +11,8 @@ \alias{icc} \alias{p_value} \alias{se} +\alias{means_by_group} +\alias{mean_n} \title{Deprecated functions} \usage{ r2(x) @@ -32,6 +34,10 @@ icc(x) p_value(x, ...) se(x, ...) + +means_by_group(x, ...) + +mean_n(x, ...) } \arguments{ \item{x}{An object.}