diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..ad6679c Binary files /dev/null and b/.DS_Store differ diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index c0fe40c..e712a21 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -1,4 +1,4 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help # # NOTE: This workflow is overkill for most R packages and @@ -12,6 +12,8 @@ on: name: R-CMD-check +permissions: read-all + jobs: R-CMD-check: runs-on: ${{ matrix.config.os }} @@ -22,31 +24,40 @@ jobs: fail-fast: false matrix: config: - - {os: macOS-latest, r: 'release'} + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} - # Use older ubuntu to maximise backward compatibility - - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-18.04, r: 'release'} - - {os: ubuntu-18.04, r: 'oldrel-1'} - - {os: ubuntu-18.04, r: 'oldrel-2'} + # use 4.1 to check with rtools40's older compiler + - {os: windows-latest, r: '4.1'} + + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + - {os: ubuntu-latest, r: 'oldrel-2'} + - {os: ubuntu-latest, r: 'oldrel-3'} + - {os: ubuntu-latest, r: 'oldrel-4'} env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} R_KEEP_PKG_SOURCE: yes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - uses: r-lib/actions/setup-pandoc@v1 + - uses: r-lib/actions/setup-pandoc@v2 - - uses: r-lib/actions/setup-r@v1 + - uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} http-user-agent: ${{ matrix.config.http-user-agent }} use-public-rspm: true - - uses: r-lib/actions/setup-r-dependencies@v1 + - uses: r-lib/actions/setup-r-dependencies@v2 with: - extra-packages: rcmdcheck + extra-packages: any::rcmdcheck + needs: check - - uses: r-lib/actions/check-r-package@v1 + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/check-full.yaml b/.github/workflows/check-full.yaml deleted file mode 100644 index 5801cb5..0000000 --- a/.github/workflows/check-full.yaml +++ /dev/null @@ -1,58 +0,0 @@ -# Workflow derived from https://github.com/r-lib/actions/tree/master/examples -# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help -# -# NOTE: This workflow is overkill for most R packages and -# check-standard.yaml is likely a better choice. -# usethis::use_github_action("check-standard") will install it. -on: - push: - branches: [main, master] - pull_request: - branches: [main, master] - -name: R-CMD-full - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: macOS-latest, r: 'release'} - - - {os: windows-latest, r: 'release'} - # Use 3.6 to trigger usage of RTools35 - - {os: windows-latest, r: '3.6'} - - # Use older ubuntu to maximise backward compatibility - - {os: ubuntu-18.04, r: 'devel', http-user-agent: 'release'} - - {os: ubuntu-18.04, r: 'release'} - - {os: ubuntu-18.04, r: 'oldrel-1'} - - {os: ubuntu-18.04, r: 'oldrel-2'} - - {os: ubuntu-18.04, r: 'oldrel-3'} - - {os: ubuntu-18.04, r: 'oldrel-4'} - - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - R_KEEP_PKG_SOURCE: yes - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-pandoc@v1 - - - uses: r-lib/actions/setup-r@v1 - with: - r-version: ${{ matrix.config.r }} - http-user-agent: ${{ matrix.config.http-user-agent }} - use-public-rspm: true - - - uses: r-lib/actions/setup-r-dependencies@v1 - with: - extra-packages: rcmdcheck - - - uses: r-lib/actions/check-r-package@v1 diff --git a/DESCRIPTION b/DESCRIPTION index 6bf2d2b..56359f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,15 +16,17 @@ License: GPL (>= 3) Encoding: UTF-8 ByteCompile: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 +RoxygenNote: 7.3.1 Depends: R (>= 3.5.0) Imports: data.table (>= 1.14.0), glue (>= 1.4.2), - nortest (>= 1.0-4), effectsize (>= 0.4.5), afex (>= 0.28-1), PMCMRplus (>= 1.9.0), WRS2 (>= 1.1-1), lifecycle +Suggests: + testthat (>= 3.0.0) +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 64cebf0..3e1a333 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,7 +3,6 @@ S3method(as.data.frame,writR) S3method(print,writR) export(aov_r) -export(autest) export(cent_disp) export(clean_data) export(contingency) @@ -14,24 +13,18 @@ export(lablr) export(one_sample) export(pairs_two_sample) export(pairwise_test) +export(rm_anova) +export(sphericity) export(sphericity_check) export(style.p) export(two_sample) importFrom(PMCMRplus,durbinAllPairsTest) importFrom(PMCMRplus,gamesHowellTest) importFrom(PMCMRplus,kwAllPairsDunnTest) -importFrom(WRS2,dep.effect) importFrom(WRS2,lincon) -importFrom(WRS2,rmanova) importFrom(WRS2,rmmcp) -importFrom(WRS2,t1way) -importFrom(WRS2,trimse) -importFrom(WRS2,wmcpAKP) -importFrom(WRS2,yuen) -importFrom(WRS2,yuen.effect.ci) -importFrom(WRS2,yuend) -importFrom(afex,aov_ez) importFrom(data.table,"%chin%") +importFrom(data.table,"%like%") importFrom(data.table,":=") importFrom(data.table,.N) importFrom(data.table,.SD) @@ -42,40 +35,8 @@ importFrom(data.table,fcase) importFrom(data.table,fifelse) importFrom(data.table,melt) importFrom(data.table,rbindlist) -importFrom(data.table,rowidv) -importFrom(data.table,setnames) -importFrom(effectsize,cohens_d) -importFrom(effectsize,cohens_g) -importFrom(effectsize,cramers_v) -importFrom(effectsize,eta_squared) -importFrom(effectsize,hedges_g) -importFrom(effectsize,kendalls_w) -importFrom(effectsize,oddsratio) -importFrom(effectsize,omega_squared) -importFrom(effectsize,pearsons_c) -importFrom(effectsize,rank_biserial) -importFrom(effectsize,rank_epsilon_squared) -importFrom(glue,glue) importFrom(lifecycle,deprecated) -importFrom(nortest,lillie.test) -importFrom(stats,IQR) -importFrom(stats,anova) -importFrom(stats,as.formula) -importFrom(stats,chisq.test) -importFrom(stats,complete.cases) -importFrom(stats,fisher.test) -importFrom(stats,friedman.test) -importFrom(stats,kruskal.test) -importFrom(stats,lm) -importFrom(stats,mcnemar.test) -importFrom(stats,median) importFrom(stats,na.omit) -importFrom(stats,oneway.test) importFrom(stats,p.adjust) importFrom(stats,pairwise.t.test) -importFrom(stats,pchisq) -importFrom(stats,sd) -importFrom(stats,shapiro.test) -importFrom(stats,t.test) -importFrom(stats,wilcox.test) importFrom(utils,combn) diff --git a/R/aov_r.R b/R/aov_r.R index 05a3187..404a8f4 100644 --- a/R/aov_r.R +++ b/R/aov_r.R @@ -11,17 +11,15 @@ #' @param effsize.type The effect size used to estimate the effects of the factors on the response variable. Possible values are "eta" ("biased") or "omega" ("unbiased", the default). #' @param sphericity If "none", then sphericity assumption is assumed to be met for within-subject(s) factor(s). "GG": applies Greenhouse-Geisser correction. "HF": applies Hyunh-Feldt correction. 'auto' (Default) choose the appropiate correction based on Mauchly test of sphericity (p-value > 0.05) #' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. #' @param markdown Logical (default FALSE). If `lbl` is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). -#' @importFrom data.table %chin% as.data.table .SD fcase .N -#' @importFrom utils combn -#' @importFrom stats anova -#' @importFrom effectsize eta_squared omega_squared +#' @param character.only Logical. checks whether to use the unevaluated expression or its +#' content (when TRUE), asumming is a character vector. Defaults to `FALSE`. #' @return A list with statistical results. +#' #' @export aov_r <- function(data, - response = NULL, + response, between = NULL, within = NULL, rowid = NULL, @@ -29,30 +27,31 @@ aov_r <- function(data, effsize.type = "unbiased", sphericity = "auto", conf.level = 0.95, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL) { + character.only = FALSE, + markdown) { `Pr(>F)` <- rn <- `num Df` <- `den Df` <- NULL - # Auxiliary functions - is_empty <- function(i) length(i) == 0; - is_not_empty <- function(i) length(i) > 0; - is_not_null <- function(i) !is.null(i); - - # Argument checking - if (is.null(between) && is.null(within)) stop("between and within can't both be NULL", call. = FALSE) - if (is.null(response)) stop("response can't be NULL", call. = FALSE) - if (is_not_empty(within) && is.null(rowid)) stop("If within is provided, rowid can't be NULL", call. = FALSE) + # Argument checking - Response + if (missing(response)) stop("response can't be NULL", call. = FALSE) # Transform data to data.table - if (!"data.table" %chin% class(data)) { - data <- data.table::as.data.table(data) - } + if (!"data.table" %chin% class(data)) data <- data.table::as.data.table(data) + + # Transform arguments to character + response <- deparser(response, character.only) + between <- deparser(between, character.only) + within <- deparser(within, character.only) + rowid <- deparser(rowid, character.only) + + # Argument checking - arguments + if (is.null(between) && is.null(within)) stop("between and within can't both be NULL", call. = FALSE) + if (!is.null(within) && is.null(rowid)) stop("if within is provided, rowid can't be NULL", call. = FALSE) # Get only variables of interest - data <- droplevels(data[j = .SD, .SDcols = c(rowid, response, between, within)]) + data <- droplevels(data[, .SD, .SDcols = c(rowid, response, between, within)]) - if (is_empty(within) && is.null(rowid) && is_not_empty(between)) { + if (length(within) == 0 && is.null(rowid) && length(between) > 0) { rowid <- "rowid" data[, rowid := seq_len(.N)] } @@ -75,7 +74,7 @@ aov_r <- function(data, n_obs <- nrow(model$data$wide) # Get sphericity correction - if (is_empty(within)) { + if (length(within) == 0) { sphericity <- NULL } else { sphericity <- sphericity_check(model) @@ -100,11 +99,11 @@ aov_r <- function(data, # Set sphericity correction (if available) within_method <- NA_character_ - if (is_not_null(within)) { + if (!is.null(within)) { within_method <- data.table::fcase( - sphericity == "none", "Fisher's repeated measures ANOVA", - sphericity == "GG", "Repeated measures ANOVA with GG correction", - sphericity == "HF", "Repeated measures ANOVA with HG correction" + sphericity == "none", "Fisher's rmANOVA", + sphericity == "GG", "Greenhouse-Geisser's rmANOVA", + sphericity == "HF", "Huynh-Feldt's rmANOVA" ) } @@ -116,8 +115,8 @@ aov_r <- function(data, df.error = `den Df`, p.value = `Pr(>F)`, method = data.table::fcase(rn %chin% between, "Fisher's ANOVA", - rn %chin% within, within_method, - utils::combn(within, 1, grepl, T, rn), within_method), + rn %chin% within, within_method, + utils::combn(within, 1, grepl, T, rn), within_method), estimate = efs[[2L]], conf.level = efs$CI, conf.low = efs$CI_low, @@ -126,7 +125,7 @@ aov_r <- function(data, n_obs = n_obs )] - if (lbl) { + if (!missing(markdown) && isTRUE(markdown)) { model <- model[j = lablr(.SD, markdown = markdown), by = "x"] } diff --git a/R/autest.R b/R/autest.R deleted file mode 100644 index 09c8b42..0000000 --- a/R/autest.R +++ /dev/null @@ -1,134 +0,0 @@ -#' @title Hypothesis testing for group differences with assumption checking for test selection -#' @name autest -#' @description A list containing results from a multi-sample test. -#' -#' @param data Data frame from which `x` and `y` (and possibly `rowid` if provided) will be searched. -#' @param x Character for the grouping factor. Must be present in data -#' @param y Character for the response variable. Must be present in data. -#' @param rowid Character for the subject-id column. If null, then is assumed that data is sorted for paired designs, creating one. So if your data is not sorted and you leave this argument unspecified, the results can be inaccurate when there are more than two levels in x and there are NAs present. -#' @param type Set `"auto"` (default) for checking the normality and homogeneity of variances for test selection. Other options are `"p"` for parametric, `"np"` for non-parametric and `"r"` for robust tests. -#' @param paired Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is `FALSE.` -#' @param var.equal Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used. -#' @param posthoc Logical indicating if post-hoc tests should be returned additionally to regular output -#' @param sphericity Character. Which sphericity correction of the degrees of freedom should be reported for the within-subject factors. Possible values are "GG" corresponding to the Greenhouse-Geisser correction, "HF" (i.e., Hyunh-Feldt correction), and "none" (i.e., no correction). -#' @param test.value A number indicating the true value of the mean (Default: 0) to be tested. Only for one sample test. -#' @param trim Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help. -#' @param nboot Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L). -#' @param effsize.type Options are `"unbiased"` or `"omega"` for partial omega squared (k-samples) or `"g"` for Hedges g (two-samples) and `"biased"` or `"eta"` for partial eta squared (k-samples) or `"d"` for Cohen's d as a measure of effect size. For non-parametric analysis, Kendalls' W is used for paired designs, where rank epsilon squared is used for independent groups designs in `k_sample()`, whereas rank-biserial correlation is used in `two_sample()`. -#' @param alternative A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". -#' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param ss_type Type of sum of squares for repeated measures ANOVA (defaults to 3). Possible values are "II", "III", 2, or 3. -#' @param p.adjust.method Adjustment method for p-values for multiple comparisons. Possible methods are: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" (default). -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. -#' @param markdown Logical (default FALSE). If `lbl` is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). -#' @param ... Currently ignored. -#' @export - -autest <- function(data, x, y = NULL, - rowid = NULL, - type = "auto", - paired = FALSE, - var.equal = FALSE, - posthoc = FALSE, - sphericity = "GG", - test.value = 0, - trim = 0.2, - nboot = 100L, - effsize.type = "unbiased", - alternative = "two.sided", - conf.level = 0.95, - ss_type = 3, - p.adjust.method = "none", - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, - ...) { - - if (!is.null(y)) { - # Data cleaning - data <- clean_data(data, x = x, y = y, rowid = rowid, paired = paired, wide = FALSE) - } else { - # One sample test - test <- one_sample( - data = data, - y = x, - type = type, - test.value = test.value, - trim = trim, - nboot = nboot, - effsize.type = effsize.type, - alternative = alternative, - conf.level = conf.level, - lbl = lbl, - markdown = markdown, - ) - - return(test) - } - # K-samples test - x_lvl <- levels(data[[x]]) - if (length(x_lvl) > 2) { - test <- k_sample( - data = data, - x = x, - y = y, - rowid = rowid, - type = type, - paired = paired, - var.equal = var.equal, - sphericity = sphericity, - trim = trim, nboot = nboot, - effsize.type = effsize.type, - alternative = alternative, - conf.level = conf.level, - ss_type = ss_type, - lbl = lbl, - markdown = markdown, - internal = TRUE - ) - - # With post-hoc if required - if (posthoc) { - posthoc <- pairwise_test( - data = data, - x = x, - y = y, - rowid = rowid, - type = type, - paired = paired, - var.equal = var.equal, - trim = trim, - nboot = nboot, - p.adjust.method = p.adjust.method, - alternative = alternative, - conf.level = conf.level, - internal = TRUE - ) - - output <- list(test = test, pwc = posthoc) - return(output) - } else { - return(test) - } - } else { - # Two sample test - test <- two_sample( - data = data, - x = x, - y = y, - rowid = rowid, - type = type, - paired = paired, - var.equal = var.equal, - trim = trim, - nboot = nboot, - effsize.type = effsize.type, - alternative = alternative, - conf.level = conf.level, - lbl = lbl, - markdown = markdown, - internal = TRUE - ) - - return(test) - } -} diff --git a/R/cent_disp.R b/R/cent_disp.R index 9c7ed28..96fe170 100644 --- a/R/cent_disp.R +++ b/R/cent_disp.R @@ -8,8 +8,6 @@ #' @param markdown Whether you want the output formated for inline R markdown (TRUE) or as plain text (FALSE). #' @keywords cent_disp #' @return A character vector of length one. -#' @importFrom stats sd median IQR -#' @importFrom glue glue #' @export cent_disp <- function(x, @@ -19,17 +17,15 @@ cent_disp <- function(x, markdown = TRUE) { if (!is.numeric(x)) stop(deparse(substitute(x)), " is not numeric.") + x <- x[!is.na(x)] if (type != "custom") { if (type == "auto") { type <- if (is_normal(x)) "p" else "np" } - if (type == "p") { - .f <- list(cent = mean, disp = sd, m = "M", i = "SD") - } - if (type == "np") { - .f <- list(cent = median, disp = IQR, m = "Mdn", i = "IQR") - } + if (type == "p") .f <- list(cent = base::mean, disp = stats::sd, m = "M", i = "SD") + if (type == "np") .f <- list(cent = stats::median, disp = stats::IQR, m = "Mdn", i = "IQR") + m <- round(.f$cent(x), k) i <- round(.f$disp(x), k) if (markdown) { @@ -38,35 +34,17 @@ cent_disp <- function(x, paste0(.f$m, " = ", m, ", ", .f$i, " = ", i) } } else { - str.b <- gsub( - pattern = "\\{|\\}", - replacement = "", - x = regmatches( - x = str.a, - m = gregexpr( - pattern = "\\{(.*?)\\}", - text = str.a - ) - )[[1]] - ) - res <- data.frame( - rbind( - sapply( - X = str.b, - FUN = function(i) { - .f <- eval( - expr = as.name( - x = i - ) - ) - round( - x = .f(x), - digits = k - ) - } - ) - ) - ) - glue(str.a, .envir = res) + m <- gregexpr(pattern = "\\{(.*?)\\}", text = str.a) + m <- regmatches(str.a, m)[[1]] + + str.b <- gsub("\\{|\\}", "", m) + str_eval <- function(i) { + .f <- eval(expr = as.name(x = i)) + round(x = .f(x), digits = k) + } + + res <- vapply(str.b, str_eval, FUN.VALUE = NA_real_) + + glue::glue(str.a, .envir = as.list(res)) } } diff --git a/R/clean_data.R b/R/clean_data.R index c8995bb..ead15fc 100644 --- a/R/clean_data.R +++ b/R/clean_data.R @@ -1,131 +1,67 @@ -#' @title Data table without NA's and with rowid (paired designs only). -#' @name clean_data +#' Remove NA's from long to wide/long data.table #' -#' @param data Data frame from which `x` and `y` (and possibly `rowid` if provided) will be searched. -#' @param x Unquoted name for the grouping factor. Must be present in data -#' @param y Unquoted name for the response variable. Must be present in data. -#' @param rowid Unquoted name for the subject-id column. If null, then is assumed that data is sorted for paired designs, creating one. So if your data is not sorted and you leave this argument unspecified, the results can be inaccurate when there are more than two levels in x and there are NAs present. -#' @param paired Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is `FALSE.` -#' @param wide Logical to whether return a data.frame in wide format (`TRUE`, i.e. one columns per group/time) or in long format (`FALSE`). +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' This function allows removing NA's from long format data into wide (or long) format +#' data, even suporting repeated measures designs (i.e., with more than one subject per +#' factor level). +#' +#' @param data Data from which `x` and `y` (and possibly `rowid` if provided) will +#' be searched. +#' @param x Name for the grouping factor. Must be present in data +#' @param y Name for the response variable. Must be present in data. +#' @param rowid Name for the subject-id column. If null, then is assumed that +#' data is sorted for paired designs, creating one. So if your data is not sorted and you +#' leave this argument unspecified, the results can be inaccurate when there are more than +#' two levels in x and there are NAs present. Ignored if `paired` is `FALSE`. +#' @param paired Logical that decides whether the experimental design is repeated +#' measures/within-subjects or between-subjects. The default is `FALSE.` +#' @param wide Logical to whether return a data.frame in wide format (`TRUE`, i.e. one +#' columns per group/time) or in long format (`FALSE`). +#' @param character.only Logical. checks whether to use the unevaluated expression or its +#' content (when TRUE), asumming is a character vector. Defaults to `FALSE`. #' @param ... Currently ignored. -#' @importFrom stats na.omit as.formula -#' @importFrom data.table rowidv setnames data.table dcast := melt +#' +#' #' @export clean_data <- function(data, x, y, rowid = NULL, paired = FALSE, wide = FALSE, + character.only = FALSE, ...) { + if (missing(data)) stop("`data` can't be null", call. = FALSE) + if (missing(x) || missing(y)) stop("`x` and `y` can't be null", call. = FALSE) - arg <- match.call() - - # Check if data is null - is.null(arg$data) && stop("'data' can't be null", call. = F) - - # Get names - n_dt <- names(data) - - # Check if x or y is null - is.null(arg$x) || is.null(arg$y) && stop("'x' and 'y' can't be null", call. = F) + if (!"data.table" %in% class(data)) + data <- data.table::as.data.table(data) - # For latter use - x_var <- data[[x]] + y_label <- deparser(y, character.only); y <- as.name(y_label) + x_label <- deparser(x, character.only); x <- as.name(x_label) + rowid <- deparser(rowid, character.only) - # Otherwise, check if is in data - all(x != n_dt) || all(y != n_dt) && stop("'x' and 'y' must be within your data. Check your arguments", call. = F) - - # Check if rowid is present in the data if is paired and not null - if(paired) { - if(is.null(rowid)) { - rowid_var <- data.table::rowidv(x_var) - } else { - all(rowid != n_dt) && stop(rowid, " not found in data", call. = F) - rowid_var <- data[[rowid]] - } + if (is.null(rowid) || length(rowid) == 0 || !paired) { + expr_j <- substitute(list("rowid" = factor(seq_len(.N)), y)); + expr_by <- substitute(list(x <- factor(x))) } else { - rowid_var <- NULL + rowid <- as.name(x = rowid) + expr_j <- substitute(y); + expr_by <- substitute(list(x <- factor(x), "rowid" = factor(rowid))) } - # Add rowid if paired is TRUE and drop unused levels - dt <- droplevels( - x = stats::na.omit( - object = data.table::setnames( - x = data.table::data.table( - x_col = x_var - , y_col = data[[y]] - , rowid = rowid_var - ) - , old = c("x_col", "y_col") - , new = c(x, y) - ) - ) - ) - - # Get levels for input data and output data - data_x_levels <- levels(data[[arg$x]]) - dt_x_levels <- levels(dt[[arg$x]]) + data <- data[, eval(expr_j), eval(expr_by)] - # Check for empty levels - if(length(data_x_levels) > length(dt_x_levels)) { - drlv <- data_x_levels[!data_x_levels %in% dt_x_levels] - # Display a warning if any - warning( - "Unused levels (i.e. ", - paste0(drlv, collapse = ", "), - ") were dropped", - call. = FALSE - ) + if (paired || wide) { + expr <- stats::as.formula(paste("rowid ~", x_label)) + data <- data.table::dcast(data = data, formula = expr, value.var = y_label, drop = c(F, T)) + data <- data[, .SD, .SDcols = function(i) !all(is.na(i))] } - if (paired) { - # Check if they're more than 1 observation per subject on each factor level - if (any(table(dt[["rowid"]], dt[[x]]) > 1)) { - stop("More than 1 observation per subject for at least one factor level, check your data with table()", call. = F) - } + if (paired) data <- stats::na.omit(data)[, droplevels(.SD)] - # Wide format - one row per subject and remove missing values - dt <- stats::na.omit( - object = data.table::dcast( - data = dt, - formula = stats::as.formula( - object = paste("rowid ~", x) - ), - value.var = y - ) - )[j = rowid := factor(x = rowid)][] - - # Si el output solicitado es formato ancho devuelve el resultado intermedio - if (wide) { - # Formato ancho - return(dt) - } else { - # De lo contrario en formato largo - dt <- data.table::melt( - data = dt, - id.vars = "rowid", - variable.name = x, - value.name = y - ) - return(dt) - } - } + if (!wide && paired) data <- data.table::melt(data, id.vars = 1, variable.name = x_label, value.name = y_label) - if (wide) { - p <- function(i, lng) { - i[!is.na(i)][seq_len(lng)] - } - lng <- max(table(dt[[x]])) - # Formato ancho y no pareado - dt <- `attr<-`( - x = tapply(dt[[y]], dt[[x]], p, lng, simplify = FALSE), - which = "class", - value = c("data.table", "data.frame") - ) - - return(dt) - } else { - # Formato largo y no pareado - return(dt) - } + return(data) } diff --git a/R/contingency.R b/R/contingency.R index 676726a..e2dfa2b 100644 --- a/R/contingency.R +++ b/R/contingency.R @@ -5,81 +5,87 @@ #' @param x Factor variable, quoted or unquoted. #' @param y Factor. If `NULL`, a goodness-of-fit is carried, otherwise a two-way analysis is performed. #' @param paired Logical. If `TRUE` McNemar's Chi-squared test is carried on. -#' @param exact Logical. If `TRUE` then Fisher's Exact Test is carried on, but only when `paired = FALSE` (default). If is a 2 x 2 design, Odds Ratio (OR) is returned as effect size, otherwise it will only return the formated p-value. +#' @param ratio A vector of probabilities of the same length of x. An error is given if any entry of p is negative. #' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. -#' @param markdown Whether you want the output formated for inline R Markdown or as plain text. +#' @param character.only Logical. checks whether to use the unevaluated expression or its +#' content (when TRUE), asumming is a character vector. Defaults to `FALSE`. #' @param ... Currently not used. #' @keywords contingency -#' @importFrom stats chisq.test fisher.test mcnemar.test -#' @importFrom effectsize cramers_v oddsratio cohens_g pearsons_c +#' #' @export contingency <- function(data , x , y = NULL , paired = FALSE - , exact = FALSE + , ratio = NULL , conf.level = 0.95 - , lbl = if (is.null(markdown)) FALSE else TRUE - , markdown = NULL + , character.only = FALSE , ...) { + x <- deparser(x, character.only) + y <- deparser(y, character.only) + x_var <- data[[x]] + if (is.null(y)) { - tab <- table(x_var) - } else { - y_var <- data[[y]] - tab <- table(x_var, y_var) + x_tab <- table(x_var) + if (is.null(ratio)) ratio <- rep(1 / length(x_tab), length(x_tab)) + .f <- stats::chisq.test + .f.es <- effectsize::pearsons_c + .f.arg <- alist(x = x_tab, p = ratio, correct = FALSE) + } else { + xy_tab <- table(x_var, data[[y]]) + if (paired) { + .f <- stats::mcnemar.test + .f.es <- effectsize::cohens_g + } else { + .f <- stats::chisq.test + .f.es <- effectsize::cramers_v + } + .f.arg <- alist(x = xy_tab, correct = FALSE) } - if (paired) { - # Mcnemar test - .f <- stats::mcnemar.test(tab) - .es <- effectsize::cohens_g(tab, ci = conf.level) - } else if (exact) { - # Exact test - .f <- stats::fisher.test(tab) - .es <- try(expr = effectsize::oddsratio(tab, ci = conf.level), silent = TRUE) - if ("try-error" %chin% class(.es)) { - .es <- rep(NA_real_, 4) - .es <- `names<-`(.es, rep(NA, 4)) - .f$method <- paste(.f$method, "without OR") - } + .f <- do.call(.f, .f.arg) + + .f.arg <- append(.f.arg, list(adjust = TRUE, ci = conf.level)) + .f.es <- do.call(.f.es, .f.arg) + + res <- get_contingency_expr(.f, .f.es, x, y) + + return(res) +} +get_contingency_expr <- function(.f, .f.es, x, y) { + if (is.null(y)) { + y <- NA_character_ + effectsize <- "Pearson's C" } else { - # Chi-square - .f <- stats::chisq.test(tab) - if (is.null(y)) { - .es <- effectsize::pearsons_c(tab, ci = conf.level) - } else { - .es <- effectsize::cramers_v(tab, ci = conf.level) - } + es_name <- tolower(x = names(.f.es)[[1L]]) + if (grepl("cramer", es_name)) effectsize <- "Cramer's V" + if (grepl("cohen", es_name)) effectsize <- "Cohen's g" } + if (is.null(.f$statistic)) .f$statistic <- NA_real_ + if (is.null(.f$parameter)) .f$parameter <- NA_real_ + res <- list( - y = if (is.null(y)) NA_character_ else y, + y = y, x = x, - statistic = if (is.null(.f$statistic)) NA_real_ else .f$statistic, - df = if (is.null(.f$parameter)) NA_real_ else .f$parameter, + statistic = .f$statistic, + df = .f$parameter, df.error = NA_real_, p.value = .f$p.value, method = .f$method, alternative = NA_character_, - estimate = as.numeric(.es[[1L]]), - conf.level = as.numeric(.es[[2L]]), - conf.low = as.numeric(.es[[3L]]), - conf.high = as.numeric(.es[[4L]]), - effectsize = names(.es)[[1L]], - n_obs = sum(tab) + estimate = .f.es[[1L]], + conf.level = .f.es[[2L]], + conf.low = .f.es[[3L]], + conf.high = .f.es[[4L]], + effectsize = effectsize, + n_obs = NA_integer_ ) - if (lbl) { - res <- lablr(res, markdown) - } - - class(res) <- c("writR", "list") - + class(res) <- c("writR", "writR.contingency", class(res)) return(res) } - diff --git a/R/deparser.R b/R/deparser.R new file mode 100644 index 0000000..5fbb940 --- /dev/null +++ b/R/deparser.R @@ -0,0 +1,57 @@ +#' Always returns the outermost expression +#' +#' @param x An expression from which to capture the outermost expression. +#' @param env The last environment in which to search `x`. Default is missing. +#' @param ... Currently not used. +#' +#' @source + +subs <- function(x, env, ...) { + .call <- quote(substitute(x)) + .name <- eval(.call) + + .envs <- rev(x = sys.frames()) + if (!missing(env) && is.environment(env)) { + to_enclos <- vapply(.envs, identical, env, FUN.VALUE = NA) + to_enclos <- 1:which(to_enclos) + .envs <- .envs[to_enclos] + } + + for (i in .envs) { + .call[[2]] <- .name + .name <- eval(.call, i) + } + + return(.name) +} + +#' Get the deparsed value of the outermost expression +#' +#' @param x An expression from which to capture the outermost expression. +#' @param character.only whether to treat `x` as a character. Default is FALSE. +#' @param ... Currently not used. + +deparser <- function(x, character.only = FALSE, ...) { + if (missing(x)) return(NULL) + + if (isTRUE(character.only)) { + x <- as.character(x) + return(x) + } + + x <- subs(x, env = parent.frame()) + + .f <- function(i) { + if (is.null(i)) return(NULL) + if (!is.character(i)) i <- deparse(i) + return(i) + } + + if (length(x) > 1) { + x <- as.list(x)[-1L] + x <- lapply(x, .f) + unlist(x) + } else { + .f(x) + } +} diff --git a/R/k_sample.R b/R/k_sample.R index 8140347..b602117 100644 --- a/R/k_sample.R +++ b/R/k_sample.R @@ -6,49 +6,36 @@ #' @param x Character for the grouping factor. Must be present in data #' @param y Character for the response variable. Must be present in data. #' @param rowid Character for the subject-id column. If null, then is assumed that data is sorted for paired designs, creating one. So if your data is not sorted and you leave this argument unspecified, the results can be inaccurate when there are more than two levels in x and there are NAs present. -#' @param type Set `"auto"` (default) for checking the normality and homogeneity of variances for test selection. Other options are `"p"` for parametric, `"np"` for non-parametric and `"r"` for robust tests. +#' @param type Missing (default) or `NULL` for checking the normality and homogeneity of variances for test selection. Other options are `"p"` for parametric, `"np"` for non-parametric and `"r"` for robust tests. #' @param paired Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is `FALSE.` #' @param var.equal Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used. -#' @param sphericity Character. Which sphericity correction of the degrees of freedom should be reported for the within-subject factors. Possible values are "GG" corresponding to the Greenhouse-Geisser correction, "HF" (i.e., Hyunh-Feldt correction), and "none" (i.e., no correction). -#' @param trim Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help. -#' @param nboot Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L). +#' @param is_spherical Logical. checks whether to assume that the sphericity assumptions holds or not, if `NULL` (the default) it will be tested using mauchly's test with a threshold of 0.05. +#' @param adjust Character. correction for sphericity to be applied, it can be any character of length one starting with 'g' (indicating Greenhouse–Geisser correction) or 'h' (indicating Huynh–Feldt correction). #' @param effsize.type Options are `"unbiased"` or `"omega"` for partial omega squared and `"biased"` or `"eta"` for partial eta squared as a measure of effect size. For non-parametric analysis, Kendalls' W is used for paired designs, where rank epsilon squared is used for independent groups designs. -#' @param alternative A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". #' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param ss_type Type of sum of squares for repeated measures ANOVA (defaults to 3). Possible values are "II", "III", 2, or 3. -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. -#' @param markdown Logical (default FALSE). If `lbl` is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). -#' @param internal Logical to whether this function is being used inside of internal functions. +#' @param character.only Logical. checks whether to use the unevaluated expression or its +#' content (when TRUE), asumming is a character vector. Defaults to `FALSE`. #' @param ... Currently ignored. -#' @importFrom data.table dcast %chin% -#' @importFrom effectsize eta_squared omega_squared kendalls_w rank_epsilon_squared -#' @importFrom afex aov_ez -#' @importFrom stats anova oneway.test as.formula friedman.test kruskal.test -#' @importFrom WRS2 rmanova wmcpAKP t1way +#' #' @export k_sample <- function(data, x, y, rowid = NULL, - type = "auto", + type, paired = FALSE, var.equal = FALSE, - sphericity = "GG", - trim = 0.2, - nboot = 100L, + is_spherical = NULL, + adjust = NULL, effsize.type = "unbiased", - alternative = "two.sided", conf.level = 0.95, - ss_type = 3, - lbl = if(is.null(markdown)) FALSE else TRUE, - markdown = NULL, - internal = FALSE, + character.only = FALSE, ...) { - # Avoid unnecessary computation - if (!internal) { - # data cleaning - data <- clean_data(data, x = x, y = y, rowid = rowid, paired = paired, wide = FALSE) - } + x <- deparser(x, character.only) + y <- deparser(y, character.only) + rowid <- deparser(rowid, character.only) + + data <- clean_data(data, x, y, rowid, paired, character.only = TRUE) # Create vectors of variables y_var <- data[[y]] @@ -58,278 +45,93 @@ k_sample <- function(data, x, y, x_lvl <- levels(x_var) # Check normality - if (type == "auto") { - normal <- vapply(x_lvl, function(i) { - is_normal(y_var[x_var == i]) - }, NA, USE.NAMES = FALSE) + if (missing(type) || is.null(type)) { + normal <- vapply(x_lvl, function(i) is_normal(y_var[x_var == i]), NA) type <- if (all(normal)) "check" else "np" } + n_obs <- if (paired) length(y_var) / length(x_lvl) else length(y_var) + # Parametric statistics if (type %chin% c("p", "check")) { # Assign an effect size - .f <- if (effsize.type %chin% c("biased", "eta")) { - effectsize::eta_squared - } else if (effsize.type %chin% c("unbiased", "omega")) { - effectsize::omega_squared - } - - # Check model validity for repeated measures design - if (paired) { - model <- afex::aov_ez( - id = "rowid", - dv = y, - data = data, - within = x, - include_aov = TRUE, - type = ss_type) - if (model$Anova$singular) { - sphericity <- "none" - type <- "p" - } - } - - # Check assumptions - if (type == "check") { - # Sphericity - if (paired) { - sphericity <- sphericity_check(model) - # Homogeneity of variances - } else { - var.equal <- is_var.equal(y_var, x_var) - } - } + if (effsize.type %in% c("biased", "eta")) .f.es <- effectsize::F_to_eta2 + if (effsize.type %in% c("unbiased", "omega")) .f.es <- effectsize::F_to_omega2 - # Parametric statistics if (paired) { - test <- stats::anova(model, correction = sphericity) - - es <- suppressWarnings({ - .f(model = model, ci = conf.level, verbose = FALSE) - }) - - test <- list( - "y" = y, - "x" = x, - statistic = test$F, - df = as.double(test[["num Df"]]), - df.error = as.double(test[["den Df"]]), - p.value = test[["Pr(>F)"]], - method = if (sphericity == "none") { - "Fisher's repeated measures ANOVA" - } else { - paste("Repeated measures ANOVA with", sphericity, "correction") - }, - alternative = NA_character_, - estimate = es[[2L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[2L], - n_obs = length(y_var) / length(x_lvl) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + .f <- rm_anova(data, x, y, is_spherical, adjust, character.only = TRUE) + .f.es <- NULL + method <- .f$method } else { - - test <- stats::oneway.test( - formula = y_var ~ x_var, - var.equal = var.equal - ) - - es <- .f(test, ci = conf.level, verbose = FALSE) - - test <- list( - "y" = y, - "x" = x, - statistic = test$statistic, - df = test$parameter[["num df"]], - df.error = test$parameter[["denom df"]], - p.value = test$p.value, - method = if (var.equal) "Fisher's ANOVA" else "Welch's ANOVA", - alternative = NA_character_, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[1L], - n_obs = length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + if (type == "check") var.equal <- is_var.equal(y_var, x_var) + .f <- stats::oneway.test(formula = y_var ~ x_var, var.equal = var.equal) + .f.es <- .f.es(f = .f$statistic, df = .f$parameter[[1L]], df_error = .f$parameter[[2L]], ci = conf.level) + method <- if (var.equal) "Fisher's ANOVA" else "Welch's ANOVA" } } # Non-parametric statistics if (type == "np") { # Friedman rank-sum test for dependent samples if (paired) { - test <- stats::friedman.test( - y = y_var, - groups = x_var, - blocks = data$rowid - ) - - es <- effectsize::kendalls_w( - x = y_var, - groups = x_var, - blocks = data$rowid, - ci = conf.level, - verbose = FALSE - ) - - test <- list( - "y" = y, - "x" = x, - statistic = test$statistic, - df = as.double(test$parameter), - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = NA_character_, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[1L], - n_obs = length(y_var) / length(x_lvl) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") + .f <- stats::friedman.test(y = y_var, groups = x_var, blocks = data$rowid) + .f.es <- effectsize::kendalls_w(x = y_var, groups = x_var, blocks = data$rowid, ci = conf.level, verbose = FALSE) + method <- "Friedman Rank Sum Test" - return(test) # Kruskal-Wallis rank-sum test for independent samples } else { - test <- stats::kruskal.test( - x = y_var, - g = x_var - ) - - es <- effectsize::rank_epsilon_squared( - x = y_var, - groups = x_var, - ci = conf.level - ) - - test <- list( - "y" = y, - "x" = x, - statistic = test$statistic, - df = as.double(test$parameter), - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = NA_character_, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[1L], - n_obs = length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + .f <- stats::kruskal.test(x = y_var, g = x_var) + .f.es <- effectsize::rank_epsilon_squared(x = y_var, groups = x_var, ci = conf.level) + method <- "Kruskal-Wallis Rank Sum Test" } } - # Robust statistics - if (type == "r") { - # one-way repeated measures ANOVA for trimmed means - if (paired) { - test <- WRS2::rmanova( - y = y_var, - groups = x_var, - blocks = data$rowid, - tr = trim - ) - - formula <- stats::as.formula( - object = paste0("rowid ~", x) - ) - es <- WRS2::wmcpAKP( - x = data.table::dcast(data, formula, value.var = y)[,-1L], - tr = trim, - nboot = nboot - ) + res <- get_k_expr(.f, .f.es, type, paired, x, y, n_obs, effsize.type, method) - test <- list( - "y" = y, - "x" = x, - statistic = test$test, - df = as.double(test$df1), - df.error = as.double(test$df2), - p.value = test$p.value, - method = "one-way repeated measures ANOVA for trimmed means", - alternative = NA_character_, - estimate = es[[1L]], - conf.level = 0.95, - conf.low = es[[2L]], - conf.high = es[[3L]], - effectsize = "Algina-Keselman-Penfield robust standardized difference average", - n_obs = length(y_var) / length(x_lvl) - ) + return(res) +} - if(lbl) { - test <- lablr(test, markdown) - } +get_k_expr <- function(.f, .f.es, type, paired, x, y, n_obs, effsize.type, method) { + if (type %in% c("p", "check")) { + p.value <- .f$p.value + statistic <- .f$statistic - class(test) <- c("writR", "list") + if (effsize.type %in% c("biased", "eta")) effectsize <- "Eta-squared (partial)" + if (effsize.type %in% c("unbiased", "omega")) effectsize <- "Omega-squared (partial)" - return(test) - # one-way ANOVA for trimmed means + if (paired) { + .f.es <- list(.f$estimate, CI = .f$conf.level, CI_low = .f$conf.low, CI_high = .f$conf.high) + df <- .f$df_num + df.error <- .f$df_den } else { - test <- WRS2::t1way( - formula = y_var ~ x_var, - tr = trim, - nboot = nboot - ) - - test <- list( - "y" = y, - "x" = x, - statistic = test$test, - df = as.double(test$df1), - df.error = as.double(test$df2), - p.value = test$p.value, - method = "one-way ANOVA for trimmed means", - alternative = NA_character_, - estimate = test$effsize, - conf.level = 0.95, - conf.low = test$effsize_ci[[1L]], - conf.high = test$effsize_ci[[2L]], - effectsize = "Explanatory measure of effect size", - n_obs = length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + df <- .f$parameter[[1L]] + df.error <- .f$parameter[[2L]] } + } else { + effectsize <- if (paired) "Kendall's W" else "rank-biserial correlation" + statistic <- .f$statistic + df <- .f$parameter + df.error <- NA_real_ + p.value <- .f$p.value } + + res <- list( + "y" = y, + "x" = x, + statistic = statistic, + df = df, + df.error = df.error, + p.value = p.value, + method = method, + alternative = NA_character_, + estimate = .f.es[[1L]], + conf.level = .f.es[["CI"]], + conf.low = .f.es[["CI_low"]], + conf.high = .f.es[["CI_high"]], + effectsize = effectsize, + n_obs = n_obs + ) + + class(res) <- c("writR", "writR.k", class(res)) + return(res) } diff --git a/R/lablr.R b/R/lablr.R index 24e3361..85a7714 100644 --- a/R/lablr.R +++ b/R/lablr.R @@ -2,7 +2,7 @@ #' @name lablr #' @description A list containing stats, p value, effectsize, confidence/credible interval and a concatenated string named 'full'. #' -#' @param t Output from any of the functions autest, k_sample, two_sample or one_sample. +#' @param t Output from any of the functions, k_sample, two_sample or one_sample. #' @param markdown Logical (default FALSE), indicating if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). #' @importFrom data.table fcase rbindlist #' @export @@ -14,7 +14,7 @@ lablr <- function(t, markdown = FALSE) { t$conf.level <- t$conf.level*100 # Set some common labels - if(markdown) { + if (markdown) { ci. <- paste0("CI~",t$conf.level,"~[") p. <- "*p* " } else { @@ -23,21 +23,21 @@ lablr <- function(t, markdown = FALSE) { } # Check specific effectsizes for ANOVA... - if(grepl("ANOVA", method)) { - if(grepl("Eta", t$effectsize)) { + if (grepl("ANOVA", method)) { + if (grepl("Eta", t$effectsize)) { es <- "eta" } - if(grepl("Omega", t$effectsize)) { + if (grepl("Omega", t$effectsize)) { es <- "omega" } } # Or t-test's - if(grepl("t-test", method)) { - if(grepl("Cohens", t$effectsize)) { + if (grepl("t-test", method)) { + if (grepl("Cohens", t$effectsize)) { es.a <- "d" es.b <- "Cohen" } - if(grepl("Hedges", t$effectsize)) { + if (grepl("Hedges", t$effectsize)) { es.a <- "g" es.b <- "Hedges" } @@ -46,21 +46,18 @@ lablr <- function(t, markdown = FALSE) { out <- switch( EXPR = method # k_sample - paired samples - , "Fisher's repeated measures ANOVA" = list(d1 = "F", d2 = "$F_{~Fisher}$ ", + , "Fisher's rmANOVA" = list(d1 = "F", d2 = "$F_{~Fisher}$ ", df = paste0("(",t$df, ", ", t$df.error,") = ", format(round(t$statistic, 2), nsmall = 2)), es1 = paste0(es, "2 = "), es2 = paste0("$\\widehat{\\",es,"}_p^2$ = ")) - , "Repeated measures ANOVA with GG correction" = list(d1 = "F", d2 = "$F_{~GG}$ ", + , "Greenhouse-Geisser's rmANOVA" = list(d1 = "F", d2 = "$F_{~GG}$ ", df = paste0("(", format(round(t$df, 1), nsmall = 1), ", ", format(round(t$df.error, 1), nsmall = 1),") = ", format(round(t$statistic, 2), nsmall = 2)), es1 = paste0(es, "2 = "), es2 = paste0("$\\widehat{\\",es,"}_p^2$ = ")) - , "Repeated measures ANOVA with HF correction" = list(d1 = "F", d2 = "$F_{~HF}$ ", + , "Huynh-Feldt's rmANOVA" = list(d1 = "F", d2 = "$F_{~HF}$ ", df = paste0("(", format(round(t$df, 1), nsmall = 1), ", ", format(round(t$df.error, 1), nsmall = 1),") = ", format(round(t$statistic, 2), nsmall = 2)), es1 = paste0(es, "2 = "), es2 = paste0("$\\widehat{\\",es,"}_p^2$ = ")) , "Friedman rank sum test" = list(d1 = "X2", d2 = "$\\chi^2_{~Friedman}$ ", df = paste0("(",t$df,") = ", format(round(t$statistic, 2), nsmall = 2)), es1 = "W = ", es2 = "$W_{~Kendall}$ = ") - , "one-way repeated measures ANOVA for trimmed means" = list(d1 = "F", d2 = "$F_{~trimmed-means}$ ", - df = paste0("(",format(round(t$df, 1), nsmall = 1), ", ", format(round(t$df.error, 1), nsmall = 1),") = ", format(round(t$statistic, 2), nsmall = 2)), - es1 = "xi = ", es2 = "$\\widehat{\\xi}$ = ") # k_sample - independent samples , "Fisher's ANOVA" = list(d1 = "F", d2 = "$F_{~Fisher}$ ", df = paste0("(",t$df, ", ", t$df.error,") = ", format(round(t$statistic, 2), nsmall = 2)), @@ -71,9 +68,6 @@ lablr <- function(t, markdown = FALSE) { , "Kruskal-Wallis rank sum test" = list(d1 = "X2", d2 = "$\\chi^2_{~Kruskal-Wallis}$ ", df = paste0("(",t$df,") = ", format(round(t$statistic, 2), nsmall = 2)), es1 = "epsilon2 = ", es2 = "$\\widehat{\\epsilon}^2$ = ") - , "one-way ANOVA for trimmed means" = list(d1 = "F", d2 = "$F_{~trimmed-means}$ ", - df = paste0("(",format(round(t$df, 1), nsmall = 1), ", ", format(round(t$df.error, 1), nsmall = 1),") = ", format(round(t$statistic, 2), nsmall = 2)), - es1 = "xi = ", es2 = "$\\widehat{\\xi}$ = ") # two_sample - independent sample , " Two Sample t-test" = list(d1 = "t", d2 = "$t_{~Student}$ ", df = paste0("(",t$df,") = ", format(round(t$statistic, 2), nsmall = 2)), @@ -85,9 +79,6 @@ lablr <- function(t, markdown = FALSE) { , "Wilcoxon rank sum test with continuity correction" = list(d1 = "ln(W)", d2 = "$\\ln(W)$ ", df = paste0("= ", format(round(t$statistic, 2), nsmall = 2)), es1 = "r = ", es2 = "$\\widehat{r}_{biserial}$ = ") - , "Two sample Yuen's test on trimmed means" = list(d1 = "t", d2 = "$t_{~Yuen}$ ", - df = paste0("(",format(round(t$df, 1), nsmall = 1), ") = ", format(round(t$statistic, 2), nsmall = 2)), - es1 = "xi = ", es2 = "$\\widehat{\\xi}$ = ") # two_sample - paired samples , "Paired t-test" = list(d1 = "t", d2 = "$t_{~Student}$ ", df = paste0("(",t$df,") = ", format(round(t$statistic, 2), nsmall = 2)), @@ -96,16 +87,10 @@ lablr <- function(t, markdown = FALSE) { , "Wilcoxon signed rank test with continuity correction" = list(d1 = "ln(V)", d2 = "$\\ln(V)$ ", df = paste0("= ", format(round(t$statistic, 2), nsmall = 2)), es1 = "r = ", es2 = "$\\widehat{r}_{biserial}$ = ") - , "Paired Yuen's test on trimmed means" = list(d1 = "t", d2 = "$t_{~Yuen}$ ", - df = paste0("(",format(round(t$df, 1), nsmall = 1), ") = ", format(round(t$statistic, 2), nsmall = 2)), - es1 = "xi = ", es2 = "$\\widehat{\\xi}$ = ") # one_sample , "One Sample t-test" = list(d1 = "t", d2 = "$t_{~Student}$ ", df = paste0("(",t$df,") = ", format(round(t$statistic, 2), nsmall = 2)), es1 = paste0(es.a, " = "), es2 = paste0("$",es.a,"_{~",es.b,"}$ = ")) - , "Bootstrap-t method for one-sample test" = list(d1 = "t ", d2 = "$t_{~Bootstrap}$ ", - df = paste0("= ", format(round(t$statistic, 2), nsmall = 2)), - es1 = "M = ", es2 = "$\\widehat{\\mu}_{~Trimmed}$ = ") # contingency , "McNemar's Chi-squared test" = , "McNemar's Chi-squared test with continuity correction" = list(d1 = "X2", d2 = "$\\chi^2_{~McNemar}$ ", @@ -121,7 +106,6 @@ lablr <- function(t, markdown = FALSE) { es2 = ifelse(t$effectsize == "pearsons_c", "$C_{~Pearson}$ = ", "$V_{~Cramer}$ = ")) # This two has their own printing methods , "Fisher's Exact Test for Count Data" = list(es = if (markdown) "$OR$ = " else "OR = ") - , "Fisher's Exact Test for Count Data without OR" = list() , stop("Method not found") ) @@ -138,7 +122,7 @@ lablr <- function(t, markdown = FALSE) { return(res) } - if(method == "Fisher's Exact Test for Count Data without OR") { + if (method == "Fisher's Exact Test for Count Data without OR") { res <- list( stats = NA_character_, p = (.p <- paste0(p., style.p(t$p.value))), @@ -153,7 +137,7 @@ lablr <- function(t, markdown = FALSE) { } # Select and set tags for later use - if(markdown) { + if (markdown) { d. <- out$d2 es. <- out$es2 } else { diff --git a/R/one_sample.R b/R/one_sample.R index e9fb0b6..cde8fd8 100644 --- a/R/one_sample.R +++ b/R/one_sample.R @@ -6,162 +6,102 @@ #' @param y Character for the response variable Must be present in data #' @param type set `"auto"` (default) for checking the normality for test selection. Other options are `"p"` for parametric, `"np"` for non-parametric and `"r"` for robust tests. #' @param test.value A number indicating the true value of the mean (Default: 0) to be tested. -#' @param trim Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help. -#' @param nboot Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L). #' @param effsize.type options are `"unbiased"` or `"g"` for Hedges g and `"biased"` or `"d"` for Cohen's d as a measure of effect size. rank-biserial correlation is used for non-parametric analysis. #' @param alternative A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". #' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. +#' @param character.only whether to treat `x` as a character. Default is FALSE. #' @param markdown Logical (default FALSE). If `lbl` is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). #' @param ... Currently ignored. -#' @importFrom effectsize cohens_d hedges_g rank_biserial -#' @importFrom stats t.test wilcox.test #' @export one_sample <- function(data, y, - type = "auto", + type, test.value = 0, - trim = 0.2, - nboot = 100L, effsize.type = "unbiased", alternative = "two.sided", conf.level = 0.95, - lbl = if(is.null(markdown)) FALSE else TRUE, - markdown = NULL, + character.only = FALSE, + markdown, ...) { + y <- deparser(y, character.only) - # Create vectors of variables y_var <- data[[y]] y_var <- y_var[!is.na(y_var)] - # Check normality - if(type == "auto") { - type <- if(is_normal(y_var)) "p" else "np" + if (missing(type)) { + type <- if (is_normal(y_var)) "p" else "np" } - # Parametric statistics - if(type == "p") { - - # Assign an effect size - .f <- if (effsize.type %chin% c("biased", "d")) { - effectsize::cohens_d - } else if (effsize.type %chin% c("unbiased", "g")) { - effectsize::hedges_g - } - - # Student t test - test <- stats::t.test( - x = y_var, - mu = test.value, - alternative = alternative - ) - - es <- .f( - x = y_var, - mu = test.value, - ci = conf.level, - verbose = FALSE - ) - - test <- list( - "y" = y, - "x" = NA_character_, - statistic = test$statistic, - df = as.numeric(test$parameter), - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = alternative, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[1L], - n_obs = length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") + if (type == "p") { + .f <- stats::t.test + if (effsize.type %chin% c("biased", "d")) .f.es <- effectsize::cohens_d + if (effsize.type %chin% c("unbiased", "g")) .f.es <- effectsize::hedges_g + } + if (type == "np") { + .f <- stats::wilcox.test + .f.es <- effectsize::rank_biserial + } - return(test) + arg.f <- alist( + x = y_var, + mu = test.value, + alternative = alternative, + conf.level = conf.level + ) + arg.f.es <- alist( + x = y_var, + mu = test.value, + ci = conf.level, + verbose = FALSE + ) + + .f <- do.call(.f, arg.f) + .f.es <- do.call(.f.es, arg.f.es) + + test <- get_one_expr(.f, .f.es, type, n_obs = length(y_var), y, alternative) + + if (!missing(markdown) && isTRUE(markdown)) { + test <- lablr(test, markdown) } - # Non-parametric statistics - if(type == "np") { - test <- stats::wilcox.test( - x = y_var, - mu = test.value, - alternative = alternative - ) - es <- effectsize::rank_biserial( - x = y_var, - mu = test.value, - ci = conf.level, - verbose = FALSE - ) + class(test) <- c("writR", "list") - test <- list( - "y" = y, - "x" = NA_character_, - statistic = log(test$statistic), - df = NA_real_, - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = alternative, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[1L], - n_obs = length(y_var) - ) + return(test) +} - if(lbl) { - test <- lablr(test, markdown) - } - class(test) <- c("writR", "list") +get_one_expr <- function(.f, .f.es, type, n_obs, ylab, alternative) { - return(test) + if (type == "p") { + statistic <- .f$statistic + df <- as.numeric(.f$parameter) + method <- .f$method + effectsize <- names(.f.es)[1L] + if (effectsize %like% "Hedges") effectsize <- "Hedges' g" + if (effectsize %like% "Cohen") effectsize <- "Cohen's d" } - # Robust statistics - if(type == "r") { - - test <- trimcibt( - x = y_var, - nv = test.value, - tr = trim, - nboot = nboot, - ci = conf.level - ) - - test <- list( - "y" = y, - "x" = NA_character_, - statistic = test$statistic, - df = NA_real_, - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = alternative, - estimate = test$estimate, - conf.level = test[["conf.level"]], - conf.low = test$conf.low, - conf.high = test$conf.high, - effectsize = test$effectsize, - n_obs = length(y_var) - ) - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + if (type == "np") { + statistic <- log(.f$statistic) + df <- NA_real_ + method <- trimws(.f$method) + effectsize <- "rank-biserial correlation" } + + list( + "y" = ylab, + "x" = NA_character_, + statistic = statistic, + df = df, + df.error = NA_real_, + p.value = .f$p.value, + method = method, + alternative = alternative, + estimate = .f.es[[1L]], + conf.level = .f.es[["CI"]], + conf.low = .f.es[["CI_low"]], + conf.high = .f.es[["CI_high"]], + effectsize = effectsize, + n_obs = n_obs + ) } diff --git a/R/pairs_two_sample.R b/R/pairs_two_sample.R index 24d43ee..c304248 100644 --- a/R/pairs_two_sample.R +++ b/R/pairs_two_sample.R @@ -9,13 +9,12 @@ #' @param type Set `"auto"` (default) for checking the normality and homogeneity of variances for test selection. Other options are `"p"` for parametric, `"np"` for non-parametric and `"r"` for robust tests. #' @param paired Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is `FALSE.` #' @param var.equal Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used. -#' @param trim Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help. -#' @param nboot Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L). #' @param effsize.type Options are `"unbiased"` or `"g"` for Hedges g and `"biased"` or `"d"` for Cohen's d as a measure of effect size for parametric test. The rank-biserial correlation is used for non-parametric analysis. #' @param alternative A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". #' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. #' @param markdown Logical (default FALSE). If `lbl` is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). +#' @param character.only Logical. checks whether to use the unevaluated expression or its +#' content (when TRUE), asumming is a character vector. Defaults to `FALSE`. #' @param ... Currently ignored. #' @importFrom data.table rbindlist #' @importFrom utils combn @@ -23,44 +22,41 @@ pairs_two_sample <- function(data, x, y, rowid = NULL, - type = "auto", + type, paired = FALSE, var.equal = FALSE, - trim = 0.2, - nboot = 100L, effsize.type = "unbiased", alternative = "two.sided", conf.level = 0.95, - lbl = if(is.null(markdown)) FALSE else TRUE, - markdown = NULL, + markdown, + character.only = FALSE, ...) { # Data cleaning - data <- clean_data(data, x = x, y = y, rowid = rowid, paired = paired, wide = FALSE) - rowid <- if(is.null(rowid)) NULL else "rowid" + x <- deparser(x, character.only) + y <- deparser(y, character.only) + rowid <- deparser(rowid, character.only) + + data <- clean_data(data, x, y, rowid, paired, character.only = TRUE) # Levels of 'x' x_var <- data[[x]] x_lvl <- levels(x_var) # Checking assumptions - if (type == "auto") { + if (missing(type) || is.null(type)) { # Create vectors of variables y_var <- data[[y]] # Check normality - normal <- vapply(x_lvl, function(i) { - is_normal(y_var[x_var == i]) - }, NA, USE.NAMES = FALSE) + normal <- vapply(x_lvl, function(i) is_normal(y_var[x_var == i]), NA) type <- if (all(normal)) "check" else "np" # Check homogeneity ov variances - if (!paired && type == "check") { - var.equal <- is_var.equal(y_var, x_var) - } + if (!paired && type == "check") var.equal <- is_var.equal(y_var, x_var) } - .lab <- if(lbl) function(i) lablr(i, markdown = markdown) else `(` + .lab <- if (!missing(markdown) && isTRUE(markdown)) function(i) lablr(i, markdown = markdown) else `(` # Rowbind list-wise every item of... test <- data.table::rbindlist( # lists of ... @@ -83,16 +79,14 @@ pairs_two_sample <- function(data, x, y, data = data[x_var %in% i], x = x, y = y, - rowid = rowid, + rowid = "rowid", type = type, paired = paired, var.equal = var.equal, - trim = trim, - nboot = nboot, effsize.type = effsize.type, alternative = alternative, conf.level = conf.level, - internal = FALSE + character.only = TRUE ) ) ) diff --git a/R/rm_anova.R b/R/rm_anova.R new file mode 100644 index 0000000..e11c8c7 --- /dev/null +++ b/R/rm_anova.R @@ -0,0 +1,150 @@ +#' Repeated measures ANOVA +#' +#' @description +#' `r lifecycle::badge("experimental")` +#' +#' This functions allows you to perform a one way repeated measures ANOVA, +#' making adjustments for violations of sphericity, with no dependencies +#' related to `{afex}` or `{car}` packages. +#' +#' @param data Data frame from which `x` and `y` (and possibly `rowid` if provided) will be searched. +#' @param x Character name for the grouping factor. Must be present in data +#' @param y Character name for the response variable. Must be present in data. +#' @param is_spherical Logical. checks whether to assume that the sphericity assumptions holds or not, if missing (the default) it will be tested using mauchly test with a threshold of 0.05. +#' @param adjust Character. correction for sphericity to be applied, it can be any character of length one starting with 'g' (indicating Greenhouse–Geisser correction) or 'h' (indicating Huynh–Feldt correction). +#' @param effsize.type Options are `"unbiased"` or `"omega"` for partial omega squared and `"biased"` or `"eta"` for partial eta squared as a measure of effect size. For non-parametric analysis, Kendalls' W is used for paired designs, where rank epsilon squared is used for independent groups designs. +#' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). +#' @param character.only Logical. checks whether to use the unevaluated expression or its +#' content (when TRUE), asumming is a character vector. Defaults to `FALSE`. +#' @param ... Currently not used. +#' +#' @export + +rm_anova <- function(data, x, y, + is_spherical, + adjust, + effsize.type = "omega", + conf.level = 0.95, + character.only = FALSE, + ...) { + + # data = clean_data(swimmers, "period", "bmi", "subject", paired = TRUE) + # x = "period" + # y = "bmi" + # effsize.type = "omega" + # conf.level = 0.95 + # character.only = TRUE + +# Escape variables ---------------------------------------------------------------------------- + + x <- deparser(x, character.only) + y <- deparser(y, character.only) + +# Clean and transform data -------------------------------------------------------------------- + + wide <- do.call( + what = data.frame, + args = tapply(data[[y]], data[[x]], c) + ) + + wide <- as.matrix(wide) + +# Run and specify model ----------------------------------------------------------------------- + + mlm <- stats::lm(wide ~ 1) + .SSD <- stats::SSD(mlm) + + x_lvl <- unique(x = data[[x]]) + idata <- `names<-`(as.data.frame(x_lvl), x) + + # ANOVA computation + n <- nrow(wide) + k <- ncol(wide) + df1 <- k - 1 + df2 <- df1 * (n - 1) + y_t <- mean(wide) + # Systematic variance due to conditions + y_j <- colMeans(wide) + ss_condition <- n * sum((y_j - y_t)^2) + ms_condition <- ss_condition / df1 + # Subject variance + y_s <- rowMeans(wide) + ss_subjects <- k * sum((y_s - y_t)^2) + ms_subjects <- ss_subjects / (n - 1) + # Unsystematic within-groups variance + s_s <- apply(wide, 2, function(i) i - mean(i)) + ss_within <- sum(s_s ^ 2) + ms_within <- ss_within / (k * (n - 1)) + # Unsystematic variance for the repeated measures + ss_resid <- ss_within - ss_subjects + ms_resid <- ss_resid / df2 + # F-ratio calculation + f_ratio <- ms_condition / ms_resid + + # Effect size calculation + if (effsize.type %in% c("biased", "eta")) { + es <- ss_condition / (ss_resid + ss_condition) + effectsize <- "Eta squared (partial)" + } else if (effsize.type %in% c("unbiased", "omega")) { + es <- (ss_condition - df1 * ms_resid) / (ss_condition + ss_resid + ss_subjects + ms_subjects) + effectsize <- "Omega squared (partial)" + } + # Confidence interval + es <- pmax(0, es) + f <- (es / df1) / ((1 - es) / df2) + ci <- effectsize::F_to_eta2(f, df1, df2, ci = conf.level)[-1] + +# Corrections and diagnostics ----------------------------------------------------------------- + + if (is.nan(f_ratio)) { + warning(y, " is essentially constant", call. = FALSE) + is_spherical <- TRUE + } + + if (missing(is_spherical) || is.null(is_spherical)) { + mch <- stats::mauchly.test(.SSD, idata = idata, X = ~ 1) + is_spherical <- mch$p.value > 0.05 + is_spherical <- is.na(is_spherical) || is_spherical + } + + if (!is_spherical) { + correction <- sphericity(.SSD, idata = idata, X = ~ 1)[1:2] + names(correction) <- c("Greenhouse-Geisser", "Huynh-Feldt") + correction[which(correction > 1)] <- 1 + + if (missing(adjust) || is.null(adjust)) { + method <- if (all(correction < 0.75)) "Greenhouse-Geisser" else "Huynh-Feldt" + } else { + adjust <- tolower(adjust) + if (grepl("^g", adjust)) method <- "Greenhouse-Geisser" + if (grepl("^h", adjust)) method <- "Huynh-Feldt" + } + + # Correction for degrees of freedom in demanded + df1 <- df1 * correction[[method]] + df2 <- df2 * correction[[method]] + } else { + method <- "Fisher" + } + + # p-value calculation + p <- stats::pf(f_ratio, df1, df2, lower.tail = FALSE) + + +# Pretty printing ----------------------------------------------------------------------------- + + res <- list( + statistic = f_ratio, + p.value = p, + method = paste0(method, "'s rmANOVA"), + df_num = df1, + df_den = df2, + estimate = es, + effectsize = effectsize, + conf.level = conf.level, + conf.low = ci[[2L]], + conf.high = ci[[3L]] + ) + + return(res) +} diff --git a/R/testing_bootstrap.R b/R/testing_bootstrap.R index f5674cb..e69de29 100644 --- a/R/testing_bootstrap.R +++ b/R/testing_bootstrap.R @@ -1,253 +0,0 @@ -# # library(data.table) -# -# # file <- "/Users/matiascastilloaguilar/Documents/Proyectos/Clientes/01-Gallardo-Asencio/data/datos.RDS" -# # data <- readRDS(file) -# -# # data <- writR::swimmers -# -# bootr <- function(data, -# y, -# x = NULL, -# conf.int = 0.95, -# conf.type = "bca", # "bca" o "perc" -# nboot = 5000, -# paired = FALSE, -# plot_dist = TRUE, -# p_value = FALSE, -# seed = 12345) { -# -# arg <- match.call() -# -# # Check assumptions on arguments --------------------------- -# -# if(!"data.table" %chin% class(data)) { -# data <- as.data.table(data) -# } -# if(!is.character(arg$y)) { -# y <- deparse(substitute(y)) -# } -# if(!is.null(arg$x) && !is.character(arg$x)) { -# x <- deparse(substitute(x)) -# } -# -# # Custom functions ----------------------------------------- -# -# # Vectorized mean -# .mn <- \(i) { -# j <- i[!is.na(i)] -# sum(j) / length(j) -# } -# # Drop missing values -# .na_drop <- \(i) { -# i[!is.na(i)] -# } -# # Confidence interval -# bootci <- \(conf.type, k, conf.int, nboot) { -# # Bias-corrected and accelerated confidence interval (BCA) -# .bca <- \(theta, conf.int) { -# low <- (1 - conf.int)/2 -# high <- 1 - low -# sims <- length(theta) -# z.inv <- length(theta[theta < .mn(theta)])/sims -# z <- qnorm(z.inv) -# U <- (sims - 1) * (.mn(theta) - theta) -# top <- sum(U^3) -# under <- 6 * (sum(U^2))^{ -# 3/2 -# } -# a <- top/under -# lower.inv <- pnorm(z + (z + qnorm(low))/(1 - a * (z + qnorm(low)))) -# lower <- quantile(theta, lower.inv, names = FALSE) -# upper.inv <- pnorm(z + (z + qnorm(high))/(1 - a * (z + qnorm(high)))) -# upper <- quantile(theta, upper.inv, names = FALSE) -# return(c(lower, upper)) -# } -# -# # Choose confidence interval -# if(isTRUE(conf.type == "bca")) { -# .bca(theta = k, conf.int = conf.int) -# } else if(isTRUE(conf.type == "perc")) { -# quantile(x = k, c((1 - conf.int)/2, (1 + conf.int)/2)) -# } else stop("conf.type must be one of 'bca' or 'perc'") -# } -# # Plot of Bootstrap distribution -# .plot_boot <- \(k, ci, mu, y) { -# oldparams <- par() -# par(mfrow=c(1,2), -# mai = c(0.8, 0.2, 0.3, 0.2), -# oma = c(1, 3, 2, 1)) -# -# # Left plot -# h <- hist(k, plot = F, breaks = 50) -# plot(h, main = "Bootstrap histogram + CI", -# col = fifelse(h$breaks %between% ci, "grey", "grey50"), -# border = FALSE, freq = T, xlab = y) -# lines(x = c(mu, mu), -# y = c(0, h$density[which.max(h$density)])) -# abline(v = 0) -# -# # Right plot -# d <- density(k) -# plot(d, col = "white", xlab = y, ylab = NA, -# main = "Bootstrap kernel density + CI", frame.plot = FALSE) -# polygon(x = d$x, y = d$y, col = "grey30", border = NA) -# polygon(x = c(ci[[1L]], d$x[d$x %between% ci], ci[[2L]]), -# y = c(0, d$y[d$x %between% ci],0), -# col = "grey70", border = NA) -# lines(x = c(mu, mu), y = c(0, d$y[which.min(abs(d$x - mu))])) -# abline(v = 0) -# # Reset params -# suppressWarnings(par(oldparams)) -# } -# -# # Body of the function ------------------------------------- -# -# if(is.null(arg$x)) { -# -# # One sample Bootstrap mean ------------------------------ -# -# # Variable -# y_var = .na_drop(data[[y]]) -# n <- length(y_var) -# -# # Set seed for reproducibility -# set.seed(seed) -# -# # Bootstrap -# k <- vapply(seq_len(nboot), \(i) { -# .mn(y_var[sample.int(n = n, replace = TRUE)]) -# }, 1) -# -# # Global mean -# mu <- .mn(k) -# -# # Confidence interval -# ci <- bootci(conf.type = conf.type, k = k, conf.int = conf.int) -# -# # Plot bootstrap mean -# if(plot_dist) { -# .plot_boot(k, ci, mu, y) -# } -# -# # Output list -# out <- c(mean = mu, lower = ci[[1]], upper = ci[[2]]) -# return(out) -# -# } else { -# -# # Mean difference with Bootstrap resampling -------------- -# -# # Check is same length if paired -# if(isTRUE(paired)) { -# data[j = .SD, by = x, .SDcols = y][j = .N, by = x][i = 1:2, j = if(diff(N) != 0) { -# stop("grups in ", x, " dont have the same length") -# }] -# } -# -# # Theoretical stat -# k_obs <- diff(data[j = lapply(.SD, .mn), by = x, .SDcols = y][i = 1:2][[y]]) -# -# # Get variable -# y_var <- data[[y]] -# x_var <- data[[x]] -# -# # Complete cases -# ind.complete <- complete.cases(y_var, x_var) -# -# # Variables to work with -# y_var <- y_var[ind.complete] -# x_var <- x_var[ind.complete] -# -# # Generate group indices -# lvl <- levels(x = factor(x_var)) -# if(length(lvl) > 2) { -# warning("Variable ", x, " has more than two levels.\nUsing this the first two: ", paste(lvl[1:2], collapse = " & ")) -# } -# ind_1 <- x_var == lvl[[1L]] -# ind_2 <- x_var == lvl[[2L]] -# -# # Generate variables by group -# y_var_a <- y_var[ind_1] -# y_var_b <- y_var[ind_2] -# -# # Set seed for reproducibility -# set.seed(seed) -# -# # Bootstrap parameters -# n_a <- length(y_var_a) -# n_b <- length(y_var_b) -# -# # Bootstrap within matrix (is FASTER) -# -# # Group A -# y_boot_a <- matrix( -# data = y_var_a[sample.int(n = n_a, size = n_a*nboot, replace = TRUE)], -# nrow = n_a, ncol = nboot) -# -# # Group B -# y_boot_b <- matrix( -# data = y_var_b[sample.int(n = n_b, size = n_b*nboot, replace = TRUE)], -# nrow = n_b, ncol = nboot) -# -# # Calculate statistic per each bootstrap sample -# k <- if(isTRUE(paired)) { -# vapply(seq_len(nboot), \(i) { -# .mn(y_boot_a[,i] - y_boot_b[,i]) -# }, 1) -# } else if (isFALSE(paired)) { -# vapply(seq_len(nboot), \(i) { -# .mn(y_boot_a[,i]) - .mn(y_boot_b[,i]) -# }, 1) -# } else stop("Paired must be one of TRUE or FALSE") -# -# # Global mean -# mu <- .mn(k) -# -# # Confidence interval -# ci <- bootci(conf.type = conf.type, k = k, conf.int = conf.int) -# -# # Plot bootstrap mean -# if(plot_dist) { -# .plot_boot(k, ci, mu, y) -# } -# -# # P-value in favour of null -# if(isTRUE(p_value)) { -# n <- length(y_var) -# -# # Resample from original sample assuming the null to be true -# # without assuming any grouping structure -# y_boot <- matrix( -# data = y_var[sample.int(n = n, size = n*nboot, replace = TRUE)], -# nrow = n, -# ncol = nboot) -# -# # Calculate statistic fitting original grouping structure on -# # bootstrap samples. -# # -# # This will show us how likely the differences are assuming -# # the null distribution to be true. -# k_boot = vapply(seq_len(nboot), \(i) { -# .mn(y_boot[ind_2,i]) - .mn(y_boot[ind_1,i]) -# }, 1) -# -# # Calculate the probability associated to the null distribution -# p <- .mn(abs(k_boot) >= abs(k_obs)) -# } else if(isFALSE(p_value)) { -# p <- NULL -# } else stop("p_value needs to be one of TRUE or FALSE") -# -# # Output list -# out <- c(mean_diff = mu, lower = ci[[1L]], upper = ci[[2L]], p_value = p) -# return(out) -# } -# } -# -# # bootr(data, PS, nboot = 1e4) -# # -# # bootr(data, PS, Sexo, p_value = T, nboot = 1e5) -# # -# # writR::clean_data(data, Periodo, PS, rowid = ID, paired = TRUE) |> -# # bootr(PS, Periodo, p_value = T, paired = TRUE, , nboot = 1e5) -# # -# # bootr(data[Periodo == "POST1"], FFMI, cat_Edad, p_value = T) diff --git a/R/two_sample.R b/R/two_sample.R index 07bc993..ac9c721 100644 --- a/R/two_sample.R +++ b/R/two_sample.R @@ -9,40 +9,29 @@ #' @param type Set `"auto"` (default) for checking the normality and homogeneity of variances for test selection. Other options are `"p"` for parametric, `"np"` for non-parametric and `"r"` for robust tests. #' @param paired Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is `FALSE.` #' @param var.equal Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used. -#' @param trim Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help. -#' @param nboot Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L). #' @param effsize.type Options are `"unbiased"` or `"g"` for Hedges g and `"biased"` or `"d"` for Cohen's d as a measure of effect size. rank-biserial correlation is used for non-parametric analysis. #' @param alternative A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less". #' @param conf.level Confidence/Credible Interval (CI) level. Default to 0.95 (95%). -#' @param lbl Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors. -#' @param markdown Logical (default FALSE). If `lbl` is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default). -#' @param internal Logical to whether this function is being used inside of internal functions. +#' @param character.only whether to treat `x` as a character. Default is FALSE. #' @param ... Currently ignored. -#' @importFrom data.table %chin% -#' @importFrom effectsize cohens_d hedges_g rank_biserial -#' @importFrom stats t.test na.omit wilcox.test -#' @importFrom WRS2 yuend dep.effect yuen yuen.effect.ci #' @export two_sample <- function(data, x, y, rowid = NULL, - type = "auto", + type, paired = FALSE, var.equal = FALSE, - trim = 0.2, - nboot = 100L, effsize.type = "unbiased", alternative = "two.sided", conf.level = 0.95, - lbl = if(is.null(markdown)) FALSE else TRUE, - markdown = NULL, - internal = FALSE, + character.only = FALSE, ...) { - # Avoid unnecesary computation if called internally inside another function - if (!internal) { - data <- clean_data(data, x = x, y = y, rowid = rowid, paired = paired, wide = FALSE) - } + x <- deparser(x, character.only) + y <- deparser(y, character.only) + rowid <- deparser(rowid, character.only) + + data <- clean_data(data, x, y, rowid, paired, character.only = TRUE) # Create vectors of variables y_var <- data[[y]] @@ -52,200 +41,98 @@ two_sample <- function(data, x, y, x_lvl <- levels(x_var) # Check if x has more than two levels - !internal && length(x_lvl) > 2 && stop(x, " has more than two levels. Try witk k_sample()", call. = F) + if (length(x_lvl) > 2) stop(x, " has more than two levels. Try witk k_sample()", call. = F) # Create vectors of 'y' for each group y_var_1 <- y_var[x_var == x_lvl[[1L]]] y_var_2 <- y_var[x_var == x_lvl[[2L]]] + # Number of observations + n_obs <- if (paired) length(y_var_1) else length(y_var) + # Check normality - if (type == "auto") { - type <- if (is_normal(y_var_1) && is_normal(y_var_2)) "check" else "np" + if (missing(type)) { + normal <- is_normal(y_var_1) && is_normal(y_var_2) + type <- if (normal) "check" else "np" } # Parametric statistics if (type %chin% c("p", "check")) { - - # Assign an effect size - .f <- if (effsize.type %chin% c("biased", "d")) { - effectsize::cohens_d - } else if (effsize.type %chin% c("unbiased", "g")) { - effectsize::hedges_g - } - - # Check homogeneity of variances - if (!paired && type == "check") { - var.equal <- is_var.equal(y_var, x_var) - } - - # Student t-test - test <- stats::t.test( - x = y_var_1, - y = y_var_2, - var.equal = var.equal, - paired = paired, - alternative = alternative, - conf.level = conf.level - ) - - es <- .f( - x = y_var_1, - y = y_var_2, - pooled_sd = var.equal, - paired = paired, - ci = conf.level, - verbose = FALSE - ) - - test <- list( - "y" = y, - "x" = x, - statistic = test$statistic, - df = as.numeric(test$parameter), - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = alternative, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = names(es)[1L], - n_obs = if(paired) length(y_var_1) else length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + .f <- stats::t.test + if (effsize.type %chin% c("biased", "d")) .f.es <- effectsize::cohens_d + if (effsize.type %chin% c("unbiased", "g")) .f.es <- effectsize::hedges_g + if (!paired && type == "check") var.equal <- is_var.equal(y_var, x_var) } - # non-parametric statistics + + # Non-parametric statistics if (type == "np") { + .f <- stats::wilcox.test + .f.es <- effectsize::rank_biserial + } - # Wilcoxon test - test <- stats::wilcox.test( - x = y_var_1, - y = y_var_2, - alternative = alternative, - paired = paired - ) - - es <- effectsize::rank_biserial( - x = y_var_1, - y = y_var_2, - ci = conf.level, - paired = paired, - verbose = FALSE - ) - - test <- list( - "y" = y, - "x" = x, - statistic = log(test$statistic), - df = NA_real_, - df.error = NA_real_, - p.value = test$p.value, - method = test$method, - alternative = if(is.null(test$alternative)) NA_character_ else test$alternative, - estimate = es[[1L]], - conf.level = es[["CI"]], - conf.low = es[["CI_low"]], - conf.high = es[["CI_high"]], - effectsize = "rank-biserial correlation", - n_obs = if(paired) length(y_var_1) else length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) + arg.f <- alist( + x = y_var_1, + y = y_var_2, + var.equal = var.equal, + paired = paired, + alternative = alternative, + conf.level = conf.level + ) + + arg.f.es <- alist( + x = y_var_1, + y = y_var_2, + pooled_sd = var.equal, + paired = paired, + ci = conf.level, + verbose = FALSE + ) + + .f <- do.call(.f, arg.f) + .f.es <- do.call(.f.es, arg.f.es) + + res <- get_two_expr(.f, .f.es, type, n_obs, x, y) + + return(res) +} + +get_two_expr <- function(.f, .f.es, type, n_obs, xlab, ylab) { + + if (is.null(.f$alternative)) .f$alternative <- NA_character_ + + if (type %in% c("p", "check")) { + statistic <- .f$statistic + df <- as.numeric(.f$parameter) + method <- .f$method + effectsize <- names(.f.es)[1L] + if (effectsize %like% "Hedges") effectsize <- "Hedges' g" + if (effectsize %like% "Cohen") effectsize <- "Cohen's d" } - # Robust statistics - if (type == "r") { - if (paired) { - - # Yuen test - paired samples - test <- WRS2::yuend( - x = y_var_1, - y = y_var_2, - tr = trim - ) - - es <- WRS2::dep.effect( - x = y_var_1, - y = y_var_2, - nboot = nboot, - tr = trim - )["AKP", ] - - test <- list( - "y" = y, - "x" = x, - statistic = test$test, - df = as.numeric(test$df), - df.error = NA_real_, - p.value = test$p.value, - method = "Paired Yuen's test on trimmed means", - alternative = "two.sided", - estimate = es[["Est"]], - conf.level = 0.95, - conf.low = es[["ci.low"]], - conf.high = es[["ci.up"]], - effectsize = "Algina-Keselman-Penfield robust standardized difference", - n_obs = length(y_var_1) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) - } else { - - # Yuen test - independent samples - test <- WRS2::yuen( - formula = y_var ~ x_var, - tr = trim - ) - - es <- WRS2::yuen.effect.ci( - formula = y_var ~ x_var, - tr = trim, - nboot = nboot - ) - - test <- list( - "y" = y, - "x" = x, - statistic = test$test, - df = as.numeric(test$df), - df.error = NA_real_, - p.value = test$p.value, - method = "Two sample Yuen's test on trimmed means", - alternative = "two.sided", - estimate = es[["effsize"]], - conf.level = 0.95, - conf.low = es[["CI"]][[1L]], - conf.high = es[["CI"]][[2L]], - effectsize = "Algina-Keselman-Penfield robust standardized difference", - n_obs = length(y_var) - ) - - if(lbl) { - test <- lablr(test, markdown) - } - - class(test) <- c("writR", "list") - - return(test) - } + + if (type == "np") { + statistic <- log(.f$statistic) + df <- NA_real_ + method <- trimws(.f$method) + effectsize <- "rank-biserial correlation" } + + res <- list( + "y" = ylab, + "x" = xlab, + statistic = statistic, + df = df, + df.error = NA_real_, + p.value = .f$p.value, + method = method, + alternative = .f$alternative, + estimate = .f.es[[1L]], + conf.level = .f.es[["CI"]], + conf.low = .f.es[["CI_low"]], + conf.high = .f.es[["CI_high"]], + effectsize = effectsize, + n_obs = n_obs + ) + + class(res) <- c("writR", "writR.two", class(res)) + return(res) } diff --git a/R/miscellaneous.R b/R/utils.R similarity index 56% rename from R/miscellaneous.R rename to R/utils.R index 31271b8..01e3590 100644 --- a/R/miscellaneous.R +++ b/R/utils.R @@ -1,60 +1,16 @@ -#' @title one-sample t-test on trimmed means -#' @description A wrapper around statsExpressions original function trimcibt -#' @name trimcibt -#' -#' @param x Numeric vector with or without NA's. -#' @param nv Test value, default to 0. -#' @param tr Trim for the mean, default to 0.2. -#' @param nboot Number of bootstrap samples for computing confidence interval for the effect size (default: 100L). -#' @param ci Confidence/Credible Interval (CI) level, default to 0.95 (95%). -#' @param ... Currently ignored. -#' @importFrom WRS2 trimse - -trimcibt <- function(x, - nv = 0, - tr = 0.2, - nboot = 100L, - ci = 0.95, - ...) { - - x <- x[!is.na(x)] - mu <- mean(x, tr) - sigma <- WRS2::trimse(x, tr) - test <- (mu - nv) / sigma - data <- matrix(data = sample(x, size = length(x) * nboot, replace = TRUE), nrow = nboot) - mu - tval <- sort( - x = abs( - x = apply(data, 1, mean, tr) / apply(data, 1, WRS2::trimse, tr) - ) - ) - icrit <- round(ci * nboot) - list( - statistic = test, - p.value = sum(abs(test) <= tval)/nboot, - method = "Bootstrap-t method for one-sample test", - estimate = mu, - conf.low = mu - tval[icrit] * sigma, - conf.high = mu + tval[icrit] * sigma, - conf.level = ci, - effectsize = "Trimmed mean" - ) -} - #' @title Style p value #' @name style.p #' @description Style p value for pretty printing #' #' @param p Vector of p value(s) #' @param k Number of decimal places -#' @return Character vector of length one. -#' @importFrom data.table fcase +#' +#' @return Character vector of the same length of the input. +#' #' @export style.p <- function(p, k = 3) { - fcase( - p < 0.001, "< 0.001", - p >= 0.001, paste0("= ", round(p, digits = k)) - ) + p <- format.default(p, digits = 1, nsmall = 3, drop0trailing = FALSE, trim = TRUE) } #' @title Normality check @@ -63,37 +19,40 @@ style.p <- function(p, k = 3) { #' @param x Numeric vector with or without NA's. #' @param alpha Threshold for rejection of the null hipotesis (of normality). #' @param test A function that returns a single numeric value (p.value) to be tested. -#' @importFrom nortest lillie.test -#' @importFrom stats shapiro.test +#' #' @export -is_normal <- function(x, alpha = 0.05, test = NULL) { - if (is.null(test)) { - if (length(x) <= 50) { - test <- function(i) stats::shapiro.test(i)$p.value - } else { - test <- function(i) nortest::lillie.test(i)$p.value - } +is_normal <- function(x, alpha = 0.05, test) { + if (missing(test)) { + test <- function(i) stats::shapiro.test(i)$p.value } x <- x[!is.na(x)] test(x) > alpha } -#' @title Levene's Test +#' @title Test for Homogeneity of Variances #' @name is_var.equal #' #' @param y Numeric vector (response). #' @param x Grouping factor. -#' @param alpha Threshold for null hipotesis (of normality) rejection. -#' @param center A function to compute the center of each group; mean gives the original Levene's test; the default, median, provides a more robust test. -#' @importFrom stats complete.cases anova lm median +#' @param alpha Threshold for null hipotesis (of normality) rejection. Defaults to 0.05 +#' @param test A character indicating whether to use Levene's test ("levene", the default) or Fligner's test ("fligner"). The first letter will be used. +#' @param center A function to compute the center of each group (valid only for Levene's Test). The mean gives the original Levene's test; the default, median, provides a more robust test. +#' +#' @return A logical of length one, indicating if the p.value of the test was greater than the alpha (which defaults to 0.05). +#' #' @export -is_var.equal <- function(y, x, alpha = 0.05, center = stats::median) { - valid <- stats::complete.cases(y, x) - meds <- tapply(y[valid], x[valid], center) - resp <- abs(y - meds[x]) - stats::anova(stats::lm(resp ~ x))[["Pr(>F)"]][[1]] > alpha +is_var.equal <- function(y, x, alpha = 0.05, test = "levene", center = stats::median) { + test <- tolower(test) + if (grepl("^l", test)) { + valid <- stats::complete.cases(y, x) + meds <- tapply(y[valid], x[valid], center) + resp <- abs(y - meds[x]) + stats::anova(stats::lm(resp ~ x))[["Pr(>F)"]][[1]] > alpha + } else if (grepl("^f", test)) { + stats::fligner.test(y, x)$p.value > alpha + } } #' @title Mauchly's Test of Sphericity @@ -101,7 +60,6 @@ is_var.equal <- function(y, x, alpha = 0.05, center = stats::median) { #' @description Internal function inside `sphericity_check`. Return a p-value from Mauchly's Test for within-subjects factors. #' #' @param model A repeated measures ANOVA model using afex. -#' @importFrom stats pchisq mauchly <- function(model) { .mauch <- function(SSPE, P, error.df) { @@ -121,11 +79,12 @@ mauchly <- function(model) { w2 <- (pp + 2) * (pp - 1) * (pp - 2) * (2 * pp^3 + 6 * pp^2 + 3 * p + 2)/(288 * (n * pp * rho)^2) z <- -n * rho * logW f <- pp * (pp + 1)/2 - 1 - Pr1 <- pchisq(z, f, lower.tail = FALSE) - Pr2 <- pchisq(z, f + 4, lower.tail = FALSE) + Pr1 <- stats::pchisq(z, f, lower.tail = FALSE) + Pr2 <- stats::pchisq(z, f + 4, lower.tail = FALSE) pval <- Pr1 + w2 * (Pr2 - Pr1) return(pval) } + .m <- model$Anova pval <- vapply(.m$terms, function(i) { .mauch( @@ -191,8 +150,9 @@ HF <- function(model, gg = NULL) { return(eps) } -#' @title Suggested sphericity correction for repeated measures ANOVA +#' Suggested sphericity correction for repeated measures ANOVA #' @name sphericity_check +#' #' @description Internal function inside `k_sample`. Return the Spherecity correction suggested based on Mauchly test in one-way repeated measures designs #' #' @param model A repeated measures ANOVA model using Afex. @@ -200,18 +160,28 @@ HF <- function(model, gg = NULL) { sphericity_check <- function(model) { .m <- model$Anova - if (.m$singular || all(mauchly(model) > 0.05)) "none" else { + if (any(.m$singular) || all(mauchly(model) > 0.05)) "none" else { is_hf <- all((gg.eps <- GG(model)) > 0.75, na.rm = TRUE) is_hf_too <- all(HF(model, gg.eps) > 0.75, na.rm = TRUE) if (is_hf || is_hf_too) "HF" else "GG" } } -#' @title Print method for writR objects -#' @name print.writR -#' @param x A writR object from any of one_sample, two_sample, k_sample, autest or contingency. -#' @param ... Currently ignored -#' @importFrom data.table as.data.table +#' Mauchly's Test of Sphericity +#' +#' Low-level function behind `mauchly.test()` +#' +#' @param object object of class SSD or mlm +#' @param Sigma matrix to be proportional to. +#' @param T transformation matrix. By default computed from M and X. +#' @param M formula or matrix describing the outer projection. +#' @param X formula or matrix describing the inner projection. +#' @param idata data frame describing intra-block design. +#' +#' @export + +sphericity <- utils::getFromNamespace("sphericity", "stats") + #' @export print.writR <- function(x, ...) { @@ -221,12 +191,6 @@ print.writR <- function(x, ...) { print(x) } -#' @title as.data.frame method for writR objects -#' @name as.data.frame.writR -#' @param x A writR object from any of one_sample, two_sample, k_sample, autest or contingency. -#' @param row.names Exported from other methods. -#' @param optional Exported from other methods. -#' @param ... Currently ignored #' @export as.data.frame.writR <- function(x, row.names = NULL, optional = FALSE, ...) { diff --git a/R/writR-package.R b/R/writR-package.R index 425b3c1..c5e5460 100644 --- a/R/writR-package.R +++ b/R/writR-package.R @@ -3,5 +3,6 @@ ## usethis namespace: start #' @importFrom lifecycle deprecated +#' @importFrom data.table as.data.table dcast := melt .N .SD %chin% %like% ## usethis namespace: end NULL diff --git a/README.Rmd b/README.Rmd index f90eccf..0f3e9d8 100644 --- a/README.Rmd +++ b/README.Rmd @@ -21,10 +21,10 @@ knitr::opts_chunk$set( ================================================================== -[![R-CMD-check](https://github.com/matcasti/writR/workflows/R-CMD-check/badge.svg)](https://github.com/matcasti/writR/actions) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4641761.svg)](https://doi.org/10.5281/zenodo.4603838) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![CRAN status](https://www.r-pkg.org/badges/version/writR)](https://CRAN.R-project.org/package=writR) +[![R-CMD-check](https://github.com/matcasti/writR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/matcasti/writR/actions/workflows/R-CMD-check.yaml) An R package for automated inferential testing (for group differences) and reporting based on parametric assumptions, which are tested automatically for test selection. @@ -93,7 +93,7 @@ devtools::install_github("matcasti/writR") # Automated testing -By default, `autest()`, checks automatically the assumptions of the data based on the parameters supplied for test selection. +By default, `k_sample()`, checks automatically the assumptions of the data based on the parameters supplied for test selection. ```{r echo=TRUE, message=FALSE, warning=FALSE, error=FALSE} library(writR) # Load the writR package @@ -104,11 +104,11 @@ diets <- data.frame( , rnorm(n = 100/2, mean = 66, sd = 7) ) # Control , diet = gl(n = 2, k = 100/2, labels = c('Treatment', 'Control') ) ) -result <- autest( +result <- k_sample( data = diets, x = "diet", # independent variable y = "weight", # dependent variable - type = 'auto', # default + type = NULL, # default, checks assumptions then choose appropiate test ) print(result) # Detailed statistical results @@ -116,7 +116,7 @@ print(result) # Detailed statistical results ## Inline results in APA style -The core function: `autest()` by default return a list of length 14 with detailed statistics, if inline results are desired, the `lablr()` function can be used. +The core function: `k_sample()` by default return a list of length 13 with detailed statistics, if inline results are desired, the `lablr()` function can be used. An example using same data as before: @@ -128,7 +128,7 @@ inline <- lablr(result) translates into this: -> The analysis of the effects of the treatment, shows that experimental group had greater weight than control, t(98) = 2.51, p = 0.014, g = 0.50, CI95 [0.10, 0.89]. +> The analysis of the effects of the treatment, shows that experimental group had greater weight than control, F(1, 98) = 6.29, p 0.014, omega2 = 0.05, CI95 [0.00, 1.00]. It also let you perform centrality and dispersion statistics for inline results by using the `cent_disp()` function. The next example illustrates its usage: @@ -136,11 +136,10 @@ It also let you perform centrality and dispersion statistics for inline results data <- datasets::ToothGrowth result <- with(data, tapply( - X = len, - INDEX = list(supp, dose), - FUN = function(var) { - cent_disp(x = var, markdown = TRUE) - })) + len, ## Variable to describe + list(supp, dose), ## Variables to aggregate on + cent_disp ## cent_disp() function + )) as.data.frame(result) ``` @@ -182,10 +181,10 @@ For paired designs you need to set `paired = TRUE`, and then, based on the numbe When `type = 'auto'` the next assumptions will be checked for \> 2 paired samples: -| Assumption checked | How is tested | If met | If not | -|--------------------|--------------------------------------------------------------------------|-------------------------------------------|-------------------------------------------------------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Sphericity check. | Friedman rank sum test | -| Sphericity | `sphericity_check(model)` | One-way repeated measures ANOVA (rmANOVA) | Greenhouse-Geisser (GG) or Huynh-Feldt (HF) correction is applied | +| Assumption checked | How is tested | If met | If not | +|--------------------|----------------------------|-------------------------------------------|-------------------------------------------------------------------| +| Normality | `stats::shapiro.test` | Sphericity check. | Friedman rank sum test | +| Sphericity | `sphericity_check(model)` | One-way repeated measures ANOVA (rmANOVA) | Greenhouse-Geisser (GG) or Huynh-Feldt (HF) correction is applied | ```{r message=FALSE, warning=FALSE} n <- 40 @@ -197,20 +196,19 @@ cancer <- data.frame( , rnorm(n = n, mean = 96, sd = 5) )) # Time-2 , period = gl(n = 3, k = n, labels = c('Basal', 'Time-1', 'Time-2') ) ) -result <- autest( +result <- k_sample( data = cancer , x = "period" , y = "cells" , rowid = "id" , paired = TRUE - , posthoc = TRUE # set to TRUE for pairwise comparisons ) # Access the whole results print(result) # For inline resutls or statistical reports -lablr(result$test) +lablr(result) ``` However, you can specify your own parameters for the selection of the test: @@ -227,14 +225,14 @@ However, you can specify your own parameters for the selection of the test: Similar as before, if `type = 'auto'` assumptions will be checked for 2 paired samples: -| Assumption checked | How is tested | If met | If not | -|--------------------|--------------------------------------------------------------------------|------------------|---------------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Student's t-test | Wilcoxon signed-rank test | +| Assumption checked | How is tested | If met | If not | +|--------------------|-----------------------|------------------|---------------------------| +| Normality | `stats::shapiro.test` | Student's t-test | Wilcoxon signed-rank test | ```{r message=FALSE, warning=FALSE, paged.print=FALSE} cancer_two <- cancer[cancer$period %in% c('Time-1','Time-2'),] -result <- autest( +result <- k_sample( data = cancer_two , x = "period" , y = "cells" @@ -264,10 +262,10 @@ For independent samples you need to set `paired = FALSE`, and then, based on the When `type = 'auto'` the next assumptions will be checked for \> 2 independent samples: -| Assumption checked | How is tested | If met | If not | -|--------------------------|--------------------------------------------------------------------------|---------------------------------|----------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Homogeneity of variances check. | Kruskal-Wallis ANOVA | -| Homogeneity of variances | Levene's test on medians with `is_var.equal()` | Fisher's ANOVA | Welch's ANOVA | +| Assumption checked | How is tested | If met | If not | +|--------------------------|------------------------------------------------|---------------------------------|----------------------| +| Normality | `stats::shapiro.test` | Homogeneity of variances check. | Kruskal-Wallis ANOVA | +| Homogeneity of variances | Levene's test on medians with `is_var.equal()` | Fisher's ANOVA | Welch's ANOVA | ```{r message=FALSE, warning=FALSE, paged.print=FALSE} set.seed(123) @@ -277,7 +275,7 @@ cancer_unpaired <- data.frame( , rnorm(n = n, mean = 90, sd = 15) )) # Drug B , group = gl(n = 3, k = n, labels = c('Control', 'Drug A', 'Drug B') ) ) -result <- autest( +result <- k_sample( data = cancer_unpaired , x = "group" , y = "cells" @@ -289,7 +287,7 @@ result <- autest( print(result) # For inline results -lablr(result$test) +lablr(result) ``` However, you can specify your own parameters for the selection of the test: @@ -305,13 +303,13 @@ However, you can specify your own parameters for the selection of the test: Just like above, if `type = 'auto'` assumptions will be checked for 2 independent samples: -| Assumption checked | How is tested | If met | If not | -|--------------------------|--------------------------------------------------------------------------|---------------------------------|-----------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Homogeneity of variances check. | Mann-Whitney *U* test | -| Homogeneity of variances | Levene's test on medians with `is_var.equal()` | Student's t-test | Welch's t-test | +| Assumption checked | How is tested | If met | If not | +|--------------------------|------------------------------------------------|---------------------------------|-----------------------| +| Normality | `stats::shapiro.test` | Homogeneity of variances check. | Mann-Whitney *U* test | +| Homogeneity of variances | Levene's test on medians with `is_var.equal()` | Student's t-test | Welch's t-test | ```{r message=FALSE, warning=FALSE} -result <- autest( +result <- k_sample( data = cancer_unpaired[cancer_unpaired$group %in% c('Drug A','Drug B'),] , x = "group" , y = "cells" @@ -378,7 +376,7 @@ print(result) # And inline results for reporting purposes inline <- result[j = lablr(.SD), keyby = x] -print(inline) +print(inline[,c("x", "full")]) ``` For inline results with previous data we would do something like this: @@ -387,7 +385,7 @@ For inline results with previous data we would do something like this: Which will translate into this after evaluation in R Markdown: -> In order to analyze the effect of gender on subjects' scores in each of the evaluation periods, we performed an analysis of variance (ANOVA) with between- and within-subjects factors. From the analyses, we find that gender has a large effect ( omega2 = 0.65, CI95 [0.51, 0.74] ) on scores when adjusting for each of the time periods, F(1, 68) = 130.74, p < 0.001. In a similar way we find a significant interaction between evaluative time and gender ( F(2, 136) = 42.88, p < 0.001 ), indicating unequal responses between males and females over time, omega2 = 0.29, CI95 [0.17, 0.40], however, time alone is not able to explain statistically significantly the variance in scores, F(2, 136) = 0.24, p = 0.79, omega2 = -0.01, CI95 [0.00, 0.00]. +> In order to analyze the effect of gender on subjects' scores in each of the evaluation periods, we performed an analysis of variance (ANOVA) with between- and within-subjects factors. From the analyses, we find that gender has a large effect (omega2 = 0.65, CI95 [0.54, 1.00]) on scores when adjusting for each of the time periods, F(1, 68) = 130.74, p < 0.001. In a similar way we find a significant interaction between evaluative time and gender ( F(2, 136) = 42.88, p < 0.001 ), indicating unequal responses between males and females over time, omega2 = 0.29, CI95 [0.17, 0.40], however, time alone is not able to explain statistically significantly the variance in scores, F(2, 136) = 0.24, p = 0.79, omega2 = -0.01, CI95 [0.00, 0.00]. When you have more than 1 factor (between or within subjects) you have to specify them as a character vector: `between = c('factor1', 'factor2' ...)`, and the same for `within = c('factor1', 'factor2' ...)`. diff --git a/README.md b/README.md index 67c80ff..ec9d7fa 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,12 @@ -[![R-CMD-check](https://github.com/matcasti/writR/workflows/R-CMD-check/badge.svg)](https://github.com/matcasti/writR/actions) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4641761.svg)](https://doi.org/10.5281/zenodo.4603838) [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) [![CRAN status](https://www.r-pkg.org/badges/version/writR)](https://CRAN.R-project.org/package=writR) +[![R-CMD-check](https://github.com/matcasti/writR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/matcasti/writR/actions/workflows/R-CMD-check.yaml) An R package for automated inferential testing (for group differences) @@ -82,8 +82,8 @@ devtools::install_github("matcasti/writR") # Automated testing -By default, `autest()`, checks automatically the assumptions of the data -based on the parameters supplied for test selection. +By default, `k_sample()`, checks automatically the assumptions of the +data based on the parameters supplied for test selection. ``` r library(writR) # Load the writR package @@ -94,24 +94,26 @@ diets <- data.frame( , rnorm(n = 100/2, mean = 66, sd = 7) ) # Control , diet = gl(n = 2, k = 100/2, labels = c('Treatment', 'Control') ) ) -result <- autest( +result <- k_sample( data = diets, x = "diet", # independent variable y = "weight", # dependent variable - type = 'auto', # default + type = NULL, # default, checks assumptions then choose appropiate test ) print(result) # Detailed statistical results -#> y x statistic df p.value method alternative estimate -#> 1: weight diet 2.508551 98 0.01376398 Two Sample t-test two.sided 0.4978591 -#> conf.level conf.low conf.high effectsize n_obs -#> 1: 0.95 0.101465 0.8917915 Hedges_g 100 +#> y x statistic df df.error p.value method estimate +#> +#> 1: weight diet 6.292829 1 98 0.01376398 Fisher's ANOVA 0.05026771 +#> conf.level conf.low conf.high effectsize n_obs +#> +#> 1: 0.95 0.003432758 1 Omega-squared (partial) 100 ``` ## Inline results in APA style -The core function: `autest()` by default return a list of length 14 with -detailed statistics, if inline results are desired, the `lablr()` +The core function: `k_sample()` by default return a list of length 13 +with detailed statistics, if inline results are desired, the `lablr()` function can be used. An example using same data as before: @@ -122,8 +124,8 @@ An example using same data as before: translates into this: > The analysis of the effects of the treatment, shows that experimental -> group had greater weight than control, t(98) = 2.51, p = 0.014, g = -> 0.50, CI95 \[0.10, 0.89\]. +> group had greater weight than control, F(1, 98) = 6.29, p 0.014, +> omega2 = 0.05, CI95 \[0.00, 1.00\]. It also let you perform centrality and dispersion statistics for inline results by using the `cent_disp()` function. The next example @@ -133,11 +135,10 @@ illustrates its usage: data <- datasets::ToothGrowth result <- with(data, tapply( - X = len, - INDEX = list(supp, dose), - FUN = function(var) { - cent_disp(x = var, markdown = TRUE) - })) + len, ## Variable to describe + list(supp, dose), ## Variables to aggregate on + cent_disp ## cent_disp() function + )) as.data.frame(result) #> 0.5 1 2 @@ -199,10 +200,10 @@ will run depending on the parameters stablished. When `type = 'auto'` the next assumptions will be checked for \> 2 paired samples: -| Assumption checked | How is tested | If met | If not | -|--------------------|--------------------------------------------------------------------------|-------------------------------------------|-------------------------------------------------------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Sphericity check. | Friedman rank sum test | -| Sphericity | `sphericity_check(model)` | One-way repeated measures ANOVA (rmANOVA) | Greenhouse-Geisser (GG) or Huynh-Feldt (HF) correction is applied | +| Assumption checked | How is tested | If met | If not | +|--------------------|---------------------------|-------------------------------------------|-------------------------------------------------------------------| +| Normality | `stats::shapiro.test` | Sphericity check. | Friedman rank sum test | +| Sphericity | `sphericity_check(model)` | One-way repeated measures ANOVA (rmANOVA) | Greenhouse-Geisser (GG) or Huynh-Feldt (HF) correction is applied | ``` r n <- 40 @@ -214,37 +215,31 @@ cancer <- data.frame( , rnorm(n = n, mean = 96, sd = 5) )) # Time-2 , period = gl(n = 3, k = n, labels = c('Basal', 'Time-1', 'Time-2') ) ) -result <- autest( +result <- k_sample( data = cancer , x = "period" , y = "cells" , rowid = "id" , paired = TRUE - , posthoc = TRUE # set to TRUE for pairwise comparisons ) # Access the whole results print(result) -#> $test -#> y x statistic df df.error p.value -#> 1: cells period 2.231395 1.77965 69.40635 0.1206689 -#> method estimate conf.level conf.low -#> 1: Repeated measures ANOVA with HF correction 0.01998957 0.95 0 -#> conf.high effectsize n_obs -#> 1: 1 Omega2_partial 40 -#> -#> $pwc -#> group1 group2 statistic p method p.adjust.method -#> 1: Time-1 Basal -1.466883 0.5560343 Games-Howell Test none -#> 2: Time-2 Basal -2.933847 0.1061919 Games-Howell Test none -#> 3: Time-2 Time-1 -1.616262 0.4922794 Games-Howell Test none +#> y x statistic df df.error p.value method +#> +#> 1: cells period 2.231395 1.77965 69.40635 0.1206689 Huynh-Feldt's rmANOVA +#> estimate conf.level conf.low conf.high effectsize n_obs +#> +#> 1: 0.01998957 0.95 0 1 Omega-squared (partial) 40 # For inline resutls or statistical reports -lablr(result$test) -#> stats p es ci -#> 1: F(1.8, 69.4) = 2.23 p = 0.121 omega2 = 0.02 CI95 [0.00, 1.00] -#> full -#> 1: F(1.8, 69.4) = 2.23, p = 0.121, omega2 = 0.02, CI95 [0.00, 1.00] +lablr(result) +#> stats p es ci +#> +#> 1: F(1.8, 69.4) = 2.23 p 0.121 omega2 = 0.02 CI95 [0.00, 1.00] +#> full +#> +#> 1: F(1.8, 69.4) = 2.23, p 0.121, omega2 = 0.02, CI95 [0.00, 1.00] ``` However, you can specify your own parameters for the selection of the @@ -263,14 +258,14 @@ test: Similar as before, if `type = 'auto'` assumptions will be checked for 2 paired samples: -| Assumption checked | How is tested | If met | If not | -|--------------------|--------------------------------------------------------------------------|------------------|---------------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Student’s t-test | Wilcoxon signed-rank test | +| Assumption checked | How is tested | If met | If not | +|--------------------|-----------------------|------------------|---------------------------| +| Normality | `stats::shapiro.test` | Student’s t-test | Wilcoxon signed-rank test | ``` r cancer_two <- cancer[cancer$period %in% c('Time-1','Time-2'),] -result <- autest( +result <- k_sample( data = cancer_two , x = "period" , y = "cells" @@ -279,17 +274,21 @@ result <- autest( # Access the whole results print(result) -#> y x statistic df p.value method alternative estimate -#> 1: cells period 1.093978 39 0.2806758 Paired t-test two.sided 0.1696215 -#> conf.level conf.low conf.high effectsize n_obs -#> 1: 0.95 -0.1394006 0.480802 Hedges_g 40 +#> y x statistic df df.error p.value method estimate +#> +#> 1: cells period 1.196787 1 39 0.2806758 Fisher's rmANOVA 0.00267743 +#> conf.level conf.low conf.high effectsize n_obs +#> +#> 1: 0.95 0 1 Omega-squared (partial) 40 # For inline results lablr(result) -#> stats p es ci -#> 1: t(39) = 1.09 p = 0.281 g = 0.17 CI95 [-0.14, 0.48] -#> full -#> 1: t(39) = 1.09, p = 0.281, g = 0.17, CI95 [-0.14, 0.48] +#> stats p es ci +#> +#> 1: F(1, 39) = 1.20 p 0.281 omega2 = 0.00 CI95 [0.00, 1.00] +#> full +#> +#> 1: F(1, 39) = 1.20, p 0.281, omega2 = 0.00, CI95 [0.00, 1.00] ``` Same as above, you can specify your own parameters for the selection of @@ -312,10 +311,10 @@ the parameters stablished. When `type = 'auto'` the next assumptions will be checked for \> 2 independent samples: -| Assumption checked | How is tested | If met | If not | -|--------------------------|--------------------------------------------------------------------------|---------------------------------|----------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Homogeneity of variances check. | Kruskal-Wallis ANOVA | -| Homogeneity of variances | Levene’s test on medians with `is_var.equal()` | Fisher’s ANOVA | Welch’s ANOVA | +| Assumption checked | How is tested | If met | If not | +|--------------------------|------------------------------------------------|---------------------------------|----------------------| +| Normality | `stats::shapiro.test` | Homogeneity of variances check. | Kruskal-Wallis ANOVA | +| Homogeneity of variances | Levene’s test on medians with `is_var.equal()` | Fisher’s ANOVA | Welch’s ANOVA | ``` r set.seed(123) @@ -325,7 +324,7 @@ cancer_unpaired <- data.frame( , rnorm(n = n, mean = 90, sd = 15) )) # Drug B , group = gl(n = 3, k = n, labels = c('Control', 'Drug A', 'Drug B') ) ) -result <- autest( +result <- k_sample( data = cancer_unpaired , x = "group" , y = "cells" @@ -335,24 +334,21 @@ result <- autest( # Check results print(result) -#> $test -#> y x statistic df df.error p.value method estimate -#> 1: cells group 4.861757 2 75.91708 0.01030964 Welch's ANOVA 0.08914428 -#> conf.level conf.low conf.high effectsize n_obs -#> 1: 0.95 0.005281224 1 Omega2 120 -#> -#> $pwc -#> group1 group2 statistic p method p.adjust.method -#> 1: Drug A Control -2.516185 0.184417805 Games-Howell Test none -#> 2: Drug B Control -4.376442 0.007873682 Games-Howell Test none -#> 3: Drug B Drug A -2.482696 0.191506063 Games-Howell Test none +#> y x statistic df df.error p.value method estimate +#> +#> 1: cells group 4.861757 2 75.91708 0.01030964 Welch's ANOVA 0.08914428 +#> conf.level conf.low conf.high effectsize n_obs +#> +#> 1: 0.95 0.005281224 1 Omega-squared (partial) 120 # For inline results -lablr(result$test) -#> stats p es ci -#> 1: F(2.0, 75.9) = 4.86 p = 0.01 omega2 = 0.09 CI95 [0.01, 1.00] -#> full -#> 1: F(2.0, 75.9) = 4.86, p = 0.01, omega2 = 0.09, CI95 [0.01, 1.00] +lablr(result) +#> stats p es ci +#> +#> 1: F(2.0, 75.9) = 4.86 p 0.010 omega2 = 0.09 CI95 [0.01, 1.00] +#> full +#> +#> 1: F(2.0, 75.9) = 4.86, p 0.010, omega2 = 0.09, CI95 [0.01, 1.00] ``` However, you can specify your own parameters for the selection of the @@ -370,13 +366,13 @@ test: Just like above, if `type = 'auto'` assumptions will be checked for 2 independent samples: -| Assumption checked | How is tested | If met | If not | -|--------------------------|--------------------------------------------------------------------------|---------------------------------|-----------------------| -| Normality | `stats::shapiro.test` for n \< 50 or `nortest::lillie.test` for n \>= 50 | Homogeneity of variances check. | Mann-Whitney *U* test | -| Homogeneity of variances | Levene’s test on medians with `is_var.equal()` | Student’s t-test | Welch’s t-test | +| Assumption checked | How is tested | If met | If not | +|--------------------------|------------------------------------------------|---------------------------------|-----------------------| +| Normality | `stats::shapiro.test` | Homogeneity of variances check. | Mann-Whitney *U* test | +| Homogeneity of variances | Levene’s test on medians with `is_var.equal()` | Student’s t-test | Welch’s t-test | ``` r -result <- autest( +result <- k_sample( data = cancer_unpaired[cancer_unpaired$group %in% c('Drug A','Drug B'),] , x = "group" , y = "cells" @@ -385,17 +381,21 @@ result <- autest( # For tabular results print(result) -#> y x statistic df p.value method alternative estimate -#> 1: cells group 1.755531 78 0.08309445 Two Sample t-test two.sided 0.3887601 -#> conf.level conf.low conf.high effectsize n_obs -#> 1: 0.95 -0.05074507 0.8258231 Hedges_g 80 +#> y x statistic df df.error p.value method estimate +#> +#> 1: cells group 3.08189 1 78 0.08309445 Fisher's ANOVA 0.02536358 +#> conf.level conf.low conf.high effectsize n_obs +#> +#> 1: 0.95 0 1 Omega-squared (partial) 80 # For inline results (e.g. manuscript) lablr(result) -#> stats p es ci -#> 1: t(78) = 1.76 p = 0.083 g = 0.39 CI95 [-0.05, 0.83] -#> full -#> 1: t(78) = 1.76, p = 0.083, g = 0.39, CI95 [-0.05, 0.83] +#> stats p es ci +#> +#> 1: F(1, 78) = 3.08 p 0.083 omega2 = 0.03 CI95 [0.00, 1.00] +#> full +#> +#> 1: F(1, 78) = 3.08, p 0.083, omega2 = 0.03, CI95 [0.00, 1.00] ``` You can specify your own parameters for the selection of the test as @@ -450,31 +450,27 @@ result <- aov_r( # Check results print(result) -#> y x statistic df df.error p.value -#> 1: score gender 130.7357382 1 68 1.720992e-17 -#> 2: score time 0.2367333 2 136 7.895263e-01 -#> 3: score gender:time 42.8799011 2 136 3.635914e-15 -#> method estimate conf.level conf.low conf.high -#> 1: Fisher's ANOVA 0.64953693 0.95 0.5389314 1 -#> 2: Fisher's repeated measures ANOVA -0.00747718 0.95 0.0000000 1 -#> 3: Fisher's repeated measures ANOVA 0.28938040 0.95 0.1844234 1 -#> effectsize n_obs -#> 1: Omega2_partial 70 -#> 2: Omega2_partial 70 -#> 3: Omega2_partial 70 +#> y x statistic df df.error p.value method +#> +#> 1: score gender 130.7357382 1 68 1.720992e-17 Fisher's ANOVA +#> 2: score time 0.2367333 2 136 7.895263e-01 Fisher's rmANOVA +#> 3: score gender:time 42.8799011 2 136 3.635914e-15 Fisher's rmANOVA +#> estimate conf.level conf.low conf.high effectsize n_obs +#> +#> 1: 0.6495369 0.95 0.5389314 1 Omega2_partial 70 +#> 2: 0.0000000 0.95 0.0000000 1 Omega2_partial 70 +#> 3: 0.2893804 0.95 0.1844234 1 Omega2_partial 70 # And inline results for reporting purposes inline <- result[j = lablr(.SD), keyby = x] -print(inline) -#> x stats p es ci -#> 1: gender F(1, 68) = 130.74 p < 0.001 omega2 = 0.65 CI95 [0.54, 1.00] -#> 2: gender:time F(2, 136) = 42.88 p < 0.001 omega2 = 0.29 CI95 [0.18, 1.00] -#> 3: time F(2, 136) = 0.24 p = 0.79 omega2 = -0.01 CI95 [0.00, 1.00] -#> full -#> 1: F(1, 68) = 130.74, p < 0.001, omega2 = 0.65, CI95 [0.54, 1.00] -#> 2: F(2, 136) = 42.88, p < 0.001, omega2 = 0.29, CI95 [0.18, 1.00] -#> 3: F(2, 136) = 0.24, p = 0.79, omega2 = -0.01, CI95 [0.00, 1.00] +print(inline[,c("x", "full")]) +#> Key: +#> x full +#> +#> 1: gender F(1, 68) = 130.74, p 2e-17, omega2 = 0.65, CI95 [0.54, 1.00] +#> 2: gender:time F(2, 136) = 42.88, p 4e-15, omega2 = 0.29, CI95 [0.18, 1.00] +#> 3: time F(2, 136) = 0.24, p 0.790, omega2 = 0.00, CI95 [0.00, 1.00] ``` For inline results with previous data we would do something like this: @@ -498,8 +494,8 @@ Which will translate into this after evaluation in R Markdown: > In order to analyze the effect of gender on subjects’ scores in each > of the evaluation periods, we performed an analysis of variance > (ANOVA) with between- and within-subjects factors. From the analyses, -> we find that gender has a large effect ( omega2 = 0.65, CI95 \[0.51, -> 0.74\] ) on scores when adjusting for each of the time periods, F(1, +> we find that gender has a large effect (omega2 = 0.65, CI95 \[0.54, +> 1.00\]) on scores when adjusting for each of the time periods, F(1, > 68) = 130.74, p \< 0.001. In a similar way we find a significant > interaction between evaluative time and gender ( F(2, 136) = 42.88, p > \< 0.001 ), indicating unequal responses between males and females @@ -530,10 +526,12 @@ result <- contingency( # Tabular format dropping empty columns print(result) -#> x statistic df p.value method -#> 1: group 1.818182 2 0.4028903 Chi-squared test for given probabilities -#> estimate conf.level conf.low conf.high effectsize n_obs -#> 1: 0.1275153 0.95 0 1 pearsons_c 110 +#> x statistic df p.value method +#> +#> 1: group 1.818182 2 0.4028903 Chi-squared test for given probabilities +#> estimate conf.level conf.low conf.high effectsize +#> +#> 1: 0.1275153 0.95 0 1 Pearson's C # For inline results inline <- lablr(result, markdown = T) @@ -564,17 +562,21 @@ result <- contingency( # Statistics in tabular format print(result) -#> y x statistic df p.value method estimate -#> 1: gear cyl 18.03636 4 0.001214066 Pearson's Chi-squared test 0.5308655 -#> conf.level conf.low conf.high effectsize n_obs -#> 1: 0.95 0.2629954 1 Cramers_v 32 +#> y x statistic df p.value method +#> +#> 1: gear cyl 18.03636 4 0.001214066 Pearson's Chi-squared test +#> estimate conf.level conf.low conf.high effectsize +#> +#> 1: 0.4819631 0.95 0.07050663 1 Cramer's V # Inline results format lablr(result) -#> stats p es ci -#> 1: X2(4) = 18.04 p = 0.001 V = 0.53 CI95 [0.26, 1.00] -#> full -#> 1: X2(4) = 18.04, p = 0.001, V = 0.53, CI95 [0.26, 1.00] +#> stats p es ci +#> +#> 1: X2(4) = 18.04 p 0.001 V = 0.48 CI95 [0.07, 1.00] +#> full +#> +#> 1: X2(4) = 18.04, p 0.001, V = 0.48, CI95 [0.07, 1.00] ``` ### Fisher’s exact test @@ -592,13 +594,21 @@ result <- contingency( # Statistics in tabular format print(result) -#> y x p.value method n_obs -#> 1: gear cyl 8.259716e-05 Fisher's Exact Test for Count Data without OR 32 +#> y x statistic df p.value method +#> +#> 1: gear cyl 18.03636 4 0.001214066 Pearson's Chi-squared test +#> estimate conf.level conf.low conf.high effectsize +#> +#> 1: 0.4819631 0.95 0.07050663 1 Cramer's V # Inline results format lablr(result) -#> p full -#> 1: p < 0.001 p < 0.001 +#> stats p es ci +#> +#> 1: X2(4) = 18.04 p 0.001 V = 0.48 CI95 [0.07, 1.00] +#> full +#> +#> 1: X2(4) = 18.04, p 0.001, V = 0.48, CI95 [0.07, 1.00] ``` ### McNemar’s Chi-squared Test @@ -626,19 +636,24 @@ result <- contingency( # Statistics in tabular format print(result) -#> y x statistic df p.value -#> 1: 2nd survey 1st survey 34.17161 1 5.045976e-09 -#> method estimate conf.level -#> 1: McNemar's Chi-squared test with continuity correction 0.1355932 0.95 -#> conf.low conf.high effectsize n_obs -#> 1: 0.09124332 0.1777538 Cohens_g 3200 +#> y x statistic df p.value +#> +#> 1: 2nd survey 1st survey 34.71186 1 3.822946e-09 +#> method estimate conf.level conf.low conf.high +#> +#> 1: McNemar's Chi-squared test 0.1355932 0.95 0.09124332 0.1777538 +#> effectsize +#> +#> 1: Cohen's g # Inline results lablr(result) -#> stats p es ci -#> 1: X2(1) = 34.17 p < 0.001 g = 0.14 CI95 [0.09, 0.18] -#> full -#> 1: X2(1) = 34.17, p < 0.001, g = 0.14, CI95 [0.09, 0.18] +#> stats p es ci +#> +#> 1: X2(1) = 34.71 p 4e-09 g = 0.14 CI95 [0.09, 0.18] +#> full +#> +#> 1: X2(1) = 34.71, p 4e-09, g = 0.14, CI95 [0.09, 0.18] ``` ## Dependencies @@ -666,12 +681,11 @@ To cite package ‘writR’ in publications run the following code in your ``` r citation('writR') -#> #> To cite package 'writR' in publications use: #> -#> Matías Castillo Aguilar (2021). writR: Inferential statistics and -#> reporting in APA style. https://doi.org/10.5281/zenodo.4603838, -#> https://matcasti.github.io/writR. +#> Castillo Aguilar M (2021). _writR: Inferential statistics and +#> reporting in APA style_. R package version 1.0.1, +#> . #> #> A BibTeX entry for LaTeX users is #> @@ -679,7 +693,7 @@ citation('writR') #> title = {writR: Inferential statistics and reporting in APA style}, #> author = {Matías {Castillo Aguilar}}, #> year = {2021}, -#> note = {https://doi.org/10.5281/zenodo.4603838, -#> https://matcasti.github.io/writR}, +#> note = {R package version 1.0.1}, +#> url = {https://github.com/matcasti/writR}, #> } ``` diff --git a/man/GG.Rd b/man/GG.Rd index 8ad1eac..3de9007 100644 --- a/man/GG.Rd +++ b/man/GG.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{GG} \alias{GG} \title{Greenhouse-Geisser epsilon} diff --git a/man/HF.Rd b/man/HF.Rd index 1f6c448..8b26ef5 100644 --- a/man/HF.Rd +++ b/man/HF.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{HF} \alias{HF} \title{Hyunh-Feldt epsilon} diff --git a/man/aov_r.Rd b/man/aov_r.Rd index 9131f73..fe74a17 100644 --- a/man/aov_r.Rd +++ b/man/aov_r.Rd @@ -6,7 +6,7 @@ \usage{ aov_r( data, - response = NULL, + response, between = NULL, within = NULL, rowid = NULL, @@ -14,8 +14,8 @@ aov_r( effsize.type = "unbiased", sphericity = "auto", conf.level = 0.95, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL + character.only = FALSE, + markdown ) } \arguments{ @@ -37,7 +37,8 @@ aov_r( \item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} +\item{character.only}{Logical. checks whether to use the unevaluated expression or its +content (when TRUE), asumming is a character vector. Defaults to \code{FALSE}.} \item{markdown}{Logical (default FALSE). If \code{lbl} is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} } diff --git a/man/as.data.frame.writR.Rd b/man/as.data.frame.writR.Rd deleted file mode 100644 index d9adc35..0000000 --- a/man/as.data.frame.writR.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R -\name{as.data.frame.writR} -\alias{as.data.frame.writR} -\title{as.data.frame method for writR objects} -\usage{ -\method{as.data.frame}{writR}(x, row.names = NULL, optional = FALSE, ...) -} -\arguments{ -\item{x}{A writR object from any of one_sample, two_sample, k_sample, autest or contingency.} - -\item{row.names}{Exported from other methods.} - -\item{optional}{Exported from other methods.} - -\item{...}{Currently ignored} -} -\description{ -as.data.frame method for writR objects -} diff --git a/man/autest.Rd b/man/autest.Rd deleted file mode 100644 index 2cfb7c4..0000000 --- a/man/autest.Rd +++ /dev/null @@ -1,73 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/autest.R -\name{autest} -\alias{autest} -\title{Hypothesis testing for group differences with assumption checking for test selection} -\usage{ -autest( - data, - x, - y = NULL, - rowid = NULL, - type = "auto", - paired = FALSE, - var.equal = FALSE, - posthoc = FALSE, - sphericity = "GG", - test.value = 0, - trim = 0.2, - nboot = 100L, - effsize.type = "unbiased", - alternative = "two.sided", - conf.level = 0.95, - ss_type = 3, - p.adjust.method = "none", - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, - ... -) -} -\arguments{ -\item{data}{Data frame from which \code{x} and \code{y} (and possibly \code{rowid} if provided) will be searched.} - -\item{x}{Character for the grouping factor. Must be present in data} - -\item{y}{Character for the response variable. Must be present in data.} - -\item{rowid}{Character for the subject-id column. If null, then is assumed that data is sorted for paired designs, creating one. So if your data is not sorted and you leave this argument unspecified, the results can be inaccurate when there are more than two levels in x and there are NAs present.} - -\item{type}{Set \code{"auto"} (default) for checking the normality and homogeneity of variances for test selection. Other options are \code{"p"} for parametric, \code{"np"} for non-parametric and \code{"r"} for robust tests.} - -\item{paired}{Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is \code{FALSE.}} - -\item{var.equal}{Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used.} - -\item{posthoc}{Logical indicating if post-hoc tests should be returned additionally to regular output} - -\item{sphericity}{Character. Which sphericity correction of the degrees of freedom should be reported for the within-subject factors. Possible values are "GG" corresponding to the Greenhouse-Geisser correction, "HF" (i.e., Hyunh-Feldt correction), and "none" (i.e., no correction).} - -\item{test.value}{A number indicating the true value of the mean (Default: 0) to be tested. Only for one sample test.} - -\item{trim}{Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help.} - -\item{nboot}{Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L).} - -\item{effsize.type}{Options are \code{"unbiased"} or \code{"omega"} for partial omega squared (k-samples) or \code{"g"} for Hedges g (two-samples) and \code{"biased"} or \code{"eta"} for partial eta squared (k-samples) or \code{"d"} for Cohen's d as a measure of effect size. For non-parametric analysis, Kendalls' W is used for paired designs, where rank epsilon squared is used for independent groups designs in \code{k_sample()}, whereas rank-biserial correlation is used in \code{two_sample()}.} - -\item{alternative}{A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less".} - -\item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} - -\item{ss_type}{Type of sum of squares for repeated measures ANOVA (defaults to 3). Possible values are "II", "III", 2, or 3.} - -\item{p.adjust.method}{Adjustment method for p-values for multiple comparisons. Possible methods are: "holm", "hochberg", "hommel", "bonferroni", "BH", "BY", "fdr", "none" (default).} - -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} - -\item{markdown}{Logical (default FALSE). If \code{lbl} is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} - -\item{...}{Currently ignored.} -} -\description{ -A list containing results from a multi-sample test. -} diff --git a/man/clean_data.Rd b/man/clean_data.Rd index 3d89eb3..d3d779e 100644 --- a/man/clean_data.Rd +++ b/man/clean_data.Rd @@ -2,25 +2,47 @@ % Please edit documentation in R/clean_data.R \name{clean_data} \alias{clean_data} -\title{Data table without NA's and with rowid (paired designs only).} +\title{Remove NA's from long to wide/long data.table} \usage{ -clean_data(data, x, y, rowid = NULL, paired = FALSE, wide = FALSE, ...) +clean_data( + data, + x, + y, + rowid = NULL, + paired = FALSE, + wide = FALSE, + character.only = FALSE, + ... +) } \arguments{ -\item{data}{Data frame from which \code{x} and \code{y} (and possibly \code{rowid} if provided) will be searched.} +\item{data}{Data from which \code{x} and \code{y} (and possibly \code{rowid} if provided) will +be searched.} -\item{x}{Unquoted name for the grouping factor. Must be present in data} +\item{x}{Name for the grouping factor. Must be present in data} -\item{y}{Unquoted name for the response variable. Must be present in data.} +\item{y}{Name for the response variable. Must be present in data.} -\item{rowid}{Unquoted name for the subject-id column. If null, then is assumed that data is sorted for paired designs, creating one. So if your data is not sorted and you leave this argument unspecified, the results can be inaccurate when there are more than two levels in x and there are NAs present.} +\item{rowid}{Name for the subject-id column. If null, then is assumed that +data is sorted for paired designs, creating one. So if your data is not sorted and you +leave this argument unspecified, the results can be inaccurate when there are more than +two levels in x and there are NAs present. Ignored if \code{paired} is \code{FALSE}.} -\item{paired}{Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is \code{FALSE.}} +\item{paired}{Logical that decides whether the experimental design is repeated +measures/within-subjects or between-subjects. The default is \code{FALSE.}} -\item{wide}{Logical to whether return a data.frame in wide format (\code{TRUE}, i.e. one columns per group/time) or in long format (\code{FALSE}).} +\item{wide}{Logical to whether return a data.frame in wide format (\code{TRUE}, i.e. one +columns per group/time) or in long format (\code{FALSE}).} + +\item{character.only}{Logical. checks whether to use the unevaluated expression or its +content (when TRUE), asumming is a character vector. Defaults to \code{FALSE}.} \item{...}{Currently ignored.} } \description{ -Data table without NA's and with rowid (paired designs only). +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +This function allows removing NA's from long format data into wide (or long) format +data, even suporting repeated measures designs (i.e., with more than one subject per +factor level). } diff --git a/man/contingency.Rd b/man/contingency.Rd index 0384ad0..795971a 100644 --- a/man/contingency.Rd +++ b/man/contingency.Rd @@ -9,10 +9,9 @@ contingency( x, y = NULL, paired = FALSE, - exact = FALSE, + ratio = NULL, conf.level = 0.95, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, + character.only = FALSE, ... ) } @@ -25,13 +24,12 @@ contingency( \item{paired}{Logical. If \code{TRUE} McNemar's Chi-squared test is carried on.} -\item{exact}{Logical. If \code{TRUE} then Fisher's Exact Test is carried on, but only when \code{paired = FALSE} (default). If is a 2 x 2 design, Odds Ratio (OR) is returned as effect size, otherwise it will only return the formated p-value.} +\item{ratio}{A vector of probabilities of the same length of x. An error is given if any entry of p is negative.} \item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} - -\item{markdown}{Whether you want the output formated for inline R Markdown or as plain text.} +\item{character.only}{Logical. checks whether to use the unevaluated expression or its +content (when TRUE), asumming is a character vector. Defaults to \code{FALSE}.} \item{...}{Currently not used.} } diff --git a/man/deparser.Rd b/man/deparser.Rd new file mode 100644 index 0000000..ee521cb --- /dev/null +++ b/man/deparser.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deparser.R +\name{deparser} +\alias{deparser} +\title{Get the deparsed value of the outermost expression} +\usage{ +deparser(x, character.only = FALSE, ...) +} +\arguments{ +\item{x}{An expression from which to capture the outermost expression.} + +\item{character.only}{whether to treat \code{x} as a character. Default is FALSE.} + +\item{...}{Currently not used.} +} +\description{ +Get the deparsed value of the outermost expression +} diff --git a/man/figures/README-unnamed-chunk-15-1.png b/man/figures/README-unnamed-chunk-15-1.png index 4c90fb1..cbd6229 100644 Binary files a/man/figures/README-unnamed-chunk-15-1.png and b/man/figures/README-unnamed-chunk-15-1.png differ diff --git a/man/is_normal.Rd b/man/is_normal.Rd index 3002165..fb86ad4 100644 --- a/man/is_normal.Rd +++ b/man/is_normal.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{is_normal} \alias{is_normal} \title{Normality check} \usage{ -is_normal(x, alpha = 0.05, test = NULL) +is_normal(x, alpha = 0.05, test) } \arguments{ \item{x}{Numeric vector with or without NA's.} diff --git a/man/is_var.equal.Rd b/man/is_var.equal.Rd index 9fe1f78..44d9633 100644 --- a/man/is_var.equal.Rd +++ b/man/is_var.equal.Rd @@ -1,20 +1,25 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{is_var.equal} \alias{is_var.equal} -\title{Levene's Test} +\title{Test for Homogeneity of Variances} \usage{ -is_var.equal(y, x, alpha = 0.05, center = stats::median) +is_var.equal(y, x, alpha = 0.05, test = "levene", center = stats::median) } \arguments{ \item{y}{Numeric vector (response).} \item{x}{Grouping factor.} -\item{alpha}{Threshold for null hipotesis (of normality) rejection.} +\item{alpha}{Threshold for null hipotesis (of normality) rejection. Defaults to 0.05} -\item{center}{A function to compute the center of each group; mean gives the original Levene's test; the default, median, provides a more robust test.} +\item{test}{A character indicating whether to use Levene's test ("levene", the default) or Fligner's test ("fligner"). The first letter will be used.} + +\item{center}{A function to compute the center of each group (valid only for Levene's Test). The mean gives the original Levene's test; the default, median, provides a more robust test.} +} +\value{ +A logical of length one, indicating if the p.value of the test was greater than the alpha (which defaults to 0.05). } \description{ -Levene's Test +Test for Homogeneity of Variances } diff --git a/man/k_sample.Rd b/man/k_sample.Rd index 4b76648..be69bc3 100644 --- a/man/k_sample.Rd +++ b/man/k_sample.Rd @@ -9,19 +9,14 @@ k_sample( x, y, rowid = NULL, - type = "auto", + type, paired = FALSE, var.equal = FALSE, - sphericity = "GG", - trim = 0.2, - nboot = 100L, + is_spherical = NULL, + adjust = NULL, effsize.type = "unbiased", - alternative = "two.sided", conf.level = 0.95, - ss_type = 3, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, - internal = FALSE, + character.only = FALSE, ... ) } @@ -34,31 +29,22 @@ k_sample( \item{rowid}{Character for the subject-id column. If null, then is assumed that data is sorted for paired designs, creating one. So if your data is not sorted and you leave this argument unspecified, the results can be inaccurate when there are more than two levels in x and there are NAs present.} -\item{type}{Set \code{"auto"} (default) for checking the normality and homogeneity of variances for test selection. Other options are \code{"p"} for parametric, \code{"np"} for non-parametric and \code{"r"} for robust tests.} +\item{type}{Missing (default) or \code{NULL} for checking the normality and homogeneity of variances for test selection. Other options are \code{"p"} for parametric, \code{"np"} for non-parametric and \code{"r"} for robust tests.} \item{paired}{Logical that decides whether the experimental design is repeated measures/within-subjects or between-subjects. The default is \code{FALSE.}} \item{var.equal}{Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used.} -\item{sphericity}{Character. Which sphericity correction of the degrees of freedom should be reported for the within-subject factors. Possible values are "GG" corresponding to the Greenhouse-Geisser correction, "HF" (i.e., Hyunh-Feldt correction), and "none" (i.e., no correction).} +\item{is_spherical}{Logical. checks whether to assume that the sphericity assumptions holds or not, if \code{NULL} (the default) it will be tested using mauchly's test with a threshold of 0.05.} -\item{trim}{Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help.} - -\item{nboot}{Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L).} +\item{adjust}{Character. correction for sphericity to be applied, it can be any character of length one starting with 'g' (indicating Greenhouse–Geisser correction) or 'h' (indicating Huynh–Feldt correction).} \item{effsize.type}{Options are \code{"unbiased"} or \code{"omega"} for partial omega squared and \code{"biased"} or \code{"eta"} for partial eta squared as a measure of effect size. For non-parametric analysis, Kendalls' W is used for paired designs, where rank epsilon squared is used for independent groups designs.} -\item{alternative}{A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less".} - \item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} -\item{ss_type}{Type of sum of squares for repeated measures ANOVA (defaults to 3). Possible values are "II", "III", 2, or 3.} - -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} - -\item{markdown}{Logical (default FALSE). If \code{lbl} is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} - -\item{internal}{Logical to whether this function is being used inside of internal functions.} +\item{character.only}{Logical. checks whether to use the unevaluated expression or its +content (when TRUE), asumming is a character vector. Defaults to \code{FALSE}.} \item{...}{Currently ignored.} } diff --git a/man/lablr.Rd b/man/lablr.Rd index 9bdd4d6..020ad62 100644 --- a/man/lablr.Rd +++ b/man/lablr.Rd @@ -7,7 +7,7 @@ lablr(t, markdown = FALSE) } \arguments{ -\item{t}{Output from any of the functions autest, k_sample, two_sample or one_sample.} +\item{t}{Output from any of the functions, k_sample, two_sample or one_sample.} \item{markdown}{Logical (default FALSE), indicating if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} } diff --git a/man/mauchly.Rd b/man/mauchly.Rd index 07b4c7a..9677f0c 100644 --- a/man/mauchly.Rd +++ b/man/mauchly.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{mauchly} \alias{mauchly} \title{Mauchly's Test of Sphericity} diff --git a/man/one_sample.Rd b/man/one_sample.Rd index 3fe1cbe..eec5ac8 100644 --- a/man/one_sample.Rd +++ b/man/one_sample.Rd @@ -7,15 +7,13 @@ one_sample( data, y, - type = "auto", + type, test.value = 0, - trim = 0.2, - nboot = 100L, effsize.type = "unbiased", alternative = "two.sided", conf.level = 0.95, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, + character.only = FALSE, + markdown, ... ) } @@ -28,17 +26,13 @@ one_sample( \item{test.value}{A number indicating the true value of the mean (Default: 0) to be tested.} -\item{trim}{Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help.} - -\item{nboot}{Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L).} - \item{effsize.type}{options are \code{"unbiased"} or \code{"g"} for Hedges g and \code{"biased"} or \code{"d"} for Cohen's d as a measure of effect size. rank-biserial correlation is used for non-parametric analysis.} \item{alternative}{A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less".} \item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} +\item{character.only}{whether to treat \code{x} as a character. Default is FALSE.} \item{markdown}{Logical (default FALSE). If \code{lbl} is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} diff --git a/man/pairs_two_sample.Rd b/man/pairs_two_sample.Rd index a191333..9dc5aed 100644 --- a/man/pairs_two_sample.Rd +++ b/man/pairs_two_sample.Rd @@ -9,16 +9,14 @@ pairs_two_sample( x, y, rowid = NULL, - type = "auto", + type, paired = FALSE, var.equal = FALSE, - trim = 0.2, - nboot = 100L, effsize.type = "unbiased", alternative = "two.sided", conf.level = 0.95, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, + markdown, + character.only = FALSE, ... ) } @@ -37,20 +35,17 @@ pairs_two_sample( \item{var.equal}{Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used.} -\item{trim}{Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help.} - -\item{nboot}{Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L).} - \item{effsize.type}{Options are \code{"unbiased"} or \code{"g"} for Hedges g and \code{"biased"} or \code{"d"} for Cohen's d as a measure of effect size for parametric test. The rank-biserial correlation is used for non-parametric analysis.} \item{alternative}{A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less".} \item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} - \item{markdown}{Logical (default FALSE). If \code{lbl} is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} +\item{character.only}{Logical. checks whether to use the unevaluated expression or its +content (when TRUE), asumming is a character vector. Defaults to \code{FALSE}.} + \item{...}{Currently ignored.} } \description{ diff --git a/man/print.writR.Rd b/man/print.writR.Rd deleted file mode 100644 index cfe3db8..0000000 --- a/man/print.writR.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R -\name{print.writR} -\alias{print.writR} -\title{Print method for writR objects} -\usage{ -\method{print}{writR}(x, ...) -} -\arguments{ -\item{x}{A writR object from any of one_sample, two_sample, k_sample, autest or contingency.} - -\item{...}{Currently ignored} -} -\description{ -Print method for writR objects -} diff --git a/man/rm_anova.Rd b/man/rm_anova.Rd new file mode 100644 index 0000000..fd78b65 --- /dev/null +++ b/man/rm_anova.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rm_anova.R +\name{rm_anova} +\alias{rm_anova} +\title{Repeated measures ANOVA} +\usage{ +rm_anova( + data, + x, + y, + is_spherical, + adjust, + effsize.type = "omega", + conf.level = 0.95, + character.only = FALSE, + ... +) +} +\arguments{ +\item{data}{Data frame from which \code{x} and \code{y} (and possibly \code{rowid} if provided) will be searched.} + +\item{x}{Character name for the grouping factor. Must be present in data} + +\item{y}{Character name for the response variable. Must be present in data.} + +\item{is_spherical}{Logical. checks whether to assume that the sphericity assumptions holds or not, if missing (the default) it will be tested using mauchly test with a threshold of 0.05.} + +\item{adjust}{Character. correction for sphericity to be applied, it can be any character of length one starting with 'g' (indicating Greenhouse–Geisser correction) or 'h' (indicating Huynh–Feldt correction).} + +\item{effsize.type}{Options are \code{"unbiased"} or \code{"omega"} for partial omega squared and \code{"biased"} or \code{"eta"} for partial eta squared as a measure of effect size. For non-parametric analysis, Kendalls' W is used for paired designs, where rank epsilon squared is used for independent groups designs.} + +\item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} + +\item{character.only}{Logical. checks whether to use the unevaluated expression or its +content (when TRUE), asumming is a character vector. Defaults to \code{FALSE}.} + +\item{...}{Currently not used.} +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} + +This functions allows you to perform a one way repeated measures ANOVA, +making adjustments for violations of sphericity, with no dependencies +related to \code{{afex}} or \code{{car}} packages. +} diff --git a/man/sphericity.Rd b/man/sphericity.Rd new file mode 100644 index 0000000..00faa8b --- /dev/null +++ b/man/sphericity.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utils.R +\name{sphericity} +\alias{sphericity} +\title{Mauchly's Test of Sphericity} +\usage{ +sphericity( + object, + Sigma = diag(nrow = p), + T = Thin.row(Proj(M) - Proj(X)), + M = diag(nrow = p), + X = ~0, + idata = data.frame(index = seq_len(p)) +) +} +\arguments{ +\item{object}{object of class SSD or mlm} + +\item{Sigma}{matrix to be proportional to.} + +\item{T}{transformation matrix. By default computed from M and X.} + +\item{M}{formula or matrix describing the outer projection.} + +\item{X}{formula or matrix describing the inner projection.} + +\item{idata}{data frame describing intra-block design.} +} +\description{ +Low-level function behind \code{mauchly.test()} +} diff --git a/man/sphericity_check.Rd b/man/sphericity_check.Rd index 3cf0fad..a706a35 100644 --- a/man/sphericity_check.Rd +++ b/man/sphericity_check.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{sphericity_check} \alias{sphericity_check} \title{Suggested sphericity correction for repeated measures ANOVA} diff --git a/man/style.p.Rd b/man/style.p.Rd index fefcd38..ef0d116 100644 --- a/man/style.p.Rd +++ b/man/style.p.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R +% Please edit documentation in R/utils.R \name{style.p} \alias{style.p} \title{Style p value} @@ -12,7 +12,7 @@ style.p(p, k = 3) \item{k}{Number of decimal places} } \value{ -Character vector of length one. +Character vector of the same length of the input. } \description{ Style p value for pretty printing diff --git a/man/subs.Rd b/man/subs.Rd new file mode 100644 index 0000000..6920a6c --- /dev/null +++ b/man/subs.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/deparser.R +\name{subs} +\alias{subs} +\title{Always returns the outermost expression} +\source{ +\url{https://stackoverflow.com/a/26558733} +} +\usage{ +subs(x, env, ...) +} +\arguments{ +\item{x}{An expression from which to capture the outermost expression.} + +\item{env}{The last environment in which to search \code{x}. Default is missing.} + +\item{...}{Currently not used.} +} +\description{ +Always returns the outermost expression +} diff --git a/man/trimcibt.Rd b/man/trimcibt.Rd deleted file mode 100644 index b05cdd2..0000000 --- a/man/trimcibt.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/miscellaneous.R -\name{trimcibt} -\alias{trimcibt} -\title{one-sample t-test on trimmed means} -\usage{ -trimcibt(x, nv = 0, tr = 0.2, nboot = 100L, ci = 0.95, ...) -} -\arguments{ -\item{x}{Numeric vector with or without NA's.} - -\item{nv}{Test value, default to 0.} - -\item{tr}{Trim for the mean, default to 0.2.} - -\item{nboot}{Number of bootstrap samples for computing confidence interval for the effect size (default: 100L).} - -\item{ci}{Confidence/Credible Interval (CI) level, default to 0.95 (95\%).} - -\item{...}{Currently ignored.} -} -\description{ -A wrapper around statsExpressions original function trimcibt -} diff --git a/man/two_sample.Rd b/man/two_sample.Rd index 15b7080..066d5fa 100644 --- a/man/two_sample.Rd +++ b/man/two_sample.Rd @@ -9,17 +9,13 @@ two_sample( x, y, rowid = NULL, - type = "auto", + type, paired = FALSE, var.equal = FALSE, - trim = 0.2, - nboot = 100L, effsize.type = "unbiased", alternative = "two.sided", conf.level = 0.95, - lbl = if (is.null(markdown)) FALSE else TRUE, - markdown = NULL, - internal = FALSE, + character.only = FALSE, ... ) } @@ -38,21 +34,13 @@ two_sample( \item{var.equal}{Logical variable indicating whether to treat the two variances as being equal. If TRUE then the pooled variance is used to estimate the variance otherwise the Welch (or Satterthwaite) approximation to the degrees of freedom is used.} -\item{trim}{Trim level for the mean when carrying out robust tests. In case of an error, try reducing the value of tr, which is by default set to 0.2. Lowering the value might help.} - -\item{nboot}{Number of bootstrap samples for computing confidence interval for the effect size (Default: 100L).} - \item{effsize.type}{Options are \code{"unbiased"} or \code{"g"} for Hedges g and \code{"biased"} or \code{"d"} for Cohen's d as a measure of effect size. rank-biserial correlation is used for non-parametric analysis.} \item{alternative}{A character string specifying the alternative hypothesis, must be one of "two.sided" (default), "greater" or "less".} \item{conf.level}{Confidence/Credible Interval (CI) level. Default to 0.95 (95\%).} -\item{lbl}{Logical (default FALSE) indicating if a report ready output is desired. This will change the output to a list with characters rather than numeric vectors.} - -\item{markdown}{Logical (default FALSE). If \code{lbl} is TRUE, then this argument specify if the report-ready labels should be formated for inline code for R markdown (using mathjax and markdown syntax), or if the output should be in plain text (the default).} - -\item{internal}{Logical to whether this function is being used inside of internal functions.} +\item{character.only}{whether to treat \code{x} as a character. Default is FALSE.} \item{...}{Currently ignored.} } diff --git a/man/writR-package.Rd b/man/writR-package.Rd index 1cd2c8a..1e76467 100644 --- a/man/writR-package.Rd +++ b/man/writR-package.Rd @@ -6,13 +6,14 @@ \alias{writR-package} \title{writR: Inferential statistics and reporting in APA style} \description{ +\if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} + Perform and report basic inferential tests for group differences in APA style for articles writings in Markdown or plain text format. } \seealso{ Useful links: \itemize{ \item \url{https://github.com/matcasti/writR} - \item \url{https://doi.org/10.5281/zenodo.4603838} \item Report bugs at \url{https://github.com/matcasti/writR/issues} } diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..7b478c6 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(writR) + +test_check("writR") diff --git a/tests/testthat/test-aov_r.R b/tests/testthat/test-aov_r.R new file mode 100644 index 0000000..889d706 --- /dev/null +++ b/tests/testthat/test-aov_r.R @@ -0,0 +1,13 @@ +test_that("aov_r works", { + test_data <- datasets::ChickWeight + + a <- aov_r(test_data, weight, Diet, Time, rowid = Chick) + b <- aov_r(test_data, "weight", "Diet", "Time", rowid = "Chick") + + response <- "weight"; between <- "Diet"; within <- "Time"; rowid <- "Chick" + c <- aov_r(test_data, response, between, within, rowid, character.only = TRUE) + + expect_equal(a, b) + expect_equal(a, c) + expect_equal(b, c) +}) diff --git a/tests/testthat/test-cent_disp.R b/tests/testthat/test-cent_disp.R new file mode 100644 index 0000000..244acf1 --- /dev/null +++ b/tests/testthat/test-cent_disp.R @@ -0,0 +1,12 @@ +test_that("cent_disp works", { + weight <- datasets::ChickWeight$weight + is_normal(weight) + expect_true(cent_disp(x = weight) == "*Mdn* = 103, *IQR* = 100.8") + + set.seed(123) + weight <- rnorm(n = 578, mean = 103, sd = 101) + is_normal(weight) + expect_true(cent_disp(x = weight) == "*M* = 106.4, *SD* = 97") + + expect_true(cent_disp(weight, str.a = "{mean} ± {sd}") == "106.4 ± 97") +})