diff --git a/DESCRIPTION b/DESCRIPTION index 5aad36b..c3fb716 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: inferr Type: Package Title: Inferential Statistics -Version: 0.3.0.9000 +Version: 0.3.1 Authors@R: person("Aravind", "Hebbali", email = "hebbali.aravind@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-9220-9669")) Description: Select set of parametric and non-parametric statistical tests. 'inferr' builds upon the solid set of @@ -13,13 +13,11 @@ License: MIT + file LICENSE URL: https://rsquaredacademy.github.io/inferr/, https://github.com/rsquaredacademy/inferr BugReports: https://github.com/rsquaredacademy/inferr/issues Imports: - dplyr, - magrittr, - purrr, - Rcpp, - rlang, - tibble, - tidyr + data.table, + magrittr, + Rcpp, + stats, + utils Suggests: covr, knitr, diff --git a/NAMESPACE b/NAMESPACE index 79ee36a..d39961e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -30,12 +30,6 @@ S3method(print,infer_ts_ind_ttest) S3method(print,infer_ts_paired_ttest) S3method(print,infer_ts_prop_test) S3method(print,infer_ts_var_test) -export(binom_calc) -export(binom_test) -export(chisq_gof) -export(chisq_test) -export(cochran_test) -export(ind_ttest) export(infer_binom_calc) export(infer_binom_test) export(infer_chisq_assoc_test) @@ -52,51 +46,22 @@ export(infer_runs_test) export(infer_ts_ind_ttest) export(infer_ts_paired_ttest) export(infer_ts_prop_calc) -export(infer_ts_prop_grp) +export(infer_ts_prop_group) export(infer_ts_prop_test) export(infer_ts_var_test) -export(levene_test) -export(mcnemar_test) -export(os_vartest) -export(owanova) -export(paired_ttest) -export(prop_test) -export(runs_test) -export(ts_prop_calc) -export(ts_prop_grp) -export(ts_prop_test) -export(ttest) -export(var_test) +import(magrittr) importFrom(Rcpp,sourceCpp) -importFrom(dplyr,"%>%") -importFrom(dplyr,funs) -importFrom(dplyr,group_by) -importFrom(dplyr,mutate) -importFrom(dplyr,pull) -importFrom(dplyr,select) -importFrom(dplyr,summarise_all) +importFrom(data.table,":=") +importFrom(data.table,data.table) +importFrom(data.table,setDF) importFrom(magrittr,"%>%") -importFrom(magrittr,subtract) -importFrom(magrittr,use_series) -importFrom(purrr,map) -importFrom(purrr,map_dbl) -importFrom(purrr,map_df) -importFrom(purrr,map_int) -importFrom(rlang,"!!!") -importFrom(rlang,"!!") -importFrom(rlang,enquo) -importFrom(rlang,quo_is_null) -importFrom(rlang,quos) -importFrom(rlang,sym) importFrom(stats,anova) importFrom(stats,as.formula) importFrom(stats,complete.cases) importFrom(stats,cor) importFrom(stats,dbinom) -importFrom(stats,formula) importFrom(stats,lm) importFrom(stats,median) -importFrom(stats,model.frame) importFrom(stats,pbinom) importFrom(stats,pchisq) importFrom(stats,pf) @@ -107,7 +72,7 @@ importFrom(stats,qnorm) importFrom(stats,qt) importFrom(stats,sd) importFrom(stats,var) -importFrom(tibble,as_data_frame) -importFrom(tibble,tibble) -importFrom(tidyr,gather) +importFrom(utils,install.packages) +importFrom(utils,menu) +importFrom(utils,packageVersion) useDynLib(inferr) diff --git a/NEWS.md b/NEWS.md index 71d3959..edf5ac4 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,26 +1,6 @@ -# inferr 0.3.0.9000 - -The following functions were deprecated in `0.3.0` and have been removed: - -- `binom_calc` -- `binom_test` -- `chisq_gof` -- `chisq_test` -- `cochran_test` -- `ind_ttest` -- `launch_inferr` -- `levene_test` -- `mcnemar_test` -- `os_vartest` -- `owanova` -- `paired_ttest` -- `prop_test` -- `runs_test` -- `ts_prop_calc` -- `ts_prop_grp` -- `ts_prop_test` -- `ttest` -- `var_test` +# inferr 0.3.0.1 + +This is a patch release to fix error on R-devel on Debian and Fedora. # inferr 0.3.0 diff --git a/R/ifr-anova.R b/R/ifr-anova.R new file mode 100644 index 0000000..eac0b26 --- /dev/null +++ b/R/ifr-anova.R @@ -0,0 +1,140 @@ +#' @title One Way ANOVA +#' @description One way analysis of variance +#' @param data a \code{data.frame} or a \code{tibble} +#' @param x numeric; column in \code{data} +#' @param y factor; column in \code{data} +#' @param ... additional arguments passed to or from other methods +#' @return \code{infer_oneway_anova} returns an object of class \code{"infer_oneway_anova"}. +#' An object of class \code{"infer_oneway_anova"} is a list containing the +#' following components: +#' +#' \item{adjusted_r2}{adjusted r squared value} +#' \item{df_btw}{between groups degress of freedom} +#' \item{df_within}{within groups degress of freedom} +#' \item{df_total}{total degress of freedom} +#' \item{fstat}{f value} +#' \item{group_stats}{group statistics} +#' \item{ms_btw}{between groups mean square} +#' \item{ms_within}{within groups mean square} +#' \item{obs}{number of observations} +#' \item{pval}{p value} +#' \item{r2}{r squared value} +#' \item{rmse}{root mean squared error} +#' \item{ss_between}{between group sum of squares} +#' \item{ss_within}{within group sum of squares} +#' \item{ss_total}{total sum of squares} +#' +#' @references Kutner, M. H., Nachtsheim, C., Neter, J., & Li, W. (2005). +#' Applied linear statistical models. Boston: McGraw-Hill Irwin. +#' +#' @seealso \code{\link[stats]{anova}} +#' @examples +#' infer_oneway_anova(mtcars, mpg, cyl) +#' infer_oneway_anova(hsb, write, prog) +#' @export +#' +infer_oneway_anova <- function(data, x, y, ...) UseMethod("infer_oneway_anova") + +#' @export +infer_oneway_anova.default <- function(data, x, y, ...) { + + x1 <- deparse(substitute(x)) + y1 <- deparse(substitute(y)) + + fdata <- data[c(x1, y1)] + sample_mean <- anova_avg(fdata, x1) + sample_stats <- anova_split(fdata, x1, y1, sample_mean) + k <- anova_calc(fdata, sample_stats, x1, y1) + + result <- + list( + adjusted_r2 = round(k$reg$adj.r.squared, 4), + df_btw = k$df_sstr, + df_total = k$df_sst, + df_within = k$df_sse, + fstat = k$f, + group_stats = sample_stats[, c(1, 2, 3, 5)], + ms_btw = k$mstr, + ms_within = k$mse, + obs = k$obs, + pval = k$sig, + r2 = round(k$reg$r.squared, 4), + rmse = round(k$reg$sigma, 4), + ss_between = k$sstr, + ss_total = k$total, + ss_within = k$ssee) + + class(result) <- "infer_oneway_anova" + return(result) +} + +#' @export +print.infer_oneway_anova <- function(x, ...) { + print_owanova(x) +} + +#' @import magrittr +#' @importFrom stats var sd +#' @importFrom data.table data.table := setDF +anova_split <- function(data, x, y, sample_mean) { + + dat <- data[c(y, x)] + dm <- data.table(dat) + + by_factor <- dm[, .(length = length(get(x)), + mean = mean(get(x)), + var = var(get(x)), + sd = sd(get(x))), + by = y] + + by_factor[, ':='(sst = length * ((mean - sample_mean) ^ 2), + sse = (length - 1) * var)] + + setDF(by_factor) + by_factor <- by_factor[order(by_factor[, 1]),] + + return(by_factor) +} + +anova_avg <- function(data, y) { + + mean(data[[y]]) + +} + +#' @importFrom stats pf as.formula lm +anova_calc <- function(data, sample_stats, x, y) { + + var_names <- names(data[c(x, y)]) + + sample_stats %>% + use_series(sst) %>% + sum() %>% + round(3) -> sstr + + sample_stats %>% + use_series(sse) %>% + sum() %>% + round(3) -> ssee + + total <- round(sstr + ssee, 3) + df_sstr <- nrow(sample_stats) - 1 + df_sse <- nrow(data) - nrow(sample_stats) + df_sst <- nrow(data) - 1 + mstr <- round(sstr / df_sstr, 3) + mse <- round(ssee / df_sse, 3) + f <- round(mstr / mse, 3) + sig <- round(1 - pf(f, df_sstr, df_sse), 3) + obs <- nrow(data) + regs <- paste(var_names[1], "~ as.factor(", var_names[2], ")") + model <- lm(as.formula(regs), data = data) + reg <- summary(model) + + out <- list( + sstr = sstr, ssee = ssee, total = total, df_sstr = df_sstr, + df_sse = df_sse, df_sst = df_sst, mstr = mstr, mse = mse, f = f, + sig = sig, obs = obs, model = model, reg = reg + ) + + return(out) +} diff --git a/R/infer-binom-test.R b/R/ifr-binom-test.R similarity index 55% rename from R/infer-binom-test.R rename to R/ifr-binom-test.R index 53e0deb..5dc3d7f 100644 --- a/R/infer-binom-test.R +++ b/R/ifr-binom-test.R @@ -1,4 +1,3 @@ -#' @importFrom stats pbinom dbinom #' @title Binomial Test #' @description Test whether the proportion of successes on a two-level #' categorical dependent variable significantly differs from a hypothesized value. @@ -8,17 +7,18 @@ #' @param data a \code{data.frame} or a \code{tibble} #' @param variable factor; column in \code{data} #' @param ... additional arguments passed to or from other methods -#' @return \code{binom_test} returns an object of class \code{"binom_test"}. -#' An object of class \code{"binom_test"} is a list containing the +#' +#' @return \code{infer_binom_test} returns an object of class \code{"infer_binom_test"}. +#' An object of class \code{"infer_binom_test"} is a list containing the #' following components: #' -#' \item{n}{number of observations} -#' \item{k}{number of successes} #' \item{exp_k}{expected number of successes} -#' \item{obs_p}{assumed probability of success} #' \item{exp_p}{expected probability of success} -#' \item{lower}{lower one sided p value} -#' \item{upper}{upper one sided p value} +#' \item{k}{number of successes} +#' \item{n}{number of observations} +#' \item{obs_p}{assumed probability of success} +#' \item{pval_lower}{lower one sided p value} +#' \item{pval_upper}{upper one sided p value} #' @section Deprecated Functions: #' \code{binom_calc()} and \code{binom_test()} have been deprecated. Instead use #' \code{infer_binom_cal()} and \code{infer_binom_test()}. @@ -38,42 +38,40 @@ infer_binom_calc <- function(n, success, prob = 0.5, ...) UseMethod("infer_binom #' @export infer_binom_calc.default <- function(n, success, prob = 0.5, ...) { + if (!is.numeric(n)) { - stop("n must be an integer") + stop("n must be an integer", call. = FALSE) } if (!is.numeric(success)) { - stop("success must be an integer") + stop("success must be an integer", call. = FALSE) } if (!is.numeric(prob)) { - stop("prob must be numeric") + stop("prob must be numeric", call. = FALSE) } if ((prob < 0) | (prob > 1)) { - stop("prob must be between 0 and 1") + stop("prob must be between 0 and 1", call. = FALSE) } k <- binom_comp(n, success, prob) - out <- list( - n = n, k = k$k, exp_k = k$exp_k, obs_p = k$obs_p, - exp_p = k$exp_p, lower = k$lower, upper = k$upper + out <- + list( + exp_k = k$exp_k, + exp_p = k$exp_p, + k = k$k, + n = n, + obs_p = k$obs_p, + pval_lower = k$lower, + pval_upper = k$upper ) class(out) <- "infer_binom_calc" return(out) } -#' @export -#' @rdname infer_binom_calc -#' @usage NULL -#' -binom_calc <- function(n, success, prob = 0.5, ...) { - .Deprecated("infer_binom_calc()") - infer_binom_calc(n, success, prob = 0.5, ...) -} - #' @export print.infer_binom_calc <- function(x, ...) { print_binom(x) @@ -82,11 +80,9 @@ print.infer_binom_calc <- function(x, ...) { #' @export #' @rdname infer_binom_calc infer_binom_test <- function(data, variable, prob = 0.5) { - varyable <- enquo(variable) - fdata <- - data %>% - pull(!! varyable) + varyable <- deparse(substitute(variable)) + fdata <- data[[varyable]] if (!is.factor(fdata)) { stop("variable must be of type factor", call. = FALSE) @@ -109,10 +105,52 @@ infer_binom_test <- function(data, variable, prob = 0.5) { infer_binom_calc.default(n, k, prob) } -#' @export -#' @rdname infer_binom_calc -#' @usage NULL -#' -binom_test <- function(data, prob = 0.5) { - .Deprecated("infer_binom_test()") -} \ No newline at end of file +#' @importFrom stats pbinom dbinom +binom_comp <- function(n, success, prob) { + + n <- n + k <- success + obs_p <- k / n + exp_k <- round(n * prob) + lt <- pbinom(k, n, prob, lower.tail = T) + ut <- pbinom(k - 1, n, prob, lower.tail = F) + p_opp <- round(dbinom(k, n, prob), 9) + i_p <- dbinom(exp_k, n, prob) + i_k <- exp_k + + if (k < exp_k) { + while (i_p > p_opp) { + i_k <- i_k + 1 + i_p <- round(dbinom(i_k, n, prob), 9) + if (round(i_p) == p_opp) { + break + } + } + + ttf <- pbinom(k, n, prob, lower.tail = T) + pbinom(i_k - 1, n, prob, lower.tail = F) + } else { + while (p_opp <= i_p) { + i_k <- i_k - 1 + i_p <- dbinom(i_k, n, prob) + if (round(i_p) == p_opp) { + break + } + } + + i_k <- i_k + tt <- pbinom(i_k, n, prob, lower.tail = T) + pbinom(k - 1, n, prob, lower.tail = F) + ttf <- ifelse(tt <= 1, tt, 1) + } + + list(exp_k = exp_k, + exp_p = prob, + ik = i_k, + k = k, + lower = round(lt, 6), + n = n, + obs_p = obs_p, + two_tail = round(ttf, 6), + upper = round(ut, 6) + ) + +} diff --git a/R/ifr-chisq-assoc-test.R b/R/ifr-chisq-assoc-test.R new file mode 100644 index 0000000..4591e58 --- /dev/null +++ b/R/ifr-chisq-assoc-test.R @@ -0,0 +1,203 @@ +#' @title Chi Square Test of Association +#' @description Chi Square test of association to examine if there is a +#' relationship between two categorical variables. +#' @param data a \code{data.frame} or \code{tibble} +#' @param x factor; column in \code{data} +#' @param y factor; column in \code{data} +#' @return \code{infer_chisq_assoc_test} returns an object of class +#' \code{"infer_chisq_assoc_test"}. An object of class +#' \code{"infer_chisq_assoc_test"} is a list containing the +#' following components: +#' +#' \item{chisquare}{chi square} +#' \item{chisquare_lr}{likelihood ratio chi square} +#' \item{chisquare_mantel_haenszel}{mantel haenszel chi square} +#' \item{chisquare_adjusted}{continuity adjusted chi square} +#' \item{contingency_coefficient}{contingency coefficient} +#' \item{cramers_v}{cramer's v} +#' \item{df}{degrees of freedom} +#' \item{ds}{product of dimensions of the table of \code{x} and \code{y}} +#' \item{phi_coefficient}{phi coefficient} +#' \item{pval_chisquare}{p-value of chi square} +#' \item{pval_chisquare_adjusted}{p-value of continuity adjusted chi square} +#' \item{pval_chisquare_lr}{p-value of likelihood ratio chi square} +#' \item{pval_chisquare_mantel_haenszel}{p-value of mantel haenszel chi square} +#' +#' @section Deprecated Function: +#' \code{chisq_test()} has been deprecated. Instead use +#' \code{infer_chisq_assoc_test()}. +#' +#' @seealso \code{\link[stats]{chisq.test}} +#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric +#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. +#' @examples +#' infer_chisq_assoc_test(hsb, female, schtyp) +#' +#' infer_chisq_assoc_test(hsb, female, ses) +#' @export +#' +infer_chisq_assoc_test <- function(data, x, y) UseMethod("infer_chisq_assoc_test") + +#' @export +infer_chisq_assoc_test.default <- function(data, x, y) { + + x1 <- deparse(substitute(x)) + y1 <- deparse(substitute(y)) + + xone <- data[[x1]] + yone <- data[[y1]] + + if (!is.factor(xone)) { + stop("x must be a categorical variable", call. = FALSE) + } + + if (!is.factor(yone)) { + stop("y must be a categorical variable", call. = FALSE) + } + + # dimensions + k <- table(xone, yone) + dk <- dim(k) + ds <- prod(dk) + nr <- dk[1] + nc <- dk[2] + + + if (ds == 4) { + twoway <- matrix(table(xone, yone), nrow = 2) + df <- df_chi(twoway) + ef <- efmat(twoway) + k <- pear_chsq(twoway, df, ef) + m <- lr_chsq(twoway, df, ef) + n <- yates_chsq(twoway) + p <- mh_chsq(twoway, n$total, n$prod_totals) + } else { + twoway <- matrix(table(xone, yone), nrow = dk[1]) + ef <- efm(twoway, dk) + df <- df_chi(twoway) + k <- pear_chi(twoway, df, ef) + m <- lr_chsq2(twoway, df, ef, ds) + } + + j <- chigf(xone, yone, k$chi) + + result <- if (ds == 4) { + list( + chisquare = k$chi, + chisquare_adjusted = n$chi_y, + chisquare_lr = m$chilr, + chisquare_mantel_haenszel = p$chimh, + contingency_coefficient = j$cc, + cramers_v = j$cv, + df = df, + ds = ds, + phi_coefficient = j$phi, + pval_chisquare = k$sig, + pval_chisquare_adjusted = n$sig_y, + pval_chisquare_lr = m$sig_lr, + pval_chisquare_mantel_haenszel = p$sig_mh + ) + } else { + list( + chisquare = k$chi, + chisquare_lr = m$chilr, + contingency_coefficient = j$cc, + cramers_v = j$cv, + df = df, + ds = ds, + phi_coefficient = j$phi, + pval_chisquare = k$sig, + pval_chisquare_lr = m$sig_lr + ) + } + + class(result) <- "infer_chisq_assoc_test" + return(result) +} + +#' @export +print.infer_chisq_assoc_test <- function(x, ...) { + print_chisq_test(x) +} + +# chi square association +df_chi <- function(twoway) { + (nrow(twoway) - 1) * (ncol(twoway) - 1) +} + +efmat <- function(twoway) { + mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = 2) + mat2 <- matrix(colSums(twoway), nrow = 1) + + mat1 %*% mat2 +} + +#' @importFrom stats pchisq +pear_chsq <- function(twoway, df, ef) { + chi <- round(sum(((twoway - ef) ^ 2) / ef), 4) + sig <- round(pchisq(chi, df, lower.tail = F), 4) + + list(chi = chi, sig = sig) +} + +lr_chsq <- function(twoway, df, ef) { + chilr <- round(2 * sum(matrix(log(twoway / ef), nrow = 1) %*% matrix(twoway, nrow = 4)), 4) + sig_lr <- round(pchisq(chilr, df, lower.tail = F), 4) + + list(chilr = chilr, sig_lr = sig_lr) +} + +lr_chsq2 <- function(twoway, df, ef, ds) { + chilr <- round(2 * sum(matrix(twoway, ncol = ds) %*% matrix(log(twoway / ef), nrow = ds)), 4) + sig_lr <- round(pchisq(chilr, df, lower.tail = F), 4) + + list(chilr = chilr, sig_lr = sig_lr) +} + +yates_chsq <- function(twoway) { + way2 <- twoway[, c(2, 1)] + total <- sum(twoway) + prods <- prod(diag(twoway)) - prod(diag(way2)) + prod_totals <- prod(rowSums(twoway)) * prod(colSums(twoway)) + chi_y <- round((total * (abs(prods) - (total / 2)) ^ 2) / prod_totals, 4) + sig_y <- round(pchisq(chi_y, 1, lower.tail = F), 4) + + list(chi_y = chi_y, sig_y = sig_y, total = total, prod_totals = prod_totals) +} + +mh_chsq <- function(twoway, total, prod_totals) { + num <- twoway[1] - ((rowSums(twoway)[1] * colSums(twoway)[1]) / total) + den <- prod_totals / ((total ^ 3) - (total ^ 2)) + chimh <- round((num ^ 2) / den, 4) + sig_mh <- round(pchisq(chimh, 1, lower.tail = F), 4) + + list(chimh = chimh, sig_mh = sig_mh) +} + +efm <- function(twoway, dk) { + mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = dk[1]) + mat2 <- matrix(colSums(twoway), ncol = dk[2]) + + mat1 %*% mat2 +} + +pear_chi <- function(twoway, df, ef) { + chi <- round(sum(((twoway - ef) ^ 2) / ef), 4) + sig <- round(pchisq(chi, df, lower.tail = F), 4) + + list(chi = chi, sig = sig) +} + +chigf <- function(x, y, chi) { + twoway <- matrix(table(x, y), + nrow = nlevels(as.factor(x)), + ncol = nlevels(as.factor(y)) + ) + total <- sum(twoway) + phi <- round(sqrt(chi / total), 4) + cc <- round(sqrt(chi / (chi + total)), 4) + q <- min(nrow(twoway), ncol(twoway)) + cv <- round(sqrt(chi / (total * (q - 1))), 4) + + list(phi = phi, cc = cc, cv = cv) +} diff --git a/R/infer-chisq-gof-test.R b/R/ifr-chisq-gof-test.R similarity index 54% rename from R/infer-chisq-gof-test.R rename to R/ifr-chisq-gof-test.R index c7d3199..8e9d643 100644 --- a/R/infer-chisq-gof-test.R +++ b/R/ifr-chisq-gof-test.R @@ -9,17 +9,18 @@ #' \code{"infer_chisq_gof_test"}. An object of class \code{"infer_chisq_gof_test"} #' is a list containing the following components: #' +#' \item{categories}{levels of \code{x}} #' \item{chisquare}{chi square statistic} -#' \item{pvalue}{p-value} -#' \item{df}{chi square degrees of freedom} -#' \item{ssize}{number of observations} -#' \item{names}{levels of \code{x}} -#' \item{level}{number of levels of \code{x}} -#' \item{obs}{observed frequency/proportion} -#' \item{exp}{expected frequency/proportion} #' \item{deviation}{deviation of observed from frequency} -#' \item{std}{standardized residuals} +#' \item{degrees_of_freedom}{chi square degrees of freedom} +#' \item{expected_frequency}{expected frequency/proportion} +#' \item{n_levels}{number of levels of \code{x}} +#' \item{observed_frequency}{observed frequency/proportion} +#' \item{pvalue}{p-value} +#' \item{sample_size}{number of observations} +#' \item{std_residuals}{standardized residuals} #' \item{varname}{name of categorical variable} +#' #' @section Deprecated Function: #' \code{chisq_gof()} has been deprecated. Instead use #' \code{infer_chisq_gof_test()} @@ -38,49 +39,32 @@ infer_chisq_gof_test <- function(data, x, y, correct = FALSE) UseMethod("infer_c #' @export infer_chisq_gof_test.default <- function(data, x, y, correct = FALSE) { - x1 <- enquo(x) - xcheck <- - data %>% - pull(!! x1) - - xlen <- - data %>% - pull(!! x1) %>% - length() - - xone <- - data %>% - pull(!! x1) %>% - table() %>% - as.vector() + x1 <- deparse(substitute(x)) + xcheck <- data[[x1]] + xlen <- length(data[[x1]]) + xone <- as.vector(table(data[[x1]])) if (!is.factor(xcheck)) { - stop("x must be an object of class factor") + stop("x must be an object of class factor", call. = FALSE) } if (!is.numeric(y)) { - stop("y must be numeric") + stop("y must be numeric", call. = FALSE) } if (!is.logical(correct)) { - stop("correct must be either TRUE or FALSE") + stop("correct must be either TRUE or FALSE", call. = FALSE) } - - varname <- - data %>% - select(!! x1) %>% - names() - - n <- length(xone) + varname <- names(data[x1]) + n <- length(xone) + df <- n - 1 if (length(y) != n) { - stop("Length of y must be equal to the number of categories in x") + stop("Length of y must be equal to the number of categories in x", call. = FALSE) } - df <- n - 1 - if (sum(y) == 1) { y <- xlen * y } @@ -93,26 +77,49 @@ infer_chisq_gof_test.default <- function(data, x, y, correct = FALSE) { sig <- round(pchisq(k$chi, df, lower.tail = FALSE), 4) - result <- list( - chisquare = k$chi, pvalue = sig, df = df, ssize = length(xcheck), - names = levels(xcheck), level = nlevels(xcheck), obs = xone, exp = y, - deviation = format(k$dev, nsmall = 2), std = format(k$std, nsmall = 2), - varname = varname + result <- + list( + categories = levels(xcheck), + chisquare = k$chi, + deviation = format(k$dev, nsmall = 2), + degrees_of_freedom = df, + expected_frequency = y, + n_levels = nlevels(xcheck), + observed_frequency = xone, + pvalue = sig, + sample_size = length(xcheck), + std_residuals = format(k$std, nsmall = 2), + varname = varname ) class(result) <- "infer_chisq_gof_test" return(result) } -#' @export -#' @rdname infer_chisq_gof_test -#' @usage NULL -#' -chisq_gof <- function(x, y, correct = FALSE) { - .Deprecated("infer_chisq_gof_test()") -} - #' @export print.infer_chisq_gof_test <- function(x, ...) { print_chisq_gof(x) -} \ No newline at end of file +} + +chi_cort <- function(x, y) { + + diff <- x - y - 0.5 + dif <- abs(x - y) - 0.5 + dif2 <- dif ^ 2 + dev <- round((diff / y) * 100, 2) + std <- round(diff / sqrt(y), 2) + chi <- round(sum(dif2 / y), 4) + + list(dev = dev, std = std, chi = chi) +} + +chigof <- function(x, y) { + + dif <- x - y + dif2 <- dif ^ 2 + dev <- round((dif / y) * 100, 2) + std <- round(dif / sqrt(y), 2) + chi <- round(sum(dif2 / y), 4) + + list(dev = dev, std = std, chi = chi) +} diff --git a/R/ifr-cochran-q-test.R b/R/ifr-cochran-q-test.R new file mode 100644 index 0000000..9e4d19f --- /dev/null +++ b/R/ifr-cochran-q-test.R @@ -0,0 +1,120 @@ +#' @title Cochran Q Test +#' @description Test if the proportions of 3 or more dichotomous variables are +#' equal in the same population. +#' @param data a \code{data.frame} or \code{tibble} +#' @param ... columns in \code{data} +#' @return \code{infer_cochran_qtest} returns an object of class +#' \code{"infer_cochran_qtest"}. An object of class \code{"infer_cochran_qtest"} +#' is a list containing the following components: +#' +#' \item{df}{degrees of freedom} +#' \item{n}{number of observations} +#' \item{pvalue}{p value} +#' \item{q}{cochran's q statistic} +#' +#' @section Deprecated Function: +#' \code{cochran_test()} has been deprecated. Instead use +#' \code{infer_cochran_qtest()}. +#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric +#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. +#' +#' @examples +#' infer_cochran_qtest(exam, exam1, exam2, exam3) +#' @export +#' +infer_cochran_qtest <- function(data, ...) UseMethod("infer_cochran_qtest") + +#' @export +infer_cochran_qtest.default <- function(data, ...) { + + vars <- vapply(substitute(...()), deparse, NA_character_) + fdata <- data[vars] + + if (ncol(fdata) < 3) { + stop("Please specify at least 3 variables.", call. = FALSE) + } + + if (any(sapply(lapply(fdata, as.factor), nlevels) > 2)) { + stop("Please specify dichotomous/binary variables only.", call. = FALSE) + } + + k <- cochran_comp(fdata) + + result <- + list( + df = k$df, + n = k$n, + pvalue = k$pvalue, + q = k$q) + + class(result) <- "infer_cochran_qtest" + return(result) +} + +#' @export +#' +print.infer_cochran_qtest <- function(x, ...) { + print_cochran_test(x) +} + +coch_data <- function(x, ...) { + + if (is.data.frame(x)) { + data <- x %>% + lapply(as.numeric) %>% + as.data.frame() %>% + `-`(1) + } else { + data <- cbind(x, ...) %>% + apply(2, as.numeric) %>% + `-`(1) %>% + as.data.frame() + } + + return(data) +} + +cochran_comp <- function(data) { + + n <- nrow(data) + k <- ncol(data) + df <- k - 1 + + cs <- + data %>% + lapply(as.numeric) %>% + as.data.frame() %>% + subtract(1) %>% + sums() + + q <- coch(k, cs$cls_sum, cs$cl, cs$g, cs$gs_sum) + + pvalue <- 1 - pchisq(q, df) + + list( + df = df, + n = n, + pvalue = round(pvalue, 4), + q = q) + +} + +sums <- function(data) { + + cl <- colSums(data) + cls_sum <- sum(cl ^ 2) + g <- rowSums(data) + gs_sum <- sum(g ^ 2) + + list( + cl = cl, + cls_sum = cls_sum, + g = g, + gs_sum = gs_sum) + +} + +coch <- function(k, cls_sum, cl, g, gs_sum) { + ((k - 1) * ((k * cls_sum) - (sum(cl) ^ 2))) / ((k * sum(g)) - gs_sum) +} + diff --git a/R/infer-data-exam.R b/R/ifr-data-exam.R similarity index 81% rename from R/infer-data-exam.R rename to R/ifr-data-exam.R index f2a91fd..b6d9aaf 100644 --- a/R/infer-data-exam.R +++ b/R/ifr-data-exam.R @@ -9,5 +9,5 @@ #' \item{exam3}{result of exam3} #' } #' -#' @source \url{http://www.spss-tutorials.com/spss-cochran-q-test/} +#' @source \url{https://www.spss-tutorials.com/spss-cochran-q-test/} "exam" diff --git a/R/infer-data-hsb.R b/R/ifr-data-hsb.R similarity index 90% rename from R/infer-data-hsb.R rename to R/ifr-data-hsb.R index 3c6b30c..3784dab 100644 --- a/R/infer-data-hsb.R +++ b/R/ifr-data-hsb.R @@ -18,5 +18,5 @@ #' \item{socst}{scores from test of social studies} #' } #' -#' @source \url{http://www.ats.ucla.edu/stat/spss/whatstat/whatstat.htm} +#' @source \url{https://nces.ed.gov/surveys/hsb/} "hsb" diff --git a/R/infer-data-treatment.R b/R/ifr-data-treatment.R similarity index 100% rename from R/infer-data-treatment.R rename to R/ifr-data-treatment.R diff --git a/R/infer-data-treatment2.R b/R/ifr-data-treatment2.R similarity index 100% rename from R/infer-data-treatment2.R rename to R/ifr-data-treatment2.R diff --git a/R/infer-launch-shiny-app.R b/R/ifr-launch-shiny-app.R similarity index 99% rename from R/infer-launch-shiny-app.R rename to R/ifr-launch-shiny-app.R index 71a0938..aa2ae92 100644 --- a/R/infer-launch-shiny-app.R +++ b/R/ifr-launch-shiny-app.R @@ -26,4 +26,3 @@ infer_launch_shiny_app <- function() { xplorerr::app_inference() } - \ No newline at end of file diff --git a/R/infer-levene-test.R b/R/ifr-levene-test.R similarity index 60% rename from R/infer-levene-test.R rename to R/ifr-levene-test.R index c1b411a..5a2ef04 100644 --- a/R/infer-levene-test.R +++ b/R/ifr-levene-test.R @@ -1,6 +1,3 @@ -#' @importFrom stats anova model.frame formula -#' @importFrom purrr map_int -#' @importFrom rlang quo_is_null #' @title Levene's test for equality of variances #' @description \code{infer_levene_test} reports Levene's robust test statistic #' for the equality of variances and the @@ -53,39 +50,29 @@ infer_levene_test <- function(data, ...) UseMethod("infer_levene_test") #' @export #' @rdname infer_levene_test -infer_levene_test.default <- function(data, ..., group_var = NULL, - trim_mean = 0.1) { - groupvar <- enquo(group_var) +infer_levene_test.default <- function(data, ..., group_var = NULL, trim_mean = 0.1) { - varyables <- quos(...) + groupvar <- deparse(substitute(group_var)) + varyables <- vapply(substitute(...()), deparse, NA_character_) + fdata <- data[varyables] - fdata <- - data %>% - select(!!! varyables) - - if (quo_is_null(groupvar)) { - z <- as.list(fdata) - ln <- z %>% map_int(length) + if (groupvar == "NULL") { + z <- as.list(fdata) + ln <- unlist(lapply(z, length)) ly <- seq_len(length(z)) if (length(z) < 2) { stop("Please specify at least two variables.", call. = FALSE) } - out <- gvar(ln, ly) - fdata <- unlist(z) - groupvars <- - out %>% - unlist() %>% - as.factor() + out <- gvar(ln, ly) + fdata <- unlist(z) + groupvars <- as.factor(unlist(out)) + } else { - fdata <- - fdata %>% - pull(1) - groupvars <- - data %>% - pull(!! groupvar) + fdata <- fdata[[1]] + groupvars <- data[[groupvar]] if (length(fdata) != length(groupvars)) { stop("Length of variable and group_var do not match.", call. = FALSE) @@ -94,28 +81,78 @@ infer_levene_test.default <- function(data, ..., group_var = NULL, k <- lev_comp(fdata, groupvars, trim_mean) - out <- list( - bf = k$bf, p_bf = k$p_bf, lev = k$lev, p_lev = k$p_lev, - bft = k$bft, p_bft = k$p_bft, avgs = k$avgs, sds = k$sds, - avg = k$avg, sd = k$sd, n = k$n, levs = k$levs, n_df = k$n_df, - d_df = k$d_df, lens = k$lens - ) + out <- + list(avg = k$avg, + avgs = k$avgs, + bf = k$bf, + bft = k$bft, + d_df = k$d_df, + lens = k$lens, + lev = k$lev, + levs = k$levs, + n = k$n, + n_df = k$n_df, + p_bf = k$p_bf, + p_bft = k$p_bft, + p_lev = k$p_lev, + sd = k$sd, + sds = k$sds) class(out) <- "infer_levene_test" return(out) } -#' @export -#' @rdname infer_levene_test -#' @usage NULL -#' -levene_test <- function(variable, ..., group_var = NULL, - trim.mean = 0.1) { - .Deprecated("infer_levene_test()") -} #' @export #' print.infer_levene_test <- function(x, ...) { print_levene_test(x) -} \ No newline at end of file +} + +#' @importFrom stats anova +lev_metric <- function(cvar, gvar, loc, ...) { + + metric <- tapply(cvar, gvar, loc, ...) + y <- abs(cvar - metric[gvar]) + result <- anova(lm(y ~ gvar)) + + list( + fstat = result$`F value`[1], + p = result$`Pr(>F)`[1] + ) + +} + +#' @importFrom stats complete.cases median +lev_comp <- function(variable, group_var, trim.mean) { + + comp <- complete.cases(variable, group_var) + n <- length(comp) + k <- nlevels(group_var) + cvar <- variable[comp] + gvar <- group_var[comp] + lens <- tapply(cvar, gvar, length) + avgs <- tapply(cvar, gvar, mean) + sds <- tapply(cvar, gvar, sd) + bf <- lev_metric(cvar, gvar, mean) + lev <- lev_metric(cvar, gvar, median) + bft <- lev_metric(cvar, gvar, mean, trim = trim.mean) + + list( + avg = round(mean(cvar), 2), + avgs = round(avgs, 2), + bf = round(bf$fstat, 4), + bft = round(bft$fstat, 4), + d_df = (n - k), + lens = lens, + lev = round(lev$fstat, 4), + levs = levels(gvar), + n = n, + n_df = (k - 1), + p_bf = round(bf$p, 4), + p_bft = round(bft$p, 4), + p_lev = round(lev$p, 4), + sd = round(sd(cvar), 2), + sds = round(sds, 2)) + +} diff --git a/R/ifr-mcnemar-test.R b/R/ifr-mcnemar-test.R new file mode 100644 index 0000000..c8cbbf1 --- /dev/null +++ b/R/ifr-mcnemar-test.R @@ -0,0 +1,237 @@ +#' @importFrom magrittr %>% +#' @title McNemar Test +#' @description Test if the proportions of two dichotomous variables are +#' equal in the same population. +#' @param data a \code{data.frame} or \code{tibble} +#' @param x factor; column in \code{data} +#' @param y factor; column in \code{data} +#' @return \code{infer_mcnemar_test} returns an object of class \code{"infer_mcnemar_test"}. +#' An object of class \code{"infer_mcnemar_test"} is a list containing the +#' following components: +#' +#' \item{statistic}{chi square statistic} +#' \item{df}{degrees of freedom} +#' \item{pvalue}{p-value} +#' \item{exactp}{exact p-value} +#' \item{cstat}{continuity correction chi square statistic} +#' \item{cpvalue}{continuity correction p-value} +#' \item{kappa}{kappa coefficient; measure of interrater agreement} +#' \item{std_err}{asymptotic standard error} +#' \item{kappa_cil}{95\% kappa lower confidence limit} +#' \item{kappa_ciu}{95\% kappa upper confidence limit} +#' \item{cases}{cases} +#' \item{controls}{controls} +#' \item{ratio}{ratio of proportion with factor} +#' \item{odratio}{odds ratio} +#' \item{tbl}{two way table} +#' @section Deprecated Function: +#' \code{mcnermar_test()} has been deprecated. Instead use +#' \code{infer_mcnemar_test()}. +#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric +#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. +#' +#' @seealso \code{\link[stats]{mcnemar.test}} +#' @examples +#' # using variables from data +#' hb <- hsb +#' hb$himath <- ifelse(hsb$math > 60, 1, 0) +#' hb$hiread <- ifelse(hsb$read > 60, 1, 0) +#' infer_mcnemar_test(hb, himath, hiread) +#' +#' # test if the proportion of students in himath and hiread group is same +#' himath <- ifelse(hsb$math > 60, 1, 0) +#' hiread <- ifelse(hsb$read > 60, 1, 0) +#' infer_mcnemar_test(table(himath, hiread)) +#' +#' # using matrix +#' infer_mcnemar_test(matrix(c(135, 18, 21, 26), nrow = 2)) +#' @export +#' +infer_mcnemar_test <- function(data, x = NULL, y = NULL) UseMethod("infer_mcnemar_test") + +#' @export +#' +infer_mcnemar_test.default <- function(data, x = NULL, y = NULL) { + + if (is.matrix(data) | is.table(data)) { + dat <- mcdata(data) + } else { + + x1 <- deparse(substitute(x)) + y1 <- deparse(substitute(y)) + dat <- table(data[c(x1, y1)]) + + } + + k <- mccomp(dat) + + result <- + list(cases = k$cases, + controls = k$controls, + cpvalue = k$cpvalue, + cstat = k$cstat, + df = k$df, + exactp = k$exactp, + kappa = k$kappa, + kappa_cil = k$kappa_cil, + kappa_ciu = k$kappa_ciu, + odratio = k$odratio, + pvalue = k$pvalue, + ratio = k$ratio, + statistic = k$statistic, + std_err = k$std_err, + tbl = dat) + + class(result) <- "infer_mcnemar_test" + return(result) +} + +#' @export +#' +print.infer_mcnemar_test <- function(x, ...) { + print_mcnemar_test(x) +} + +mcdata <- function(x, y) { + if (!is.matrix(x)) { + stop("x must be either a table or a matrix", call. = FALSE) + } + + if (is.matrix(x)) { + if (length(x) != 4) { + stop("x must be a 2 x 2 matrix", call. = FALSE) + } + } + + dat <- x + return(dat) +} + + +mctestp <- function(dat) { + retrieve <- matrix(c(1, 2, 2, 1), nrow = 2) + dat[retrieve] +} + +tetat <- function(p) { + ((p[1] - p[2]) ^ 2) / sum(p) +} + +mcpval <- function(test_stat, df) { + 1 - pchisq(test_stat, df) +} + +mcpex <- function(dat) { + 2 * min(pbinom(dat[2], sum(dat[2], dat[3]), 0.5), pbinom(dat[3], sum(dat[2], dat[3]), 0.5)) +} + +mcstat <- function(p) { + ((abs(p[1] - p[2]) - 1) ^ 2) / sum(p) +} + +mccpval <- function(cstat, df) { + 1 - pchisq(cstat, df) +} + +mckappa <- function(dat) { + + agreement <- sum(diag(dat)) / sum(dat) + expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2) + (agreement - expected) / (1 - expected) + +} + +mcserr <- function(dat, kappa) { + expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2) + serr(dat, kappa, expected) +} + +#' @importFrom stats qnorm +mcconf <- function(std_err, kappa) { + + alpha <- 0.05 + interval <- stats::qnorm(1 - (alpha / 2)) * std_err + ci_lower <- kappa - interval + ci_upper <- kappa + interval + + list(ci_lower = ci_lower, ci_upper = ci_upper) + +} + +prop_fact <- function(dat, p) { + + dat_per <- dat / sum(dat) + row_sum <- rowSums(dat_per) + col_sum <- colSums(dat_per) + controls <- 1 - col_sum[2] + cases <- 1 - row_sum[2] + ratio <- cases / controls + odds_ratio <- p[1] / p[2] + + list(cases = cases, + controls = controls, + odds_ratio = odds_ratio, + ratio = ratio + + ) + +} + +serr <- function(dat, kappa, expected) { + + dat_per <- dat / sum(dat) + row_sum <- rowSums(dat_per) + row_sum[3] <- sum(row_sum) + col_sum <- colSums(dat_per) + dat_per <- rbind(dat_per, col_sum) + dat_per <- cbind(dat_per, row_sum) + d1 <- dim(dat_per) + + dat_per[d1[1], d1[2]] <- 1.0 + diagonal <- diag(dat_per) + + a <- diagonal[1] * (1 - (row_sum[1] + col_sum[1]) * (1 - kappa)) ^ 2 + + diagonal[2] * (1 - (row_sum[2] + col_sum[2]) * (1 - kappa)) ^ 2 + + x1 <- dat_per[lower.tri(dat_per)][1] + x2 <- dat_per[upper.tri(dat_per)][1] + + b <- ((1 - kappa) ^ 2) * ((x1 * (row_sum[1] + col_sum[2]) ^ 2) + + (x2 * (row_sum[2] + col_sum[1]) ^ 2)) + + c <- ((kappa) - expected * (1 - kappa)) ^ 2 + variance <- ((a + b - c) / ((1 - expected) ^ 2)) / sum(dat) + + sqrt(variance) +} + +mccomp <- function(dat) { + + p <- mctestp(dat) + test_stat <- tetat(p) + df <- nrow(dat) - 1 + pvalue <- mcpval(test_stat, df) + exactp <- mcpex(dat) + cstat <- mcstat(p) + cpvalue <- mccpval(cstat, df) + kappa <- mckappa(dat) + std_err <- mcserr(dat, kappa) + clu <- mcconf(std_err, kappa) + k <- prop_fact(dat, p) + + list(cases = round(k$cases, 4), + controls = round(k$controls, 4), + cpvalue = cpvalue, + cstat = cstat, + df = df, + exactp = round(exactp, 4), + kappa = round(kappa, 4), + kappa_cil = round(clu$ci_lower, 4), + kappa_ciu = round(clu$ci_upper, 4), + odratio = round(k$odds_ratio, 4), + pvalue = round(pvalue, 4), + ratio = round(k$ratio, 4), + statistic = round(test_stat, 4), + std_err = round(std_err, 4)) + +} diff --git a/R/infer-os-prop-test.R b/R/ifr-os-prop-test.R similarity index 58% rename from R/infer-os-prop-test.R rename to R/ifr-os-prop-test.R index 9fa123c..76225f3 100644 --- a/R/infer-os-prop-test.R +++ b/R/ifr-os-prop-test.R @@ -1,4 +1,3 @@ -#' @importFrom stats pnorm #' @title One Sample Test of Proportion #' @description \code{infer_os_prop_test} compares proportion in one group to a #' specified population proportion. @@ -46,60 +45,85 @@ infer_os_prop_test <- function(data, variable = NULL, prob = 0.5, phat = 0.5, infer_os_prop_test.default <- function(data, variable = NULL, prob = 0.5, phat = 0.5, alternative = c("both", "less", "greater", "all")) { if (is.numeric(data)) { - method <- match.arg(alternative) - k <- prop_comp( - data, prob = prob, phat = phat, - alternative = method - ) - } else { - varyables <- enquo(variable) - - fdata <- - data %>% - pull(!! varyables) - - n1 <- length(fdata) - - n2 <- - fdata %>% - table() %>% - `[[`(2) - - phat <- round(n2 / n1, 4) - - prob <- prob method <- match.arg(alternative) + k <- prop_comp(data, prob = prob, phat = phat, alternative = method) + + } else { - k <- prop_comp( - n1, prob = prob, phat = phat, - alternative = method - ) + varyables <- deparse(substitute(variable)) + fdata <- data[[varyables]] + n1 <- length(fdata) + n2 <- table(fdata)[[2]] + phat <- round(n2 / n1, 4) + prob <- prob + method <- match.arg(alternative) + k <- prop_comp(n1, prob = prob, phat = phat, alternative = method) } - result <- list( - n = k$n, phat = k$phat, p = k$p, z = k$z, sig = k$sig, - alt = k$alt, obs = k$obs, exp = k$exp, - deviation = k$deviation, - std = k$std - ) + result <- + list(alt = k$alt, + deviation = k$deviation, + exp = k$exp, + n = k$n, + obs = k$obs, + p = k$p, + phat = k$phat, + sig = k$sig, + std = k$std, + z = k$z) class(result) <- "infer_os_prop_test" return(result) } - -#' @export -#' @rdname infer_os_prop_test -#' @usage NULL -#' -prop_test <- function(n, prob = 0.5, - alternative = c("both", "less", "greater", "all"), phat, ...) { - .Deprecated("infer_os_prop_test()") -} - #' @export #' print.infer_os_prop_test <- function(x, ...) { print_prop_test(x) -} \ No newline at end of file +} + +#' @importFrom stats pnorm +prop_comp <- function(n, prob, alternative, phat) { + + n <- n + phat <- phat + p <- prob + q <- 1 - p + obs <- c(n * (1 - phat), n * phat) + exp <- n * c(q, p) + dif <- obs - exp + dev <- round((dif / exp) * 100, 2) + std <- round(dif / sqrt(exp), 2) + num <- phat - prob + den <- sqrt((p * q) / n) + z <- round(num / den, 4) + lt <- round(pnorm(z), 4) + ut <- round(1 - pnorm(z), 4) + tt <- round((1 - pnorm(abs(z))) * 2, 4) + alt <- alternative + + if (alt == "all") { + sig <- c("two-both" = tt, "less" = lt, "greater" = ut) + } else if (alt == "greater") { + sig <- ut + } else if (alt == "less") { + sig <- lt + } else { + sig <- tt + } + + out <- + list(alt = alt, + deviation = format(dev, nsmall = 2), + exp = exp, + n = n, + obs = obs, + p = prob, + phat = phat, + sig = sig, + std = format(std, nsmall = 2), + z = z) + + return(out) +} diff --git a/R/infer-os-t-test.R b/R/ifr-os-t-test.R similarity index 55% rename from R/infer-os-t-test.R rename to R/ifr-os-t-test.R index dbc8653..44aa943 100644 --- a/R/infer-os-t-test.R +++ b/R/ifr-os-t-test.R @@ -48,6 +48,7 @@ #' #' # all tails #' infer_os_t_test(hsb, write, mu = 50, alternative = 'all') +#' #' @export #' infer_os_t_test <- function(data, x, mu = 0, alpha = 0.05, @@ -57,55 +58,104 @@ infer_os_t_test <- function(data, x, mu = 0, alpha = 0.05, #' infer_os_t_test.default <- function(data, x, mu = 0, alpha = 0.05, alternative = c("both", "less", "greater", "all"), ...) { - x1 <- enquo(x) - xone <- - data %>% - pull(!! x1) + x1 <- deparse(substitute(x)) + xone <- data[[x1]] if (!is.numeric(xone)) { - stop("x must be numeric") + stop("x must be numeric", call. = FALSE) } if (!is.numeric(mu)) { - stop("mu must be numeric") + stop("mu must be numeric", call. = FALSE) } if (!is.numeric(alpha)) { - stop("alpha must be numeric") + stop("alpha must be numeric", call. = FALSE) } - type <- match.arg(alternative) - - var_name <- - data %>% - select(!! x1) %>% - names() - - k <- ttest_comp(xone, mu, alpha, type) + type <- match.arg(alternative) + var_name <- names(data[x1]) + k <- ttest_comp(xone, mu, alpha, type) - result <- list( - mu = k$mu, n = k$n, df = k$df, Mean = k$Mean, - stddev = k$stddev, std_err = k$std_err, - test_stat = k$test_stat, confint = k$confint, - mean_diff = k$mean_diff, mean_diff_l = k$mean_diff_l, - mean_diff_u = k$mean_diff_u, p_l = k$p_l, p_u = k$p_u, - p = k$p, conf = k$conf, type = type, var_name = var_name - ) + result <- + list(conf = k$conf, + confint = k$confint, + df = k$df, + Mean = k$Mean, + mean_diff = k$mean_diff, + mean_diff_l = k$mean_diff_l, + mean_diff_u = k$mean_diff_u, + mu = k$mu, + n = k$n, + p = k$p, + p_l = k$p_l, + p_u = k$p_u, + stddev = k$stddev, + std_err = k$std_err, + test_stat = k$test_stat, + type = type, + var_name = var_name) class(result) <- "infer_os_t_test" return(result) } -#' @export -#' @rdname infer_os_t_test -#' @usage NULL -#' -ttest <- function(x, mu = 0, alpha = 0.05, - type = c("both", "less", "greater", "all"), ...) { - .Deprecated("infer_os_t_test()") -} - #' @export #' print.infer_os_t_test <- function(x, ...) { print_ttest(x) -} \ No newline at end of file +} + +#' @importFrom stats qt pt +ttest_comp <- function(x, mu, alpha, type) { + + n <- length(x) + a <- (alpha / 2) + df <- n - 1 + conf <- 1 - alpha + Mean <- round(mean(x), 4) + stddev <- round(sd(x), 4) + std_err <- round(stddev / sqrt(n), 4) + test_stat <- round((Mean - mu) / std_err, 3) + + if (type == "less") { + cint <- c(-Inf, test_stat + qt(1 - alpha, df)) + } else if (type == "greater") { + cint <- c(test_stat - qt(1 - alpha, df), Inf) + } else { + cint <- qt(1 - a, df) + cint <- test_stat + c(-cint, cint) + } + + confint <- round(mu + cint * std_err, 4) + mean_diff <- round((Mean - mu), 4) + mean_diff_l <- confint[1] - mu + mean_diff_u <- confint[2] - mu + p_l <- pt(test_stat, df) + p_u <- pt(test_stat, df, lower.tail = FALSE) + + if (p_l < 0.5) { + p <- p_l * 2 + } else { + p <- p_u * 2 + } + + + out <- + list(conf = conf, + confint = confint, + df = df, + Mean = Mean, + mean_diff = mean_diff, + mean_diff_l = mean_diff_l, + mean_diff_u = mean_diff_u, + mu = mu, + n = n, + p = p, + p_l = p_l, + p_u = p_u, + stddev = stddev, + std_err = std_err, + test_stat = test_stat) + + return(out) +} diff --git a/R/infer-os-var-test.R b/R/ifr-os-var-test.R similarity index 63% rename from R/infer-os-var-test.R rename to R/ifr-os-var-test.R index 9fb2f67..a171b59 100644 --- a/R/infer-os-var-test.R +++ b/R/ifr-os-var-test.R @@ -1,4 +1,3 @@ -#' @importFrom stats qchisq #' @title One Sample Variance Comparison Test #' @description \code{infer_os_var_test} performs tests on the equality of standard #' deviations (variances).It tests that the standard deviation of a sample is @@ -56,56 +55,91 @@ infer_os_var_test <- function(data, x, sd, confint = 0.95, #' infer_os_var_test.default <- function(data, x, sd, confint = 0.95, alternative = c("both", "less", "greater", "all"), ...) { - x1 <- enquo(x) - xone <- - data %>% - pull(!! x1) + x1 <- deparse(substitute(x)) + xone <- data[[x1]] if (!is.numeric(xone)) { - stop("x must be numeric") + stop("x must be numeric", call. = FALSE) } if (!is.numeric(sd)) { - stop("sd must be numeric") + stop("sd must be numeric", call. = FALSE) } if (!is.numeric(confint)) { - stop("confint must be numeric") + stop("confint must be numeric", call. = FALSE) } - type <- match.arg(alternative) - - varname <- - data %>% - select(!! x1) %>% - names() - - k <- osvar_comp(xone, sd, confint) + type <- match.arg(alternative) + varname <- names(data[x1]) + k <- osvar_comp(xone, sd, confint) - result <- list( - n = k$n, sd = k$sd, sigma = round(k$sigma, 4), se = round(k$se, 4), - chi = round(k$chi, 4), - df = k$df, p_lower = k$p_lower, p_upper = k$p_upper, p_two = k$p_two, - xbar = round(k$xbar, 4), c_lwr = k$c_lwr, c_upr = k$c_upr, var_name = varname, - conf = k$conf, type = type - ) + result <- + list(chi = round(k$chi, 4), + c_lwr = k$c_lwr, + conf = k$conf, + c_upr = k$c_upr, + df = k$df, + n = k$n, + p_lower = k$p_lower, + p_two = k$p_two, + p_upper = k$p_upper, + sd = k$sd, + se = round(k$se, 4), + sigma = round(k$sigma, 4), + type = type, + var_name = varname, + xbar = round(k$xbar, 4)) class(result) <- "infer_os_var_test" return(result) } -#' @export -#' @rdname infer_os_var_test -#' @usage NULL -#' -os_vartest <- function(x, sd, confint = 0.95, - alternative = c("both", "less", "greater", "all"), ...) { - .Deprecated("infer_os_var_test()") -} - #' @export #' print.infer_os_var_test <- function(x, ...) { print_os_vartest(x) -} \ No newline at end of file +} + +#' @importFrom stats qchisq +osvar_comp <- function(x, sd, confint) { + + n <- length(x) + df <- n - 1 + xbar <- mean(x) + sigma <- sd(x) + se <- sigma / sqrt(n) + chi <- df * ((sigma / sd) ^ 2) + + p_lower <- pchisq(chi, df) + p_upper <- pchisq(chi, df, lower.tail = F) + + if (p_lower < 0.5) { + p_two <- pchisq(chi, df) * 2 + } else { + p_two <- pchisq(chi, df, lower.tail = F) * 2 + } + + conf <- confint + a <- (1 - conf) / 2 + al <- 1 - a + tv <- df * sigma + c_lwr <- round(tv / qchisq(al, df), 4) + c_upr <- round(tv / qchisq(a, df), 4) + + list(chi = chi, + c_lwr = c_lwr, + conf = conf, + c_upr = c_upr, + df = df, + n = n, + p_lower = p_lower, + p_two = p_two, + p_upper = p_upper, + sd = sd, + se = se, + sigma = sigma, + xbar = xbar) + +} diff --git a/R/infer-output.R b/R/ifr-output.R similarity index 74% rename from R/infer-output.R rename to R/ifr-output.R index 851c8e8..6aab7cb 100644 --- a/R/infer-output.R +++ b/R/ifr-output.R @@ -1,37 +1,35 @@ print_owanova <- function(data) { # width - w1 <- nchar("Between Groups") - w2 <- max(nchar("Squares"), nchar(data$between), nchar(data$within), nchar(data$total)) - w3 <- max(nchar("DF"), nchar(data$df_btw), nchar(data$df_btw), nchar(data$df_within), nchar(data$df_total)) - w4 <- max(nchar("Mean Square"), nchar(data$ms_btw), nchar(data$ms_within)) - w5 <- max(nchar("F"), nchar(data$f)) - w6 <- max(nchar("Sig."), nchar(format(data$sig, nsmall = 4))) - w <- sum(w1, w2, w3, w4, w5, w6, 21) - w7 <- nchar(data$sigma) - - dc <- as.vector(data$tab[, 1]) - - w8 <- max(nchar("Category"), max(nchar(dc))) - w9 <- max(nchar("N"), max(nchar(data$tab[[2]]))) - w10 <- max(nchar("Mean"), max(nchar(format(data$tab[[3]], nsmall = 3)))) - w11 <- max(nchar("Std. Dev."), max(nchar(format(data$tab[[4]], nsmall = 3)))) - wr <- sum(w8, w9, w10, w11, 13) - - - p <- format(data$p, nsmall = 4) - q <- nrow(data$tab) - s <- length(data$tab) - + w1 <- nchar("Between Groups") + w2 <- max(nchar("Squares"), nchar(data$ss_between), nchar(data$ss_within), nchar(data$ss_total)) + w3 <- max(nchar("DF"), nchar(data$df_btw), nchar(data$df_btw), nchar(data$df_within), nchar(data$df_total)) + w4 <- max(nchar("Mean Square"), nchar(data$ms_btw), nchar(data$ms_within)) + w5 <- max(nchar("F"), nchar(data$fstat)) + w6 <- max(nchar("Sig."), nchar(format(data$pval, nsmall = 4))) + w <- sum(w1, w2, w3, w4, w5, w6, 21) + w7 <- nchar(data$rmse) + + dc <- as.vector(data$group_stats[, 1]) + + w8 <- max(nchar("Category"), max(nchar(dc))) + w9 <- max(nchar("N"), max(nchar(data$group_stats[[2]]))) + w10 <- max(nchar("Mean"), max(nchar(format(data$group_stats[[3]], nsmall = 3)))) + w11 <- max(nchar("Std. Dev."), max(nchar(format(data$group_stats[[4]], nsmall = 3)))) + wr <- sum(w8, w9, w10, w11, 13) + + p <- format(data$pval, nsmall = 4) + q <- nrow(data$group_stats) + s <- length(data$group_stats) cat(fg("ANOVA", w), "\n") cat(rep("-", w), sep = "", "\n") cat(fg("", w1), fs(), fg("Sum of", w2), fs(), fg("", w3), fs(), fg("", w4), fs(), fg("", w5), fs(), fg("", w6), "\n") cat(fg("", w1), fs(), fg("Squares", w2), fs(), fg("DF", w3), fs(), fg("Mean Square", w4), fs(), fg("F", w5), fs(), fg("Sig.", w6), "\n") cat(rep("-", w), sep = "", "\n") - cat(fl("Between Groups", w1), fs(), fg(data$between, w2), fs(), fg(data$df_btw, w3), fs(), fg(data$ms_btw, w4), fs(), fg(data$f, w5), fs(), fg(p, w6), "\n") - cat(fl("Within Groups", w1), fs(), fg(data$within, w2), fs(), fg(data$df_within, w3), fs(), fg(data$ms_within, w4), fs(), fg("", w5), fs(), fg("", w6), "\n") - cat(fl("Total", w1), fs(), fg(data$total, w2), fs(), fg(data$df_total, w3), fs(), fg("", w4), fs(), fg("", w5), fs(), fg("", w6), "\n") + cat(fl("Between Groups", w1), fs(), fg(data$ss_between, w2), fs(), fg(data$df_btw, w3), fs(), fg(data$ms_btw, w4), fs(), fg(data$fstat, w5), fs(), fg(data$pval, w6), "\n") + cat(fl("Within Groups", w1), fs(), fg(data$ss_within, w2), fs(), fg(data$df_within, w3), fs(), fg(data$ms_within, w4), fs(), fg("", w5), fs(), fg("", w6), "\n") + cat(fl("Total", w1), fs(), fg(data$ss_total, w2), fs(), fg(data$df_total, w3), fs(), fg("", w4), fs(), fg("", w5), fs(), fg("", w6), "\n") cat(rep("-", w), sep = "", "\n\n") cat(fg("Report", wr), "\n") @@ -40,14 +38,14 @@ print_owanova <- function(data) { cat(rep("-", wr), sep = "", "\n") for (i in seq_len(q)) { cat( - fc(data$tab[[i, 1]], w8), fs(), fg(data$tab[[i, 2]], w9), fs(), fk(format(round(data$tab[[i, 3]], 3), nsmall = 3), w10), - fs(), fk(format(round(data$tab[[i, 4]], 3), nsmall = 3), w11), "\n" + fc(data$group_stats[[i, 1]], w8), fs(), fg(data$group_stats[[i, 2]], w9), fs(), fk(format(round(data$group_stats[[i, 3]], 3), nsmall = 3), w10), + fs(), fk(format(round(data$group_stats[[i, 4]], 3), nsmall = 3), w11), "\n" ) } cat(rep("-", wr), sep = "", "\n\n") cat(fl("Number of obs", 13), "=", fl(data$obs, w7), fs(), fl("R-squared", 13), "=", data$r2, "\n") - cat(fl("Root MSE", 13), "=", data$sigma, fs(), fl("Adj R-squared", 13), "=", data$ar2, "\n\n") + cat(fl("Root MSE", 13), "=", data$rmse, fs(), fl("Adj R-squared", 13), "=", data$adjusted_r2, "\n\n") } @@ -58,7 +56,7 @@ print_binom <- function(data) { w2 <- max(nchar("N"), nchar(data$n)) w3 <- max(nchar("Obs. Prop"), nchar(data$obs_p)) w4 <- max(nchar("Exp. Prop"), nchar(data$exp_p)) - w <- sum(w1, w2, w3, w4, 13) + w <- sum(w1, w2, w3, w4, 13) k0 <- data$n - data$k p0 <- 1 - data$obs_p @@ -88,16 +86,13 @@ print_binom <- function(data) { cat(" ", rep("-", w), sep = "", "\n") # test summary widths - w6 <- nchar("Lower") - w7 <- nchar(paste0("Pr(k <= ", data$k, " or k >= ", data$k, ")")) - w8 <- nchar(paste0("Pr(k <= ", data$k, " or k >= ", data$k, ")")) - w9 <- 8 + w6 <- nchar("Lower") + w7 <- nchar(paste0("Pr(k <= ", data$k, " or k >= ", data$k, ")")) + w8 <- nchar(paste0("Pr(k <= ", data$k, " or k >= ", data$k, ")")) + w9 <- 8 w10 <- sum(w6, w7, w9, 9) w11 <- sum(w6, w8, w9, 9) - - - if (data$k < data$exp_k) { cat("\n\n", format("Test Summary", width = w11, justify = "centre"), "\n") cat(" ", rep("-", w11), sep = "", "\n") @@ -108,19 +103,13 @@ print_binom <- function(data) { cat(" ", rep("-", w11), sep = "", "\n") cat( " ", format("Lower", width = w6, justify = "left"), fs(), format(paste0("Pr(k <= ", data$k, ")"), width = w8, justify = "centre"), fs(), - format(as.character(data$lower), width = w9, justify = "centre"), "\n" + format(as.character(data$pval_lower), width = w9, justify = "centre"), "\n" ) cat( " ", format("Upper", width = w6, justify = "left"), fs(), format(paste0("Pr(k >= ", data$k, ")"), width = w8, justify = "centre"), fs(), - format(as.character(data$upper), width = w9, justify = "centre"), "\n" - ) - # if (data$ik < 0) { - # cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(), - # format(data$two_tail, width = w9, justify = 'centre'),'\n') - # } else { - # cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$k, ' or k >= ', data$ik, ')'), width = w8, justify = 'left'), fs(), - # format(data$two_tail, width = w9, justify = 'centre'),'\n') - # } + format(as.character(data$pval_upper), width = w9, justify = "centre"), "\n" + ) + cat(" ", rep("-", w11), sep = "", "\n") } else { cat("\n\n", format("Test Summary", width = w10, justify = "centre"), "\n") @@ -132,72 +121,72 @@ print_binom <- function(data) { cat(" ", rep("-", w10), sep = "", "\n") cat( " ", format("Lower", width = w6, justify = "left"), fs(), format(paste0("Pr(k <= ", data$k, ")"), width = w7, justify = "centre"), fs(), - format(as.character(data$lower), width = w9, justify = "centre"), "\n" + format(as.character(data$pval_lower), width = w9, justify = "centre"), "\n" ) cat( " ", format("Upper", width = w6, justify = "left"), fs(), format(paste0("Pr(k >= ", data$k, ")"), width = w7, justify = "centre"), fs(), - format(as.character(data$upper), width = w9, justify = "centre"), "\n" - ) - # if (data$ik < 0) { - # cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), - # format(data$two_tail, width = w9, justify = 'centre'),'\n') - # } else { - # cat(" ", format('Two', width = w6, justify = 'left'), fs(), format(paste0('Pr(k <= ', data$ik, ' or k >= ', data$k, ')'), width = w7, justify = 'left'), fs(), - # format(data$two_tail, width = w9, justify = 'centre'),'\n') - # } + format(as.character(data$pval_upper), width = w9, justify = "centre"), "\n" + ) + cat(" ", rep("-", w10), sep = "", "\n") } } print_ttest <- function(data) { - null_l <- paste0("Ho: mean(", data$var_name, ") >=", as.character(data$mu)) - alt_l <- paste0(" Ha: mean(", data$var_name, ") <", as.character(data$mu)) - null_u <- paste0("Ho: mean(", data$var_name, ") <=", as.character(data$mu)) - alt_u <- paste0("Ha: mean(", data$var_name, ") >", as.character(data$mu)) - null_t <- paste0("Ho: mean(", data$var_name, ") ~=", as.character(data$mu)) - alt_t <- paste0("Ha: mean(", data$var_name, ") !=", as.character(data$mu)) - all_l <- paste("Ha: mean <", as.character(data$mu)) - all_u <- paste("Ha: mean >", as.character(data$mu)) - all_t <- paste("Ha: mean ~=", as.character(data$mu)) - char_p_l <- format(data$p_l, digits = 0, nsmall = 4) - char_p_u <- format(data$p_u, digits = 0, nsmall = 4) - char_p <- format(data$p, digits = 0, nsmall = 4) - all_p_l <- paste("P < t =", char_p_l) - all_p_t <- paste("P > |t| =", char_p) - all_p_u <- paste("P > t =", char_p_u) + + null_l <- paste0("Ho: mean(", data$var_name, ") >=", as.character(data$mu)) + null_u <- paste0("Ho: mean(", data$var_name, ") <=", as.character(data$mu)) + null_t <- paste0("Ho: mean(", data$var_name, ") ~=", as.character(data$mu)) + + alt_l <- paste0(" Ha: mean(", data$var_name, ") <", as.character(data$mu)) + alt_u <- paste0("Ha: mean(", data$var_name, ") >", as.character(data$mu)) + alt_t <- paste0("Ha: mean(", data$var_name, ") !=", as.character(data$mu)) + all_l <- paste("Ha: mean <", as.character(data$mu)) + all_u <- paste("Ha: mean >", as.character(data$mu)) + all_t <- paste("Ha: mean ~=", as.character(data$mu)) + + char_p_l <- format(round(data$p_l, 5), nsmall = 5, scientific = FALSE) + char_p_u <- format(round(data$p_u, 5), nsmall = 5, scientific = FALSE) + char_p <- format(round(data$p, 5), nsmall = 5, scientific = FALSE) + + all_p_l <- paste("P < t =", char_p_l) + all_p_t <- paste("P > |t| =", char_p) + all_p_u <- paste("P > t =", char_p_u) all_tval <- paste0(" t = ", as.character(data$test_stat)) - # formatting output # compute the characters of each output and decide the overall width - var_width <- max(nchar("Variable"), nchar(data$var_name)) - obs_width <- max(nchar("Obs"), nchar(data$n)) - mean_width <- max(nchar("Mean"), nchar(data$Mean)) - se_width <- max(nchar("Std. Err."), nchar(data$std_err)) - sd_width <- max(nchar("Std. Dev."), nchar(data$stddev)) + var_width <- max(nchar("Variable"), nchar(data$var_name)) + obs_width <- max(nchar("Obs"), nchar(data$n)) + mean_width <- max(nchar("Mean"), nchar(data$Mean)) + se_width <- max(nchar("Std. Err."), nchar(data$std_err)) + sd_width <- max(nchar("Std. Dev."), nchar(data$stddev)) conf_length <- nchar(data$confint[1]) + nchar(data$confint[2]) - conf_str <- paste0("[", data$conf * 100, "% Conf. Interval]") + conf_str <- paste0("[", data$conf * 100, "% Conf. Interval]") confint_length <- nchar(conf_str) + if (conf_length > confint_length) { conf_width <- round(conf_length / 2) } else { conf_width <- round(confint_length / 2) } - t_width <- nchar(data$test_stat) - df_width <- max(nchar("DF"), nchar(data$df)) - p_width <- max(nchar("2 Tailed"), nchar(round(data$p, 5))) - md_width <- max(nchar("Difference"), nchar(data$mean_diff)) - md_length <- nchar(data$mean_diff_l) + nchar(data$mean_diff_u) + + t_width <- nchar(data$test_stat) + df_width <- max(nchar("DF"), nchar(data$df)) + p_width <- max(nchar("2 Tailed"), nchar(round(data$p, 5))) + md_width <- max(nchar("Difference"), nchar(data$mean_diff)) + md_length <- nchar(data$mean_diff_l) + nchar(data$mean_diff_u) + if (md_length > confint_length) { md_conf_width <- floor(md_length / 2) } else { md_conf_width <- floor(confint_length / 2) } - width_1 <- sum(var_width, obs_width, mean_width, se_width, sd_width, ceiling(conf_width * 2), 26) - width_2 <- sum(var_width, t_width, df_width, p_width, md_width, ceiling(md_conf_width * 2), 26) - all_width <- round(width_1 / 3) + width_1 <- sum(var_width, obs_width, mean_width, se_width, sd_width, ceiling(conf_width * 2), 26) + width_2 <- sum(var_width, t_width, df_width, p_width, md_width, ceiling(md_conf_width * 2), 26) + all_width <- round(width_1 / 3) cat( format("One-Sample Statistics", width = width_1, justify = "centre"), @@ -240,7 +229,7 @@ print_ttest <- function(data) { "\n", formatter_t(data$var_name, var_width), formats_t(), formatter_t(round(data$test_stat, 3), t_width), formats_t(), formatter_t(data$df, df_width), formats_t(), - formatter_t(round(data$p_l, 5), p_width), + formatter_t(format(round(data$p_l, 5), scientific = FALSE), p_width), formats_t(), formatter_t(data$mean_diff, md_width), formats_t(), format_cil(round(data$mean_diff_l, 4), md_conf_width), format_ciu(round(data$mean_diff_u, 4), md_conf_width), "\n" @@ -261,7 +250,7 @@ print_ttest <- function(data) { "\n", formatter_t(data$var_name, var_width), formats_t(), formatter_t(round(data$test_stat, 3), t_width), formats_t(), formatter_t(data$df, df_width), formats_t(), - formatter_t(round(data$p_l, 5), p_width), + formatter_t(format(round(data$p_u, 5), scientific = FALSE), p_width), formats_t(), formatter_t(data$mean_diff, md_width), formats_t(), format_cil(round(data$mean_diff_l, 4), md_conf_width), format_ciu(round(data$mean_diff_u, 4), md_conf_width), "\n" @@ -282,7 +271,7 @@ print_ttest <- function(data) { "\n", formatter_t(data$var_name, var_width), formats_t(), formatter_t(round(data$test_stat, 3), t_width), formats_t(), formatter_t(data$df, df_width), formats_t(), - formatter_t(round(data$p_l, 5), p_width), + formatter_t(format(round(data$p, 5), scientific = FALSE), p_width), formats_t(), formatter_t(data$mean_diff, md_width), formats_t(), format_cil(round(data$mean_diff_l, 4), md_conf_width), format_ciu(round(data$mean_diff_u, 4), md_conf_width), "\n" @@ -298,50 +287,52 @@ print_ttest <- function(data) { print_paired_ttest <- function(data) { - char_p_u <- format(data$p_upper, digits = 0, nsmall = 3) - char_p_l <- format(data$p_lower, digits = 0, nsmall = 3) - char_p <- format(data$p_two_tail, digits = 0, nsmall = 3) + + char_p_u <- format(round(data$p_upper, 3), nsmall = 3, scientific = FALSE) + char_p_l <- format(round(data$p_lower, 3), nsmall = 3, scientific = FALSE) + char_p <- format(round(data$p_two_tail, 3), nsmall = 3, scientific = FALSE) # hypothesis heading - hyp_null <- paste0("Ho: mean(", data$var_names[1], " - ", data$var_names[2], ") = ", "0") - hyp_lt <- paste0("Ha: mean(", data$var_names[1], " - ", data$var_names[2], ") < ", "0") - hyp_ut <- paste0("Ha: mean(", data$var_names[1], " - ", data$var_names[2], ") > ", "0") - hyp_2t <- paste0("Ha: mean(", data$var_names[1], " - ", data$var_names[2], ") ~= ", "0") - conf <- data$confint * 100 - conf_char <- paste0("[", conf, "% Conf. Interval]") + hyp_null <- paste0("Ho: mean(", data$var_names[1], " - ", data$var_names[2], ") = ", "0") + hyp_lt <- paste0("Ha: mean(", data$var_names[1], " - ", data$var_names[2], ") < ", "0") + hyp_ut <- paste0("Ha: mean(", data$var_names[1], " - ", data$var_names[2], ") > ", "0") + hyp_2t <- paste0("Ha: mean(", data$var_names[1], " - ", data$var_names[2], ") ~= ", "0") + conf <- data$confint * 100 + conf_char <- paste0("[", conf, "% Conf. Interval]") # all tests combines - all_null <- paste0("Ho: mean(", data$var_names[1], " - ", data$var_names[2], ") = mean(diff) = ", "0") - all_p_l <- paste("P < t =", char_p_l) - all_p_t <- paste("P > |t| =", char_p) - all_p_u <- paste("P > t =", char_p_u) - all_tval <- paste0(" t = ", as.character(data$tstat)) + all_null <- paste0("Ho: mean(", data$var_names[1], " - ", data$var_names[2], ") = mean(diff) = ", "0") + all_p_l <- paste("P < t =", char_p_l) + all_p_t <- paste("P > |t| =", char_p) + all_p_u <- paste("P > t =", char_p_u) + all_tval <- paste0(" t = ", as.character(data$tstat)) # formatting output - var_width1 <- max(nchar("Variables"), nchar(data$var_names[1]), nchar(data$var_names[2]), nchar("diff")) - var_width <- max(nchar("Variables"), nchar(data$xy)) - obs_width <- max(nchar("Obs"), nchar(data$Obs)) - mean_width <- max(nchar("Mean"), nchar(format(max(data$b[["mean"]]), nsmall = 2))) - se_width <- max(nchar("Std. Err."), nchar(format(max(data$b[["se"]]), nsmall = 2))) - sd_width <- max(nchar("Std. Dev."), nchar(format(max(data$b[["sd"]]), nsmall = 2))) - corr_width <- nchar("Correlation") + var_width1 <- max(nchar("Variables"), nchar(data$var_names[1]), nchar(data$var_names[2]), nchar("diff")) + var_width <- max(nchar("Variables"), nchar(data$xy)) + obs_width <- max(nchar("Obs"), nchar(data$Obs)) + mean_width <- max(nchar("Mean"), nchar(format(max(data$b[["mean"]]), nsmall = 2))) + se_width <- max(nchar("Std. Err."), nchar(format(max(data$b[["se"]]), nsmall = 2))) + sd_width <- max(nchar("Std. Dev."), nchar(format(max(data$b[["sd"]]), nsmall = 2))) + corr_width <- nchar("Correlation") corsig_width <- max(nchar("Sig."), nchar(data$corsig)) - t_width <- nchar(data$tstat) - df_width <- max(nchar("DF"), nchar(data$df)) - p_width <- max(nchar("Sig."), nchar(format(data$corsig, nsmall = 3))) - conf_length <- max(sum(nchar(data$conf_int1)), sum(nchar(data$conf_int2))) + t_width <- nchar(data$tstat) + df_width <- max(nchar("DF"), nchar(data$df)) + p_width <- max(nchar("Sig."), nchar(format(data$corsig, nsmall = 3))) + conf_length <- max(sum(nchar(data$conf_int1)), sum(nchar(data$conf_int2))) + if (conf_length > 20) { - conf_width <- conf_length + conf_width <- conf_length conf_l_width <- ceiling(conf_width / 2) conf_u_width <- ceiling(conf_width / 2) } else { - conf_width <- 20 + conf_width <- 20 conf_l_width <- 10 conf_u_width <- 10 } - space1 <- 20 - space2 <- 13 - space3 <- 13 + space1 <- 20 + space2 <- 13 + space3 <- 13 width_1 <- sum(var_width1, obs_width, mean_width, se_width, sd_width, conf_width, space1) width_2 <- sum(var_width, obs_width, corr_width, corsig_width, space2) width_3 <- sum(var_width, t_width, df_width, p_width, space3) @@ -444,19 +435,19 @@ print_paired_ttest <- function(data) { print_two_ttest <- function(data) { - char_sig <- format(data$sig, digits = 0, nsmall = 4) - char_sig_l <- format(data$sig_l, digits = 0, nsmall = 4) - char_sig_u <- format(data$sig_u, digits = 0, nsmall = 4) - char_sig_pooled <- format(data$sig_pooled, digits = 0, nsmall = 4) - char_sig_pooled_l <- format(data$sig_pooled_l, digits = 0, nsmall = 4) - char_sig_pooled_u <- format(data$sig_pooled_u, digits = 0, nsmall = 4) + char_sig <- format(round(data$sig, 4), nsmall = 4, scientific = FALSE) + char_sig_l <- format(round(data$sig_l, 4), nsmall = 4, scientific = FALSE) + char_sig_u <- format(round(data$sig_u, 4), nsmall = 4, scientific = FALSE) + char_sig_pooled <- format(round(data$sig_pooled, 4), nsmall = 4, scientific = FALSE) + char_sig_pooled_l <- format(round(data$sig_pooled_l, 4), nsmall = 4, scientific = FALSE) + char_sig_pooled_u <- format(round(data$sig_pooled_u, 4), nsmall = 4, scientific = FALSE) # hypothesis heading - hyp_null <- paste0("Ho: mean(", data$levels[1], ") - mean(", data$levels[2], ") = diff = ", "0") - hyp_lt <- paste0("Ha: diff < ", "0") - hyp_2t <- paste0("Ha: diff ~= ", "0") - hyp_ut <- paste0("Ha: diff > ", "0") - conf <- data$confint * 100 + hyp_null <- paste0("Ho: mean(", data$levels[1], ") - mean(", data$levels[2], ") = diff = ", "0") + hyp_lt <- paste0("Ha: diff < ", "0") + hyp_2t <- paste0("Ha: diff ~= ", "0") + hyp_ut <- paste0("Ha: diff > ", "0") + conf <- data$confint * 100 conf_char <- paste0("[", conf, "% Conf. Interval]") # all tests combines @@ -466,34 +457,36 @@ print_two_ttest <- function(data) { all_s_l <- paste("P < t =", char_sig_l) all_s_t <- paste("P > |t| =", char_sig) all_s_u <- paste("P > t =", char_sig_u) - p_tval <- paste0(" t = ", as.character(data$t_pooled)) - s_tval <- paste0(" t = ", as.character(data$t_satterthwaite)) + p_tval <- paste0(" t = ", as.character(data$t_pooled)) + s_tval <- paste0(" t = ", as.character(data$t_satterthwaite)) # format output - grp_w <- max(nchar(data$levels[1]), nchar(data$levels[2]), nchar("Combined"), 10) - obs_w <- max(nchar("Obs"), nchar(data$obs[1]), nchar(data$obs[2]), nchar(data$n)) + grp_w <- max(nchar(data$levels[1]), nchar(data$levels[2]), nchar("Combined"), 10) + obs_w <- max(nchar("Obs"), nchar(data$obs[1]), nchar(data$obs[2]), nchar(data$n)) mean_w <- max(nchar("Mean"), nchar(data$mean[1]), nchar(data$mean[2]), nchar(data$mean_diff), nchar(data$combined[2])) - se_w <- max(nchar("Std. Err."), nchar(data$se[1]), nchar(data$se[2]), nchar(data$combined[4]), nchar(data$se_dif)) - sd_w <- max(nchar("Std. Dev."), nchar(data$sd[1]), nchar(data$sd[2]), nchar(data$combined[3]), nchar(data$sd_dif)) - df_w <- max(nchar("DF"), nchar(as.vector(data$df_pooled)), nchar(as.vector(data$df_satterthwaite))) - t_w <- max(nchar("t Value"), nchar(as.vector(data$t_pooled)), nchar(as.vector(data$t_satterthwaite))) - pt_w <- max( + se_w <- max(nchar("Std. Err."), nchar(data$se[1]), nchar(data$se[2]), nchar(data$combined[4]), nchar(data$se_dif)) + sd_w <- max(nchar("Std. Dev."), nchar(data$sd[1]), nchar(data$sd[2]), nchar(data$combined[3]), nchar(data$sd_dif)) + df_w <- max(nchar("DF"), nchar(as.vector(data$df_pooled)), nchar(as.vector(data$df_satterthwaite))) + t_w <- max(nchar("t Value"), nchar(as.vector(data$t_pooled)), nchar(as.vector(data$t_satterthwaite))) + pt_w <- max( nchar("P > |t|"), nchar(as.vector(char_sig)), nchar(as.vector(char_sig_l)), nchar(as.vector(char_sig_u)), nchar(as.vector(char_sig_pooled)), nchar(as.vector(char_sig_pooled_l)), nchar(as.vector(char_sig_u)) ) - numdf_w <- max(nchar("Num DF"), nchar(as.vector(data$num_df)), nchar(as.vector(data$den_df))) - f_w <- max(nchar("F Value"), nchar(as.vector(data$f))) - fp_w <- max(nchar("P > F"), nchar(as.vector(data$f_sig))) + numdf_w <- max(nchar("Num DF"), nchar(as.vector(data$num_df)), nchar(as.vector(data$den_df))) + f_w <- max(nchar("F Value"), nchar(as.vector(data$f))) + fp_w <- max(nchar("P > F"), nchar(as.vector(data$f_sig))) conf_length <- nchar(data$lower[1]) + nchar(data$upper[1]) + if (conf_length > 20) { - conf_width <- conf_length + conf_width <- conf_length conf_l_width <- ceiling(conf_width / 2) conf_u_width <- floor(conf_width / 2) } else { - conf_width <- 20 + conf_width <- 20 conf_l_width <- 10 conf_u_width <- 10 } + w1 <- sum(grp_w, obs_w, mean_w, se_w, sd_w, conf_width, 20) w2 <- sum(grp_w, 13, 9, df_w, t_w, pt_w, 20) w3 <- sum(grp_w, 8, numdf_w, numdf_w, f_w, fp_w, 20) @@ -643,16 +636,17 @@ print_two_ttest <- function(data) { print_prop_test <- function(data) { + cwidth <- max(nchar("z"), nchar("DF"), nchar("Pr(|Z| > |z|)"), nchar("Sample Size"), nchar("phat")) nwidth <- max(nchar(data$z), nchar(data$p0), nchar(data$sig[1]), nchar(data$n), nchar(data$phat)) - w1 <- sum(cwidth, nwidth, 6) - lw <- max(nchar("Variable"), nchar(data$varname)) - ow <- max(nchar("Observed"), nchar(data$n)) - ew <- max(nchar("Expected"), nchar(data$exp)) - dw <- max(nchar("% Deviation"), nchar(data$deviation)) - rw <- max(nchar("Std. Residuals"), nchar(data$std)) - w <- sum(lw, ow, ew, dw, rw, 16) - names <- c(0, 1) + w1 <- sum(cwidth, nwidth, 6) + lw <- max(nchar("Variable"), nchar(data$varname)) + ow <- max(nchar("Observed"), nchar(data$n)) + ew <- max(nchar("Expected"), nchar(data$exp)) + dw <- max(nchar("% Deviation"), nchar(data$deviation)) + rw <- max(nchar("Std. Residuals"), nchar(data$std)) + w <- sum(lw, ow, ew, dw, rw, 16) + names <- c(0, 1) if (data$alt == "less") { cat(format("Test Statistics", width = w1, justify = "centre"), "\n") @@ -703,10 +697,10 @@ print_prop_test <- function(data) { } print_ts_prop_test <- function(data) { + cwidth <- max(nchar("z"), nchar("Pr(|Z| > |z|)"), nchar("Total Observations")) nwidth <- max(nchar(data$z), nchar(data$sig[1]), nchar(data$n1), nchar(data$n2)) - w1 <- sum(cwidth, nwidth, 6) - + w1 <- sum(cwidth, nwidth, 6) totobs <- sum(data$n1, data$n2) if (data$alt == "less") { @@ -740,53 +734,59 @@ print_ts_prop_test <- function(data) { print_os_vartest <- function(data) { - null_l <- paste0("Ho: sd(", data$var_name, ") >= ", as.character(data$sd)) - alt_l <- paste0(" Ha: sd(", data$var_name, ") < ", as.character(data$sd)) - null_u <- paste0("Ho: sd(", data$var_name, ") <= ", as.character(data$sd)) - alt_u <- paste0("Ha: sd(", data$var_name, ") > ", as.character(data$sd)) - null_t <- paste0("Ho: sd(", data$var_name, ") = ", as.character(data$sd)) - alt_t <- paste0("Ha: sd(", data$var_name, ") != ", as.character(data$sd)) - all_l <- paste("Ha: sd <", as.character(data$sd)) - all_u <- paste("Ha: sd >", as.character(data$sd)) - all_t <- paste("Ha: sd !=", as.character(data$sd)) - char_p_l <- format(data$p_lower, digits = 0, nsmall = 4) - char_p_u <- format(data$p_upper, digits = 0, nsmall = 4) - char_p <- format(data$p_two, digits = 0, nsmall = 4) - all_p_l <- paste("Pr(C < c) =", char_p_l) + + null_l <- paste0("Ho: sd(", data$var_name, ") >= ", as.character(data$sd)) + null_u <- paste0("Ho: sd(", data$var_name, ") <= ", as.character(data$sd)) + null_t <- paste0("Ho: sd(", data$var_name, ") = ", as.character(data$sd)) + + alt_l <- paste0(" Ha: sd(", data$var_name, ") < ", as.character(data$sd)) + alt_u <- paste0("Ha: sd(", data$var_name, ") > ", as.character(data$sd)) + alt_t <- paste0("Ha: sd(", data$var_name, ") != ", as.character(data$sd)) + all_l <- paste("Ha: sd <", as.character(data$sd)) + all_u <- paste("Ha: sd >", as.character(data$sd)) + all_t <- paste("Ha: sd !=", as.character(data$sd)) + + char_p_l <- format(round(data$p_lower, 4), nsmall = 4, scientific = FALSE) + char_p_u <- format(round(data$p_upper, 4), nsmall = 4, scientific = FALSE) + char_p <- format(round(data$p_two, 4), nsmall = 4, scientific = FALSE) + if (data$p_lower < 0.5) { all_p_t <- paste("2 * Pr(C < c) =", char_p) } else { all_p_t <- paste("2 * Pr(C > c) =", char_p) } - all_p_u <- paste("Pr(C > c) =", char_p_u) + + all_p_l <- paste("Pr(C < c) =", char_p_l) + all_p_u <- paste("Pr(C > c) =", char_p_u) all_tval <- paste0(" c = ", as.character(data$chi)) - # formatting output # compute the characters of each output and decide the overall width - var_width <- max(nchar("Variable"), nchar(data$var_name)) - obs_width <- max(nchar("Obs"), nchar(data$n)) - mean_width <- max(nchar("Mean"), nchar(data$xbar)) - se_width <- max(nchar("Std. Err."), nchar(data$se)) - sd_width <- max(nchar("Std. Dev."), nchar(data$sigma)) + var_width <- max(nchar("Variable"), nchar(data$var_name)) + obs_width <- max(nchar("Obs"), nchar(data$n)) + mean_width <- max(nchar("Mean"), nchar(data$xbar)) + se_width <- max(nchar("Std. Err."), nchar(data$se)) + sd_width <- max(nchar("Std. Dev."), nchar(data$sigma)) conf_length <- nchar(data$c_lwr) + nchar(data$c_upr) - conf_str <- paste0("[", data$conf * 100, "% Conf. Interval]") + conf_str <- paste0("[", data$conf * 100, "% Conf. Interval]") confint_length <- nchar(conf_str) + if (conf_length > confint_length) { conf_width <- round(conf_length / 2) } else { conf_width <- round(confint_length / 2) } - c_width <- nchar(data$chi) - df_width <- max(nchar("DF"), nchar(data$df)) - p_width <- max(nchar("2 Tailed"), nchar(round(data$p_two, 5))) - md_width <- max(nchar("Difference"), nchar(data$mean_diff)) + + c_width <- nchar(data$chi) + df_width <- max(nchar("DF"), nchar(data$df)) + p_width <- max(nchar("2 Tailed"), nchar(round(data$p_two, 5))) + md_width <- max(nchar("Difference"), nchar(data$mean_diff)) md_length <- nchar(data$mean_diff_l) + nchar(data$mean_diff_u) - width_1 <- sum(var_width, obs_width, mean_width, se_width, sd_width, ceiling(conf_width * 2), 21) - width_2 <- sum(var_width, c_width, df_width, p_width, 12) + width_1 <- sum(var_width, obs_width, mean_width, se_width, sd_width, ceiling(conf_width * 2), 21) + width_2 <- sum(var_width, c_width, df_width, p_width, 12) all_width <- round(width_1 / 3) - width_3 <- all_width * 3 + width_3 <- all_width * 3 cat( format("One-Sample Statistics", width = width_1, justify = "centre"), @@ -881,12 +881,15 @@ print_os_vartest <- function(data) { print_chisq_test <- function(x) { + width1 <- nchar("Likelihood Ratio Chi-Square") width2 <- max(nchar(x$df)) + width3 <- max( - nchar(x$chi), nchar(x$chilr), nchar(x$chimh), nchar(x$chiy), nchar(x$phi), - nchar(x$cc), nchar(x$cv) + nchar(x$chisquare), nchar(x$chisquare_lr), nchar(x$chisquare_mantel_haenszel), nchar(x$chisquare_adjusted), nchar(x$phi_coefficient), + nchar(x$contingency_coefficient), nchar(x$cramers_v) ) + width4 <- 6 widthn <- sum(width1, width2, width3, width4, 12) @@ -899,31 +902,31 @@ print_chisq_test <- function(x) { cat(rep("-", widthn), sep = "", "\n") cat( format("Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(), - format(x$chi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sig, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" + format(x$chisquare, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" ) cat( format("Likelihood Ratio Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(), - format(x$chilr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$siglr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" + format(x$chisquare_lr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_lr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" ) cat( format("Continuity Adj. Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "right"), formats(), - format(x$chiy, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sigy, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" + format(x$chisquare_adjusted, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_adjusted, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" ) cat( format("Mantel-Haenszel Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "right"), formats(), - format(x$chimh, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sigmh, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" + format(x$chisquare_mantel_haenszel, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_mantel_haenszel, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" ) cat( format("Phi Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(), - format(x$phi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" + format(x$phi_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" ) cat( format("Contingency Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(), - format(x$cc, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" + format(x$contingency_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" ) cat( format("Cramer's V", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(), - format(x$cv, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" + format(x$cramers_v, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" ) cat(rep("-", widthn), sep = "", "\n") } else { @@ -935,23 +938,23 @@ print_chisq_test <- function(x) { cat(rep("-", widthn), sep = "", "\n") cat( format("Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(), - format(x$chi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$sig, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" + format(x$chisquare, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" ) cat( format("Likelihood Ratio Chi-Square", width = width1, justify = "left"), formats(), format(x$df, width = width2, justify = "centre"), formats(), - format(x$chilr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$siglr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" + format(x$chisquare_lr, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(x$pval_chisquare_lr, width = width4, justify = "right", nsmall = 4, scientific = F), "\n", sep = "" ) cat( format("Phi Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(), - format(x$phi, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" + format(x$phi_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" ) cat( format("Contingency Coefficient", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(), - format(x$cc, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" + format(x$contingency_coefficient, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" ) cat( format("Cramer's V", width = width1, justify = "left"), formats(), format(" ", width = width2, justify = "right"), formats(), - format(x$cv, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" + format(x$cramers_v, width = width3, justify = "centre", nsmall = 4, scientific = F), formats(), format(" ", width = width4, justify = "right"), "\n", sep = "" ) cat(rep("-", widthn), sep = "", "\n") } @@ -959,69 +962,38 @@ print_chisq_test <- function(x) { print_chisq_gof <- function(data) { + cwidth <- max(nchar("Chi-Square"), nchar("DF"), nchar("Pr > Chi Sq"), nchar("Sample Size")) - nwidth <- max(nchar(data$chisquare), nchar(data$df), nchar(data$pvalue), nchar(data$ssize)) - w1 <- sum(cwidth, nwidth, 6) - lw <- max(nchar("Variable"), nchar(data$names)) - ow <- max(nchar("Observed"), nchar(data$obs)) - ew <- max(nchar("Expected"), nchar(data$exp)) - dw <- max(nchar("% Deviation"), nchar(data$deviation)) - rw <- max(nchar("Std. Residuals"), nchar(data$std)) - w <- sum(lw, ow, ew, dw, rw, 16) - + nwidth <- max(nchar(data$chisquare), nchar(data$degrees_of_freedom), nchar(data$pvalue), + nchar(data$sample_size)) + w1 <- sum(cwidth, nwidth, 6) + lw <- max(nchar("Variable"), nchar(data$categories)) + ow <- max(nchar("Observed"), nchar(data$observed_frequency)) + ew <- max(nchar("Expected"), nchar(data$expected_frequency)) + dw <- max(nchar("% Deviation"), nchar(data$deviation)) + rw <- max(nchar("Std. Residuals"), nchar(data$std_residuals)) + w <- sum(lw, ow, ew, dw, rw, 16) cat(format("Test Statistics", width = w1, justify = "centre"), "\n") cat(rep("-", w1), sep = "", "\n") cat(format("Chi-Square", width = cwidth, justify = "left"), formats(), format(data$chisquare, width = nwidth, justify = "right"), "\n") - cat(format("DF", width = cwidth, justify = "left"), formats(), format(data$df, width = nwidth, justify = "right"), "\n") + cat(format("DF", width = cwidth, justify = "left"), formats(), format(data$degrees_of_freedom, width = nwidth, justify = "right"), "\n") cat(format("Pr > Chi Sq", width = cwidth, justify = "left"), formats(), format(data$pvalue, width = nwidth, justify = "right"), "\n") - cat(format("Sample Size", width = cwidth, justify = "left"), formats(), format(data$ssize, width = nwidth, justify = "right"), "\n\n") + cat(format("Sample Size", width = cwidth, justify = "left"), formats(), format(data$sample_size, width = nwidth, justify = "right"), "\n\n") cat(format(paste("Variable:", data$varname), width = w, justify = "centre"), "\n") cat(rep("-", w), sep = "", "\n") cat(fg("Category", lw), fs(), fg("Observed", ow), fs(), fg("Expected", ew), fs(), fg("% Deviation", dw), fs(), fg("Std. Residuals", rw), "\n") cat(rep("-", w), sep = "", "\n") - for (i in seq_len(data$level)) { + for (i in seq_len(data$n_levels)) { cat( - fg(data$names[i], lw), fs(), fg(data$obs[i], ow), fs(), fg(data$exp[i], ew), fs(), - fg(data$deviation[i], dw), fs(), fg(data$std[i], rw), "\n" + fg(data$categories[i], lw), fs(), fg(data$observed_frequency[i], ow), fs(), + fg(data$expected_frequency[i], ew), fs(), fg(data$deviation[i], dw), fs(), + fg(data$std_residuals[i], rw), "\n" ) } cat(rep("-", w), sep = "", "\n") } - -# print_os_chisqgof <- function(data) { - -# cwidth <- max(nchar('Chi-Square'), nchar('DF'), nchar('Pr > Chi Sq'), nchar('Sample Size')) -# nwidth <- max(nchar(data$chisquare), nchar(data$df), nchar(data$pvalue), nchar(data$ssize)) -# w1 <- sum(cwidth, nwidth, 6) -# lw <- max(nchar('Variable'), nchar(data$names)) -# ow <- max(nchar('Observed'), nchar(data$obs)) -# ew <- max(nchar('Expected'), nchar(data$exp)) -# dw <- max(nchar('% Deviation'), nchar(data$deviation)) -# rw <- max(nchar('Std. Residuals'), nchar(data$std)) -# w <- sum(lw, ow, ew, dw, rw, 16) - - -# cat(format("Test Statistics", width = w1, justify = "centre"), "\n") -# cat(rep("-", w1), sep = "", '\n') -# cat(format('Chi-Square', width = cwidth, justify = 'left'), formats(), format(data$chisquare, width = nwidth, justify = 'right'), '\n') -# cat(format('DF', width = cwidth, justify = 'left'), formats(), format(data$df, width = nwidth, justify = 'right'), '\n') -# cat(format('Pr > Chi Sq', width = cwidth, justify = 'left'), formats(), format(data$pvalue, width = nwidth, justify = 'right'), '\n') -# cat(format('Sample Size', width = cwidth, justify = 'left'), formats(), format(data$ssize, width = nwidth, justify = 'right'), '\n\n') -# cat(format(paste('Variable:', data$varname), width = w, justify = 'centre'), '\n') -# cat(rep("-", w), sep = "", '\n') -# cat(fg('Category', lw), fs(), fg('Observed', ow), fs(), fg('Expected', ew), fs(), fg('% Deviation', dw), fs(), fg('Std. Residuals', rw), '\n') -# cat(rep("-", w), sep = "", '\n') -# for (i in seq_len(data$level)) { -# cat(fg(data$names[i], lw), fs(), fg(data$obs[i], ow), fs(), fg(data$exp[i], ew), fs(), -# fg(data$deviation[i], dw), fs(), fg(data$std[i], rw), '\n') -# } -# cat(rep("-", w), sep = "", '\n') - -# } - - print_runs_test <- function(x) { cat( "Runs Test\n", @@ -1039,9 +1011,10 @@ print_runs_test <- function(x) { print_cochran_test <- function(data) { + cwidth <- max(nchar("N"), nchar("Cochran's Q"), nchar("df"), nchar("p value")) nwidth <- max(nchar(data$n), nchar(data$q), nchar(data$df), nchar(data$pvalue)) - w1 <- sum(cwidth, nwidth, 6) + w1 <- sum(cwidth, nwidth, 6) cat(format("Test Statistics", width = w1, justify = "centre"), "\n") cat(rep("-", w1), sep = "", "\n") @@ -1054,43 +1027,26 @@ print_cochran_test <- function(data) { print_mcnemar_test <- function(data) { - cwidth1 <- max( - nchar("McNemar's chi2"), nchar("DF"), nchar("Pr > chi2"), - nchar("Exact Pr >= chi2") - ) - nwidth1 <- max( - nchar(data$tatistic), nchar(data$df), nchar(data$pvalue), - nchar(data$exactp) - ) - w1 <- sum(cwidth1, nwidth1, 6) + + cwidth1 <- max(nchar("McNemar's chi2"), nchar("DF"), nchar("Pr > chi2"), nchar("Exact Pr >= chi2")) + nwidth1 <- max(nchar(data$tatistic), nchar(data$df), nchar(data$pvalue), nchar(data$exactp)) + w1 <- sum(cwidth1, nwidth1, 6) - cwidth2 <- max( - nchar("Kappa"), nchar("ASE"), nchar("95% Lower Conf Limit"), - nchar("95% Upper Conf Limit") - ) - nwidth2 <- max( - nchar(data$kappa), nchar(data$std_err), nchar(data$kappa_cil), - nchar(data$kappa_ciu) - ) - w2 <- sum(cwidth2, nwidth2, 6) + cwidth2 <- max(nchar("Kappa"), nchar("ASE"), nchar("95% Lower Conf Limit"), nchar("95% Upper Conf Limit")) + nwidth2 <- max(nchar(data$kappa), nchar(data$std_err), nchar(data$kappa_cil), nchar(data$kappa_ciu)) + w2 <- sum(cwidth2, nwidth2, 6) - cwidth3 <- max( - nchar("Cases"), nchar("Controls"), nchar("Ratio"), - nchar("Odds Ratio") - ) - nwidth3 <- max( - nchar(data$cases), nchar(data$controls), nchar(data$ratio), - nchar(data$odratio) - ) - w3 <- sum(cwidth3, nwidth3, 6) + cwidth3 <- max(nchar("Cases"), nchar("Controls"), nchar("Ratio"), nchar("Odds Ratio")) + nwidth3 <- max(nchar(data$cases), nchar(data$controls), nchar(data$ratio), nchar(data$odratio)) + w3 <- sum(cwidth3, nwidth3, 6) - tcs <- colSums(data$tbl) - trs <- rowSums(data$tbl) + tcs <- colSums(data$tbl) + trs <- rowSums(data$tbl) twidth1 <- 5 twidth2 <- max(nchar(data$tbl[, 1]), nchar(tcs[1])) twidth3 <- max(nchar(data$tbl[, 2]), nchar(tcs[2])) twidth4 <- max(5, nchar(sum(trs))) - w4 <- sum(twidth1, twidth2, twidth3, twidth4, 18) + w4 <- sum(twidth1, twidth2, twidth3, twidth4, 18) twidth5 <- sum(twidth2, twidth3) cat( @@ -1154,21 +1110,23 @@ print_mcnemar_test <- function(data) { print_levene_test <- function(data) { - lw <- max(nchar("Levels"), nchar(data$levs), nchar("Total")) - ow <- max(nchar("Frequency"), nchar(data$lens), nchar(data$n)) - ew <- max(nchar("Mean"), nchar(data$avgs), nchar(data$avg)) - dw <- max(nchar("Std. Dev."), nchar(data$sds), nchar(data$sd)) - w <- sum(lw, ow, ew, dw, 12) + + lw <- max(nchar("Levels"), nchar(data$levs), nchar("Total")) + ow <- max(nchar("Frequency"), nchar(data$lens), nchar(data$n)) + ew <- max(nchar("Mean"), nchar(data$avgs), nchar(data$avg)) + dw <- max(nchar("Std. Dev."), nchar(data$sds), nchar(data$sd)) + w <- sum(lw, ow, ew, dw, 12) cwidth <- max( nchar("Statistic"), nchar("Brown and Forsythe"), nchar("Levene"), nchar("Brown and Forsythe (Trimmed Mean)") ) + nwidth <- max(nchar("Num DF"), nchar(data$n_df)) dwidth <- max(nchar("Den DF"), nchar(data$d_df)) ewidth <- max(nchar("F"), nchar(data$bf), nchar(data$lev), nchar(data$bft)) fwidth <- max(nchar("Pr > F"), nchar(data$p_bf), nchar(data$p_lev), nchar(data$p_bft)) - w1 <- sum(cwidth, nwidth, dwidth, ewidth, fwidth, 16) + w1 <- sum(cwidth, nwidth, dwidth, ewidth, fwidth, 16) cat(format("Summary Statistics", width = w, justify = "centre"), "\n") cat( @@ -1217,30 +1175,32 @@ print_levene_test <- function(data) { print_var_test <- function(data) { - var_width <- max(nchar("combined"), nchar(data$lev)) - obs_width <- max(nchar("Obs"), nchar(data$lens), nchar(data$len)) + + var_width <- max(nchar("combined"), nchar(data$lev)) + obs_width <- max(nchar("Obs"), nchar(data$lens), nchar(data$len)) mean_width <- max(nchar("Mean"), nchar(data$avgs), nchar(data$avg)) - se_width <- max(nchar("Std. Err."), nchar(data$ses), nchar(data$se)) - sd_width <- max(nchar("Std. Dev."), nchar(data$sds), nchar(data$sd)) - width_1 <- sum(var_width, obs_width, mean_width, se_width, sd_width, 16) - - rto <- paste0("ratio = sd(", data$lev[1], ") / (", data$lev[2], ")") - nhyp <- "Ho: ratio = 1" - lhyp <- "Ha: ratio < 1" - uhyp <- "Ha: ratio > 1" - char_p_l <- format(data$lower, digits = 0, nsmall = 4) - char_p_u <- format(data$upper, digits = 0, nsmall = 4) - all_p_l <- paste("Pr(F < f) =", char_p_l) - all_p_u <- paste("Pr(F > f) =", char_p_u) - - - f_width <- nchar(data$f) - df1_width <- max(nchar("Num DF"), nchar(data$n1)) - df2_width <- max(nchar("Den DF"), nchar(data$n2)) - p_width <- max(nchar("p"), nchar(char_p_l)) - width_2 <- sum(f_width, df1_width, df2_width, p_width, 12) - width_3 <- sum(f_width, df1_width, df2_width, 8) - all_width <- sum(nchar(all_p_l), nchar(all_p_u), 4) + se_width <- max(nchar("Std. Err."), nchar(data$ses), nchar(data$se)) + sd_width <- max(nchar("Std. Dev."), nchar(data$sds), nchar(data$sd)) + width_1 <- sum(var_width, obs_width, mean_width, se_width, sd_width, 16) + + rto <- paste0("ratio = sd(", data$lev[1], ") / (", data$lev[2], ")") + nhyp <- "Ho: ratio = 1" + lhyp <- "Ha: ratio < 1" + uhyp <- "Ha: ratio > 1" + + char_p_l <- format(round(data$lower, 4), nsmall = 4, scientific = FALSE) + char_p_u <- format(round(data$upper, 4), nsmall = 4, scientific = FALSE) + + all_p_l <- paste("Pr(F < f) =", char_p_l) + all_p_u <- paste("Pr(F > f) =", char_p_u) + + f_width <- nchar(data$f) + df1_width <- max(nchar("Num DF"), nchar(data$n1)) + df2_width <- max(nchar("Den DF"), nchar(data$n2)) + p_width <- max(nchar("p"), nchar(char_p_l)) + width_2 <- sum(f_width, df1_width, df2_width, p_width, 12) + width_3 <- sum(f_width, df1_width, df2_width, 8) + all_width <- sum(nchar(all_p_l), nchar(all_p_u), 4) cat( format("Variance Ratio Test", width = width_1, justify = "centre"), @@ -1344,4 +1304,4 @@ print_var_test <- function(data) { ) cat("\n", rep("-", all_width), sep = "") } -} \ No newline at end of file +} diff --git a/R/infer-runs-test.R b/R/ifr-runs-test.R similarity index 77% rename from R/infer-runs-test.R rename to R/ifr-runs-test.R index 4850c06..a7acbe0 100644 --- a/R/infer-runs-test.R +++ b/R/ifr-runs-test.R @@ -1,7 +1,5 @@ #' @useDynLib inferr #' @importFrom Rcpp sourceCpp -#' @importFrom stats median -#' @importFrom purrr map #' @title Test for Random Order #' @description runtest tests whether the observations of \code{x} are serially #' independent i.e. whether they occur in a random order, by counting @@ -49,6 +47,9 @@ #' infer_runs_test(hsb, read, mean = TRUE) #' #' infer_runs_test(hsb, read, threshold = 0) +#' +#' @importFrom stats pnorm +#' #' @export #' infer_runs_test <- function(data, x, drop = FALSE, split = FALSE, mean = FALSE, @@ -59,83 +60,87 @@ infer_runs_test <- function(data, x, drop = FALSE, split = FALSE, mean = FALSE, infer_runs_test.default <- function(data, x, drop = FALSE, split = FALSE, mean = FALSE, threshold = NA) { - x1 <- enquo(x) - - xone <- - data %>% - pull(!! x1) - n <- length(xone) - - # if (!(is.numeric(x) || is.integer(x))) - # stop("x must be numeric or integer") + x1 <- deparse(substitute(x)) + xone <- data[[x1]] + n <- length(xone) if (is.na(threshold)) { y <- unique(xone) if (sum(y) == 1) { - stop("Use 0 as threshold if the data is coded as a binary.") + stop("Use 0 as threshold if the data is coded as a binary.", call. = FALSE) } } - # compute threshold if (!(is.na(threshold))) { thresh <- threshold - } else if (mean == TRUE) { + } else if (mean) { thresh <- mean(xone) } else { thresh <- median(xone, na.rm = TRUE) } - # drop values equal to threshold if drop == TRUE - if (drop == TRUE) { + if (drop) { xone <- xone[xone != thresh] } - # binary coding the data based on the threshold - if (split == TRUE) { + if (split) { x_binary <- ifelse(xone > thresh, 1, 0) } else { + x_binary <- xone %>% - map(nruns2, thresh) %>% + lapply(nruns2, thresh) %>% unlist(use.names = FALSE) } - # compute the number of runs - n_runs <- nsignC(x_binary) - n1 <- sum(x_binary) - n0 <- length(x_binary) - n1 - - # compute expected runs and sd of runs + n_runs <- nsignC(x_binary) + n1 <- sum(x_binary) + n0 <- length(x_binary) - n1 exp_runs <- expruns(n0, n1) - sd_runs <- sdruns(n0, n1) + sd_runs <- sdruns(n0, n1) - # compute the test statistic test_stat <- (n_runs - exp_runs) / (sd_runs ^ 0.5) sig <- 2 * (1 - pnorm(abs(test_stat), lower.tail = TRUE)) - # result - result <- list( - n = n, threshold = thresh, n_below = n0, n_above = n1, - mean = exp_runs, var = sd_runs, n_runs = n_runs, z = test_stat, - p = sig - ) + result <- + list(mean = exp_runs, + n = n, + n_above = n1, + n_below = n0, + n_runs = n_runs, + p = sig, + threshold = thresh, + var = sd_runs, + z = test_stat) class(result) <- "infer_runs_test" return(result) } -#' @export -#' @rdname infer_runs_test -#' @usage NULL -#' -runs_test <- function(x, drop = FALSE, split = FALSE, mean = FALSE, - threshold = NA) { - .Deprecated("infer_runs_test()") -} - #' @export #' print.infer_runs_test <- function(x, ...) { print_runs_test(x) -} \ No newline at end of file +} + +# expected runs +expruns <- function(n0, n1) { + N <- n0 + n1 + return(((2 * n0 * n1) / N) + 1) +} + +# standard deviation of runs +sdruns <- function(n0, n1) { + N <- n0 + n1 + n <- 2 * n0 * n1 + return(((n * (n - N)) / ((N ^ 2) * (N - 1)))) +} + +nruns2 <- function(data, value) { + if (data <= value) { + return(0) + } else { + return(1) + } +} diff --git a/R/ifr-ts-ind-ttest.R b/R/ifr-ts-ind-ttest.R new file mode 100644 index 0000000..5b57381 --- /dev/null +++ b/R/ifr-ts-ind-ttest.R @@ -0,0 +1,352 @@ +#' @title Two Independent Sample t Test +#' @description \code{infer_ts_ind_ttest} compares the means of two independent groups in order to determine whether +#' there is statistical evidence that the associated population means are significantly different. +#' @param data a data frame +#' @param x factor; a column in \code{data} +#' @param y numeric; a column in \code{data} +#' @param confint confidence level +#' @param alternative a character string specifying the alternative hypothesis, +#' must be one of "both" (default), "greater", "less" or "all". You can specify +#' just the initial letter +#' @param ... additional arguments passed to or from other methods +#' @return \code{infer_ts_ind_ttest} returns an object of class \code{"infer_ts_ind_ttest"}. +#' An object of class \code{"infer_ts_ind_ttest"} is a list containing the +#' following components: +#' +#' \item{levels}{levels of \code{x}} +#' \item{obs}{number of observations of \code{y} for each level of \code{x}} +#' \item{n}{total number of observations} +#' \item{mean}{mean of \code{y} for each level of \code{x}} +#' \item{sd}{standard deviation of \code{y} for each level of \code{x}} +#' \item{se}{estimate of standard error of \code{y} for each level of \code{x}} +#' \item{lower}{lower limit for the mean of \code{y} for each level of \code{x}} +#' \item{upper}{upper limit for the mean of \code{y} for each level of \code{x}} +#' \item{combined}{a data frame; mean, standard deviation, standard error and +#' confidence limit of mean of \code{y}} +#' \item{mean_diff}{difference in mean of \code{y} for the two groups of \code{x}} +#' \item{se_dif}{estimate of the standard error for difference in mean of +#' \code{y} for the two groups of \code{x}} \item{sd_dif}{degrees of freedom} +#' \item{conf_diff}{confidence interval for \code{mean_diff}} +#' \item{df_pooled}{degrees of freedom for the pooled method} +#' \item{df_satterthwaite}{degrees of freedom for the Satterthwaite method} +#' \item{t_pooled}{t statistic for the pooled method} +#' \item{t_satterthwaite}{t statistic for the Satterthwaite method} +#' \item{sig_pooled}{two-sided p-value for the pooled method} +#' \item{sig_pooled_l}{lower one-sided p-value for the pooled method} +#' \item{sig_pooled_u}{upper one-sided p-value for the pooled method} +#' \item{sig}{two-sided p-value for the Satterthwaite method} +#' \item{sig_l}{lower one-sided p-value for the Satterthwaite method} +#' \item{sig_u}{upper one-sided p-value for the Satterthwaite method} +#' \item{num_df}{numerator degrees of freedom for folded f test} +#' \item{den_df}{denominator degrees of freedom for folded f test} +#' \item{f}{f value for the equality of variances test} +#' \item{f_sig}{p-value for the folded f test} +#' \item{var_y}{name of \code{y}} +#' \item{confint}{confidence level} +#' \item{alternative}{alternative hypothesis} +#' @section Deprecated Function: +#' \code{ind_ttest()} has been deprecated. Instead use \code{infer_ts_ind_ttest()}. +#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric +#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. +#' @seealso \code{\link[stats]{t.test}} +#' @examples +#' # lower tail +#' infer_ts_ind_ttest(hsb, female, write, alternative = 'less') +#' +#' # upper tail +#' infer_ts_ind_ttest(hsb, female, write, alternative = 'greater') +#' +#' # both tails +#' infer_ts_ind_ttest(hsb, female, write, alternative = 'both') +#' +#' # all tails +#' infer_ts_ind_ttest(hsb, female, write, alternative = 'all') +#' @export +#' +infer_ts_ind_ttest <- function(data, x, y, confint = 0.95, + alternative = c("both", "less", "greater", "all"), ...) UseMethod("infer_ts_ind_ttest") + +#' @export +#' +infer_ts_ind_ttest.default <- function(data, x, y, confint = 0.95, + alternative = c("both", "less", "greater", "all"), ...) { + + x1 <- deparse(substitute(x)) + y1 <- deparse(substitute(y)) + yone <- names(data[y1]) + + if (check_x(data, x1)) { + stop("x must be a binary factor variable", call. = FALSE) + } + + if (check_level(data, x1) > 2) { + stop("x must be a binary factor variable", call. = FALSE) + } + + method <- match.arg(alternative) + var_y <- yone + alpha <- 1 - confint + a <- alpha / 2 + h <- indth(data, x1, y1, a) + grp_stat <- h + g_stat <- as.matrix(h) + comb <- indcomb(data, y1, a) + k <- indcomp(grp_stat, alpha) + j <- indsig(k$n1, k$n2, k$s1, k$s2, k$mean_diff) + m <- indpool(k$n1, k$n2, k$mean_diff, k$se_dif) + + result <- list(alternative = method, + combined = comb, + confint = confint, + conf_diff = round(k$conf_diff, 5), + den_df = k$n2 - 1, + df_pooled = m$df_pooled, + df_satterthwaite = j$d_f, + f = round(k$s1 / k$s2, 4), + f_sig = fsig(k$s1, k$s2, k$n1, k$n2), + levels = g_stat[, 1], + lower = g_stat[, 8], + mean = g_stat[, 3], + mean_diff = round(k$mean_diff, 3), + n = k$n, + num_df = k$n1 - 1, + obs = g_stat[, 2], + sd = g_stat[, 4], + sd_dif = round(k$sd_dif, 3), + se = g_stat[, 5], + se_dif = round(k$se_dif, 3), + sig = j$sig, + sig_l = j$sig_l, + sig_pooled_l = m$sig_pooled_l, + sig_pooled_u = m$sig_pooled_u, + sig_pooled = m$sig_pooled, + sig_u = j$sig_u, + t_pooled = round(m$t_pooled, 4), + t_satterthwaite = round(j$t, 4), + upper = g_stat[, 9], + var_y = var_y) + + class(result) <- "infer_ts_ind_ttest" + return(result) + +} + +#' @export +#' +print.infer_ts_ind_ttest <- function(x, ...) { + print_two_ttest(x) +} + +indth <- function(data, x, y, a) { + + h <- data_split(data, x, y) + h$df <- h$length - 1 + h$error <- qt(a, h$df) * -1 + h$lower <- h$mean_t - (h$error * h$std_err) + h$upper <- h$mean_t + (h$error * h$std_err) + + return(h) +} + +data_split <- function(data, x, y) { + + dat <- data.table(data[c(x, y)]) + out <- dat[, .(length = length(get(y)), + mean_t = mean_t(get(y)), + sd_t = sd_t(get(y)), + std_err = std_err(get(y))), + by = x] + + setDF(out) + +} + +indcomb <- function(data, y, a) { + + comb <- da(data, y) + comb$df <- comb$length - 1 + comb$error <- qt(a, comb$df) * -1 + comb$lower <- round(comb$mean_t - (comb$error * comb$std_err), 5) + comb$upper <- round(comb$mean_t + (comb$error * comb$std_err), 5) + names(comb) <- NULL + + return(comb) + +} + +da <- function(data, y) { + + dat <- data[[y]] + data.frame(length = length(dat), + mean_t = mean_t(dat), + sd_t = sd_t(dat), + std_err = std_err(dat)) + +} + +mean_t <- function(x) { + round(mean(x), 3) +} + +sd_t <- function(x) { + round(sd(x), 3) +} + +std_err <- function(x) { + + x %>% + sd() %>% + divide_by(x %>% + length() %>% + sqrt()) %>% + round(3) + +} + +indcomp <- function(grp_stat, alpha) { + + n1 <- grp_stat[1, 2] + n2 <- grp_stat[2, 2] + n <- n1 + n2 + means <- grp_stat[, 3] + mean_diff <- means[1] - means[2] + sd1 <- grp_stat[1, 4] + sd2 <- grp_stat[2, 4] + s1 <- grp_stat[1, 4] ^ 2 + s2 <- grp_stat[2, 4] ^ 2 + sd_dif <- sd_diff(n1, n2, s1, s2) + se_dif <- se_diff(n1, n2, s1, s2) + conf_diff <- conf_int_p(mean_diff, se_dif, alpha = alpha) + + list(conf_diff = conf_diff, + mean_diff = mean_diff, + n = n, + n1 = n1, + n2 = n2, + s1 = s1, + s2 = s2, + sd1 = sd1, + sd2 = sd2, + sd_dif = sd_dif, + se_dif = se_dif) + +} + +sd_diff <- function(n1, n2, s1, s2) { + + n1 <- n1 - 1 + n2 <- n2 - 1 + n <- (n1 + n2) - 2 + + (n1 * s1) %>% + add(n2 * s2) %>% + divide_by(n) %>% + raise_to_power(0.5) + +} + +se_diff <- function(n1, n2, s1, s2) { + + df <- n1 + n2 - 2 + n_1 <- n1 - 1 + n_2 <- n2 - 1 + + (n_1 * s1) %>% + add(n_2 * s2) %>% + divide_by(df) -> v + + (1 / n1) %>% + add(1 / n2) %>% + multiply_by(v) %>% + sqrt() + +} + +conf_int_p <- function(u, se, alpha = 0.05) { + + a <- alpha / 2 + error <- round(qnorm(a), 3) * -1 + lower <- u - (error * se) + upper <- u + (error * se) + c(lower, upper) + +} + +indsig <- function(n1, n2, s1, s2, mean_diff) { + + d_f <- as.vector(df(n1, n2, s1, s2)) + t <- mean_diff / (((s1 / n1) + (s2 / n2)) ^ 0.5) + sig_l <- round(pt(t, d_f), 4) + sig_u <- round(pt(t, d_f, lower.tail = FALSE), 4) + + if (sig_l < 0.5) { + sig <- round(pt(t, d_f) * 2, 4) + } else { + sig <- round(pt(t, d_f, lower.tail = FALSE) * 2, 4) + } + + list(d_f = d_f, + sig_l = sig_l, + sig_u = sig_u, + sig = sig, + t = t) + +} + +df <- function(n1, n2, s1, s2) { + + sn1 <- s1 / n1 + sn2 <- s2 / n2 + m1 <- 1 / (n1 - 1) + m2 <- 1 / (n2 - 1) + num <- (sn1 + sn2) ^ 2 + den <- (m1 * (sn1 ^ 2)) + (m2 * (sn2 ^ 2)) + + round(num / den) + +} + +fsig <- function(s1, s2, n1, n2) { + + round(min( + pf((s1 / s2), (n1 - 1), (n2 - 1)), + pf((s1 / s2), (n1 - 1), (n2 - 1), + lower.tail = FALSE + ) + ) * 2, 4) + +} + + +indpool <- function(n1, n2, mean_diff, se_dif) { + + df_pooled <- (n1 + n2) - 2 + t_pooled <- mean_diff / se_dif + sig_pooled_l <- round(pt(t_pooled, df_pooled), 4) + sig_pooled_u <- round(pt(t_pooled, df_pooled, lower.tail = FALSE), 4) + + if (sig_pooled_l < 0.5) { + sig_pooled <- round(pt(t_pooled, df_pooled) * 2, 4) + } else { + sig_pooled <- round(pt(t_pooled, df_pooled, lower.tail = FALSE) * 2, 4) + } + + list(df_pooled = df_pooled, + sig_pooled_l = sig_pooled_l, + sig_pooled_u = sig_pooled_u, + sig_pooled = sig_pooled, + t_pooled = t_pooled) + +} + +check_x <- function(data, x) { + + !is.factor(data[[x]]) + +} + +check_level <- function(data, x) { + + nlevels(data[[x]]) + +} diff --git a/R/infer-ts-paired-ttest.R b/R/ifr-ts-paired-ttest.R similarity index 59% rename from R/infer-ts-paired-ttest.R rename to R/ifr-ts-paired-ttest.R index 700a65e..70b2476 100644 --- a/R/infer-ts-paired-ttest.R +++ b/R/ifr-ts-paired-ttest.R @@ -1,4 +1,3 @@ -#' @importFrom stats cor #' @title Paired t test #' @description \code{infer_ts_paired_ttest} tests that two samples have the #' same mean, assuming paired data. @@ -58,23 +57,14 @@ infer_ts_paired_ttest <- function(data, x, y, confint = 0.95, #' infer_ts_paired_ttest.default <- function(data, x, y, confint = 0.95, alternative = c("both", "less", "greater", "all")) { - x1 <- enquo(x) - y1 <- enquo(y) - - method <- match.arg(alternative) - - var_names <- - data %>% - select(!! x1, !! y1) %>% - names() - xone <- - data %>% - pull(!! x1) + x1 <- deparse(substitute(x)) + y1 <- deparse(substitute(y)) + xone <- data[[x1]] + yone <- data[[y1]] - yone <- - data %>% - pull(!! y1) + method <- match.arg(alternative) + var_names <- names(data[c(x1, y1)]) k <- paired_comp(xone, yone, confint, var_names) @@ -90,18 +80,89 @@ infer_ts_paired_ttest.default <- function(data, x, y, confint = 0.95, return(result) } -#' @export -#' @rdname infer_ts_paired_ttest -#' @usage NULL -#' -paired_ttest <- function(x, y, confint = 0.95, - alternative = c("both", "less", "greater", "all")) { - .Deprecated("infer_ts_paired_ttest()") - infer_ts_paired_ttest(x, y, confint, alternative) -} - #' @export #' print.infer_ts_paired_ttest <- function(x, ...) { print_paired_ttest(x) +} + +#' @importFrom stats cor +paired_comp <- function(x, y, confint, var_names) { + + n <- length(x) + df <- (n - 1) + xy <- paste(var_names[1], "-", var_names[2]) + + data_prep <- paired_data(x, y) + b <- paired_stats(data_prep, "key", "value") + corr <- round(cor(x, y), 4) + corsig <- cor_sig(corr, n) + + alpha <- 1 - confint + + confint1 <- conf_int_t(b[[1, 1]], b[[1, 2]], n, alpha = alpha) %>% round(2) + confint2 <- conf_int_t(b[[2, 1]], b[[2, 2]], n, alpha = alpha) %>% round(2) + confint3 <- conf_int_t(b[[3, 1]], b[[3, 2]], n, alpha = alpha) %>% round(2) + + t <- round(b[[3, 1]] / b[[3, 3]], 4) + + p_l <- pt(t, df) + p_u <- pt(t, df, lower.tail = FALSE) + p <- pt(abs(t), df, lower.tail = FALSE) * 2 + + list( + Obs = n, b = b, conf_int1 = confint1, conf_int2 = confint2, + conf_int_diff = confint3, corr = round(corr, 2), corsig = round(corsig, 2), + tstat = t, p_lower = p_l, p_upper = p_u, p_two_tail = p, xy = xy, df = df + ) + +} + +paired_data <- function(x, y) { + + j <- data.frame(x = x, y = y) + j$z <- j$x - j$y + val <- data.frame(value = c(j$x, j$y, j$z)) + key <- rep(c("x", "y", "z"), each = nrow(j)) + cbind(key = key, value = val) + +} + +paired_stats <- function(data, key, value) { + + dat <- data.table(data[c("value", "key")]) + + out <- dat[, .(length = length(value), + mean = mean(value), + sd = sd(value)), + by = key] + + out[, ':='(se = sd / sqrt(length))] + setDF(out) + out[, c(-1, -2)] + +} + +cor_sig <- function(corr, n) { + + t <- corr / ((1 - (corr ^ 2)) / (n - 2)) ^ 0.5 + df <- n - 2 + sig <- (1 - pt(t, df)) * 2 + round(sig, 4) + +} + +conf_int_t <- function(u, s, n, alpha = 0.05) { + + a <- alpha / 2 + df <- n - 1 + error <- round(qt(a, df), 3) * -1 + lower <- u - (error * samp_err(s, n)) + upper <- u + (error * samp_err(s, n)) + c(lower, upper) + +} + +samp_err <- function(sigma, n) { + sigma / (n ^ 0.5) } \ No newline at end of file diff --git a/R/infer-ts-prop-test.R b/R/ifr-ts-prop-test.R similarity index 53% rename from R/infer-ts-prop-test.R rename to R/ifr-ts-prop-test.R index ab6b46b..64e70e6 100644 --- a/R/infer-ts-prop-test.R +++ b/R/ifr-ts-prop-test.R @@ -27,9 +27,9 @@ #' \item{sig}{p-value for z statistic} #' \item{alt}{alternative hypothesis} #' @section Deprecated Functions: -#' \code{ts_prop_test()}, \code{ts_prop_grp()} and \code{ts_prop_calc()} have +#' \code{infer_ts_prop_test()}, \code{infer_ts_prop_grp()} and \code{infer_ts_prop_calc()} have #' been deprecated. Instead use \code{infer_ts_prop_test()}, -#' \code{infer_ts_prop_grp()} and \code{infer_ts_prop_calc()}. +#' \code{infer_ts_prop_group()} and \code{infer_ts_prop_calc()}. #' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric #' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. #' @seealso \code{\link[stats]{prop.test}} @@ -41,7 +41,7 @@ #' #' # using groups #' # lower tail -#' infer_ts_prop_grp(treatment2, outcome, female, +#' infer_ts_prop_group(treatment2, outcome, female, #' alternative = 'less') #' #' # using sample size and proportions @@ -58,24 +58,23 @@ infer_ts_prop_test <- function(data, var1, var2, #' infer_ts_prop_test.default <- function(data, var1, var2, alternative = c("both", "less", "greater", "all"), ...) { - var_1 <- enquo(var1) - var_2 <- enquo(var2) - varone <- - data %>% - pull(!! var_1) + var_1 <- deparse(substitute(var1)) + var_2 <- deparse(substitute(var2)) + varone <- data[[var_1]] + vartwo <- data[[var_2]] - vartwo <- - data %>% - pull(!! var_2) + alt <- match.arg(alternative) + k <- prop_comp2(varone, vartwo, alt) - alt <- match.arg(alternative) - k <- prop_comp2(varone, vartwo, alt) - - result <- list( - n1 = k$n1, n2 = k$n2, phat1 = k$phat1, phat2 = k$phat2, - z = k$z, sig = k$sig, alt = alt - ) + result <- + list(alt = alt, + n1 = k$n1, + n2 = k$n2, + phat1 = k$phat1, + phat2 = k$phat2, + sig = k$sig, + z = k$z) class(result) <- "infer_ts_prop_test" return(result) @@ -85,9 +84,9 @@ infer_ts_prop_test.default <- function(data, var1, var2, #' @rdname infer_ts_prop_test #' @usage NULL #' -ts_prop_test <- function(var1, var2, - alternative = c("both", "less", "greater", "all"), ...) { - .Deprecated("infer_ts_prop_test()") +infer_ts_prop_calc <- function(n1, n2, p1, p2, + alternative = c("both", "less", "greater", "all"), ...) { + .Deprecated("infer_ts_prop_calc()") } #' @export @@ -100,44 +99,39 @@ print.infer_ts_prop_test <- function(x, ...) { #' @export #' @rdname infer_ts_prop_test #' -infer_ts_prop_grp <- function(data, var, group, +infer_ts_prop_group <- function(data, var, group, alternative = c("both", "less", "greater", "all")) { - var1 <- enquo(var) - group1 <- enquo(group) - - varone <- - data %>% - pull(!! var1) - groupone <- - data %>% - pull(!! group1) + var1 <- deparse(substitute(var)) + group1 <- deparse(substitute(group)) + varone <- data[[var1]] + groupone <- data[[group1]] if (nlevels(groupone) > 2) { stop("Grouping variable must be a binary factor variables.", call. = FALSE) } - n <- tapply(varone, groupone, length) - n1 <- n[[1]] - n2 <- n[[2]] - y <- tapply(varone, groupone, table) - y1 <- y[[1]][[2]] - y2 <- y[[2]][[2]] + n <- tapply(varone, groupone, length) + n1 <- n[[1]] + n2 <- n[[2]] + y <- tapply(varone, groupone, table) + y1 <- y[[1]][[2]] + y2 <- y[[2]][[2]] phat1 <- y1 / n1 phat2 <- y2 / n2 - phat <- sum(y1, y2) / sum(n1, n2) - num <- (phat1 - phat2) - den1 <- phat * (1 - phat) - den2 <- (1 / n1) + (1 / n2) - den <- sqrt(den1 * den2) - z <- num / den + phat <- sum(y1, y2) / sum(n1, n2) + num <- (phat1 - phat2) + den1 <- phat * (1 - phat) + den2 <- (1 / n1) + (1 / n2) + den <- sqrt(den1 * den2) + z <- num / den - lt <- pnorm(z) - ut <- round(pnorm(z, lower.tail = FALSE), 4) - tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4) + lt <- pnorm(z) + ut <- round(pnorm(z, lower.tail = FALSE), 4) + tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4) - alt <- match.arg(alternative) + alt <- match.arg(alternative) if (alt == "all") { sig <- c("both" = tt, "less" = lt, "greater" = ut) @@ -149,51 +143,40 @@ infer_ts_prop_grp <- function(data, var, group, sig <- tt } - out <- list( - n1 = n1, - n2 = n2, - phat1 = phat1, - phat2 = phat2, - z = round(z, 3), - sig = round(sig, 3), - alt = alt - ) + out <- + list(alt = alt, + n1 = n1, + n2 = n2, + phat1 = phat1, + phat2 = phat2, + sig = round(sig, 3), + z = round(z, 3)) class(out) <- "infer_ts_prop_test" return(out) } -#' @export -#' @rdname infer_ts_prop_test -#' @usage NULL -#' -ts_prop_grp <- function(var, group, - alternative = c("both", "less", "greater", "all")) { - .Deprecated("infer_ts_prop_grp()") -} - - #' @export #' @rdname infer_ts_prop_test #' infer_ts_prop_calc <- function(n1, n2, p1, p2, alternative = c("both", "less", "greater", "all"), ...) { - n1 <- n1 - n2 <- n2 + n1 <- n1 + n2 <- n2 phat1 <- p1 phat2 <- p2 - phat <- sum(n1 * p1, n2 * p2) / sum(n1, n2) - num <- (phat1 - phat2) - den1 <- phat * (1 - phat) - den2 <- (1 / n1) + (1 / n2) - den <- sqrt(den1 * den2) - z <- num / den + phat <- sum(n1 * p1, n2 * p2) / sum(n1, n2) + num <- (phat1 - phat2) + den1 <- phat * (1 - phat) + den2 <- (1 / n1) + (1 / n2) + den <- sqrt(den1 * den2) + z <- num / den - lt <- pnorm(z) - ut <- round(pnorm(z, lower.tail = FALSE), 4) - tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4) + lt <- pnorm(z) + ut <- round(pnorm(z, lower.tail = FALSE), 4) + tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4) - alt <- match.arg(alternative) + alt <- match.arg(alternative) if (alt == "all") { sig <- c("both" = tt, "less" = lt, "greater" = ut) @@ -205,26 +188,55 @@ infer_ts_prop_calc <- function(n1, n2, p1, p2, sig <- tt } - out <- list( - n1 = n1, - n2 = n2, - phat1 = round(phat1, 3), - phat2 = round(phat2, 3), - z = round(z, 3), - sig = round(sig, 3), - alt = alt - ) + out <- + list(alt = alt, + n1 = n1, + n2 = n2, + phat1 = round(phat1, 3), + phat2 = round(phat2, 3), + sig = round(sig, 3), + z = round(z, 3)) class(out) <- "infer_ts_prop_test" return(out) } -#' @export -#' @rdname infer_ts_prop_test -#' @usage NULL -#' -ts_prop_calc <- function(n1, n2, p1, p2, - alternative = c("both", "less", "greater", "all"), ...) { - .Deprecated("infer_ts_prop_calc()") - infer_ts_prop_calc(n1, n2, p1, p2, alternative, ...) -} \ No newline at end of file +prop_comp2 <- function(var1, var2, alt) { + + n1 <- length(var1) + n2 <- length(var2) + y1 <- table(var1)[[2]] + y2 <- table(var2)[[2]] + + phat1 <- round(y1 / n1, 4) + phat2 <- round(y2 / n2, 4) + phat <- sum(y1, y2) / sum(n1, n2) + + num <- (phat1 - phat2) + den1 <- phat * (1 - phat) + den2 <- (1 / n1) + (1 / n2) + den <- sqrt(den1 * den2) + z <- round(num / den, 4) + + lt <- round(pnorm(z), 4) + ut <- round(pnorm(z, lower.tail = FALSE), 4) + tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4) + + if (alt == "all") { + sig <- c("two-tail" = tt, "lower-tail" = lt, "upper-tail" = ut) + } else if (alt == "greater") { + sig <- ut + } else if (alt == "less") { + sig <- lt + } else { + sig <- tt + } + + list(n1 = n1, + n2 = n2, + phat1 = phat1, + phat2 = phat2, + sig = round(sig, 3), + z = round(z, 3)) + +} diff --git a/R/infer-ts-var-test.R b/R/ifr-ts-var-test.R similarity index 54% rename from R/infer-ts-var-test.R rename to R/ifr-ts-var-test.R index 536d1d2..e4eec4a 100644 --- a/R/infer-ts-var-test.R +++ b/R/ifr-ts-var-test.R @@ -1,5 +1,3 @@ -#' @importFrom stats complete.cases -#' @importFrom purrr map_dbl #' @title Two Sample Variance Comparison Test #' @description \code{infer_ts_var_test} performs tests on the equality of standard #' deviations (variances). @@ -51,46 +49,30 @@ infer_ts_var_test <- function(data, ..., group_var = NULL, #' infer_ts_var_test.default <- function(data, ..., group_var = NULL, alternative = c("less", "greater", "all")) { - groupvar <- enquo(group_var) - varyables <- quos(...) + groupvar <- deparse(substitute(group_var)) + varyables <- vapply(substitute(...()), deparse, NA_character_) + fdata <- data[varyables] - fdata <- - data %>% - select(!!! varyables) - - if (quo_is_null(groupvar)) { - z <- as.list(fdata) - ln <- z %>% map_int(length) + if (groupvar == "NULL") { + z <- as.list(fdata) + ln <- unlist(lapply(z, length)) ly <- seq_len(length(z)) if (length(z) < 2) { stop("Please specify at least two variables.", call. = FALSE) } - out <- gvar(ln, ly) - - fdata <- unlist(z) - - groupvars <- - out %>% - unlist() %>% - as.factor() + out <- gvar(ln, ly) + fdata <- unlist(z) + groupvars <- as.factor(unlist(out)) + lev <- names(data[varyables]) - lev <- - data %>% - select(!!! varyables) %>% - names() } else { - fdata <- - fdata %>% - pull(1) - groupvars <- - data %>% - pull(!! groupvar) - - lev <- levels(groupvars) + fdata <- fdata[[1]] + groupvars <- data[[groupvar]] + lev <- levels(groupvars) if (length(fdata) != length(groupvars)) { stop("Length of variable and group_var do not match.", call. = FALSE) @@ -99,30 +81,88 @@ infer_ts_var_test.default <- function(data, ..., group_var = NULL, type <- match.arg(alternative) - k <- var_comp(fdata, groupvars) - - out <- list( - f = k$f, lower = k$lower, upper = k$upper, vars = k$vars, - avgs = k$avgs, sds = k$sds, ses = k$ses, avg = k$avg, sd = k$sd, - se = k$se, n1 = k$n1, n2 = k$n2, lens = k$lens, len = k$len, - lev = lev, type = type - ) + k <- var_comp(fdata, groupvars) + + out <- list(avg = k$avg, + avgs = k$avgs, + f = k$f, + len = k$len, + lens = k$lens, + lev = lev, + lower = k$lower, + n1 = k$n1, + n2 = k$n2, + sd = k$sd, + sds = k$sds, + se = k$se, + ses = k$ses, + type = type, + upper = k$upper, + vars = k$vars) class(out) <- "infer_ts_var_test" return(out) } -#' @export -#' @rdname infer_ts_var_test -#' @usage NULL -#' -var_test <- function(variable, ..., group_var = NA, - alternative = c("less", "greater", "all")) { - .Deprecated("infer_ts_var_test()") -} - #' @export #' print.infer_ts_var_test <- function(x, ...) { print_var_test(x) -} \ No newline at end of file +} + +var_comp <- function(variable, group_var) { + + comp <- complete.cases(variable, group_var) + cvar <- variable[comp] + gvar <- group_var[comp] + d <- data.frame(cvar, gvar) + vals <- tibble_stats(d, "cvar", "gvar") + lass <- tbl_stats(d, "cvar") + lens <- vals[[2]] + vars <- vals[[4]] + f <- vars[1] / vars[2] + n1 <- lens[1] - 1 + n2 <- lens[2] - 1 + lower <- pf(f, n1, n2) + upper <- pf(f, n1, n2, lower.tail = FALSE) + + list(avg = round(lass[2], 2), + avgs = round(vals[[3]], 2), + f = round(f, 4), + len = lass[1], + lens = lens, + lower = round(lower, 4), + n1 = n1, + n2 = n2, + sd = round(lass[3], 2), + sds = round(vals[[5]], 2), + se = round(lass[4], 2), + ses = round(vals[[6]], 2), + upper = round(upper, 4), + vars = round(vars, 2)) + +} + +tibble_stats <- function(data, x, y) { + + dat <- data.table(data[c(x, y)]) + + out <- dat[, .(length = length(get(x)), + mean = mean(get(x)), + var = var(get(x)), + sd = sd(get(x))), + by = y] + + out[, ':='(ses = sd / sqrt(length))] + setDF(out) + out <- out[order(out[, 1]),] + return(out) + +} + +tbl_stats <- function(data, y) { + + dat <- data[[y]] + c(length(dat), mean(dat), sd(dat), (sd(dat) / sqrt(length(dat)))) + +} diff --git a/R/ifr-utils.R b/R/ifr-utils.R new file mode 100644 index 0000000..43cf952 --- /dev/null +++ b/R/ifr-utils.R @@ -0,0 +1,94 @@ +fg <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +fk <- function(x, w) { + format(x, width = w, justify = "centre", nsmall = 3) +} + + +fs <- function() { + rep(" ") +} + +fl <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "left") +} + +fc <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +formatter_t <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +format_cil <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +format_ciu <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +formats_t <- function() { + rep(" ") +} + +formatter_pair <- function(x, w) { + x1 <- format(x, nsmall = 2) + x2 <- as.character(x1) + ret <- format(x2, width = w, justify = "centre") + return(ret) +} + +fw <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +fn <- function(x, w) { + x %>% + as.character() %>% + format(width = w, justify = "centre") +} + +formats <- function() { + rep(" ") +} + +#' @importFrom utils packageVersion menu install.packages +check_suggests <- function(pkg) { + + pkg_flag <- tryCatch(utils::packageVersion(pkg), error = function(e) NA) + + if (is.na(pkg_flag)) { + + msg <- message(paste0('\n', pkg, ' must be installed for this functionality.')) + + if (interactive()) { + message(msg, "\nWould you like to install it?") + if (utils::menu(c("Yes", "No")) == 1) { + utils::install.packages(pkg) + } else { + stop(msg, call. = FALSE) + } + } else { + stop(msg, call. = FALSE) + } + } + +} \ No newline at end of file diff --git a/R/zzz.R b/R/ifr-zzz.R similarity index 100% rename from R/zzz.R rename to R/ifr-zzz.R diff --git a/R/infer-anova.R b/R/infer-anova.R deleted file mode 100644 index e1622dd..0000000 --- a/R/infer-anova.R +++ /dev/null @@ -1,81 +0,0 @@ -#' @importFrom stats as.formula lm pf -#' @importFrom rlang enquo !! -#' @title One Way ANOVA -#' @description One way analysis of variance -#' @param data a \code{data.frame} or a \code{tibble} -#' @param x numeric; column in \code{data} -#' @param y factor; column in \code{data} -#' @param ... additional arguments passed to or from other methods -#' @return \code{owanova} returns an object of class \code{"owanova"}. -#' An object of class \code{"owanova"} is a list containing the -#' following components: -#' -#' \item{between}{between group sum of squares} -#' \item{within}{within group sum of squares} -#' \item{total}{total sum of squares} -#' \item{df_btw}{between groups degress of freedom} -#' \item{df_within}{within groups degress of freedom} -#' \item{df_total}{total degress of freedom} -#' \item{ms_btw}{between groups mean square} -#' \item{ms_within}{within groups mean square} -#' \item{f}{f value} -#' \item{p}{p value} -#' \item{r2}{r squared value} -#' \item{ar2}{adjusted r squared value} -#' \item{sigma}{root mean squared error} -#' \item{obs}{number of observations} -#' \item{tab}{group statistics} -#' @section Deprecated Functions: -#' \code{owanova()} has been deprecated. Instead use \code{infer_oneway_anova()}. -#' @references Kutner, M. H., Nachtsheim, C., Neter, J., & Li, W. (2005). -#' Applied linear statistical models. Boston: McGraw-Hill Irwin. -#' -#' @seealso \code{\link[stats]{anova}} -#' @examples -#' infer_oneway_anova(mtcars, mpg, cyl) -#' infer_oneway_anova(hsb, write, prog) -#' @export -#' -infer_oneway_anova <- function(data, x, y, ...) UseMethod("infer_oneway_anova") - -#' @export -infer_oneway_anova.default <- function(data, x, y, ...) { - x1 <- enquo(x) - y1 <- enquo(y) - - fdata <- - data %>% - select(!! x1, !! y1) - - sample_mean <- anova_avg(fdata, !! x1) - sample_stats <- anova_split(fdata, !! x1, !! y1, sample_mean) - k <- anova_calc(fdata, sample_stats, !! x1, !! y1) - - - result <- list( - between = k$sstr, within = k$ssee, total = k$total, - df_btw = k$df_sstr, df_within = k$df_sse, - df_total = k$df_sst, ms_btw = k$mstr, ms_within = k$mse, - f = k$f, p = k$sig, r2 = round(k$reg$r.squared, 4), - ar2 = round(k$reg$adj.r.squared, 4), - sigma = round(k$reg$sigma, 4), obs = k$obs, - tab = sample_stats[, c(1, 2, 3, 5)] - ) - - class(result) <- "infer_oneway_anova" - return(result) -} - -#' @export -#' @rdname infer_oneway_anova -#' @usage NULL -#' -owanova <- function(data, x, y, ...) { - .Deprecated("infer_oneway_anova()") - infer_oneway_anova(data, x, y, ...) -} - -#' @export -print.infer_oneway_anova <- function(x, ...) { - print_owanova(x) -} \ No newline at end of file diff --git a/R/infer-chisq-assoc-test.R b/R/infer-chisq-assoc-test.R deleted file mode 100644 index dad68de..0000000 --- a/R/infer-chisq-assoc-test.R +++ /dev/null @@ -1,117 +0,0 @@ -#' @importFrom stats pchisq -#' @importFrom dplyr pull -#' @title Chi Square Test of Association -#' @description Chi Square test of association to examine if there is a -#' relationship between two categorical variables. -#' @param data a \code{data.frame} or \code{tibble} -#' @param x factor; column in \code{data} -#' @param y factor; column in \code{data} -#' @return \code{infer_chisq_assoc_test} returns an object of class -#' \code{"infer_chisq_assoc_test"}. An object of class -#' \code{"infer_chisq_assoc_test"} is a list containing the -#' following components: -#' -#' \item{chi}{chi square} -#' \item{chilr}{likelihood ratio chi square} -#' \item{chimh}{mantel haenszel chi square} -#' \item{chiy}{continuity adjusted chi square} -#' \item{sig}{p-value of chi square} -#' \item{siglr}{p-value of likelihood ratio chi square} -#' \item{sigmh}{p-value of mantel haenszel chi square} -#' \item{sigy}{p-value of continuity adjusted chi square} -#' \item{phi}{phi coefficient} -#' \item{cc}{contingency coefficient} -#' \item{cv}{cramer's v} -#' \item{ds}{product of dimensions of the table of \code{x} and \code{y}} -#' \item{df}{degrees of freedom} -#' @section Deprecated Function: -#' \code{chisq_test()} has been deprecated. Instead use -#' \code{infer_chisq_assoc_test()}. -#' -#' @seealso \code{\link[stats]{chisq.test}} -#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric -#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. -#' @examples -#' infer_chisq_assoc_test(hsb, female, schtyp) -#' -#' infer_chisq_assoc_test(hsb, female, ses) -#' @export -#' -infer_chisq_assoc_test <- function(data, x, y) UseMethod("infer_chisq_assoc_test") - -#' @export -infer_chisq_assoc_test.default <- function(data, x, y) { - x1 <- enquo(x) - y1 <- enquo(y) - - xone <- - data %>% - pull(!! x1) - - yone <- - data %>% - pull(!! y1) - - if (!is.factor(xone)) { - stop("x must be a categorical variable") - } - - if (!is.factor(yone)) { - stop("y must be a categorical variable") - } - - # dimensions - k <- table(xone, yone) - dk <- dim(k) - ds <- prod(dk) - nr <- dk[1] - nc <- dk[2] - - - if (ds == 4) { - twoway <- matrix(table(xone, yone), nrow = 2) - df <- df_chi(twoway) - ef <- efmat(twoway) - k <- pear_chsq(twoway, df, ef) - m <- lr_chsq(twoway, df, ef) - n <- yates_chsq(twoway) - p <- mh_chsq(twoway, n$total, n$prod_totals) - } else { - twoway <- matrix(table(xone, yone), nrow = dk[1]) - ef <- efm(twoway, dk) - df <- df_chi(twoway) - k <- pear_chi(twoway, df, ef) - m <- lr_chsq2(twoway, df, ef, ds) - } - - j <- chigf(xone, yone, k$chi) - - result <- if (ds == 4) { - list( - chi = k$chi, chilr = m$chilr, chimh = p$chimh, chiy = n$chi_y, - sig = k$sig, siglr = m$sig_lr, sigy = n$sig_y, sigmh = p$sig_mh, - phi = j$phi, cc = j$cc, cv = j$cv, ds = ds, df = df - ) - } else { - list( - df = df, chi = k$chi, chilr = m$chilr, sig = k$sig, siglr = m$sig_lr, - phi = j$phi, cc = j$cc, cv = j$cv, ds = ds - ) - } - - class(result) <- "infer_chisq_assoc_test" - return(result) -} - -#' @export -#' @rdname infer_chisq_assoc_test -#' @usage NULL -#' -chisq_test <- function(x, y) { - .Deprecated("infer_chisq_assoc_test()") -} - -#' @export -print.infer_chisq_assoc_test <- function(x, ...) { - print_chisq_test(x) -} \ No newline at end of file diff --git a/R/infer-cochran-q-test.R b/R/infer-cochran-q-test.R deleted file mode 100644 index 174c756..0000000 --- a/R/infer-cochran-q-test.R +++ /dev/null @@ -1,60 +0,0 @@ -#' @importFrom rlang quos !!! -#' @title Cochran Q Test -#' @description Test if the proportions of 3 or more dichotomous variables are -#' equal in the same population. -#' @param data a \code{data.frame} or \code{tibble} -#' @param ... columns in \code{data} -#' @return \code{infer_cochran_qtest} returns an object of class -#' \code{"infer_cochran_qtest"}. An object of class \code{"infer_cochran_qtest"} -#' is a list containing the following components: -#' -#' \item{n}{number of observations} -#' \item{df}{degrees of freedom} -#' \item{q}{cochran's q statistic} -#' \item{pvalue}{p-value} -#' @section Deprecated Function: -#' \code{cochran_test()} has been deprecated. Instead use -#' \code{infer_cochran_qtest()}. -#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric -#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. -#' -#' @examples -#' infer_cochran_qtest(exam, exam1, exam2, exam3) -#' @export -#' -infer_cochran_qtest <- function(data, ...) UseMethod("infer_cochran_qtest") - -#' @export -infer_cochran_qtest.default <- function(data, ...) { - vars <- quos(...) - - fdata <- data %>% - select(!!! vars) - - if (ncol(fdata) < 3) { - stop("Please specify at least 3 variables.") - } - - if (any(sapply(lapply(fdata, as.factor), nlevels) > 2)) { - stop("Please specify dichotomous/binary variables only.") - } - - k <- cochran_comp(fdata) - result <- list(n = k$n, df = k$df, q = k$q, pvalue = k$pvalue) - class(result) <- "infer_cochran_qtest" - return(result) -} - -#' @export -#' @rdname infer_cochran_qtest -#' @usage NULL -#' -cochran_test <- function(x, ...) { - .Deprecated("infer_cochran_qtest()") -} - -#' @export -#' -print.infer_cochran_qtest <- function(x, ...) { - print_cochran_test(x) -} \ No newline at end of file diff --git a/R/infer-mcnemar-test.R b/R/infer-mcnemar-test.R deleted file mode 100644 index 26961bd..0000000 --- a/R/infer-mcnemar-test.R +++ /dev/null @@ -1,96 +0,0 @@ -#' @importFrom stats qnorm -#' @importFrom magrittr %>% -#' @title McNemar Test -#' @description Test if the proportions of two dichotomous variables are -#' equal in the same population. -#' @param data a \code{data.frame} or \code{tibble} -#' @param x factor; column in \code{data} -#' @param y factor; column in \code{data} -#' @return \code{infer_mcnemar_test} returns an object of class \code{"infer_mcnemar_test"}. -#' An object of class \code{"infer_mcnemar_test"} is a list containing the -#' following components: -#' -#' \item{statistic}{chi square statistic} -#' \item{df}{degrees of freedom} -#' \item{pvalue}{p-value} -#' \item{exactp}{exact p-value} -#' \item{cstat}{continuity correction chi square statistic} -#' \item{cpvalue}{continuity correction p-value} -#' \item{kappa}{kappa coefficient; measure of interrater agreement} -#' \item{std_err}{asymptotic standard error} -#' \item{kappa_cil}{95\% kappa lower confidence limit} -#' \item{kappa_ciu}{95\% kappa upper confidence limit} -#' \item{cases}{cases} -#' \item{controls}{controls} -#' \item{ratio}{ratio of proportion with factor} -#' \item{odratio}{odds ratio} -#' \item{tbl}{two way table} -#' @section Deprecated Function: -#' \code{mcnermar_test()} has been deprecated. Instead use -#' \code{infer_mcnemar_test()}. -#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric -#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. -#' -#' @seealso \code{\link[stats]{mcnemar.test}} -#' @examples -#' # using variables from data -#' library(dplyr) -#' hb <- mutate(hsb, -#' himath = if_else(math > 60, 1, 0), -#' hiread = if_else(read > 60, 1, 0) -#' ) -#' infer_mcnemar_test(hb, himath, hiread) -#' -#' # test if the proportion of students in himath and hiread group is same -#' himath <- ifelse(hsb$math > 60, 1, 0) -#' hiread <- ifelse(hsb$read > 60, 1, 0) -#' infer_mcnemar_test(table(himath, hiread)) -#' -#' # using matrix -#' infer_mcnemar_test(matrix(c(135, 18, 21, 26), nrow = 2)) -#' @export -#' -infer_mcnemar_test <- function(data, x = NULL, y = NULL) UseMethod("infer_mcnemar_test") - -#' @export -#' -infer_mcnemar_test.default <- function(data, x = NULL, y = NULL) { - if (is.matrix(data) | is.table(data)) { - dat <- mcdata(data) - } else { - x1 <- enquo(x) - y1 <- enquo(y) - - dat <- - data %>% - select(!! x1, !! y1) %>% - table() - } - - k <- mccomp(dat) - - result <- list( - statistic = k$statistic, df = k$df, pvalue = k$pvalue, - exactp = k$exactp, cstat = k$cstat, cpvalue = k$cpvalue, kappa = k$kappa, - std_err = k$std_err, kappa_cil = k$kappa_cil, kappa_ciu = k$kappa_ciu, - cases = k$cases, controls = k$controls, ratio = k$ratio, - odratio = k$odratio, tbl = dat - ) - - class(result) <- "infer_mcnemar_test" - return(result) -} - -#' @export -#' @rdname infer_mcnemar_test -#' @usage NULL -#' -mcnemar_test <- function(x, y = NULL) { - .Deprecated("infer_mcnemar_test()") -} - -#' @export -#' -print.infer_mcnemar_test <- function(x, ...) { - print_mcnemar_test(x) -} \ No newline at end of file diff --git a/R/infer-ts-ind-ttest.R b/R/infer-ts-ind-ttest.R deleted file mode 100644 index 38698c2..0000000 --- a/R/infer-ts-ind-ttest.R +++ /dev/null @@ -1,136 +0,0 @@ -#' @importFrom stats qt pt pf -#' @title Two Independent Sample t Test -#' @description \code{infer_ts_ind_ttest} compares the means of two independent groups in order to determine whether -#' there is statistical evidence that the associated population means are significantly different. -#' @param data a data frame -#' @param x factor; a column in \code{data} -#' @param y numeric; a column in \code{data} -#' @param confint confidence level -#' @param alternative a character string specifying the alternative hypothesis, -#' must be one of "both" (default), "greater", "less" or "all". You can specify -#' just the initial letter -#' @param ... additional arguments passed to or from other methods -#' @return \code{infer_ts_ind_ttest} returns an object of class \code{"infer_ts_ind_ttest"}. -#' An object of class \code{"infer_ts_ind_ttest"} is a list containing the -#' following components: -#' -#' \item{levels}{levels of \code{x}} -#' \item{obs}{number of observations of \code{y} for each level of \code{x}} -#' \item{n}{total number of observations} -#' \item{mean}{mean of \code{y} for each level of \code{x}} -#' \item{sd}{standard deviation of \code{y} for each level of \code{x}} -#' \item{se}{estimate of standard error of \code{y} for each level of \code{x}} -#' \item{lower}{lower limit for the mean of \code{y} for each level of \code{x}} -#' \item{upper}{upper limit for the mean of \code{y} for each level of \code{x}} -#' \item{combined}{a data frame; mean, standard deviation, standard error and -#' confidence limit of mean of \code{y}} -#' \item{mean_diff}{difference in mean of \code{y} for the two groups of \code{x}} -#' \item{se_dif}{estimate of the standard error for difference in mean of -#' \code{y} for the two groups of \code{x}} \item{sd_dif}{degrees of freedom} -#' \item{conf_diff}{confidence interval for \code{mean_diff}} -#' \item{df_pooled}{degrees of freedom for the pooled method} -#' \item{df_satterthwaite}{degrees of freedom for the Satterthwaite method} -#' \item{t_pooled}{t statistic for the pooled method} -#' \item{t_satterthwaite}{t statistic for the Satterthwaite method} -#' \item{sig_pooled}{two-sided p-value for the pooled method} -#' \item{sig_pooled_l}{lower one-sided p-value for the pooled method} -#' \item{sig_pooled_u}{upper one-sided p-value for the pooled method} -#' \item{sig}{two-sided p-value for the Satterthwaite method} -#' \item{sig_l}{lower one-sided p-value for the Satterthwaite method} -#' \item{sig_u}{upper one-sided p-value for the Satterthwaite method} -#' \item{num_df}{numerator degrees of freedom for folded f test} -#' \item{den_df}{denominator degrees of freedom for folded f test} -#' \item{f}{f value for the equality of variances test} -#' \item{f_sig}{p-value for the folded f test} -#' \item{var_y}{name of \code{y}} -#' \item{confint}{confidence level} -#' \item{alternative}{alternative hypothesis} -#' @section Deprecated Function: -#' \code{ind_ttest()} has been deprecated. Instead use \code{infer_ts_ind_ttest()}. -#' @references Sheskin, D. J. 2007. Handbook of Parametric and Nonparametric -#' Statistical Procedures, 4th edition. : Chapman & Hall/CRC. -#' @seealso \code{\link[stats]{t.test}} -#' @examples -#' # lower tail -#' infer_ts_ind_ttest(hsb, female, write, alternative = 'less') -#' -#' # upper tail -#' infer_ts_ind_ttest(hsb, female, write, alternative = 'greater') -#' -#' # both tails -#' infer_ts_ind_ttest(hsb, female, write, alternative = 'both') -#' -#' # all tails -#' infer_ts_ind_ttest(hsb, female, write, alternative = 'all') -#' @export -#' -infer_ts_ind_ttest <- function(data, x, y, confint = 0.95, - alternative = c("both", "less", "greater", "all"), ...) UseMethod("infer_ts_ind_ttest") - -#' @export -#' -infer_ts_ind_ttest.default <- function(data, x, y, confint = 0.95, - alternative = c("both", "less", "greater", "all"), ...) { - x1 <- enquo(x) - y1 <- enquo(y) - - yone <- - data %>% - select(!! y1) %>% - names() - - if (check_x(data, !! x1)) { - stop("x must be a binary factor variable", call. = FALSE) - } - - if (check_level(data, !! x1) > 2) { - stop("x must be a binary factor variable", call. = FALSE) - } - - method <- match.arg(alternative) - var_y <- yone - alpha <- 1 - confint - a <- alpha / 2 - - h <- indth(data, !! x1, !! y1, a) - grp_stat <- h - g_stat <- as.matrix(h) - comb <- indcomb(data, !! y1, a) - k <- indcomp(grp_stat, alpha) - j <- indsig(k$n1, k$n2, k$s1, k$s2, k$mean_diff) - m <- indpool(k$n1, k$n2, k$mean_diff, k$se_dif) - - result <- list( - levels = g_stat[, 1], obs = g_stat[, 2], n = k$n, - mean = g_stat[, 3], sd = g_stat[, 4], se = g_stat[, 5], - lower = g_stat[, 8], upper = g_stat[, 9], combined = comb, - mean_diff = round(k$mean_diff, 3), sd_dif = round(k$sd_dif, 3), - se_dif = round(k$se_dif, 3), - conf_diff = round(k$conf_diff, 5), df_pooled = m$df_pooled, - df_satterthwaite = j$d_f, t_pooled = round(m$t_pooled, 4), - t_satterthwaite = round(j$t, 4), - sig_pooled_l = m$sig_pooled_l, sig_pooled_u = m$sig_pooled_u, - sig_pooled = m$sig_pooled, sig = j$sig, sig_l = j$sig_l, sig_u = j$sig_u, - num_df = k$n1 - 1, den_df = k$n2 - 1, f = round(k$s1 / k$s2, 4), - f_sig = fsig(k$s1, k$s2, k$n1, k$n2), var_y = var_y, confint = confint, - alternative = method - ) - - class(result) <- "infer_ts_ind_ttest" - return(result) -} - -#' @export -#' @rdname infer_ts_ind_ttest -#' @usage NULL -#' -ind_ttest <- function(data, x, y, confint = 0.95, - alternative = c("both", "less", "greater", "all"), ...) { - .Deprecated("infer_ts_ind_ttest()") -} - -#' @export -#' -print.infer_ts_ind_ttest <- function(x, ...) { - print_two_ttest(x) -} \ No newline at end of file diff --git a/R/infer-utils.R b/R/infer-utils.R deleted file mode 100644 index 40d773f..0000000 --- a/R/infer-utils.R +++ /dev/null @@ -1,1057 +0,0 @@ -#' @importFrom dplyr group_by summarise_all funs mutate -#' @importFrom magrittr %>% use_series -#' @importFrom stats var sd -#' @importFrom tibble tibble as_data_frame -anova_split <- function(data, x, y, sample_mean) { - x1 <- enquo(x) - y1 <- enquo(y) - - by_factor <- - data %>% - group_by(!! y1) %>% - select(!! y1, !! x1) %>% - summarise_all(funs(length, mean, var, sd)) %>% - as_data_frame() %>% - mutate( - sst = length * ((mean - sample_mean) ^ 2), - sse = (length - 1) * var - ) - - return(by_factor) -} - -anova_avg <- function(data, y) { - y1 <- enquo(y) - - avg <- - data %>% - select(!! y1) %>% - summarise_all(funs(mean)) - - return(unlist(avg, use.names = FALSE)) -} - -anova_calc <- function(data, sample_stats, x, y) { - x1 <- enquo(x) - y1 <- enquo(y) - - var_names <- - data %>% - select(!! x1, !! y1) %>% - names() - - sstr <- - sample_stats %>% - use_series(sst) %>% - sum() %>% - round(3) - - ssee <- - sample_stats %>% - use_series(sse) %>% - sum() %>% - round(3) - - total <- round(sstr + ssee, 3) - df_sstr <- nrow(sample_stats) - 1 - df_sse <- nrow(data) - nrow(sample_stats) - df_sst <- nrow(data) - 1 - mstr <- round(sstr / df_sstr, 3) - mse <- round(ssee / df_sse, 3) - f <- round(mstr / mse, 3) - sig <- round(1 - pf(f, df_sstr, df_sse), 3) - obs <- nrow(data) - regs <- paste(var_names[1], "~ as.factor(", var_names[2], ")") - model <- lm(as.formula(regs), data = data) - reg <- summary(model) - out <- list( - sstr = sstr, ssee = ssee, total = total, df_sstr = df_sstr, - df_sse = df_sse, df_sst = df_sst, mstr = mstr, mse = mse, f = f, - sig = sig, obs = obs, model = model, reg = reg - ) - return(out) -} - -binom_comp <- function(n, success, prob) { - n <- n - k <- success - obs_p <- k / n - exp_k <- round(n * prob) - lt <- pbinom(k, n, prob, lower.tail = T) - ut <- pbinom(k - 1, n, prob, lower.tail = F) - p_opp <- round(dbinom(k, n, prob), 9) - i_p <- dbinom(exp_k, n, prob) - i_k <- exp_k - - if (k < exp_k) { - while (i_p > p_opp) { - i_k <- i_k + 1 - i_p <- round(dbinom(i_k, n, prob), 9) - if (round(i_p) == p_opp) { - break - } - } - - ttf <- pbinom(k, n, prob, lower.tail = T) + - pbinom(i_k - 1, n, prob, lower.tail = F) - } else { - while (p_opp <= i_p) { - i_k <- i_k - 1 - i_p <- dbinom(i_k, n, prob) - if (round(i_p) == p_opp) { - break - } - } - - i_k <- i_k - - tt <- pbinom(i_k, n, prob, lower.tail = T) + - pbinom(k - 1, n, prob, lower.tail = F) - - ttf <- ifelse(tt <= 1, tt, 1) - } - out <- list( - n = n, k = k, exp_k = exp_k, obs_p = obs_p, exp_p = prob, ik = i_k, - lower = round(lt, 6), upper = round(ut, 6), two_tail = round(ttf, 6) - ) - return(out) -} - -# chi square association -df_chi <- function(twoway) { - (nrow(twoway) - 1) * (ncol(twoway) - 1) -} - -efmat <- function(twoway) { - mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = 2) - mat2 <- matrix(colSums(twoway), nrow = 1) - ef <- mat1 %*% mat2 - return(ef) -} - -pear_chsq <- function(twoway, df, ef) { - chi <- round(sum(((twoway - ef) ^ 2) / ef), 4) - sig <- round(pchisq(chi, df, lower.tail = F), 4) - out <- list(chi = chi, sig = sig) - return(out) -} - -lr_chsq <- function(twoway, df, ef) { - chilr <- round(2 * sum(matrix(log(twoway / ef), nrow = 1) %*% matrix(twoway, nrow = 4)), 4) - sig_lr <- round(pchisq(chilr, df, lower.tail = F), 4) - out <- list(chilr = chilr, sig_lr = sig_lr) - return(out) -} - -lr_chsq2 <- function(twoway, df, ef, ds) { - chilr <- round(2 * sum(matrix(twoway, ncol = ds) %*% matrix(log(twoway / ef), nrow = ds)), 4) - sig_lr <- round(pchisq(chilr, df, lower.tail = F), 4) - out <- list(chilr = chilr, sig_lr = sig_lr) - return(out) -} - -yates_chsq <- function(twoway) { - way2 <- twoway[, c(2, 1)] - total <- sum(twoway) - prods <- prod(diag(twoway)) - prod(diag(way2)) - prod_totals <- prod(rowSums(twoway)) * prod(colSums(twoway)) - chi_y <- round((total * (abs(prods) - (total / 2)) ^ 2) / prod_totals, 4) - sig_y <- round(pchisq(chi_y, 1, lower.tail = F), 4) - out <- list(chi_y = chi_y, sig_y = sig_y, total = total, prod_totals = prod_totals) - return(out) -} - -mh_chsq <- function(twoway, total, prod_totals) { - num <- twoway[1] - ((rowSums(twoway)[1] * colSums(twoway)[1]) / total) - den <- prod_totals / ((total ^ 3) - (total ^ 2)) - chimh <- round((num ^ 2) / den, 4) - sig_mh <- round(pchisq(chimh, 1, lower.tail = F), 4) - out <- list(chimh = chimh, sig_mh = sig_mh) - return(out) -} - -efm <- function(twoway, dk) { - mat1 <- matrix(rowSums(twoway) / sum(twoway), nrow = dk[1]) - mat2 <- matrix(colSums(twoway), ncol = dk[2]) - ef <- mat1 %*% mat2 - return(ef) -} - -pear_chi <- function(twoway, df, ef) { - chi <- round(sum(((twoway - ef) ^ 2) / ef), 4) - sig <- round(pchisq(chi, df, lower.tail = F), 4) - out <- list(chi = chi, sig = sig) - return(out) -} - -chigf <- function(x, y, chi) { - twoway <- matrix( - table(x, y), nrow = nlevels(as.factor(x)), - ncol = nlevels(as.factor(y)) - ) - total <- sum(twoway) - phi <- round(sqrt(chi / total), 4) - cc <- round(sqrt(chi / (chi + total)), 4) - q <- min(nrow(twoway), ncol(twoway)) - cv <- round(sqrt(chi / (total * (q - 1))), 4) - out <- list(phi = phi, cc = cc, cv = cv) - return(out) -} - -# chi square goodness of fit -chi_cort <- function(x, y) { - diff <- x - y - 0.5 - dif <- abs(x - y) - 0.5 - dif2 <- dif ^ 2 - dev <- round((diff / y) * 100, 2) - std <- round(diff / sqrt(y), 2) - chi <- round(sum(dif2 / y), 4) - out <- list(dev = dev, std = std, chi = chi) - return(out) -} - -chigof <- function(x, y) { - dif <- x - y - dif2 <- dif ^ 2 - dev <- round((dif / y) * 100, 2) - std <- round(dif / sqrt(y), 2) - chi <- round(sum(dif2 / y), 4) - out <- list(dev = dev, std = std, chi = chi) - return(out) -} - -# cochran's q test -coch_data <- function(x, ...) { - if (is.data.frame(x)) { - data <- x %>% - lapply(as.numeric) %>% - as.data.frame() %>% - `-`(1) - } else { - data <- cbind(x, ...) %>% - apply(2, as.numeric) %>% - `-`(1) %>% - as.data.frame() - } - - return(data) -} - -#' @importFrom purrr map_df -#' @importFrom magrittr subtract -cochran_comp <- function(data) { - n <- nrow(data) - k <- ncol(data) - df <- k - 1 - - cs <- - data %>% - map_df(.f = as.numeric) %>% - subtract(1) %>% - sums() - - q <- coch(k, cs$cls_sum, cs$cl, cs$g, cs$gs_sum) - - pvalue <- 1 - pchisq(q, df) - - out <- list( - n = n, - df = df, - q = q, - pvalue = round(pvalue, 4) - ) - - return(out) -} - - -# levene test -lev_metric <- function(cvar, gvar, loc, ...) { - metric <- tapply(cvar, gvar, loc, ...) - y <- abs(cvar - metric[gvar]) - result <- anova(lm(y ~ gvar)) - out <- list( - fstat = result$`F value`[1], - p = result$`Pr(>F)`[1] - ) - return(out) -} - -lev_comp <- function(variable, group_var, trim.mean) { - comp <- complete.cases(variable, group_var) - n <- length(comp) - k <- nlevels(group_var) - cvar <- variable[comp] - gvar <- group_var[comp] - lens <- tapply(cvar, gvar, length) - avgs <- tapply(cvar, gvar, mean) - sds <- tapply(cvar, gvar, sd) - - bf <- lev_metric(cvar, gvar, mean) - lev <- lev_metric(cvar, gvar, median) - bft <- lev_metric(cvar, gvar, mean, trim = trim.mean) - out <- list( - bf = round(bf$fstat, 4), - p_bf = round(bf$p, 4), - lev = round(lev$fstat, 4), - p_lev = round(lev$p, 4), - bft = round(bft$fstat, 4), - p_bft = round(bft$p, 4), - avgs = round(avgs, 2), - sds = round(sds, 2), - avg = round(mean(cvar), 2), - sd = round(sd(cvar), 2), - n = n, - levs = levels(gvar), - n_df = (k - 1), - d_df = (n - k), - lens = lens - ) - return(out) -} - -# mcnemar test -mcdata <- function(x, y) { - if (!is.matrix(x)) { - stop("x must be either a table or a matrix") - } - - if (is.matrix(x)) { - if (length(x) != 4) { - stop("x must be a 2 x 2 matrix") - } - } - - dat <- x - return(dat) -} - - -mctestp <- function(dat) { - retrieve <- matrix(c(1, 2, 2, 1), nrow = 2) - p <- dat[retrieve] - return(p) -} - -tetat <- function(p) { - out <- ((p[1] - p[2]) ^ 2) / sum(p) - return(out) -} - -mcpval <- function(test_stat, df) { - out <- 1 - pchisq(test_stat, df) - return(out) -} - -mcpex <- function(dat) { - out <- 2 * min(pbinom(dat[2], sum(dat[2], dat[3]), 0.5), pbinom(dat[3], sum(dat[2], dat[3]), 0.5)) - return(out) -} - -mcstat <- function(p) { - out <- ((abs(p[1] - p[2]) - 1) ^ 2) / sum(p) - return(out) -} - -mccpval <- function(cstat, df) { - out <- 1 - pchisq(cstat, df) - return(out) -} - -mckappa <- function(dat) { - agreement <- sum(diag(dat)) / sum(dat) - expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2) - kappa <- (agreement - expected) / (1 - expected) - return(kappa) -} - -mcserr <- function(dat, kappa) { - expected <- sum(rowSums(dat) * colSums(dat)) / (sum(dat) ^ 2) - out <- serr(dat, kappa, expected) -} - -mcconf <- function(std_err, kappa) { - alpha <- 0.05 - interval <- qnorm(1 - (alpha / 2)) * std_err - ci_lower <- kappa - interval - ci_upper <- kappa + interval - out <- list(ci_lower = ci_lower, ci_upper = ci_upper) - return(out) -} - -prop_fact <- function(dat, p) { - dat_per <- dat / sum(dat) - row_sum <- rowSums(dat_per) - col_sum <- colSums(dat_per) - controls <- 1 - col_sum[2] - cases <- 1 - row_sum[2] - ratio <- cases / controls - odds_ratio <- p[1] / p[2] - out <- list( - cases = cases, controls = controls, ratio = ratio, - odds_ratio = odds_ratio - ) - return(out) -} - -serr <- function(dat, kappa, expected) { - dat_per <- dat / sum(dat) - row_sum <- rowSums(dat_per) - row_sum[3] <- sum(row_sum) - col_sum <- colSums(dat_per) - dat_per <- rbind(dat_per, col_sum) - dat_per <- cbind(dat_per, row_sum) - d1 <- dim(dat_per) - dat_per[d1[1], d1[2]] <- 1.0 - diagonal <- diag(dat_per) - a <- diagonal[1] * (1 - (row_sum[1] + col_sum[1]) * (1 - kappa)) ^ 2 + - diagonal[2] * (1 - (row_sum[2] + col_sum[2]) * (1 - kappa)) ^ 2 - - x1 <- dat_per[lower.tri(dat_per)][1] - x2 <- dat_per[upper.tri(dat_per)][1] - b <- ((1 - kappa) ^ 2) * ((x1 * (row_sum[1] + col_sum[2]) ^ 2) + - (x2 * (row_sum[2] + col_sum[1]) ^ 2)) - - c <- ((kappa) - expected * (1 - kappa)) ^ 2 - variance <- ((a + b - c) / ((1 - expected) ^ 2)) / sum(dat) - - return(sqrt(variance)) -} - -mccomp <- function(dat) { - p <- mctestp(dat) - test_stat <- tetat(p) - df <- nrow(dat) - 1 - pvalue <- mcpval(test_stat, df) - exactp <- mcpex(dat) - cstat <- mcstat(p) - cpvalue <- mccpval(cstat, df) - kappa <- mckappa(dat) - std_err <- mcserr(dat, kappa) - clu <- mcconf(std_err, kappa) - k <- prop_fact(dat, p) - - out <- list( - statistic = round(test_stat, 4), df = df, - pvalue = round(pvalue, 4), exactp = round(exactp, 4), - cstat = cstat, cpvalue = cpvalue, kappa = round(kappa, 4), - std_err = round(std_err, 4), kappa_cil = round(clu$ci_lower, 4), - kappa_ciu = round(clu$ci_upper, 4), cases = round(k$cases, 4), - controls = round(k$controls, 4), ratio = round(k$ratio, 4), - odratio = round(k$odds_ratio, 4) - ) - return(out) -} - -# one sample proportion test -prop_comp <- function(n, prob, alternative, phat) { - n <- n - phat <- phat - p <- prob - q <- 1 - p - obs <- c(n * (1 - phat), n * phat) - exp <- n * c(q, p) - dif <- obs - exp - dev <- round((dif / exp) * 100, 2) - std <- round(dif / sqrt(exp), 2) - num <- phat - prob - den <- sqrt((p * q) / n) - z <- round(num / den, 4) - lt <- round(pnorm(z), 4) - ut <- round(1 - pnorm(z), 4) - tt <- round((1 - pnorm(abs(z))) * 2, 4) - alt <- alternative - - if (alt == "all") { - sig <- c("two-both" = tt, "less" = lt, "greater" = ut) - } else if (alt == "greater") { - sig <- ut - } else if (alt == "less") { - sig <- lt - } else { - sig <- tt - } - - out <- list( - n = n, phat = phat, p = prob, z = z, sig = sig, alt = alt, - obs = obs, exp = exp, deviation = format(dev, nsmall = 2), - std = format(std, nsmall = 2) - ) - - return(out) -} - -# one sample variance test -osvar_comp <- function(x, sd, confint) { - n <- length(x) - df <- n - 1 - xbar <- mean(x) - sigma <- sd(x) - se <- sigma / sqrt(n) - chi <- df * ((sigma / sd) ^ 2) - - p_lower <- pchisq(chi, df) - p_upper <- pchisq(chi, df, lower.tail = F) - if (p_lower < 0.5) { - p_two <- pchisq(chi, df) * 2 - } else { - p_two <- pchisq(chi, df, lower.tail = F) * 2 - } - - - conf <- confint - a <- (1 - conf) / 2 - al <- 1 - a - tv <- df * sigma - c_lwr <- round(tv / qchisq(al, df), 4) - c_upr <- round(tv / qchisq(a, df), 4) - - out <- list( - n = n, sd = sd, sigma = sigma, se = se, chi = chi, df = df, - p_lower = p_lower, p_upper = p_upper, p_two = p_two, xbar = xbar, - c_lwr = c_lwr, c_upr = c_upr, conf = conf - ) - - return(out) -} - -# two sample variance test -var_comp <- function(variable, group_var) { - comp <- complete.cases(variable, group_var) - cvar <- variable[comp] - gvar <- group_var[comp] - - d <- tibble(cvar, gvar) - vals <- tibble_stats(d, "cvar", "gvar") - lass <- tbl_stats(d, "cvar") - - lens <- vals[[2]] %>% map_int(1) - vars <- vals[[4]] %>% map_dbl(1) - - f <- vars[1] / vars[2] - n1 <- lens[1] - 1 - n2 <- lens[2] - 1 - lower <- pf(f, n1, n2) - upper <- pf(f, n1, n2, lower.tail = FALSE) - - out <- list( - f = round(f, 4), lower = round(lower, 4), - upper = round(upper, 4), - vars = round(vars, 2), - avgs = round((vals[[3]] %>% map_dbl(1)), 2), - sds = round((vals[[5]] %>% map_dbl(1)), 2), - ses = round((vals[[6]] %>% map_dbl(1)), 2), - avg = round(lass[2], 2), - sd = round(lass[3], 2), - se = round(lass[4], 2), - n1 = n1, - n2 = n2, - lens = lens, - len = lass[1] - ) - - return(out) -} - -# two sample proportion test -prop_comp2 <- function(var1, var2, alt) { - n1 <- length(var1) - n2 <- length(var2) - y1 <- table(var1)[[2]] - y2 <- table(var2)[[2]] - phat1 <- round(y1 / n1, 4) - phat2 <- round(y2 / n2, 4) - phat <- sum(y1, y2) / sum(n1, n2) - - # test statistic - num <- (phat1 - phat2) - den1 <- phat * (1 - phat) - den2 <- (1 / n1) + (1 / n2) - den <- sqrt(den1 * den2) - z <- round(num / den, 4) - - lt <- round(pnorm(z), 4) - ut <- round(pnorm(z, lower.tail = FALSE), 4) - tt <- round(pnorm(abs(z), lower.tail = FALSE) * 2, 4) - - - - if (alt == "all") { - sig <- c("two-tail" = tt, "lower-tail" = lt, "upper-tail" = ut) - } else if (alt == "greater") { - sig <- ut - } else if (alt == "less") { - sig <- lt - } else { - sig <- tt - } - - # result - out <- list( - n1 = n1, n2 = n2, phat1 = phat1, phat2 = phat2, z = round(z, 3), - sig = round(sig, 3) - ) - - return(out) -} - -# one sample t test -ttest_comp <- function(x, mu, alpha, type) { - n <- length(x) - a <- (alpha / 2) - df <- n - 1 - conf <- 1 - alpha - Mean <- round(mean(x), 4) - stddev <- round(sd(x), 4) - std_err <- round(stddev / sqrt(n), 4) - test_stat <- round((Mean - mu) / std_err, 3) - - if (type == "less") { - cint <- c(-Inf, test_stat + qt(1 - alpha, df)) - } else if (type == "greater") { - cint <- c(test_stat - qt(1 - alpha, df), Inf) - } else { - cint <- qt(1 - a, df) - cint <- test_stat + c(-cint, cint) - } - - confint <- round(mu + cint * std_err, 4) - mean_diff <- round((Mean - mu), 4) - mean_diff_l <- confint[1] - mu - mean_diff_u <- confint[2] - mu - p_l <- pt(test_stat, df) - p_u <- pt(test_stat, df, lower.tail = FALSE) - - if (p_l < 0.5) { - p <- p_l * 2 - } else { - p <- p_u * 2 - } - - - out <- list( - mu = mu, n = n, df = df, Mean = Mean, stddev = stddev, std_err = std_err, - test_stat = test_stat, confint = confint, mean_diff = mean_diff, mean_diff_l = mean_diff_l, - mean_diff_u = mean_diff_u, p_l = p_l, p_u = p_u, p = p, conf = conf - ) - - return(out) -} - -# paired sample t test -paired_comp <- function(x, y, confint, var_names) { - n <- length(x) - df <- (n - 1) - xy <- paste(var_names[1], "-", var_names[2]) - data_prep <- paired_data(x, y) - b <- paired_stats(data_prep, "key", "value") - corr <- round(cor(x, y), 4) - corsig <- cor_sig(corr, n) - alpha <- 1 - confint - confint1 <- conf_int_t(b[[1, 1]], b[[1, 2]], n, alpha = alpha) %>% round(2) - confint2 <- conf_int_t(b[[2, 1]], b[[2, 2]], n, alpha = alpha) %>% round(2) - confint3 <- conf_int_t(b[[3, 1]], b[[3, 2]], n, alpha = alpha) %>% round(2) - t <- round(b[[3, 1]] / b[[3, 3]], 4) - p_l <- pt(t, df) - p_u <- pt(t, df, lower.tail = FALSE) - p <- pt(abs(t), df, lower.tail = FALSE) * 2 - - out <- list( - Obs = n, b = b, conf_int1 = confint1, conf_int2 = confint2, - conf_int_diff = confint3, corr = round(corr, 2), corsig = round(corsig, 2), - tstat = t, p_lower = p_l, p_upper = p_u, p_two_tail = p, xy = xy, df = df - ) - - return(out) -} - -# independent sample t test -indth <- function(data, x, y, a) { - x1 <- enquo(x) - y1 <- enquo(y) - - h <- data_split(data, !! x1, !! y1) - h$df <- h$length - 1 - h$error <- qt(a, h$df) * -1 - h$lower <- h$mean_t - (h$error * h$std_err) - h$upper <- h$mean_t + (h$error * h$std_err) - return(h) -} - -indcomb <- function(data, y, a) { - y1 <- enquo(y) - - comb <- da(data, !! y1) - comb$df <- comb$length - 1 - comb$error <- qt(a, comb$df) * -1 - comb$lower <- round(comb$mean_t - (comb$error * comb$std_err), 5) - comb$upper <- round(comb$mean_t + (comb$error * comb$std_err), 5) - names(comb) <- NULL - return(comb) -} - -indcomp <- function(grp_stat, alpha) { - n1 <- grp_stat[1, 2] - n2 <- grp_stat[2, 2] - n <- n1 + n2 - means <- grp_stat[, 3] - mean_diff <- means[1] - means[2] - sd1 <- grp_stat[1, 4] - sd2 <- grp_stat[2, 4] - s1 <- grp_stat[1, 4] ^ 2 - s2 <- grp_stat[2, 4] ^ 2 - sd_dif <- sd_diff(n1, n2, s1, s2) - se_dif <- se_diff(n1, n2, s1, s2) - conf_diff <- conf_int_p(mean_diff, se_dif, alpha = alpha) - out <- list( - n1 = n1, n2 = n2, n = n, mean_diff = mean_diff, sd1 = sd1, - sd2 = sd2, s1 = s1, s2 = s2, sd_dif = sd_dif, se_dif = se_dif, - conf_diff = conf_diff - ) - return(out) -} - -indsig <- function(n1, n2, s1, s2, mean_diff) { - d_f <- as.vector(df(n1, n2, s1, s2)) - t <- mean_diff / (((s1 / n1) + (s2 / n2)) ^ 0.5) - sig_l <- round(pt(t, d_f), 4) - sig_u <- round(pt(t, d_f, lower.tail = FALSE), 4) - if (sig_l < 0.5) { - sig <- round(pt(t, d_f) * 2, 4) - } else { - sig <- round(pt(t, d_f, lower.tail = FALSE) * 2, 4) - } - out <- list(d_f = d_f, t = t, sig_l = sig_l, sig_u = sig_u, sig = sig) - return(out) -} - -fsig <- function(s1, s2, n1, n2) { - out <- round(min( - pf((s1 / s2), (n1 - 1), (n2 - 1)), - pf((s1 / s2), (n1 - 1), (n2 - 1), - lower.tail = FALSE - ) - ) * 2, 4) - return(out) -} - - -indpool <- function(n1, n2, mean_diff, se_dif) { - df_pooled <- (n1 + n2) - 2 - t_pooled <- mean_diff / se_dif - sig_pooled_l <- round(pt(t_pooled, df_pooled), 4) - sig_pooled_u <- round(pt(t_pooled, df_pooled, lower.tail = FALSE), 4) - if (sig_pooled_l < 0.5) { - sig_pooled <- round(pt(t_pooled, df_pooled) * 2, 4) - } else { - sig_pooled <- round(pt(t_pooled, df_pooled, lower.tail = FALSE) * 2, 4) - } - out <- list( - df_pooled = df_pooled, t_pooled = t_pooled, - sig_pooled_l = sig_pooled_l, sig_pooled_u = sig_pooled_u, - sig_pooled = sig_pooled - ) - return(out) -} - -#' @importFrom rlang sym -tibble_stats <- function(data, x, y) { - - by_factor <- data %>% - group_by(!! sym(y)) %>% - select(!! sym(y), !! sym(x)) %>% - summarise_all(funs(length, mean, var, sd)) %>% - as_data_frame() %>% - mutate( - ses = sd / sqrt(length) - ) - - return(by_factor) - -} - -tbl_stats <- function(data, y) { - - avg <- data %>% - select(y) %>% - summarise_all(funs(length, mean, sd)) %>% - as_data_frame() %>% - mutate( - se = sd / sqrt(length) - ) - - return(unlist(avg, use.names = FALSE)) - -} - - -fg <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -fk <- function(x, w) { - x %>% - format(width = w, justify = "centre", nsmall = 3) -} - - -fs <- function() { - rep(" ") -} - -fl <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "left") -} - -fc <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -formatter_t <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -format_cil <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -format_ciu <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -formats_t <- function() { - rep(" ") -} - -l <- function(x) { - x <- as.character(x) - k <- grep("\\$", x) - if (length(k) == 1) { - temp <- strsplit(x, "\\$") - out <- temp[[1]][2] - } else { - out <- x - } - return(out) -} - -#' @importFrom tidyr gather -paired_data <- function(x, y) { - d <- tibble(x = x, y = y) %>% - mutate(z = x - y) %>% - gather() - return(d) -} - -#' @importFrom dplyr select -paired_stats <- function(data, key, value) { - - d <- data %>% - group_by(key) %>% - select(value, key) %>% - summarise_all(funs(length, mean, sd)) %>% - as_data_frame() %>% - mutate( - se = sd / sqrt(length) - ) %>% - select(-(key:length)) - - return(d) -} - - -cor_sig <- function(corr, n) { - t <- corr / ((1 - (corr ^ 2)) / (n - 2)) ^ 0.5 - df <- n - 2 - sig <- (1 - pt(t, df)) * 2 - return(round(sig, 4)) -} - -samp_err <- function(sigma, n) { - sigma / (n ^ 0.5) -} - -conf_int_t <- function(u, s, n, alpha = 0.05) { - a <- alpha / 2 - df <- n - 1 - error <- round(qt(a, df), 3) * -1 - lower <- u - (error * samp_err(s, n)) - upper <- u + (error * samp_err(s, n)) - result <- c(lower, upper) - return(result) -} - -formatter_pair <- function(x, w) { - x1 <- format(x, nsmall = 2) - x2 <- as.character(x1) - ret <- format(x2, width = w, justify = "centre") - return(ret) -} - -mean_t <- function(x) { - return(round(mean(x), 3)) -} - -sd_t <- function(x) { - s <- sd(x) - return(round(s, 3)) -} - -std_err <- function(x) { - se <- sd(x) / sqrt(length(x)) - return(round(se, 3)) -} - -data_split <- function(data, x, y) { - x1 <- enquo(x) - y1 <- enquo(y) - - by_gender <- - data %>% - group_by(!! x1) %>% - select(!! x1, !! y1) %>% - summarise_all(funs(length, mean_t, sd_t, std_err)) %>% - as.data.frame() - - return(by_gender) -} - -da <- function(data, y) { - y1 <- enquo(y) - - dat <- - data %>% - select(!! y1) %>% - summarise_all(funs(length, mean_t, sd_t, std_err)) %>% - as.data.frame() - - return(dat) -} - -sd_diff <- function(n1, n2, s1, s2) { - n1 <- n1 - 1 - n2 <- n2 - 1 - n <- (n1 + n2) - 2 - return(((n1 * s1 + n2 * s2) / n) ^ 0.5) -} - -se_diff <- function(n1, n2, s1, s2) { - df <- n1 + n2 - 2 - n_1 <- n1 - 1 - n_2 <- n2 - 1 - v <- (n_1 * s1 + n_2 * s2) / df - return(sqrt(v * (1 / n1 + 1 / n2))) -} - -se_sw <- function(s1, s2, n1, n2) { - return(((s1 / n1) + (s2 / n2)) ^ 0.5) -} - -df <- function(n1, n2, s1, s2) { - sn1 <- s1 / n1 - sn2 <- s2 / n2 - m1 <- 1 / (n1 - 1) - m2 <- 1 / (n2 - 1) - num <- (sn1 + sn2) ^ 2 - den <- (m1 * (sn1 ^ 2)) + (m2 * (sn2 ^ 2)) - return(round(num / den)) -} - -conf_int_p <- function(u, se, alpha = 0.05) { - a <- alpha / 2 - error <- round(qnorm(a), 3) * -1 - lower <- u - (error * se) - upper <- u + (error * se) - result <- c(lower, upper) - return(result) -} - -fw <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -fn <- function(x, w) { - x %>% - as.character() %>% - format(width = w, justify = "centre") -} - -formats <- function() { - rep(" ") -} - -sums <- function(data) { - cl <- colSums(data) - cls_sum <- sum(cl ^ 2) - g <- rowSums(data) - gs_sum <- sum(g ^ 2) - result <- list(cl = cl, cls_sum = cls_sum, g = g, gs_sum = gs_sum) -} - -coch <- function(k, cls_sum, cl, g, gs_sum) { - out <- ((k - 1) * ((k * cls_sum) - (sum(cl) ^ 2))) / ((k * sum(g)) - gs_sum) - return(out) -} - -# function for binary coding -nruns <- function(data, value) { - if (data > value) { - return(1) - } else if (data < value) { - return(0) - } else { - return(NA) - } -} - -nruns2 <- function(data, value) { - if (data <= value) { - return(0) - } else { - return(1) - } -} - -# expected runs -expruns <- function(n0, n1) { - N <- n0 + n1 - return(((2 * n0 * n1) / N) + 1) -} - -# standard deviation of runs -sdruns <- function(n0, n1) { - N <- n0 + n1 - n <- 2 * n0 * n1 - return(((n * (n - N)) / ((N ^ 2) * (N - 1)))) -} - -check_level <- function(data, x) { - x1 <- enquo(x) - - data %>% - pull(!! x1) %>% - nlevels() -} - -check_x <- function(data, x) { - x1 <- enquo(x) - - data %>% - pull(!! x1) %>% - (is.factor) %>% - `!`() -} \ No newline at end of file diff --git a/R/inferr.R b/R/inferr.R index 9016c11..ac5e6e0 100644 --- a/R/inferr.R +++ b/R/inferr.R @@ -7,8 +7,7 @@ #' #' @docType package #' @name inferr -#' @importFrom dplyr %>% NULL ## quiets concerns of R CMD check re: the .'s that appear in pipelines -if (getRversion() >= "2.15.1") utils::globalVariables(c(".", "sse", "sst")) \ No newline at end of file +if (getRversion() >= "2.15.1") utils::globalVariables(c(".", "sse", "sst", "var", "sd")) diff --git a/README.Rmd b/README.Rmd index 03b14cf..4c04d58 100644 --- a/README.Rmd +++ b/README.Rmd @@ -20,7 +20,7 @@ knitr::opts_chunk$set( [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/inferr)](https://cran.r-project.org/package=inferr) [![cran checks](https://cranchecks.info/badges/summary/inferr)](https://cran.r-project.org/web/checks/check_results_inferr.html) [![R build status](https://github.com/rsquaredacademy/inferr/workflows/R-CMD-check/badge.svg)](https://github.com/rsquaredacademy/inferr/actions) [![Coverage status](https://codecov.io/gh/rsquaredacademy/inferr/branch/master/graph/badge.svg)](https://codecov.io/github/rsquaredacademy/inferr?branch=master) -[![status](https://tinyverse.netlify.com/badge/inferr)](https://CRAN.R-project.org/package=inferr)![](https://img.shields.io/badge/lifecycle-maturing-blue.svg) [![](https://cranlogs.r-pkg.org/badges/grand-total/inferr)](https://cran.r-project.org/package=inferr) +[![status](https://tinyverse.netlify.com/badge/inferr)](https://CRAN.R-project.org/package=inferr) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![](https://cranlogs.r-pkg.org/badges/grand-total/inferr)](https://cran.r-project.org/package=inferr) ## Overview @@ -109,8 +109,3 @@ infer_mcnemar_test(hb, himath, hiread) If you encounter a bug, please file a minimal reproducible example using [reprex](https://reprex.tidyverse.org/index.html) on github. For questions and clarifications, use [StackOverflow](https://stackoverflow.com/). - -## Code of Conduct - -Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). -By participating in this project you agree to abide by its terms. diff --git a/README.md b/README.md index 0ff21c6..8457a90 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,9 @@ checks](https://cranchecks.info/badges/summary/inferr)](https://cran.r-project.o status](https://github.com/rsquaredacademy/inferr/workflows/R-CMD-check/badge.svg)](https://github.com/rsquaredacademy/inferr/actions) [![Coverage status](https://codecov.io/gh/rsquaredacademy/inferr/branch/master/graph/badge.svg)](https://codecov.io/github/rsquaredacademy/inferr?branch=master) -[![status](https://tinyverse.netlify.com/badge/inferr)](https://CRAN.R-project.org/package=inferr)![](https://img.shields.io/badge/lifecycle-maturing-blue.svg) +[![status](https://tinyverse.netlify.com/badge/inferr)](https://CRAN.R-project.org/package=inferr) +[![Lifecycle: +stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![](https://cranlogs.r-pkg.org/badges/grand-total/inferr)](https://cran.r-project.org/package=inferr) @@ -25,21 +27,21 @@ additional and flexible input options and more detailed and structured test results. As of version 0.3, **inferr** includes a select set of parametric and non-parametric statistical tests which are listed below: - - One Sample t Test - - Paired Sample t Test - - Independent Sample t Test - - One Sample Proportion Test - - Two Sample Proportion Test - - One Sample Variance Test - - Two Sample Variance Test - - Binomial Test - - ANOVA - - Chi Square Goodness of Fit Test - - Chi Square Independence Test - - Levene’s Test - - Cochran’s Q Test - - McNemar Test - - Runs Test for Randomness +- One Sample t Test +- Paired Sample t Test +- Independent Sample t Test +- One Sample Proportion Test +- Two Sample Proportion Test +- One Sample Variance Test +- Two Sample Variance Test +- Binomial Test +- ANOVA +- Chi Square Goodness of Fit Test +- Chi Square Independence Test +- Levene’s Test +- Cochran’s Q Test +- McNemar Test +- Runs Test for Randomness ## Installation @@ -54,7 +56,7 @@ devtools::install_github("rsquaredacademy/inferr") ## Articles - - [Introduction to +- [Introduction to inferr](https://inferr.rsquaredacademy.com/articles/intro.html) ## Usage @@ -78,7 +80,7 @@ infer_os_t_test(hsb, write, mu = 50, type = 'all') #> -------------------------------------------------------------------------------- #> Variable t DF Sig Mean Diff. [95% Conf. Interval] #> -------------------------------------------------------------------------------- -#> write 4.141 199 0.99997 2.775 1.4537 4.0969 +#> write 4.141 199 0.00005 2.775 1.4537 4.0969 #> -------------------------------------------------------------------------------- ``` @@ -213,9 +215,3 @@ If you encounter a bug, please file a minimal reproducible example using [reprex](https://reprex.tidyverse.org/index.html) on github. For questions and clarifications, use [StackOverflow](https://stackoverflow.com/). - -## Code of Conduct - -Please note that this project is released with a [Contributor Code of -Conduct](CONDUCT.md). By participating in this project you agree to -abide by its terms. diff --git a/cran-comments.md b/cran-comments.md index 5a3d8d7..11d7290 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,14 +1,8 @@ ## Test environments -* local Windows 10, R 3.4.3 -* ubuntu 12.04 (on travis-ci), R 3.3.3, R 3.4.3, R-devel +* local Windows 10 install, R 4.1.0 +* ubuntu 14.04 (on GitHub Actions), R 4.1.0, R-devel * win-builder (devel and release) ## R CMD check results 0 errors | 0 warnings | 0 note - -## Reverse dependencies - -There are no reverse dependencies. - ---- diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..edae1f1 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,166 @@ + + + + + + + + +Page not found (404) • inferr + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + + + +
+ +
+
+ + +Content not found. Please use links in the navbar. + +
+ + + +
+ + + + +
+ + + + + + + + diff --git a/docs/CONDUCT.html b/docs/CONDUCT.html index 33bbf3a..7860797 100644 --- a/docs/CONDUCT.html +++ b/docs/CONDUCT.html @@ -71,7 +71,7 @@ inferr - 0.3.0.9000 + 0.3.1 @@ -111,7 +111,7 @@ - - -
-

-inferr 0.1.1 2017-05-02 +
+

+inferr 0.1.1 2017-05-02

Bug Fixes

  • -binom_test() accepts non-binary variables (#1).
  • +binom_test() accepts non-binary variables (#1).
  • -ind_ttest() should throw an error when the grouping variable has more than 2 levels (#2).
  • +ind_ttest() should throw an error when the grouping variable has more than 2 levels (#2).
  • -ts_prop_test() should accept only binary variables (#3).
  • +ts_prop_test() should accept only binary variables (#3).
  • -var_test() should accept only binary variables for the group_var input. The number of continuous variables must also not exceed two. (#4).
  • +var_test() should accept only binary variables for the group_var input. The number of continuous variables must also not exceed two. (#4).
-
-

-inferr 0.1.0 2017-02-14 +
+

+inferr 0.1.0 2017-02-14

  • First release
  • @@ -172,27 +215,22 @@

- + @@ -200,6 +238,8 @@

Contents

+ + diff --git a/docs/pkgdown.css b/docs/pkgdown.css index c5ab586..1273238 100644 --- a/docs/pkgdown.css +++ b/docs/pkgdown.css @@ -17,12 +17,14 @@ html, body { height: 100%; } +body { + position: relative; +} + body > .container { display: flex; height: 100%; flex-direction: column; - - padding-top: 60px; } body > .container .row { @@ -58,12 +60,21 @@ img { max-width: 100%; } +/* Fix bug in bootstrap (only seen in firefox) */ +summary { + display: list-item; +} + /* Typographic tweaking ---------------------------------*/ -.contents h1.page-header { +.contents .page-header { margin-top: calc(-60px + 1em); } +dd { + margin-left: 3em; +} + /* Section anchors ---------------------------------*/ a.anchor { @@ -97,37 +108,135 @@ a.anchor { margin-top: -40px; } -/* Static header placement on mobile devices */ -@media (max-width: 767px) { - .navbar-fixed-top { - position: absolute; - } - .navbar { - padding: 0; - } +/* Navbar submenu --------------------------*/ + +.dropdown-submenu { + position: relative; +} + +.dropdown-submenu>.dropdown-menu { + top: 0; + left: 100%; + margin-top: -6px; + margin-left: -1px; + border-radius: 0 6px 6px 6px; +} + +.dropdown-submenu:hover>.dropdown-menu { + display: block; +} + +.dropdown-submenu>a:after { + display: block; + content: " "; + float: right; + width: 0; + height: 0; + border-color: transparent; + border-style: solid; + border-width: 5px 0 5px 5px; + border-left-color: #cccccc; + margin-top: 5px; + margin-right: -10px; } +.dropdown-submenu:hover>a:after { + border-left-color: #ffffff; +} + +.dropdown-submenu.pull-left { + float: none; +} + +.dropdown-submenu.pull-left>.dropdown-menu { + left: -100%; + margin-left: 10px; + border-radius: 6px 0 6px 6px; +} /* Sidebar --------------------------*/ -#sidebar { +#pkgdown-sidebar { margin-top: 30px; + position: -webkit-sticky; + position: sticky; + top: 70px; } -#sidebar h2 { + +#pkgdown-sidebar h2 { font-size: 1.5em; margin-top: 1em; } -#sidebar h2:first-child { +#pkgdown-sidebar h2:first-child { margin-top: 0; } -#sidebar .list-unstyled li { +#pkgdown-sidebar .list-unstyled li { margin-bottom: 0.5em; } +/* bootstrap-toc tweaks ------------------------------------------------------*/ + +/* All levels of nav */ + +nav[data-toggle='toc'] .nav > li > a { + padding: 4px 20px 4px 6px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; +} + +nav[data-toggle='toc'] .nav > li > a:hover, +nav[data-toggle='toc'] .nav > li > a:focus { + padding-left: 5px; + color: inherit; + border-left: 1px solid #878787; +} + +nav[data-toggle='toc'] .nav > .active > a, +nav[data-toggle='toc'] .nav > .active:hover > a, +nav[data-toggle='toc'] .nav > .active:focus > a { + padding-left: 5px; + font-size: 1.5rem; + font-weight: 400; + color: inherit; + border-left: 2px solid #878787; +} + +/* Nav: second level (shown on .active) */ + +nav[data-toggle='toc'] .nav .nav { + display: none; /* Hide by default, but at >768px, show it */ + padding-bottom: 10px; +} + +nav[data-toggle='toc'] .nav .nav > li > a { + padding-left: 16px; + font-size: 1.35rem; +} + +nav[data-toggle='toc'] .nav .nav > li > a:hover, +nav[data-toggle='toc'] .nav .nav > li > a:focus { + padding-left: 15px; +} + +nav[data-toggle='toc'] .nav .nav > .active > a, +nav[data-toggle='toc'] .nav .nav > .active:hover > a, +nav[data-toggle='toc'] .nav .nav > .active:focus > a { + padding-left: 15px; + font-weight: 500; + font-size: 1.35rem; +} + +/* orcid ------------------------------------------------------------------- */ + .orcid { - height: 16px; + font-size: 16px; + color: #A6CE39; + /* margins are required by official ORCID trademark and display guidelines */ + margin-left:4px; + margin-right:4px; vertical-align: middle; } @@ -135,15 +244,14 @@ a.anchor { .ref-index th {font-weight: normal;} -.ref-index td {vertical-align: top;} -.ref-index .alias {width: 40%;} -.ref-index .title {width: 60%;} - +.ref-index td {vertical-align: top; min-width: 100px} +.ref-index .icon {width: 40px;} .ref-index .alias {width: 40%;} +.ref-index-icons .alias {width: calc(40% - 40px);} .ref-index .title {width: 60%;} .ref-arguments th {text-align: right; padding-right: 10px;} -.ref-arguments th, .ref-arguments td {vertical-align: top;} +.ref-arguments th, .ref-arguments td {vertical-align: top; min-width: 100px} .ref-arguments .name {width: 20%;} .ref-arguments .desc {width: 80%;} @@ -218,6 +326,19 @@ a.sourceLine:hover { visibility: visible; } +/* headroom.js ------------------------ */ + +.headroom { + will-change: transform; + transition: transform 200ms linear; +} +.headroom--pinned { + transform: translateY(0%); +} +.headroom--unpinned { + transform: translateY(-100%); +} + /* mark.js ----------------------------*/ mark { @@ -225,3 +346,22 @@ mark { border-bottom: 2px solid rgba(255, 153, 51, 0.3); padding: 1px; } + +/* vertical spacing after htmlwidgets */ +.html-widget { + margin-bottom: 10px; +} + +/* fontawesome ------------------------ */ + +.fab { + font-family: "Font Awesome 5 Brands" !important; +} + +/* don't display links in code chunks when printing */ +/* source: https://stackoverflow.com/a/10781533 */ +@media print { + code a:link:after, code a:visited:after { + content: ""; + } +} diff --git a/docs/pkgdown.js b/docs/pkgdown.js index 16d5750..7e7048f 100644 --- a/docs/pkgdown.js +++ b/docs/pkgdown.js @@ -1,174 +1,108 @@ -$(function() { - - $("#sidebar") - .stick_in_parent({offset_top: 40}) - .on('sticky_kit:bottom', function(e) { - $(this).parent().css('position', 'static'); - }) - .on('sticky_kit:unbottom', function(e) { - $(this).parent().css('position', 'relative'); - }); +/* http://gregfranko.com/blog/jquery-best-practices/ */ +(function($) { + $(function() { - $('body').scrollspy({ - target: '#sidebar', - offset: 60 - }); + $('.navbar-fixed-top').headroom(); - $('[data-toggle="tooltip"]').tooltip(); + $('body').css('padding-top', $('.navbar').height() + 10); + $(window).resize(function(){ + $('body').css('padding-top', $('.navbar').height() + 10); + }); - var cur_path = paths(location.pathname); - $("#navbar ul li a").each(function(index, value) { - if (value.text == "Home") - return; - if (value.getAttribute("href") === "#") - return; + $('[data-toggle="tooltip"]').tooltip(); + + var cur_path = paths(location.pathname); + var links = $("#navbar ul li a"); + var max_length = -1; + var pos = -1; + for (var i = 0; i < links.length; i++) { + if (links[i].getAttribute("href") === "#") + continue; + // Ignore external links + if (links[i].host !== location.host) + continue; + + var nav_path = paths(links[i].pathname); + + var length = prefix_length(nav_path, cur_path); + if (length > max_length) { + max_length = length; + pos = i; + } + } - var path = paths(value.pathname); - if (is_prefix(cur_path, path)) { - // Add class to parent
  • , and enclosing
  • if in dropdown - var menu_anchor = $(value); + // Add class to parent
  • , and enclosing
  • if in dropdown + if (pos >= 0) { + var menu_anchor = $(links[pos]); menu_anchor.parent().addClass("active"); menu_anchor.closest("li.dropdown").addClass("active"); } }); -}); -$(document).ready(function() { - // do keyword highlighting - /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ - var mark = function() { + function paths(pathname) { + var pieces = pathname.split("/"); + pieces.shift(); // always starts with / - var referrer = document.URL ; - var paramKey = "q" ; - - if (referrer.indexOf("?") !== -1) { - var qs = referrer.substr(referrer.indexOf('?') + 1); - var qs_noanchor = qs.split('#')[0]; - var qsa = qs_noanchor.split('&'); - var keyword = ""; + var end = pieces[pieces.length - 1]; + if (end === "index.html" || end === "") + pieces.pop(); + return(pieces); + } - for (var i = 0; i < qsa.length; i++) { - var currentParam = qsa[i].split('='); + // Returns -1 if not found + function prefix_length(needle, haystack) { + if (needle.length > haystack.length) + return(-1); - if (currentParam.length !== 2) { - continue; - } - - if (currentParam[0] == paramKey) { - keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); - } - } - - if (keyword !== "") { - $(".contents").unmark({ - done: function() { - $(".contents").mark(keyword); - } - }); - } + // Special case for length-0 haystack, since for loop won't run + if (haystack.length === 0) { + return(needle.length === 0 ? 0 : -1); } - }; - - mark(); -}); - -function paths(pathname) { - var pieces = pathname.split("/"); - pieces.shift(); // always starts with / - var end = pieces[pieces.length - 1]; - if (end === "index.html" || end === "") - pieces.pop(); - return(pieces); -} - -function is_prefix(needle, haystack) { - if (needle.length > haystack.lengh) - return(false); - - // Special case for length-0 haystack, since for loop won't run - if (haystack.length === 0) { - return(needle.length === 0); - } + for (var i = 0; i < haystack.length; i++) { + if (needle[i] != haystack[i]) + return(i); + } - for (var i = 0; i < haystack.length; i++) { - if (needle[i] != haystack[i]) - return(false); + return(haystack.length); } - return(true); -} + /* Clipboard --------------------------*/ -/* Clipboard --------------------------*/ + function changeTooltipMessage(element, msg) { + var tooltipOriginalTitle=element.getAttribute('data-original-title'); + element.setAttribute('data-original-title', msg); + $(element).tooltip('show'); + element.setAttribute('data-original-title', tooltipOriginalTitle); + } -function changeTooltipMessage(element, msg) { - var tooltipOriginalTitle=element.getAttribute('data-original-title'); - element.setAttribute('data-original-title', msg); - $(element).tooltip('show'); - element.setAttribute('data-original-title', tooltipOriginalTitle); -} + if(ClipboardJS.isSupported()) { + $(document).ready(function() { + var copyButton = ""; -if(Clipboard.isSupported()) { - $(document).ready(function() { - var copyButton = ""; + $(".examples, div.sourceCode").addClass("hasCopyButton"); - $(".examples").addClass("hasCopyButton"); + // Insert copy buttons: + $(copyButton).prependTo(".hasCopyButton"); - // Insert copy buttons: - $(copyButton).prependTo(".hasCopyButton"); + // Initialize tooltips: + $('.btn-copy-ex').tooltip({container: 'body'}); - // Initialize tooltips: - $('.btn-copy-ex').tooltip({container: 'body'}); + // Initialize clipboard: + var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { + text: function(trigger) { + return trigger.parentNode.textContent; + } + }); - // Initialize clipboard: - var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { - text: function(trigger) { - return trigger.parentNode.textContent; - } - }); + clipboardBtnCopies.on('success', function(e) { + changeTooltipMessage(e.trigger, 'Copied!'); + e.clearSelection(); + }); - clipboardBtnCopies.on('success', function(e) { - changeTooltipMessage(e.trigger, 'Copied!'); - e.clearSelection(); + clipboardBtnCopies.on('error', function() { + changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); + }); }); - - clipboardBtnCopies.on('error', function() { - changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); - }); - }); -} - -/* Search term highlighting ------------------------------*/ - -function matchedWords(hit) { - var words = []; - - var hierarchy = hit._highlightResult.hierarchy; - // loop to fetch from lvl0, lvl1, etc. - for (var idx in hierarchy) { - words = words.concat(hierarchy[idx].matchedWords); - } - - var content = hit._highlightResult.content; - if (content) { - words = words.concat(content.matchedWords); } - - // return unique words - var words_uniq = [...new Set(words)]; - return words_uniq; -} - -function updateHitURL(hit) { - - var words = matchedWords(hit); - var url = ""; - - if (hit.anchor) { - url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; - } else { - url = hit.url + '?q=' + escape(words.join(" ")); - } - - return url; -} +})(window.jQuery || window.$) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 2defe6d..0e24873 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -1,8 +1,9 @@ -pandoc: 1.19.2.1 -pkgdown: 1.0.0 +pandoc: 2.11.4 +pkgdown: 1.6.1 pkgdown_sha: ~ articles: intro: intro.html +last_built: 2021-05-28T10:08Z urls: reference: https://inferr.rsquaredacademy.com/reference article: https://inferr.rsquaredacademy.com/articles diff --git a/docs/reference/Rplot001.png b/docs/reference/Rplot001.png new file mode 100644 index 0000000..17a3580 Binary files /dev/null and b/docs/reference/Rplot001.png differ diff --git a/docs/reference/exam.html b/docs/reference/exam.html index b078772..3a2a054 100644 --- a/docs/reference/exam.html +++ b/docs/reference/exam.html @@ -72,7 +72,7 @@ inferr - 0.3.0.9000 + 0.3.1
  • @@ -112,7 +112,7 @@