diff --git a/.DS_Store b/.DS_Store index 8e9ac423..f2290d73 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/DESCRIPTION b/DESCRIPTION index bb611795..b730d51a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,28 +32,30 @@ Imports: crayon, dplyr, ggplot2, - mice, + ggrepel, + lavaan, + lme4, + margins, + pbkrtest, purrr, rlang, + stats, tidyr, tibble, - lavaan + utils Suggests: - margins, - pbkrtest, devtools, forcats, knitr, - lme4, rmarkdown, roxygen2, testthat, - ggrepel, - covr + covr, + mice VignetteBuilder: knitr Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 URL: https://github.com/jrosen48/konfound BugReports: https://github.com/jrosen48/konfound/issues Depends: diff --git a/NAMESPACE b/NAMESPACE index 63e34ff2..03342dad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,9 +5,72 @@ export(konfound) export(launch_shiny) export(mkonfound) export(pkonfound) +export(test_cop) +export(test_sensitivity_ln) +export(tkonfound) export(tkonfound_fig) -import(dplyr) -import(rlang) -importFrom(rlang,.data) +import(ggplot2) +import(lavaan) +importFrom(broom,glance) +importFrom(broom,tidy) +importFrom(broom.mixed,tidy) +importFrom(crayon,bold) +importFrom(crayon,italic) +importFrom(crayon,underline) +importFrom(dplyr,bind_cols) +importFrom(dplyr,case_when) +importFrom(dplyr,filter) +importFrom(dplyr,mutate) +importFrom(dplyr,pull) +importFrom(dplyr,rename) +importFrom(dplyr,select) +importFrom(dplyr,tibble) +importFrom(ggplot2,aes) +importFrom(ggplot2,aes_string) +importFrom(ggplot2,annotate) +importFrom(ggplot2,element_blank) +importFrom(ggplot2,element_line) +importFrom(ggplot2,element_text) +importFrom(ggplot2,facet_grid) +importFrom(ggplot2,geom_col) +importFrom(ggplot2,geom_curve) +importFrom(ggplot2,geom_histogram) +importFrom(ggplot2,geom_hline) +importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_point) +importFrom(ggplot2,geom_segment) +importFrom(ggplot2,ggplot) +importFrom(ggplot2,ggtitle) +importFrom(ggplot2,scale_fill_manual) +importFrom(ggplot2,scale_shape_manual) +importFrom(ggplot2,scale_x_continuous) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,sec_axis) +importFrom(ggplot2,theme) +importFrom(ggplot2,theme_bw) +importFrom(ggplot2,theme_void) +importFrom(ggplot2,xlab) +importFrom(ggplot2,ylab) +importFrom(ggrepel,geom_label_repel) +importFrom(lavaan,parameterEstimates) +importFrom(lavaan,sem) +importFrom(lme4,fixef) +importFrom(lme4,lmer) +importFrom(margins,margins) +importFrom(pbkrtest,get_Lb_ddf) +importFrom(purrr,map) +importFrom(purrr,map2_dfr) +importFrom(purrr,map_dbl) +importFrom(purrr,modify_if) +importFrom(rlang,"!!") +importFrom(rlang,enquo) +importFrom(rlang,quo_name) importFrom(stats,chisq.test) +importFrom(stats,cor) importFrom(stats,fisher.test) +importFrom(stats,glm) +importFrom(stats,qt) +importFrom(tibble,tribble) +importFrom(tidyr,gather) +importFrom(utils,browseURL) +importFrom(utils,globalVariables) diff --git a/R/concord1.R b/R/concord1.R index a406e2f1..9346f9a0 100644 --- a/R/concord1.R +++ b/R/concord1.R @@ -4,6 +4,7 @@ #' #' @docType data #' @name concord1 -#' @references Hamilton, Lawrence C. 1983. Saving water: A causal model of household conservation. Sociological Perspectives 26(4):355-374. +#' @references Hamilton, Lawrence C. 1983. Saving water: +#' A causal model of household conservation. Sociological Perspectives 26(4):355-374. #' @format A data.frame with 496 rows and 10 variables. -NULL \ No newline at end of file +NULL diff --git a/R/cop_pse_auxiliary.R b/R/cop_pse_auxiliary.R index cc958b81..483c104e 100644 --- a/R/cop_pse_auxiliary.R +++ b/R/cop_pse_auxiliary.R @@ -1,3 +1,9 @@ +#' Calculate R2yz based on ryxGz and R2 +#' +#' @param ryxGz correlation coefficient between Y and X given Z +#' @param R2 coefficient of determination +#' @return R2yz value +#' @importFrom lavaan parameterEstimates cal_ryz <- function(ryxGz, R2){ R2yz = (ryxGz^2 - R2)/(ryxGz^2 - 1) if (R2yz >= 0) { @@ -8,6 +14,15 @@ cal_ryz <- function(ryxGz, R2){ return(ryz) } +#' Calculate R2xz based on variances and standard error +#' +#' @param var_x variance of X +#' @param var_y variance of Y +#' @param R2 coefficient of determination +#' @param df degrees of freedom +#' @param std_err standard error +#' @return R2xz value +#' @importFrom lavaan parameterEstimates cal_rxz <- function(var_x, var_y, R2, df, std_err){ R2xz = 1 - ((var_y * (1 - R2))/(var_x * df * std_err^2)) if (R2xz <= 0) {stop("Error! R2xz < 0!")} @@ -16,14 +31,43 @@ cal_rxz <- function(var_x, var_y, R2, df, std_err){ return(rxz) } +#' Calculate rxy based on ryxGz, rxz, and ryz +#' +#' @param ryxGz correlation coefficient between Y and X given Z +#' @param rxz correlation coefficient between X and Z +#' @param ryz correlation coefficient between Y and Z +#' @return rxy value +#' @importFrom lavaan parameterEstimates cal_rxy <- function(ryxGz, rxz, ryz){ rxy = ryxGz * sqrt((1 - rxz^2)*(1 - ryz^2)) + rxz * ryz return(rxy) } -cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y, est_uncond, rxz, n_obs){ +#' Calculate delta star for sensitivity analysis +#' +#' @param FR2max maximum R2 +#' @param R2 current R2 +#' @param R2_uncond unconditional R2 +#' @param est_eff estimated effect +#' @param eff_thr effect threshold +#' @param var_x variance of X +#' @param var_y variance of Y +#' @param est_uncond unconditional estimate +#' @param rxz correlation coefficient between X and Z +#' @param n_obs number of observations +#' @return delta star value +#' @importFrom lavaan parameterEstimates +cal_delta_star <- function(FR2max, + R2, R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + est_uncond, + rxz, + n_obs){ if (FR2max > .99) {FR2max = .99} - # if (FR2max < R2 + inci) {FR2max = R2 + inci} check with Ken what this means + # if (FR2max < R2 + inci) {FR2max = R2 + inci}check with Ken what this means if (FR2max > R2) {D = sqrt(FR2max - R2)} #elements for computing Oster's delta_star @@ -35,7 +79,8 @@ cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y t_x = var_x * (n_obs / (n_obs - 1)) * (1 - rxz^2) ## adjust df for var_x ## var_x is population variance, need sample variance from x - ## this adjustment is to get closer to what robomit generates as they run regression using the sample data + ## this adjustment is to get closer to what robomit generates as they + ## run regression using the sample data num1 = bt_m_b * rt_m_ro_t_syy * t_x num2 = bt_m_b * var_x * t_x * b0_m_b1^2 num3 = 2 * bt_m_b^2 * (t_x * b0_m_b1 * var_x) @@ -92,6 +137,7 @@ cal_delta_star <- function(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y # see test_cop for updated approach to calculate delta exact + verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, rzy, rcvy, rcvx, rcvz){ @@ -115,8 +161,8 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -130,18 +176,24 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[4,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - betaCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$est - seCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$se + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + betaCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$est + seCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$se } - + #get regression based on true delta in terms of standardized coefficent cor.matrix <- matrix(c(1,rxy, rzy, rcvy, rxy, 1, rxz, rcvx, @@ -153,8 +205,8 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, flag_cor <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cor.matrix, - sample.nobs = n_obs) + sample.cov = cor.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cor = F @@ -169,15 +221,21 @@ verify_reg_Gzcv = function(n_obs, sdx, sdy, sdz, sdcv, # if model can be run, then run it if (class(flag_cor) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cor.matrix, - sample.nobs = n_obs) + sample.cov = cor.matrix, + sample.nobs = n_obs) std_R2 <- 1 - lavaan::parameterEstimates(fit)[4,]$est - std_betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - std_seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - std_betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - std_seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - std_betaCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$est - std_seCV <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta3',]$se + std_betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + std_seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + std_betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + std_seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + std_betaCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$est + std_seCV <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta3',]$se } if (class(flag_cor) == "lavaan" && class(flag_cov) == "lavaan") { @@ -229,6 +287,18 @@ verify_manual <- function(rxy, rxz, rxcv, ryz, rycv, rzcv, sdy, sdx){ return(beta) } + +#' Verify regression model with control variable Z +#' +#' @param n_obs number of observations +#' @param sdx standard deviation of X +#' @param sdy standard deviation of Y +#' @param sdz standard deviation of Z +#' @param rxy correlation coefficient between X and Y +#' @param rxz correlation coefficient between X and Z +#' @param rzy correlation coefficient between Z and Y +#' @return list of model parameters +#' @importFrom lavaan sem parameterEstimates verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ model <- 'Y ~ beta1 * X + beta2 * Z' @@ -247,8 +317,8 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -262,16 +332,20 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[3,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se - betaZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$est - seZ <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta2',]$se - } - + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$est + seZ <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta2',]$se + } + if (class(flag_cov) == "lavaan") { result = list(R2, betaX, seX, betaZ, seZ) return(result) @@ -280,6 +354,15 @@ verify_reg_Gz = function(n_obs, sdx, sdy, sdz, rxy, rxz, rzy){ } } + +#' Verify unconditional regression model +#' +#' @param n_obs number of observations +#' @param sdx standard deviation of X +#' @param sdy standard deviation of Y +#' @param rxy correlation coefficient between X and Y +#' @return list of model parameters +#' @importFrom lavaan sem parameterEstimates verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ model <- 'Y ~ beta1 * X' @@ -294,8 +377,8 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ flag_cov <- tryCatch( expr = { lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) }, error = function(e){ flag_cov = F @@ -309,12 +392,14 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ #if model can be run to verify true delta, then run it can save results if (class(flag_cov) == "lavaan") { fit <- lavaan::sem(model, - sample.cov = cov.matrix, - sample.nobs = n_obs) + sample.cov = cov.matrix, + sample.nobs = n_obs) ## the R2 extracted from summary is NOT right, do the calculation below R2 <- (sdy^2 - lavaan::parameterEstimates(fit)[2,]$est) / sdy^2 - betaX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$est - seX <- lavaan::parameterEstimates(fit)[lavaan::parameterEstimates(fit)$label == 'beta1',]$se + betaX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$est + seX <- lavaan::parameterEstimates(fit)[ + lavaan::parameterEstimates(fit)$label == 'beta1',]$se } if (class(flag_cov) == "lavaan") { @@ -324,5 +409,3 @@ verify_reg_uncond = function(n_obs, sdx, sdy, rxy){ stop("Error!") } } - - diff --git a/R/helper_output_dataframe.R b/R/helper_output_dataframe.R index 26518a56..deda4a7a 100644 --- a/R/helper_output_dataframe.R +++ b/R/helper_output_dataframe.R @@ -1,6 +1,30 @@ -# Function to output the data frame +#' Output data frame based on model estimates and thresholds +#' +#' @param est_eff estimated effect +#' @param beta_threshhold threshold for beta +#' @param unstd_beta unstandardized beta value +#' @param bias bias to change inference +#' @param sustain sustain to change inference +#' @param recase number of cases to replace null +#' @param obs_r observed correlation +#' @param critical_r critical correlation +#' @param r_con correlation for omitted variable +#' @param itcv inferential threshold for confounding variable +#' @param non_linear flag for non-linear models +#' @return data frame with model information +#' @importFrom dplyr tibble -output_df <- function(est_eff, beta_threshhold, unstd_beta, bias = NULL, sustain = NULL, recase, obs_r, critical_r, r_con, itcv, non_linear) { +# Function to output the data frame +output_df <- function(est_eff, + beta_threshhold, + unstd_beta, + bias = NULL, + sustain = NULL, + recase, obs_r, + critical_r, + r_con, + itcv, + non_linear) { if (abs(est_eff) > abs(beta_threshhold)) { df <- dplyr::tibble( action = "to_invalidate", diff --git a/R/helper_output_print.R b/R/helper_output_print.R index 09b32dcb..824348b8 100644 --- a/R/helper_output_print.R +++ b/R/helper_output_print.R @@ -1,24 +1,73 @@ +#' Output printed text with formatting +#' +#' This function outputs printed text for various indices such as RIR +#' (Robustness of Inference to Replacement) +#' and IT (Impact Threshold for a Confounding Variable) with specific formatting +#' like bold, underline, and italic +#' using functions from the crayon package. It handles different scenarios based +#' on the effect difference, +#' beta threshold, and other parameters, providing formatted +#' output for each case. +#' +#' @param eff_diff The difference in the effect size being evaluated. +#' @param beta_threshhold The threshold value of beta, used for +#' statistical significance determination. +#' @param bias The percentage of the estimate that could be due to bias (optional). +#' @param sustain The percentage of the estimate necessary to sustain an inference (optional). +#' @param nu The hypothesized effect size used in replacement analysis. +#' @param recase The number of cases that need to be replaced to change the inference. +#' @param obs_r The observed correlation coefficient in the data. +#' @param critical_r The critical correlation coefficient for statistical significance. +#' @param r_con The correlation coefficient of an omitted variable with both the outcome and the predictor. +#' @param itcv The impact threshold for a confounding variable. +#' @param alpha The level of statistical significance. +#' @param index A character string indicating the index for which the output is generated ('RIR' or 'IT'). +#' @importFrom crayon bold underline italic + # Function to output printed text -output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index) { - if (index == "RIR"){ - cat(crayon::bold("Robustness of Inference to Replacement (RIR):\n")) - if (abs(eff_diff) > abs(beta_threshhold)) { - cat("To invalidate an inference, ", round(bias, 3), "% of the estimate would have to be due to bias. ") - cat("\n") - cat("This is based on a threshold of ", round(beta_threshhold, 3), " for statistical significance (alpha = ", alpha, ").\n", sep = "") - cat("\n") - cat("To invalidate an inference, ", round(recase, 3), " observations would have to be replaced with cases") - cat("\n") - cat("for which the effect is ", nu, " (RIR = ", round(recase, 3), ").\n", sep = "") - cat("\n") +output_print <- function(eff_diff, + beta_threshhold, + bias = NULL, + sustain = NULL, + nu, + recase, + obs_r, + critical_r, + r_con, + itcv, + alpha, + index) { + if (index == "RIR") { + cat(crayon::bold("Robustness of Inference to Replacement (RIR):\n")) + if (abs(eff_diff) > abs(beta_threshhold)) { + cat("To invalidate an inference, ", round(bias, 3), + "% of the estimate would have to be due to bias. ") + cat("\n") + cat("This is based on a threshold of ", round(beta_threshhold, 3), + " for statistical significance (alpha = ", alpha, ").\n", + sep = "") + cat("\n") + cat("To invalidate an inference, ", round(recase, 3), + " observations would have to be replaced with cases") + cat("\n") + cat("for which the effect is ", nu, + " (RIR = ", round(recase, 3), ").\n", sep = "") + cat("\n") } else if (abs(eff_diff) < abs(beta_threshhold)) { - cat("To sustain an inference, ", round(sustain, 3), "% of the estimate would have to be due to bias. ") + cat("To sustain an inference, ", round(sustain, 3), + "% of the estimate would have to be due to bias. ") cat("\n") - cat("This is based on a threshold of ", round(beta_threshhold, 3), " for statistical significance (alpha = ", alpha, ").\n", sep = "") + cat("This is based on a threshold of ", round(beta_threshhold, 3), + " for statistical significance (alpha = ", alpha, ").\n", + sep = "") cat("\n") - cat("To sustain an inference, ", round(recase, 3), " of the cases with ", nu, " effect would have to be replaced with cases at the threshold of inference"," (RIR = ", round(recase, 3), ").\n", sep = "") + cat("To sustain an inference, ", round(recase, 3), + " of the cases with ", nu, + " effect would have to be replaced with cases at the + threshold of inference", + " (RIR = ", round(recase, 3), ").\n", sep = "") } else if (eff_diff == beta_threshhold) { warning("The coefficient is exactly equal to the threshold.\n") @@ -26,65 +75,94 @@ output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, cat("See Frank et al. (2013) for a description of the method.") cat("\n") cat("\n") - cat(crayon::underline("Citation:"), "Frank, K.A., Maroulis, S., Duong, M., and Kelcey, B. (2013).") + cat(crayon::underline("Citation:"), + "Frank, K.A., Maroulis, S., Duong, M., and Kelcey, B. (2013).") cat("\n") cat("What would it take to change an inference?") cat("\n") - cat("Using Rubin's causal model to interpret the robustness of causal inferences.") + cat("Using Rubin's causal model to interpret the + robustness of causal inferences.") cat("\n") - cat(crayon::italic("Education, Evaluation and Policy Analysis, 35"), "437-460.") + cat(crayon::italic("Education, Evaluation and + Policy Analysis, 35"), "437-460.") cat("\n") } if (index == "IT") { cat(crayon::bold("Impact Threshold for a Confounding Variable:\n")) if (abs(obs_r) > abs(critical_r) & obs_r > 0) { - cat("The minimum impact of an omitted variable to invalidate an inference for a null hypothesis of 0 effect is based on a correlation of ", r_con) + cat("The minimum impact of an omitted variable to invalidate an + inference for a null hypothesis of 0 effect is based on + a correlation of ", r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates) based on a threshold of ") + " with the predictor of interest + (conditioning on observed covariates) based on a threshold of ") cat("\n") - cat(round(critical_r, 3), " for statistical significance (alpha = ", alpha, ").\n", - sep = "") + cat(round(critical_r, 3), " for statistical + significance (alpha = ", alpha, ").\n",sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), " to invalidate an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), + " to invalidate an inference for a null hypothesis of 0 effect. + \n", sep = "") } else if (abs(obs_r) > abs(critical_r) & obs_r < 0) { - cat("The minimum (in absolute value) impact of an omitted variable to invalidate an inference for a null hypothesis of 0 effect is based on a correlation of ", -r_con) + cat("The minimum (in absolute value) impact of an omitted + variable to invalidate an inference for a null hypothesis of 0 + effect is based on a correlation of ", -r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates; signs are interchangable) based on a threshold of ") + " with the predictor of interest (conditioning on observed + covariates; signs are interchangable) based on a threshold of ") cat("\n") - cat(round(critical_r, 3), " for statistical significance (alpha = ", alpha, ").\n", + cat(round(critical_r, 3), " for statistical significance + (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), " to invalidate an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), + " to invalidate an inference for a null hypothesis + of 0 effect.\n", sep = "") } else if (abs(obs_r) < abs(critical_r) & obs_r > 0) { - cat("The maximum impact (in absolute value) of an omitted variable to sustain an inference for a null hypothesis of 0 effect is based on a correlation of ", -r_con) + cat("The maximum impact (in absolute value) of an omitted variable + to sustain an inference for a null hypothesis of 0 effect is + based on a correlation of ", -r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates; signs are interchangable) based on a threshold of ", round(beta_threshhold, 3)) + " with the predictor of interest (conditioning on observed + covariates; signs are interchangable) based on a threshold of ", + round(beta_threshhold, 3)) cat("\n") cat(" for statistical significance (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), " to sustain an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(-r_con, " X ", r_con, " = ", -round(r_con^2, 3), + " to sustain an inference for a null hypothesis of 0 effect.\n", + sep = "") } else if (abs(obs_r) < abs(critical_r) & obs_r < 0) { - cat("The maximum impact of an omitted variable to sustain an inference for a null hypothesis of 0 effect is based on a correlation of ", r_con) + cat("The maximum impact of an omitted variable to sustain an + inference for a null hypothesis of 0 effect is based on a + correlation of ", r_con) cat("\n") cat(" with the outcome and at ", r_con, - " with the predictor of interest (conditioning on observed covariates) based on a threshold of ", round(beta_threshhold, 3)) + " with the predictor of interest (conditioning on observed + covariates) based on a threshold of ", round(beta_threshhold, 3)) cat("\n") cat(" for statistical significance (alpha = ", alpha, ").\n", sep = "") cat("\n") - cat("Correspondingly the impact of an omitted variable (as defined in Frank 2000) must be ") + cat("Correspondingly the impact of an omitted variable + (as defined in Frank 2000) must be ") cat("\n") - cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), " to sustain an inference for a null hypothesis of 0 effect.\n", sep = "") + cat(r_con, " X ", r_con, " = ", round(r_con^2, 3), + " to sustain an inference for a null hypothesis of 0 effect.\n", + sep = "") } else if (obs_r == critical_r) { warning("The correlation is exactly equal to the threshold.\n") } @@ -95,7 +173,9 @@ output_print <- function(eff_diff, beta_threshhold, bias = NULL, sustain = NULL, cat("\n") cat("Frank, K. (2000). Impact of a confounding variable on the") cat("\n") - cat("inference of a regression coefficient.", crayon::italic("Sociological Methods and Research, 29"), "(2), 147-194") + cat("inference of a regression coefficient.", + crayon::italic("Sociological Methods and Research, 29"), + "(2), 147-194") cat("\n") } diff --git a/R/helper_output_table.R b/R/helper_output_table.R index c3211f5a..1f608da7 100644 --- a/R/helper_output_table.R +++ b/R/helper_output_table.R @@ -1,3 +1,21 @@ +#' Output a Tidy Table from a Model Object +#' +#' This function takes a model object and the tested variable, +#' tidies the model output using `broom::tidy`, +#' calculates the impact threshold for confounding variables (ITCV) and impact +#' for each covariate,and returns a rounded, tidy table of model outputs. +#' +#' @param model_object A model object from which to generate the output. +#' @param tested_variable The variable being tested in the model. +#' @return A tidy data frame containing model outputs, ITCV, +#' and impacts for covariates. +#' @importFrom broom tidy +#' @importFrom purrr modify_if +#' @importFrom stats cor +#' @importFrom dplyr select filter mutate +#' @importFrom rlang !! enquo + + # Function to output the data frame output_table <- function(model_object, tested_variable) { @@ -5,18 +23,34 @@ output_table <- function(model_object, tested_variable) { cat("Dependent variable is", p, "\n") model_output <- broom::tidy(model_object) # tidying output var_row <- model_output$term == tested_variable - model_output$itcv[var_row] <- abs(konfound(model_object, !!tested_variable, to_return = "raw_output")$itcv) + model_output$itcv[var_row] <- abs(konfound( + model_object, + !!tested_variable, to_return = "raw_output" + )$itcv) + + covariate_names <- model_output$term[!( + model_output$term %in% c("(Intercept)", tested_variable) +)] + + +for (i in seq(covariate_names)) { + cov_row <- model_output$term == covariate_names[i] + d <- model_object$model + cor_df <- as.data.frame(stats::cor(d)) + model_output$impact[cov_row] <- round( + abs(cor_df[cov_row, 1]) * abs(cor_df[cov_row, tested_variable]), + 3 + ) # r_zy * r_zx +} - covariate_names <- model_output$term[!(model_output$term %in% c("(Intercept)", tested_variable))] - for (i in seq(covariate_names)) { - cov_row <- model_output$term == covariate_names[i] - d <- model_object$model - cor_df <- as.data.frame(stats::cor(d)) - model_output$impact[cov_row] <- round(abs(cor_df[cov_row, 1]) * abs(cor_df[cov_row, tested_variable]), 3) # r_zy * r_zx - } + model_output <- purrr::modify_if( + model_output, + is.numeric, + round, + digits = 3 +) - model_output <- purrr::modify_if(model_output, is.numeric, round, digits = 3) return(model_output) } diff --git a/R/helper_plot_correlation.R b/R/helper_plot_correlation.R index e622bc66..0bbce83d 100644 --- a/R/helper_plot_correlation.R +++ b/R/helper_plot_correlation.R @@ -1,3 +1,17 @@ +#' Plot Correlation Diagram +#' +#' This function creates a plot to illustrate the correlation between different +#' variables,specifically focusing on the confounding variable, predictor of +#' interest, and outcome.It uses ggplot2 for graphical representation. +#' +#' @param r_con Correlation coefficient related to the confounding variable. +#' @param obs_r Observed correlation coefficient. +#' @param critical_r Critical correlation coefficient for decision-making. +#' @return A ggplot object representing the correlation diagram. +#' @importFrom ggplot2 ggplot aes_string geom_segment geom_curve annotate +#' theme_void ggtitle + + # plot correlation plot_correlation <- function(r_con, obs_r, critical_r) { @@ -13,28 +27,111 @@ plot_correlation <- function(r_con, obs_r, critical_r) { p <- ggplot2::ggplot(d, ggplot2::aes_string(x = "x", y = "y")) + - # main arrows - ggplot2::geom_segment(ggplot2::aes(y = .1), xend = 0, yend = .9, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # straight up - ggplot2::geom_segment(ggplot2::aes(x = .1), xend = 1, yend = .9, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # hypotenuse - ggplot2::geom_segment(ggplot2::aes(x = .15, y = 1), xend = .9, yend = 1, arrow = ggplot2::arrow(), size = 2.5, color = "#A6CEE3") + # straight across +# main arrows +ggplot2::geom_segment( + ggplot2::aes(y = .1), + xend = 0, + yend = .9, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # straight up +ggplot2::geom_segment( + ggplot2::aes(x = .1), + xend = 1, + yend = .9, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # hypotenuse +ggplot2::geom_segment( + ggplot2::aes(x = .15, y = 1), + xend = .9, + yend = 1, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#A6CEE3" +) + # straight across + + +# label annotations +ggplot2::annotate( + "text", + x = 0, + y = 0, + label = paste0("Confounding\nVariable"), + fontface = 3 +) + +ggplot2::annotate( + "text", + x = 0, + y = 1, + label = paste0("Predictor of Interest"), + fontface = 3 +) + +ggplot2::annotate( + "text", + x = 1, + y = 1, + label = paste0("Outcome"), + fontface = 3 +) + + - # lagel annotations - ggplot2::annotate("text", x = 0, y = 0, label = paste0("Confounding\nVariable"), fontface = 3) + - ggplot2::annotate("text", x = 0, y = 1, label = paste0("Predictor of Interest"), fontface = 3) + - ggplot2::annotate("text", x = 1, y = 1, label = paste0("Outcome"), fontface = 3) + +# CV arrows +# ggplot2::geom_segment(ggplot2::aes(x = .05, y = .25), xend = .275, +#yend = .65, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + +# straight across +# ggplot2::geom_segment(ggplot2::aes(x = .175, y = .15), xend = .3, +#yend = .625, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + + # straight across +ggplot2::geom_curve( + ggplot2::aes(x = .04, y = .325, xend = .35, yend = .825), + curvature = -.35, + size = 2.5, + color = "#1F78B4", + arrow = ggplot2::arrow() +) + +ggplot2::geom_curve( + ggplot2::aes(x = .225, y = .23, xend = .4, yend = .8), + curvature = .35, + size = 2.5, + color = "#1F78B4", + arrow = ggplot2::arrow() +) + +ggplot2::geom_segment( + ggplot2::aes(x = .37, y = .81), + xend = .465, + yend = .925, + arrow = ggplot2::arrow(), + size = 2.5, + color = "#1F78B4" +) + # straight across - # CV arrows - # ggplot2::geom_segment(ggplot2::aes(x = .05, y = .25), xend = .275, yend = .65, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - # ggplot2::geom_segment(ggplot2::aes(x = .175, y = .15), xend = .3, yend = .625, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across - ggplot2::geom_curve(ggplot2::aes(x = .04, y = .325, xend = .35, yend = .825), curvature = -.35, size = 2.5, color = "#1F78B4", arrow = ggplot2::arrow()) + - ggplot2::geom_curve(ggplot2::aes(x = .225, y = .23, xend = .4, yend = .8), curvature = .35, size = 2.5, color = "#1F78B4", arrow = ggplot2::arrow()) + - ggplot2::geom_segment(ggplot2::aes(x = .37, y = .81), xend = .465, yend = .925, arrow = ggplot2::arrow(), size = 2.5, color = "#1F78B4") + # straight across +# corr. annotations +ggplot2::annotate( + "text", + x = -.125, + y = .5, + label = paste0("Rx.cv | Z =\n ", r_con), + fontface = 1 +) + +ggplot2::annotate( + "text", + x = .575, + y = .35, + label = paste0("Ry.cv | Z =\n", r_con), + fontface = 1 +) + +ggplot2::annotate( + "text", + x = .25, + y = .525, + label = paste0("Rx.cv | Z * Ry.cv | Z =\n", round(r_con^2, 3)), + fontface = 1 +) + - # corr. annotations - ggplot2::annotate("text", x = -.125, y = .5, label = paste0("Rx.cv | Z =\n ", r_con), fontface = 1) + - ggplot2::annotate("text", x = .575, y = .35, label = paste0("Ry.cv | Z =\n", r_con), fontface = 1) + - ggplot2::annotate("text", x = .25, y = .525, label = paste0("Rx.cv | Z * Ry.cv | Z =\n", round(r_con^2, 3)), fontface = 1) + # plot modifications ggplot2::xlim(-.15, 1.1) + diff --git a/R/helper_plot_threshold.R b/R/helper_plot_threshold.R index aa388c5f..96506f38 100644 --- a/R/helper_plot_threshold.R +++ b/R/helper_plot_threshold.R @@ -1,3 +1,19 @@ +#' Plot Effect Threshold Diagram +#' +#' This function creates a plot to illustrate the threshold of an effect +#' estimate in relation to a specified beta threshold. It uses ggplot2 +#' for graphical representation. +#' +#' @param beta_threshold The threshold value for the effect. +#' @param est_eff The estimated effect size. +#' @return A ggplot object representing the effect threshold diagram. +#' @importFrom ggplot2 ggplot aes geom_col geom_hline annotate +#' scale_fill_manual theme_bw theme xlab ylab +#' @importFrom dplyr tibble mutate rename select filter pull +#' @importFrom tidyr gather + + + # Function to output the plot plot_threshold <- function(beta_threshold, est_eff) { @@ -27,7 +43,8 @@ plot_threshold <- function(beta_threshold, est_eff) { } else if (est_eff < beta_threshold) { # beta is below threshold dd <- dplyr::tibble(est_eff = est_eff, beta_threshold = beta_threshold) - dd <- dplyr::mutate(dd, `Above Estimated Effect, Below Threshold` = abs(est_eff - beta_threshold)) + dd <- dplyr::mutate( dd,`Above Estimated Effect, Below Threshold` + = abs(est_eff - beta_threshold)) dd <- dplyr::mutate(dd, `Below Threshold` = est_eff) dd <- dplyr::select(dd, -beta_threshold) @@ -43,22 +60,30 @@ plot_threshold <- function(beta_threshold, est_eff) { y_thresh_text <- y_thresh + sqrt(.005 * abs(y_thresh)) effect_text <- est_eff + sqrt(.025 * abs(est_eff)) # y-value of text - cols <- c("lightgray", "#1F78B4") # dark blue and green (green not used right now) + cols <- c("lightgray", "#1F78B4") + # dark blue and green (green not used right now) } p <- ggplot2::ggplot(dd, ggplot2::aes(x = inference, y = val, fill = key)) + ggplot2::geom_col(position = "stack") + ggplot2::geom_hline(yintercept = est_eff, color = "black") + - ggplot2::annotate("text", x = 1, y = effect_text, label = "Estimated Effect") + + ggplot2::annotate("text", x = 1, y = effect_text, + label = "Estimated Effect") + ggplot2::geom_hline(yintercept = y_thresh, color = "red") + - ggplot2::annotate("text", x = 1, y = y_thresh_text, label = "Threshold") + - # ggplot2::geom_text(aes(label = "Effect"), vjust = -.5) + this is discussed here: https://github.com/jrosen48/konfound/issues/5 + ggplot2::annotate("text", x = 1, y = y_thresh_text, + label = "Threshold") + + # ggplot2::geom_text(aes(label = "Effect"), vjust = -.5) + + #this is discussed here: https://github.com/jrosen48/konfound/issues/5 ggplot2::scale_fill_manual("", values = cols) + ggplot2::theme_bw() + - ggplot2::theme(axis.text.x = ggplot2::element_blank(), axis.ticks = ggplot2::element_blank()) + + ggplot2::theme( + axis.text.x = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank() +) + + ggplot2::xlab(NULL) + ggplot2::ylab("Effect (abs. value)") + ggplot2::theme(legend.position = "top") diff --git a/R/konfound-glm-dichotomous.R b/R/konfound-glm-dichotomous.R index 4aa1287b..6db9a419 100644 --- a/R/konfound-glm-dichotomous.R +++ b/R/konfound-glm-dichotomous.R @@ -1,7 +1,33 @@ +#' Konfound Analysis for Generalized Linear Models with Dichotomous Outcomes +#' +#' This function performs konfound analysis on a generalized linear model +#' object with a dichotomous outcome. It uses 'broom' to tidy model outputs +#' and calculates the sensitivity of inferences. +#' +#' @param model_object The model object produced by glm. +#' @param tested_variable_string The name of the variable being tested. +#' @param test_all Boolean indicating whether to test all variables or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param to_return The type of output to return. +#' @param n_treat Number of treatment cases. +#' @param switch_trm Term to switch for sensitivity analysis. +#' @param replace Boolean indicating whether to replace cases or not. +#' @return The results of the konfound analysis. +#' @importFrom broom tidy glance +#' @importFrom stats glm + + # konfound-glm -konfound_glm_dichotomous <- function(model_object, tested_variable_string, test_all, alpha, tails, - to_return, n_treat, switch_trm, replace) { +konfound_glm_dichotomous <- function(model_object, + tested_variable_string, + test_all, + alpha, tails, + to_return, + n_treat, + switch_trm, + replace) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) diff --git a/R/konfound-glm.R b/R/konfound-glm.R index aef0941c..fbdcaac3 100644 --- a/R/konfound-glm.R +++ b/R/konfound-glm.R @@ -1,6 +1,33 @@ +#' Konfound Analysis for Generalized Linear Models +#' +#' This function performs konfound analysis on a generalized linear model +#' object. It uses 'broom' to tidy model outputs and calculates the sensitivity +#' of inferences. It supports analysis for a single variable +#' or multiple variables. +#' +#' @param model_object The model object produced by glm. +#' @param tested_variable_string The name of the variable being tested. +#' @param test_all Boolean indicating whether to test all variables or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return The type of output to return. +#' @return The results of the konfound analysis for the specified variable(s). +#' @importFrom broom tidy glance +#' @importFrom dplyr select filter bind_cols +#' @importFrom stats glm +#' @importFrom margins margins + + # konfound-glm -konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, tails, index = "RIR", to_return) { +konfound_glm <- function(model_object, + tested_variable_string, + test_all, + alpha, + tails, + index = "RIR", + to_return) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) @@ -8,11 +35,18 @@ konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, coef_df <- tidy_output[tidy_output$term == tested_variable_string, ] } else { coef_df <- tidy_output[-1, ] - coef_df$est_eff <- suppressWarnings(summary(margins::margins(model_object))$AME[names(summary(margins::margins(model_object))$AME) == tested_variable_string]) + coef_df$est_eff <- suppressWarnings(summary( + margins::margins(model_object))$AME[ + names(summary( + margins::margins( + model_object))$AME) == tested_variable_string]) } # to remove intercept est_eff <- coef_df$estimate - est_eff <- suppressWarnings(summary(margins::margins(model_object))$AME[names(summary(margins::margins(model_object))$AME) == tested_variable_string]) + est_eff <- suppressWarnings(summary( + margins::margins(model_object))$AME[ + names(summary( + margins::margins(model_object))$AME) == tested_variable_string]) std_err <- coef_df$std.error n_obs <- glance_output$nobs n_covariates <- glance_output$df.null - glance_output$df.residual @@ -33,11 +67,14 @@ konfound_glm <- function(model_object, tested_variable_string, test_all, alpha, ) return(out) } else { - message("Note that this output is calculated based on the correlation-based approach used in mkonfound()") - stop("Multiple variables cannot presently be tested for models fit using glm(); this will be added in the future.") + message("Note that this output is calculated based on + the correlation-based approach used in mkonfound()") + stop("Multiple variables cannot presently be tested + for models fit using glm(); this will be added in the future.") d <- data.frame(t = est_eff / std_err, df = (n_obs - n_covariates - 1)) o <- mkonfound(d, .data$t, .data$df) - term_names <- dplyr::select(tidy_output, var_name = .data$term) # remove the first row for intercept + term_names <- dplyr::select(tidy_output, var_name = .data$term) + # remove the first row for intercept term_names <- dplyr::filter(term_names, .data$var_name != "(Intercept)") return(dplyr::bind_cols(term_names, o)) } diff --git a/R/konfound-lm.R b/R/konfound-lm.R index ff2923b3..31a4884e 100644 --- a/R/konfound-lm.R +++ b/R/konfound-lm.R @@ -1,6 +1,29 @@ +#' Konfound Analysis for Linear Models +#' +#' This function performs konfound analysis on a linear model object +#' produced by lm. +#' It calculates the sensitivity of inferences for coefficients in the model. +#' It supports analysis for a single variable or multiple variables. +#' +#' @param model_object The linear model object produced by lm. +#' @param tested_variable_string The name of the variable being tested. +#' @param test_all Boolean indicating whether to test all variables or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return The type of output to return. +#' @return The results of the konfound analysis for the specified variable(s). +#' @importFrom broom tidy glance +#' @importFrom dplyr select filter bind_cols + # konfound-lm -konfound_lm <- function(model_object, tested_variable_string, test_all, alpha, tails, index, to_return) { +konfound_lm <- function(model_object, + tested_variable_string, + test_all, alpha, + tails, + index, + to_return) { tidy_output <- broom::tidy(model_object) # tidying output glance_output <- broom::glance(model_object) @@ -31,10 +54,12 @@ konfound_lm <- function(model_object, tested_variable_string, test_all, alpha, t ) return(out) } else { - message("Note that this output is calculated based on the correlation-based approach used in mkonfound()") + message("Note that this output is calculated based on + the correlation-based approach used in mkonfound()") d <- data.frame(t = est_eff / std_err, df = (n_obs - n_covariates - 1)) o <- mkonfound(d, .data$t, .data$df) - term_names <- dplyr::select(tidy_output, var_name = .data$term) # remove the first row for intercept + term_names <- dplyr::select(tidy_output, var_name = .data$term) + # remove the first row for intercept term_names <- dplyr::filter(term_names, .data$var_name != "(Intercept)") return(dplyr::bind_cols(term_names, o)) } diff --git a/R/konfound-lmer.R b/R/konfound-lmer.R index afb773d1..c1280281 100644 --- a/R/konfound-lmer.R +++ b/R/konfound-lmer.R @@ -1,14 +1,47 @@ + # # # konfound-lmer +#' Extract Degrees of Freedom for Fixed Effects in a Linear Mixed-Effects Model +#' +#' @param model_object The mixed-effects model object produced by lme4::lmer. +#' @return A vector containing degrees of freedom for the fixed effects in the model. +#' @importFrom lme4 fixef +#' @importFrom pbkrtest get_Lb_ddf +#' @importFrom purrr map_dbl + get_kr_df <- function(model_object) { L <- diag(rep(1, length(lme4::fixef(model_object)))) L <- as.data.frame(L) - out <- suppressWarnings(purrr::map_dbl(L, pbkrtest::get_Lb_ddf, object = model_object)) + out <- suppressWarnings(purrr::map_dbl( + L, pbkrtest::get_Lb_ddf, object = model_object)) names(out) <- names(lme4::fixef(model_object)) out } -konfound_lmer <- function(model_object, tested_variable_string, test_all, alpha, tails, index, to_return) { + +#' Konfound Analysis for Linear Mixed-Effects Models +#' +#' This function performs konfound analysis on a linear mixed-effects model +#' object produced by lme4::lmer. It calculates the sensitivity of inferences +#' for fixed effects in the model. It supports analysis for a single variable or multiple variables. +#' +#' @param model_object The mixed-effects model object produced by lme4::lmer. +#' @param tested_variable_string The name of the fixed effect being tested. +#' @param test_all Boolean indicating whether to test all fixed effects or not. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return The type of output to return. +#' @return The results of the konfound analysis for the specified fixed effect(s). +#' @importFrom broom.mixed tidy +#' @importFrom dplyr filter bind_cols + +konfound_lmer <- function(model_object, + tested_variable_string, + test_all, alpha, + tails, + index, + to_return) { tidy_output <- broom.mixed::tidy(model_object) # tidying output if (test_all == FALSE) { diff --git a/R/konfound.R b/R/konfound.R index d9b1c3c3..62fa74eb 100644 --- a/R/konfound.R +++ b/R/konfound.R @@ -1,13 +1,33 @@ -#' Perform sensitivity analysis on fitted models -#' @description For fitted models, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. Currently works for: models created with lm() (linear models). -#' @param model_object output from a model (currently works for: lm) -#' @param tested_variable Variable associated with the unstandardized beta coefficient to be tested -#' @inheritParams pkonfound -#' @param index whether output is RIR or IT (impact threshold); defaults to "RIR" -#' @param two_by_two whether or not the tested variable is a dichotomous variable in a GLM; if so, the 2X2 table approach is used; only works for single variables at present (so test_all = TRUE will return an error) -#' @param test_all whether to carry out the sensitivity test for all of the coefficients (defaults to FALSE) -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference -#' @importFrom rlang .data +#' Konfound Analysis for Various Model Types +#' +#' Performs sensitivity analysis on fitted models including +#' linear models (`lm`), generalized linear models (`glm`), +#' and linear mixed-effects models (`lmerMod`). +#' It calculates the amount of bias required to invalidate or +#' sustain an inference,and the impact of an omitted variable +#' necessary to affect the inference. +#' +#' @param model_object A model object produced by `lm`, `glm`, or `lme4::lmer`. +#' @param tested_variable Variable associated with the coefficient to be tested. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param index Type of sensitivity analysis ('RIR' by default). +#' @param to_return Type of output to return ('print', 'raw_output', 'table'). +#' @param test_all Boolean; if `TRUE`, tests all coefficients. +#' @param two_by_two Boolean; if `TRUE`, uses a 2x2 table approach +#' for `glm` dichotomous variables. +#' @param n_treat Number of treatment cases +#' (used only if `two_by_two` is `TRUE`). +#' @param switch_trm Boolean; switch treatment and control in the analysis. +#' @param replace Replacement method for treatment cases ('control' by default). +#' @return Depending on `to_return`, prints the result, returns a raw output, +#' or a summary table. +#' @importFrom rlang enquo quo_name +#' @importFrom lme4 fixef lmer +#' @importFrom broom tidy glance +#' @importFrom dplyr filter select bind_cols +#' @importFrom purrr map_dbl +#' @importFrom pbkrtest get_Lb_ddf #' @examples #' # using lm() for linear models #' m1 <- lm(mpg ~ wt + hp, data = mtcars) @@ -49,97 +69,125 @@ konfound <- function(model_object, n_treat = NULL, switch_trm = TRUE, replace = "control") { - - # Stop messages - if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { - stop("konfound() is currently implemented for models estimated with lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") - } - - if ("table" %in% to_return & test_all == TRUE) stop("cannot return a table when test_all is set to TRUE") - - # Dealing with non-standard evaluation - tested_variable_enquo <- rlang::enquo(tested_variable) # dealing with non-standard evaluation (so unquoted names for tested_variable can be used) - tested_variable_string <- rlang::quo_name(tested_variable_enquo) - - # Dispatching based on class - if (class(model_object)[1] == "lm") { - output <- konfound_lm( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - index = index, - to_return = to_return - ) - if (is.null(output)) { - - } else { - return(output) + # Stop messages + if (!(class(model_object)[1] %in% c("lm", "glm", "lmerMod"))) { + stop("konfound() is currently implemented for models estimated with + lm(), glm(), and lme4::lmer(); consider using pkonfound() instead") } - } - - if (inherits(model_object, "glm") & two_by_two == FALSE) { - message("Note that for a non-linear model, impact threshold should not be interpreted.") - message("Note that this is only an approximation. For exact results in terms of the number of cases that must be switched from treatment success to treatment failure to invalidate the inference see: https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") - message("If a dichotomous independent variable is used, consider using the 2X2 table approach enabled with the argument two_by_two = TRUE") - output <- konfound_glm( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - to_return = to_return - ) - return(output) - } - - if (inherits(model_object, "glm") & two_by_two == TRUE) { + if ("table" %in% to_return & test_all == TRUE){ + stop("cannot return a table when test_all is set to TRUE") + } + # Dealing with non-standard evaluation - if(is.null(n_treat)) stop("Please provide a value for n_treat to use this functionality with a dichotomous predictor") - if (test_all == TRUE) stop("test_all = TRUE is not supported when two_by_two is specified") + # dealing with non-standard evaluation + #(so unquoted names for tested_variable can be used) + tested_variable_enquo <- rlang::enquo(tested_variable) + tested_variable_string <- rlang::quo_name(tested_variable_enquo) - output <- konfound_glm_dichotomous( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - to_return = to_return, - n_treat = n_treat, - switch_trm = switch_trm, - replace = replace - ) + # Dispatching based on class + if (class(model_object)[1] == "lm") { + output <- konfound_lm( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + index = index, + to_return = to_return + ) + + if (is.null(output)) { + + } else { + return(output) + } + } - return(output) + if (inherits(model_object, "glm") & two_by_two == FALSE) { + message("Note that for a non-linear model, + impact threshold should not be interpreted.") + message("Note that this is only an approximation. For exact results + in terms of the number of cases that must be switched from treatment + success to treatment failure to invalidate the inference see: + https://msu.edu/~kenfrank/non%20linear%20replacement%20treatment.xlsm") + message("If a dichotomous independent variable is used, consider using + the 2X2 table approach enabled with the argument two_by_two = TRUE") + output <- konfound_glm( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + to_return = to_return + ) + + return(output) + } - } - - if (inherits(model_object, "lmerMod")) { - output <- konfound_lmer( - model_object = model_object, - tested_variable_string = tested_variable_string, - test_all = test_all, - alpha = alpha, - tails = tails, - index = index, - to_return = to_return - ) + if (inherits(model_object, "glm") & two_by_two == TRUE) { + + if(is.null(n_treat)){ + stop("Please provide a value for + n_treat to use this functionality with a dichotomous predictor") + } + if (test_all == TRUE) { + stop("test_all = TRUE is not supported + when two_by_two is specified") + } + output <- konfound_glm_dichotomous( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + to_return = to_return, + n_treat = n_treat, + switch_trm = switch_trm, + replace = replace + ) + + return(output) + + } - message("Note that the Kenward-Roger approximation is used to estimate degrees of freedom for the predictor(s) of interest. We are presently working to add other methods for calculating the degrees of freedom for the predictor(s) of interest. If you wish to use other methods now, consider those detailed here: https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. You can then enter degrees of freedom obtained from another method along with the coefficient, number of observations, and number of covariates to the pkonfound() function to quantify the robustness of the inference.") + if (inherits(model_object, "lmerMod")) { + output <- konfound_lmer( + model_object = model_object, + tested_variable_string = tested_variable_string, + test_all = test_all, + alpha = alpha, + tails = tails, + index = index, + to_return = to_return + ) + + message("Note that the Kenward-Roger approximation is used to + estimate degrees of freedom for the predictor(s) of interest. + We are presently working to add other methods for calculating + the degrees of freedom for the predictor(s) of interest. + If you wish to use other methods now, consider those detailed here: + https://bbolker.github.io/mixedmodels-misc/glmmFAQ.html#why-doesnt-lme4-display-denominator-degrees-of-freedomp-values-what-other-options-do-i-have. + You can then enter degrees of freedom obtained from another method + along with the coefficient, + number of observations, and number of covariates to the pkonfound() + function to quantify the robustness of the inference.") + + return(output) + } + + if (!("table" %in% to_return)) { + message("For more detailed output, + consider setting `to_return` to table") + } - return(output) - } - - if (!("table" %in% to_return)) { - message("For more detailed output, consider setting `to_return` to table") - } - - if (test_all == FALSE) { - message("To consider other predictors of interest, consider setting `test_all` to TRUE.") - } else { - message("Note that presently these predictors of interest are tested independently; future output may use the approach used in mkonfound.") - } + if (test_all == FALSE) { + message("To consider other predictors of interest, + consider setting `test_all` to TRUE.") + } else { + message("Note that presently these predictors of interest are tested + independently; future output may use the approach used + in mkonfound.") + } } diff --git a/R/mkonfound-data.R b/R/mkonfound-data.R index db063821..6e5dd3a1 100644 --- a/R/mkonfound-data.R +++ b/R/mkonfound-data.R @@ -1,7 +1,9 @@ #' Example data for the mkonfound function #' -#' A dataset containing t and df values from example studies from Educational Evaluation -#' and Policy Analysis (as detailed in Frank et al., 2013): https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view +#' A dataset containing t and df values from example studies +#' from Educational Evaluation +#' and Policy Analysis (as detailed in Frank et al., 2013): +#' https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view #' #' @format A data frame with 30 rows and 2 variables: #' \describe{ @@ -10,4 +12,4 @@ #' ... #' } #' @source \url{https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view} -"mkonfound_ex" \ No newline at end of file +"mkonfound_ex" diff --git a/R/mkonfound.R b/R/mkonfound.R index 5f926de3..08fa536c 100644 --- a/R/mkonfound.R +++ b/R/mkonfound.R @@ -1,21 +1,32 @@ -#' Perform meta-analyses including sensitivity analysis -#' @description For fitted models, this command carries out sensitivity analysis for a number of models, when their parameters stored in a data.frame. -#' @param d data.frame or tibble with the t-statistics and associated degrees of freedom -#' @param t t-statistic or vector of t-statistics -#' @param df degrees of freedom or vector of degrees of freedom associated with the t-statistics in the t argument -#' @param return_plot whether to return a plot of the percent bias; defaults to FALSE -#' @inheritParams pkonfound -#' @import rlang -#' @import dplyr -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference for each of the cases in the data.frame +#' Meta-Analysis and Sensitivity Analysis for Multiple Studies +#' +#' Performs sensitivity analysis for multiple models, where parameters +#' are stored in a data frame. It calculates the amount of bias required to +#' invalidate or sustain an inference for each case in the data frame. +#' +#' @param d A data frame or tibble containing t-statistics and associated +#' degrees of freedom. +#' @param t Column name or vector of t-statistics. +#' @param df Column name or vector of degrees of freedom associated +#' with t-statistics. +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for the test (1 or 2). +#' @param return_plot Whether to return a plot of the percent bias +#' (default is `FALSE`). +#' @return Depending on `return_plot`, either returns a data frame with +#' analysis results or a plot. +#' @importFrom rlang enquo +#' @importFrom dplyr select pull case_when +#' @importFrom purrr map2_dfr +#' @importFrom ggplot2 ggplot aes_string geom_histogram scale_fill_manual +#' theme_bw ggtitle facet_grid scale_y_continuous theme ylab xlab +#' @importFrom stats qt #' @examples #' \dontrun{ -#' mkonfound_ex -#' str(d) -#' mkonfound(mkonfound_ex, t, df) +#' data <- data.frame(t = c(2.1, 2.3, 1.9), df = c(30, 40, 35)) +#' mkonfound(data, t, df) #' } #' @export -#' mkonfound <- function(d, t, df, alpha = .05, tails = 2, return_plot = FALSE) { t_enquo <- enquo(t) @@ -25,18 +36,23 @@ mkonfound <- function(d, t, df, alpha = .05, tails = 2, return_plot = FALSE) { df_vec <- pull(select(d, !!df_enquo)) if (length(t_vec) <= 1) { - stop("To carry out sensitivity analysis for a single study, use pkonfound()") + stop("To carry out sensitivity analysis + for a single study, use pkonfound()") } - results_df <- suppressWarnings(purrr::map2_dfr(.x = t_vec, .y = df_vec, .f = core_sensitivity_mkonfound)) - + results_df <- suppressWarnings( + purrr::map2_dfr( + .x = t_vec, .y = df_vec, .f = core_sensitivity_mkonfound) +) if (return_plot == TRUE) { results_df$action <- dplyr::case_when( results_df$action == "to_invalidate" ~ "To Invalidate", results_df$action == "to_sustain" ~ "To Sustain" ) - p <- ggplot2::ggplot(results_df, ggplot2::aes_string(x = "pct_bias_to_change_inference", fill = "action")) + + p <- ggplot2::ggplot(results_df, ggplot2::aes_string( + x = "pct_bias_to_change_inference", fill = "action" +)) + ggplot2::geom_histogram() + ggplot2::scale_fill_manual("", values = c("#1F78B4", "#A6CEE3")) + ggplot2::theme_bw() + @@ -86,8 +102,15 @@ core_sensitivity_mkonfound <- function(t, df, alpha = .05, tails = 2) { r_con <- round(sqrt(abs(itcv)), 3) out <- dplyr::data_frame(t, df, action, inference, pct_bias, itcv, r_con) - names(out) <- c("t", "df", "action", "inference", "pct_bias_to_change_inference", "itcv", "r_con") - out$pct_bias_to_change_inference <- round(out$pct_bias_to_change_inference, 3) + names(out) <- c("t", + "df", + "action", + "inference", + "pct_bias_to_change_inference", + "itcv", + "r_con") + out$pct_bias_to_change_inference <- round( + out$pct_bias_to_change_inference, 3) out$itcv <- round(out$itcv, 3) out$action <- as.character(out$action) out$inference <- as.character(out$inference) diff --git a/R/nonlinear_auxiliary.R b/R/nonlinear_auxiliary.R index 6b60f6b6..f57dde34 100644 --- a/R/nonlinear_auxiliary.R +++ b/R/nonlinear_auxiliary.R @@ -1,3 +1,6 @@ +#' @importFrom stats qt + + # to evaluate whether we are moving cases to invalidate or sustain the inference isinvalidate <- function(thr_t, ob_t) { if ((0 < thr_t && thr_t < ob_t) || (ob_t < thr_t && thr_t < 0)) { @@ -8,7 +11,7 @@ isinvalidate <- function(thr_t, ob_t) { return(x) } -# to evaluate what kind of switches we need - increase or decrease the odds ratio +#to evaluate what kind of switches we need-increase or decrease the odds ratio isdcroddsratio <- function(thr_t, ob_t) { if (thr_t < ob_t) { x <- T @@ -18,7 +21,8 @@ isdcroddsratio <- function(thr_t, ob_t) { return(x) } -# get the a cell (control failure) for given odds_ratio, se and treatment cases - first solution +# get the a cell (control failure) for given odds_ratio, +# se and treatment cases - first solution get_a1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { a1 <- -(1 / (2 * (1 + odds_ratio^2 + odds_ratio * (-2 + n_trm * std_err^2)))) * (2 * n_trm * (-1 + odds_ratio) * odds_ratio + n_trm^2 * odds_ratio * std_err^2 - @@ -28,7 +32,8 @@ get_a1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(a1) } -# get the c cell (treatment failure) for given odds_ratio, se and treatment cases - first solution +# get the c cell (treatment failure) for given odds_ratio, +# se and treatment cases - first solution get_c1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { c1 <- -((-2 * n_trm + 2 * n_trm * odds_ratio - n_obs * n_trm * odds_ratio * std_err^2 + n_trm^2 * odds_ratio * std_err^2 + @@ -38,7 +43,8 @@ get_c1_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(c1) } -# get the a cell (control failure) for given odds_ratio, se and treatment cases - second solution +# get the a cell (control failure) for given odds_ratio, +# se and treatment cases - second solution get_a2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { a2 <- (1 / (2 * (1 + odds_ratio^2 + odds_ratio * (-2 + n_trm * std_err^2)))) * (-2 * n_trm * (-1 + odds_ratio) * odds_ratio - n_trm^2 * odds_ratio * std_err^2 + @@ -48,7 +54,8 @@ get_a2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { return(a2) } -# get the c cell (treatment failure) for given odds_ratio, se and treatment cases - second solution +# get the c cell (treatment failure) for given odds_ratio, +# se and treatment cases - second solution get_c2_kfnl <- function(odds_ratio, std_err, n_obs, n_trm) { c2 <- (2 * n_trm - 2 * n_trm * odds_ratio + n_obs * n_trm * odds_ratio * std_err^2 - n_trm^2 * odds_ratio * std_err^2 + @@ -75,8 +82,10 @@ taylorexp <- function(a, b, c, d, q, thr) { d1square <- Num1 / Den1 - Num2 / Den2 # d1unsquare is the first derivative of the unsquared term d1unsquare <- d1square / (2 * log(a * (d - q) / (b * (c + q))) / sqrt(1 / a + 1 / b + 1 / (c + q) + 1 / (d - q))) - # x is the number of cases need to be replaced solved based on the taylor expansion - # this is the (linear approximation) of the original/unsquared term around the value of q + # x is the number of cases need to be replaced + # solved based on the taylor expansion + # this is the (linear approximation) of the + # original/unsquared term around the value of q x <- (thr - log(a * (d - q) / (b * (c + q))) / sqrt((1 / a + 1 / b + 1 / (c + q) + 1 / (d - q)))) / d1unsquare + q return(x) } @@ -100,6 +109,8 @@ get_abcd_kfnl <- function(a1, b1, c1, d1) { } # get the number of switches + + getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### calculate the est and se after rounding (before any switches) a <- table_bstart[1] @@ -129,7 +140,9 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { b_tryall <- b - (b - 1) * (1 - as.numeric(switch_trm)) tryall_t <- get_t_kfnl(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE(thr_t - tryall_t > 0 & tryall_est*est_eff_start > 0) + allnotenough <- isTRUE( + thr_t - tryall_t > 0 & tryall_est*est_eff_start > 0 + ) } if (t_start > thr_t) { # transfer cases from A to B or D to C to decrease odds ratio @@ -139,7 +152,8 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { b_tryall <- b + (a - 1) * (1 - as.numeric(switch_trm)) tryall_t <- get_t_kfnl(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE(tryall_t - thr_t > 0 & tryall_est*est_eff_start > 0) + allnotenough <- isTRUE( + tryall_t - thr_t > 0 & tryall_est*est_eff_start > 0) } ### run following if transfering one row is enough @@ -165,20 +179,23 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many and + ### causes non-positive odds ratio if (a_taylor <= 0) { b_taylor <- a_taylor + b_taylor - 1 a_taylor <- 1 @@ -205,9 +222,11 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { t_loop <- t_taylor } - ### when we need to transfer two rows the previously defined tryall are the starting point for brute force + ### when we need to transfer two rows the previously defined + ### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point and + ### call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -263,7 +282,8 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { est_eff_final <- log(a_final * d_final / (b_final * c_final)) std_err_final <- sqrt(1 / a_final + 1 / b_final + 1 / c_final + 1 / d_final) t_final <- est_eff_final / std_err_final - table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) + table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) if (switch_trm == allnotenough) { final <- abs(a - a_final) + as.numeric(allnotenough) * abs(c - c_final) } else { @@ -289,9 +309,12 @@ getswitch <- function(table_bstart, thr_t, switch_trm, n_obs) { ### return result result <- list( - final_switch = final, table_start = table_start, table_final = table_final, est_eff_start = est_eff_start, - est_eff_final = est_eff_final, std_err_start = std_err_start, std_err_final = std_err_final, - t_start = t_start, t_final = t_final, taylor_pred = taylor_pred, perc_bias_pred = perc_bias_pred, + final_switch = final, table_start = table_start, + table_final = table_final, est_eff_start = est_eff_start, + est_eff_final = est_eff_final, std_err_start = std_err_start, + std_err_final = std_err_final, + t_start = t_start, t_final = t_final, taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, step = step, needtworows = allnotenough, final_extra = final_extra ) @@ -314,6 +337,20 @@ get_pi <- function(odds_ratio, std_err, n_obs, n_trm) { return(x) } +#' Perform a Chi-Square Test +#' +#' @description +#' `chisq_p` calculates the p-value for a chi-square test given a contingency table. +#' +#' @param a Frequency count for row 1, column 1. +#' @param b Frequency count for row 1, column 2. +#' @param c Frequency count for row 2, column 1. +#' @param d Frequency count for row 2, column 2. +#' +#' @return P-value from the chi-square test. +#' @importFrom stats chisq.test + + # get p value for chi-square test chisq_p <- function(a, b, c, d){ table <- matrix(c(a,b,c,d), byrow = TRUE, 2, 2) @@ -354,11 +391,13 @@ table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) p_ob <- chisq_p(a, b, c, d) chisq_ob <- chisq_value(a, b, c, d) -# to evaluate whether we are moving cases to invalidate or sustain the inference +# to evaluate whether we are moving cases to +# invalidate or sustain the inference if (p_ob < thr_p){isinvalidate_ob <- TRUE} if (p_ob > thr_p){isinvalidate_ob <- FALSE} -# to evaluate what kind of switches we need - increase or decrease the odds ratio +# to evaluate what kind of switches we need-increase +#or decrease the odds ratio if (odds_ratio >= 1) {dcroddsratio_ob <- isinvalidate_ob} if (odds_ratio < 1) {dcroddsratio_ob <- !isinvalidate_ob} @@ -388,7 +427,8 @@ if (!dcroddsratio_start) { b_tryall <- b - (b - 1) * (1 - as.numeric(switch_trm)) tryall_p <- chisq_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) } if (dcroddsratio_start ) { # transfer cases from A to B or D to C to decrease odds ratio @@ -398,7 +438,8 @@ if (dcroddsratio_start ) { b_tryall <- b + (a - 1) * (1 - as.numeric(switch_trm)) tryall_p <- chisq_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) } ### run following if transfering one row is enough @@ -424,20 +465,23 @@ if (!allnotenough) { ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many + ### and causes non-positive odds ratio if (a_taylor <= 0) { b_taylor <- a_taylor + b_taylor - 1 a_taylor <- 1 @@ -465,9 +509,11 @@ if (!allnotenough) { t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } -### when we need to transfer two rows the previously defined tryall are the starting point for brute force +### when we need to transfer two rows the previously defined +### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point + ### and call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -487,8 +533,10 @@ if (t_loop < thr_t) { } ### make a small adjustment to make it just below/above the thresold if (t_start > thr_t) { - c_loopsec <- c_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop - 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop + 1 * as.numeric(switch_trm == allnotenough) } else if (t_start < thr_t) { @@ -508,8 +556,10 @@ if (t_loop > thr_t) { t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } if (t_start < thr_t) { - c_loopsec <- c_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop + 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop - 1 * as.numeric(switch_trm == allnotenough) } else if (t_start > thr_t) { @@ -522,15 +572,21 @@ if (t_loop > thr_t) { p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) -### start 2nd round brute force - use fisher test p value as evaluation criterion -#### scenario 1 need to reduce odds ratio to invalidate the inference-need to increase p +### start 2nd round brute force - use fisher +###test p value as evaluation criterion +#### scenario 1 need to reduce odds ratio to +###invalidate the inference-need to increase p if (isinvalidate_start & dcroddsratio_start){ if (p_loopsec < thr_p) { while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- + d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -540,41 +596,63 @@ if (isinvalidate_start & dcroddsratio_start){ } if (p_loopsec > thr_p){ #taylor too much, return some odds ratio while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) } } -#### scenario 2 need to reduce odds ratio to sustain the inference-need to reduce p +#### scenario 2 need to reduce odds ratio to +#### sustain the inference-need to reduce p if (!isinvalidate_start & dcroddsratio_start) { if (p_loopsec < thr_p) { # taylor too much, return some odds ratio while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) } - if (p_loopsec > thr_p){ # taylor not enough, continue to reduce odds ratio + # taylor not enough, continue to reduce odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- chisq_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec @@ -583,14 +661,20 @@ if (!isinvalidate_start & dcroddsratio_start) { } } -#### scenario 3 need to increase odds ratio to invalidate the inference-need to increase p +#### scenario 3 need to increase odds ratio to +#### invalidate the inference-need to increase p if (isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec < thr_p){ #taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec < thr_p){ while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -598,29 +682,42 @@ if (isinvalidate_start & !dcroddsratio_start){ a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){#taylor too much, returns some odds ratio - decrease + #taylor too much, returns some odds ratio - decrease + if (p_loopsec > thr_p){ while(p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) } } -#### scenario 4 need to increase odds ratio to sustain the inference-need to decrease p +#### scenario 4 need to increase odds ratio +#### to sustain the inference-need to decrease p if (!isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec > thr_p){#taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p){ - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } c_final <- c_loopsec @@ -628,23 +725,34 @@ if (!isinvalidate_start & !dcroddsratio_start){ a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec < thr_p){#taylor too much, return some odds ratio - decrease + #taylor too much, return some odds ratio - decrease + if (p_loopsec < thr_p){ while (p_loopsec < thr_p){ - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) p_loopsec <- chisq_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } ### so the final results (after switching) is as follows: -table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) +table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2 +) p_final <- chisq_p(a_final, b_final, c_final, d_final) chisq_final <- chisq_value(a_final, b_final, c_final, d_final) @@ -669,11 +777,18 @@ if (allnotenough) { total_switch <- final + allnotenough*final_extra -result <- list(final_switch = final, User_enter_value = table_start, Transfer_Table = table_final, - p_final = p_final, chisq_final = chisq_final, - needtworows=allnotenough, taylor_pred = taylor_pred, - perc_bias_pred = perc_bias_pred, final_extra = final_extra, - dcroddsratio_ob = dcroddsratio_ob, total_switch = total_switch, isinvalidate_ob = isinvalidate_ob) +result <- list(final_switch = final, + User_enter_value = table_start, + Transfer_Table = table_final, + p_final = p_final, + chisq_final = chisq_final, + needtworows=allnotenough, + taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, + final_extra = final_extra, + dcroddsratio_ob = dcroddsratio_ob, + total_switch = total_switch, + isinvalidate_ob = isinvalidate_ob) return(result) } @@ -693,11 +808,13 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) p_ob <- fisher_p(a, b, c, d) - # to evaluate whther we are moving cases to invalidate or sustain the inference + # to evaluate whther we are moving cases + # to invalidate or sustain the inference if (p_ob < thr_p){isinvalidate_ob <- TRUE} if (p_ob > thr_p){isinvalidate_ob <- FALSE} - # to evaluate what kind of switches we need - increase or decrease the odds ratio + # to evaluate what kind of switches + # we need-increase or decrease the odds ratio if (odds_ratio >= 1) {dcroddsratio_ob <- isinvalidate_ob} if (odds_ratio < 1) {dcroddsratio_ob <- !isinvalidate_ob} @@ -732,7 +849,9 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ if (d_tryall == 0) {d_tryall <- d_tryall + 0.5} tryall_p <- fisher_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est< 0 & tryall_est*est > 0 + ) } if (dcroddsratio_start ) { # transfer cases from A to B or D to C to decrease odds ratio @@ -746,7 +865,9 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ if (d_tryall == 0) {d_tryall <- d_tryall + 0.5} tryall_p <- fisher_p(a_tryall, b_tryall, c_tryall, d_tryall) tryall_est <- log(a_tryall*d_tryall/c_tryall/b_tryall) - allnotenough <- isTRUE((thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0) + allnotenough <- isTRUE( + (thr_p-tryall_p)*tryall_est> 0 & tryall_est*est > 0 + ) } ### run following if transfering one row is enough @@ -772,20 +893,25 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ ### calculate predicted switches based on Taylor expansion if (switch_trm) { - taylor_pred <- abs(taylorexp(a, b, c, d, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(a, b, c, d, step * perc_bias_pred, thr_t) + ) a_taylor <- round(a) b_taylor <- round(b) c_taylor <- round(c + taylor_pred * step) d_taylor <- round(d - taylor_pred * step) } else { - taylor_pred <- abs(taylorexp(d, c, b, a, step * perc_bias_pred, thr_t)) + taylor_pred <- abs( + taylorexp(d, c, b, a, step * perc_bias_pred, thr_t) + ) a_taylor <- round(a - taylor_pred * step) b_taylor <- round(b + taylor_pred * step) c_taylor <- round(c) d_taylor <- round(d) } - ### check whether taylor_pred move too many and causes non-positive odds ratio + ### check whether taylor_pred move too many and + ### causes non-positive odds ratio if (a_taylor < 0) { b_taylor <- a_taylor + b_taylor a_taylor <- 0 @@ -813,9 +939,11 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } - ### when we need to transfer two rows the previously defined tryall are the starting point for brute force + ### when we need to transfer two rows the previously defined + ### tryall are the starting point for brute force if (allnotenough) { - ### Later: set tryall as the starting point and call this getswitch function again + ### Later: set tryall as the starting point and + ### call this getswitch function again a_loop <- a_tryall b_loop <- b_tryall c_loop <- c_tryall @@ -839,8 +967,10 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ } ### make a small adjustment to make it just below/above the thresold if (t_start > thr_t) { - c_loopsec <- c_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop - 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop + 1 * as.numeric(switch_trm == allnotenough) } else if (t_start < thr_t) { @@ -864,8 +994,10 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ t_loop <- get_t_kfnl(a_loop, b_loop, c_loop, d_loop) } if (t_start < thr_t) { - c_loopsec <- c_loop - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loop + 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_loopsec <- c_loop - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loop + + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_loopsec <- a_loop + 1 * as.numeric(switch_trm == allnotenough) b_loopsec <- b_loop - 1 * as.numeric(switch_trm == allnotenough) } else if (t_start > thr_t) { @@ -878,59 +1010,94 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) - ### start 2nd round brute force - use fisher test p value as evaluation criterion - #### scenario 1 need to reduce odds ratio to invalidate the inference-need to increase p + ### start 2nd round brute force - use fisher test + ### p value as evaluation criterion + #### scenario 1 need to reduce odds ratio to + ### invalidate the inference-need to increase p if (isinvalidate_start & dcroddsratio_start){ if (p_loopsec < thr_p) { while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){ #taylor too much, return some odds ratio + #taylor too much, return some odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) } } - #### scenario 2 need to reduce odds ratio to sustain the inference-need to reduce p + #### scenario 2 need to reduce odds ratio + ### to sustain the inference-need to reduce p if (!isinvalidate_start & dcroddsratio_start) { if (p_loopsec < thr_p) { # taylor too much, return some odds ratio while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) } - if (p_loopsec > thr_p){ # taylor not enough, continue to reduce odds ratio + # taylor not enough, continue to reduce odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec @@ -939,68 +1106,108 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ } } - #### scenario 3 need to increase odds ratio to invalidate the inference-need to increase p + #### scenario 3 need to increase odds + ### ratio to invalidate the inference-need to increase p if (isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec < thr_p){ #taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec < thr_p){ while (p_loopsec < thr_p) { - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec > thr_p){#taylor too much, returns some odds ratio - decrease + #taylor too much, returns some odds ratio - decrease + if (p_loopsec > thr_p){ while(p_loopsec > thr_p) { - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } - #### scenario 4 need to increase odds ratio to sustain the inference-need to decrease p + #### scenario 4 need to increase odds ratio + #### to sustain the inference-need to decrease p if (!isinvalidate_start & !dcroddsratio_start){ - if (p_loopsec > thr_p){#taylor not enough, continue to increase odds ratio + #taylor not enough, continue to increase odds ratio + if (p_loopsec > thr_p){ while (p_loopsec > thr_p){ - c_loopsec <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } c_final <- c_loopsec d_final <- d_loopsec a_final <- a_loopsec b_final <- b_loopsec } - if (p_loopsec < thr_p){#taylor too much, return some odds ratio - decrease + #taylor too much, return some odds ratio - decrease + if (p_loopsec < thr_p){ while (p_loopsec < thr_p){ - c_loopsec <- c_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_loopsec <- d_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_loopsec <- a_loopsec - 1 * as.numeric(switch_trm == allnotenough) - b_loopsec <- b_loopsec + 1 * as.numeric(switch_trm == allnotenough) - p_loopsec <- fisher_p(a_loopsec, b_loopsec, c_loopsec, d_loopsec) + c_loopsec <- c_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_loopsec <- d_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_loopsec <- a_loopsec - + 1 * as.numeric(switch_trm == allnotenough) + b_loopsec <- b_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + p_loopsec <- fisher_p( + a_loopsec, b_loopsec, c_loopsec, d_loopsec + ) } - c_final <- c_loopsec - 1 * (1 - as.numeric(switch_trm == allnotenough)) - d_final <- d_loopsec + 1 * (1 - as.numeric(switch_trm == allnotenough)) - a_final <- a_loopsec + 1 * as.numeric(switch_trm == allnotenough) - b_final <- b_loopsec - 1 * as.numeric(switch_trm == allnotenough) + c_final <- c_loopsec - + 1 * (1 - as.numeric(switch_trm == allnotenough)) + d_final <- d_loopsec + + 1 * (1 - as.numeric(switch_trm == allnotenough)) + a_final <- a_loopsec + + 1 * as.numeric(switch_trm == allnotenough) + b_final <- b_loopsec - + 1 * as.numeric(switch_trm == allnotenough) } } ### so the final results (after switching) is as follows: - table_final <- matrix(c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2) + table_final <- matrix( + c(a_final, b_final, c_final, d_final), byrow = TRUE, 2, 2 + ) p_final <- fisher_p(a_final, b_final, c_final, d_final) fisher_final <- fisher_oddsratio(a_final, b_final, c_final, d_final) @@ -1025,11 +1232,17 @@ getswitch_fisher <- function(a, b, c, d, thr_p = 0.05, switch_trm = T){ total_switch <- final + allnotenough*final_extra - result <- list(final_switch = final, User_enter_value = table_start, Transfer_Table = table_final, - p_final = p_final, fisher_final = fisher_final, - needtworows=allnotenough, taylor_pred = taylor_pred, - perc_bias_pred = perc_bias_pred, final_extra = final_extra, - dcroddsratio_ob = dcroddsratio_ob, total_switch = total_switch, isinvalidate_ob = isinvalidate_ob) + result <- list(final_switch = final, User_enter_value = table_start, + Transfer_Table = table_final, + p_final = p_final, + fisher_final = fisher_final, + needtworows=allnotenough, + taylor_pred = taylor_pred, + perc_bias_pred = perc_bias_pred, + final_extra = final_extra, + dcroddsratio_ob = dcroddsratio_ob, + total_switch = total_switch, + isinvalidate_ob = isinvalidate_ob) return(result) -} \ No newline at end of file +} diff --git a/R/pkonfound.R b/R/pkonfound.R index b2146218..2f297847 100644 --- a/R/pkonfound.R +++ b/R/pkonfound.R @@ -1,34 +1,59 @@ #' Perform sensitivity analysis for published studies -#' @description For published studies, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. -#' @param est_eff the estimated effect (such as an unstandardized beta coefficient or a group mean difference) -#' @param std_err the standard error of the estimate of the unstandardized regression coefficient +#' @description For published studies, this command calculates +#' (1) how much bias there must be in an estimate to invalidate/sustain +#' an inference; (2) the impact of an omitted variable necessary to +#' invalidate/sustain an inference for a regression coefficient. +#' @param est_eff the estimated effect (such as an unstandardized beta +#' coefficient or a group mean difference) +#' @param std_err the standard error of the estimate of the unstandardized +#' regression coefficient #' @param n_obs the number of observations in the sample #' @param n_covariates the number of covariates in the regression model #' @param alpha probability of rejecting the null hypothesis (defaults to 0.05) -#' @param tails integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2) -#' @param index whether output is RIR or IT (impact threshold); defaults to "RIR" -#' @param nu what hypothesis to be tested; defaults to testing whether est_eff is significantly different from 0 -#' @param model_type the type of model being estimated; defaults to "ols" for a linear regression model; the other option is "logistic" -#' @param n_treat the number of cases associated with the treatment condition; applicable only when model_type = "logistic" -#' @param switch_trm whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic" -#' @param a cell is the number of cases in the control group showing unsuccessful results -#' @param b cell is the number of cases in the control group showing successful results -#' @param c cell is the number of cases in the treatment group showing unsuccessful results -#' @param d cell is the number of cases in the treatment group showing successful results -#' @param two_by_two_table table that is a matrix or can be coerced to one (data.frame, tibble, tribble) from which the a, b, c, and d arguments can be extracted -#' @param test whether using Fisher's Exact Test or A chi-square test; defaults to Fisher's Exact Test -#' @param replace whether using entire sample or the control group to calculate the base rate; default is the control group +#' @param tails integer whether hypothesis testing is one-tailed (1) or +#' two-tailed (2; defaults to 2) +#' @param index whether output is RIR or IT (impact threshold); +#' defaults to "RIR" +#' @param nu what hypothesis to be tested; defaults to testing whether +#' est_eff is significantly different from 0 +#' @param model_type the type of model being estimated; defaults to "ols" for +#' a linear regression model; the other option is "logistic" +#' @param n_treat the number of cases associated with the treatment condition; +#' applicable only when model_type = "logistic" +#' @param switch_trm whether to switch the treatment and control cases; defaults +#' to FALSE; applicable only when model_type = "logistic" +#' @param a cell is the number of cases in the control group showing +#' unsuccessful results +#' @param b cell is the number of cases in the control group showing successful +#' results +#' @param c cell is the number of cases in the treatment group showing +#' unsuccessful results +#' @param d cell is the number of cases in the treatment group showing +#' successful results +#' @param two_by_two_table table that is a matrix or can be coerced to one +#' (data.frame, tibble, tribble) from which the a, b, c, and d arguments can +#' be extracted +#' @param test whether using Fisher's Exact Test or A chi-square test; defaults +#' to Fisher's Exact Test +#' @param replace whether using entire sample or the control group to calculate +#' the base rate; default is the control group #' @param sdx the standard deviation of X #' @param sdy the standard deviation of Y #' @param R2 the unadjusted,original R2 in the observed function #' @param eff_thr unstandardized coefficient threshold to change an inference -#' @param FR2max the largest R2, or R2max, in the final model with unobserved confounder -#' @param FR2max_multiplier the multiplier of R2 to get R2max, default is set to 1.3 -#' @param to_return whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return +#' @param FR2max the largest R2, or R2max, in the final model with unobserved +#' confounder +#' @param FR2max_multiplier the multiplier of R2 to get R2max, +#' default is set to 1.3 +#' @param to_return whether to return a data.frame +#' (by specifying this argument to equal "raw_output" for use in other analyses) +#' or a plot ("plot"); default is to print ("print") the output to the console; +#' can specify a vector of output to return #' @importFrom stats fisher.test #' @import lavaan #' @import ggplot2 -#' @return prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +#' @return prints the bias and the number of cases that would have to be +#' replaced with cases for which there is no effect to invalidate the inference #' @examples #' # using pkonfound for linear models #' pkonfound(2, .4, 100, 3) @@ -62,9 +87,13 @@ # pkonfound(two_by_two_table = my_table) # #' # use pkonfound to calculate delta* and delta_exact -#' pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") +#' pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, +#' eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") #' # use pkonfound to calculate rxcv and rycv when preserving standard error -#' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") +#' pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, +#' sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") +#' @importFrom dplyr pull +#' @importFrom tibble tribble #' @export pkonfound <- function(est_eff, @@ -92,7 +121,8 @@ FR2max, FR2max_multiplier = 1.3, to_return = "print") { - if ("table" %in% to_return) stop("a table can only be output when using konfound") + if ("table" %in% to_return) stop("a table can only be + output when using konfound") if (index == "COP") { @@ -139,7 +169,10 @@ ) } else if (model_type == "logistic" & is.null(n_treat)) { - warning("For a logistic regression, the log-odds coefficients are used for the pkonfound() calculations. To carry out an analysis using average marginal effects, you can use the konfound() function with the results of a model estimated in R.") + warning("For a logistic regression, the log-odds coefficients + are used for the pkonfound() calculations. To carry out an + analysis using average marginal effects, you can use the + konfound() function with the results of a model estimated in R.") out <- test_sensitivity_ln( est_eff = est_eff, @@ -157,7 +190,8 @@ } else if(!is.null(a)) { # error handling if (is.null(a) | is.null(b) | is.null(c) | is.null(d)) { - stop("Please enter values for a, b, c, and d to use the 2 x 2 table functionality") + stop("Please enter values for a, b, c, + and d to use the 2 x 2 table functionality") } out <- tkonfound(a = a, @@ -208,7 +242,8 @@ if (!is.null(out)) { # dealing with a strange print issue } if (to_return == "print") { - message("For other forms of output, run ?pkonfound and inspect the to_return argument") + message("For other forms of output, run ? + pkonfound and inspect the to_return argument") } message("For models fit in R, consider use of konfound().") diff --git a/R/test_cop.R b/R/test_cop.R index 5f94ff95..e1baf749 100644 --- a/R/test_cop.R +++ b/R/test_cop.R @@ -1,5 +1,30 @@ -# COP standards for Coefficient of Proportionality -# test_cop calculates both versions of COP (Oster's approx & exact) +#' Coefficient of Proportionality (COP) Test +#' +#' Conducts the Coefficient of Proportionality (COP) test, calculating both +#' Oster's approximate and exact versions of COP. +#' +#' @param est_eff The estimated effect (unstandardized). +#' @param std_err The standard error of the effect (unstandardized). +#' @param n_obs Number of observations. +#' @param n_covariates Number of covariates in the model. +#' @param sdx Standard deviation of the predictor variable. +#' @param sdy Standard deviation of the outcome variable. +#' @param R2 R-squared of the model, not adjusted. +#' @param eff_thr Threshold for the effect size, unstandardized. +#' @param FR2max_multiplier Multiplier for R2 to get R2max. +#' @param FR2max Maximum R-squared in the final model with an +#' unobserved confounder. +#' @param alpha Significance level for hypothesis testing (default: 0.05). +#' @param tails Number of tails for hypothesis testing (default: 2). +#' @param to_return Type of output to return ('raw_output', 'print', or other). +#' @return A list containing results of the COP test, including delta star, +#' delta exact,percentage bias, and other statistical measures. +#' Can also print summary results. +#' @importFrom stats qt +#' @importFrom ggplot2 ggplot aes geom_point geom_line scale_shape_manual +#' scale_y_continuous +#' @importFrom ggplot2 sec_axis theme element_blank element_line element_text +#' @export test_cop <- function(est_eff, # unstandardized std_err, # unstandardized @@ -10,327 +35,409 @@ test_cop <- function(est_eff, # unstandardized R2, # NOT the adjusted R2, should be the original R2 eff_thr = 0, # this is the unstandardized version FR2max_multiplier = 1.3, - FR2max = 0, # NOT the adjusted R2, should be the original R2 + FR2max = 0, #NOT the adjusted R2, should be the original R2 alpha = 0.05, tails = 2, to_return = to_return){ - - ## test example - # est_eff = .125 - # std_err = .050 - # n_obs = 6174 - # n_covariates = 7 - # sdx = .217 - # sdy = .991 - # R2 = .251 - # eff_thr = 0 - # FR2max = .61 - # test_cop(est_eff = .4, std_err = .1, n_obs = 290, - # sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, FR2max = .8, to_return = "short") - - ## prepare input - df = n_obs - n_covariates - 3 ## df of M3 - var_x = sdx^2 - var_y = sdy^2 - - ### if the user specifies R2max directly then we use the specified R2max - if (FR2max == 0){FR2max = FR2max_multiplier * R2} - var_z = sdz = 1 - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be greater than zero.")} - if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs to be greater than R2.")} - if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")} - if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {stop("Did not run! Entered values produced Rxz^2 <=0, consider adding more significant digits to your entered values.")} - - negest <- 0 # an indicator of whether the use specified est_eff is negative, 1 is yes negative - if (est_eff < 0) { - est_eff <- abs(est_eff) - negest <- 1 - } - - ## now standardize - beta_thr = eff_thr * sdx / sdy - beta = est_eff * sdx / sdy - SE = std_err * sdx / sdy - - ## observed regression, reg y on x Given z - tyxGz = beta / SE ### this should be equal to est_eff / std_err - ryxGz = tyxGz / sqrt(df + 1 + tyxGz^2) - ## df + 1 because omitted variable is NOT included in M2 - ryxGz_M2 = tyxGz / sqrt(n_obs + tyxGz^2) - ## ryxGz_M2 is only for simulation to recover the exact number - - ## make sure R2 due to x alone is not larger than overall or observed R2 - if (ryxGz^2 > R2) {illcnd_ryxGz = T} else {illcond_ryxGz = F} - - ## calculate ryz, rxz, rxy - ryz = rzy = cal_ryz(ryxGz, R2) - rxz = rzx = cal_rxz(var_x, var_y, R2, df + 1, std_err) - ## df + 1 because omitted variable is NOT included in M2 - #### we change the n_obs to df to recover the rxz as in the particular sample - - ## note that in the updated approach rxy is not necessary to calculate rxcv_exact, ryxcv_exact and delta_exact - rxy = ryx = cal_rxy(ryxGz, rxz, ryz) - rxy_M2 = cal_rxy(ryxGz_M2, rxz, ryz) - ## rxy_M2 is only for simulation to recover the exact number - - ## baseline regression model, no z (unconditional) - eff_uncond = sqrt((var_y / var_x)) * rxy - t_uncond = rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2) - ## n_obs - 2 - adjust the df in the M1 - std_err_uncond = eff_uncond / t_uncond - R2_uncond = rxy^2 - - ## calculate delta_star - delta_star = cal_delta_star(FR2max, R2, R2_uncond, est_eff, eff_thr, var_x, var_y, eff_uncond, rxz, n_obs) - - ## now introduce cv - sdcv = var_cv = 1 - rcvz = rzcv = 0 - v = 1 - rxz^2 # this is to simplify calculation later - D = sqrt(FR2max - R2) # same above - - ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0) - rxcv_oster = rcvx_oster = delta_star * rxz * (sdcv / sdz) - if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1) - {rcvy_oster = rycv_oster = - D * sqrt(1 - (rcvx_oster^2 / v)) + - (ryx * rcvx_oster) / (v) - - (ryz * rcvx_oster * rxz) / (v)} - - # Checking beta, R2, se generated by delta_star with a regression - verify_oster = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, - rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz) - - # prepare some other values in the final Table (long output) - R2_M3_oster = as.numeric(verify_oster[1]) - eff_x_M3_oster = as.numeric(verify_oster[2]) # should be equivalent or very close to eff_thr - se_x_M3_oster = as.numeric(verify_oster[3]) - beta_x_M3_oster = as.numeric(verify_oster[9]) # should be equivalent or very close to beta_thr - t_x_M3_oster = eff_x_M3_oster / se_x_M3_oster - eff_z_M3_oster = as.numeric(verify_oster[4]) - se_z_M3_oster = as.numeric(verify_oster[5]) - eff_cv_M3_oster = as.numeric(verify_oster[6]) - se_cv_M3_oster = as.numeric(verify_oster[7]) - cov_oster = verify_oster[[11]] - cor_oster = verify_oster[[12]] - - ## calculate the exact/true rcvx, rcvy, delta (updated version that does not need rxy) - ### the idea is to calculate everything conditional on z - sdxGz = sdx * sqrt(1 - rxz^2) - sdyGz = sdy * sqrt(1 - ryz^2) - ryxcvGz_exact_sq = (FR2max - ryz^2) / (1 - ryz^2) - ### equation 7 in the manuscript - rxcvGz_exact = (ryxGz - sdxGz / sdyGz * beta_thr) / - sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - - 2 * ryxGz * sdxGz / sdyGz * beta_thr + - ryxcvGz_exact_sq) - ### equation 6 in the manuscript - rycvGz_exact = ryxGz * rxcvGz_exact + - sqrt((ryxcvGz_exact_sq - ryxGz^2) * - (1 - rxcvGz_exact^2)) - ### now get unconditional exact rxcv and rycv - rycv_exact = sqrt(1 - ryz^2) * rycvGz_exact - rxcv_exact = sqrt(1 - rxz^2) * rxcvGz_exact - delta_exact = rxcv_exact / rxz - - ## previous approach - comment out, but could find in cop_pse_auxiliary - ## exact_result = cal_delta_exact(ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz) - ## rxcv_exact = rcvx_exact = as.numeric(exact_result[1]) - ## rycv_exact = rcvy_exact = as.numeric(exact_result[2]) - ## delta_exact = as.numeric(exact_result[3]) - - # Checking beta, R2, se generated by True/Exact Delta with a regression - verify_exact = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, - rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz) - ### verify_exact[1] == verify_exact[4] == FR2max - ### verify_exact[2] == eff_thr - ### verify_exact[5] == beta_thr - - # calculate % bias in delta comparing oster's delta_star with true delta - delta_pctbias = 100 * (delta_star - delta_exact) / delta_exact - - # prepare some other values in the final Table (long output) - R2_M3 = as.numeric(verify_exact[1]) - eff_x_M3 = as.numeric(verify_exact[2]) # should be equivalent or very close to eff_thr - se_x_M3 = as.numeric(verify_exact[3]) - beta_x_M3 = as.numeric(verify_exact[9]) # should be equivalent or very close to beta_thr - t_x_M3 = eff_x_M3 / se_x_M3 - eff_z_M3 = as.numeric(verify_exact[4]) - se_z_M3 = as.numeric(verify_exact[5]) - eff_cv_M3 = as.numeric(verify_exact[6]) - se_cv_M3 = as.numeric(verify_exact[7]) - cov_exact = verify_exact[[11]] - cor_exact = verify_exact[[12]] - - verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy) - R2_M2 = as.numeric(verify_pse_reg_M2[1]) - eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) # should be equivalent or very close to est_eff - se_x_M2 = as.numeric(verify_pse_reg_M2[3]) # should be equivalent or very close to std_err - eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) - se_z_M2 = as.numeric(verify_pse_reg_M2[5]) - t_x_M2 = eff_x_M2 / se_x_M2 - - verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) - R2_M1 = as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2 - eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx - se_x_M1 = as.numeric(verify_pse_reg_M1[3]) - t_x_M1 = eff_x_M1 / se_x_M1 - - delta_star_restricted = ((est_eff - eff_thr)/(eff_x_M1 - est_eff))* - ((R2_M2 - R2_M1)/(R2_M3 - R2_M2)) - - fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, # R2 for three reg models - eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, # unstd reg coef for X in three reg models - se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, # unstd reg se for X in three reg models - rxy, ryxGz, beta_x_M3, beta_x_M3_oster, # std reg coef for X in three reg models - t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, # t values for X in three reg models - # NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, # reg coef for Z in three reg models - # NA, se_z_M2, se_z_M3, se_z_M3_oster, # se for Z in three reg models - # NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, # t for Z in three reg models, - NA, NA, eff_cv_M3, eff_cv_M3_oster, # reg coef for CV in three reg models - NA, NA, se_cv_M3, se_cv_M3_oster, # se for CV in three reg models - NA, NA, eff_cv_M3 / se_cv_M3, eff_cv_M3_oster / se_cv_M3_oster), # t for CV in three reg models - nrow = 8, ncol = 4, byrow = T) - - rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", - # "coef_Z", "SE_Z", "t_Z", - "coef_CV", "SE_CV", "t_CV") - - colnames(fTable) <- c("M1:X", "M2:X,Z", - "M3(delta_exact):X,Z,CV", - "M3(delta*):X,Z,CV") - - ## figure - figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", - "Intermediate(M2)", eff_x_M2, R2, "exact", - "Final(M3)", eff_x_M3, FR2max, "exact", - "Intermediate(M2)", eff_x_M2, R2, "star", - "Final(M3)", eff_x_M3_oster, FR2max, "star"), nrow = 5, ncol = 4, byrow = T) - colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat") - - figTable <- as.data.frame(figTable) - figTable$ModelLabel <- as.character(figTable$ModelLabel) - figTable$ModelLabel <- factor(figTable$ModelLabel, - levels = c("Baseline(M1)", - "Intermediate(M2)", - "Final(M3)")) - figTable$cat <- as.character(figTable$cat) - figTable$cat <- factor(figTable$cat, - levels = c("exact", - "star")) - figTable$coef_X <- as.numeric(figTable$coef_X) - figTable$R2 <- as.numeric(figTable$R2) - -scale = 1/(round(max(figTable$coef_X)*10)/10) - -fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = ModelLabel)) + - ggplot2::geom_point(ggplot2::aes(y = coef_X, group = cat, shape = cat), color = "blue", size = 3) + - ggplot2::scale_shape_manual(values = c(16, 1)) + - ggplot2::geom_point(ggplot2::aes(y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + - # scale_linetype_manual(values=c("solid", "dotted")) + - ggplot2::geom_line(ggplot2::aes(y = R2/scale, group = cat), linetype = "solid", color = "#7CAE00") + - ggplot2::geom_line(ggplot2::aes(y = coef_X, group = cat, linetype = cat), color = "blue") + - ggplot2::scale_y_continuous( - # Features of the first axis - name = "Coeffcient (X)", - # Add a second axis and specify its features - sec.axis = ggplot2::sec_axis(~.* scale, - name="R2")) + - ggplot2::theme(axis.title.x = ggplot2::element_blank(), - legend.position = "none", - axis.line.y.right = ggplot2::element_line(color = "#7CAE00"), - axis.title.y.right = ggplot2::element_text(color = "#7CAE00"), - axis.text.y.right = ggplot2::element_text(color = "#7CAE00"), - axis.line.y.left = ggplot2::element_line(color = "blue"), - axis.title.y.left = ggplot2::element_text(color = "blue"), - axis.text.y.left = ggplot2::element_text(color = "blue"), - axis.line.x.bottom = ggplot2::element_line(color = "black"), - axis.text.x.bottom = ggplot2::element_text(color = "black")) - - ############################################## - ######### conditional RIR #################### - - # calculating critical r - if (est_eff < 0) { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) * -1 - } else { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) - } - critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2)) - - # final solutions - cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / - (R2 - ryz^2 + ryz^2 * critical_r^2) - cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs - - cond_RIRpi_null <- 1 - sqrt(critical_r^2 / - (R2 - ryz^2 + ryz^2 * critical_r^2)) - cond_RIR_null <- cond_RIRpi_null * n_obs - - cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / - (R2 - ryz^2)) - cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs - - ############################################## - ######### output ############################# - - if (to_return == "raw_output") { - if (negest == 1) { - cat("Using the absolute value of the estimated effect, results can be interpreted by symmetry.") - cat("\n") + + ## test example + # est_eff = .125 + # std_err = .050 + # n_obs = 6174 + # n_covariates = 7 + # sdx = .217 + # sdy = .991 + # R2 = .251 + # eff_thr = 0 + # FR2max = .61 + # test_cop(est_eff = .4, std_err = .1, n_obs = 290, + # sdx = 2, sdy = 6, R2 = .7, eff_thr = 0, + # FR2max = .8, to_return = "short") + + ## prepare input + df = n_obs - n_covariates - 3 ## df of M3 + var_x = sdx^2 + var_y = sdy^2 + + ### if the user specifies R2max directly then we use the specified R2max + if (FR2max == 0){FR2max = FR2max_multiplier * R2} + var_z = sdz = 1 + + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater + than zero.")} + if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to + be greater than zero.")} + if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to + be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.") + } + if (!(R2 < FR2max)) {stop("Did not run! R2 Max needs + to be greater than R2.")} + if (!(FR2max < 1)) {stop("Did not run! R2 Max needs to be less than 1.")} + if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) + {stop("Did not run! Entered values produced Rxz^2 <=0, consider + adding more significant digits to your entered values.") + } + # an indicator of whether the use specified est_eff is + # negative, 1 is yes negative + negest <- 0 + if (est_eff < 0) { + est_eff <- abs(est_eff) + negest <- 1 } - output <- list("delta*" = delta_star, - "delta*restricted" = delta_star_restricted, - "delta_exact" = delta_exact, - "delta_pctbias" = delta_pctbias, - #"cov_oster" = cov_oster, - #"cov_exact" = cov_exact, - "cor_oster" = cor_oster, - "cor_exact" = cor_exact, - "var(Y)" = sdy^2, - "var(X)" = sdx^2, - #"var(Z)" = sdz^2, - "var(CV)" = sdcv^2, - "Table" = fTable, - "Figure" = fig, - "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY, - "conditional RIR (fixed y)" = cond_RIR_fixedY, - "conditional RIR pi (null)" = cond_RIRpi_null, - "conditional RIR (null)" = cond_RIR_null, - "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz, - "conditional RIR (rxyGz)" = cond_RIR_rxyz) - return(output) - } - - if (to_return == "print") { - cat("This function calculates delta* and the exact value of delta.") - cat("\n") - if (negest == 1) { - cat("Using the absolute value of the estimated effect, results can be interpreted by symmetry.") - cat("\n") + + ## now standardize + beta_thr = eff_thr * sdx / sdy + beta = est_eff * sdx / sdy + SE = std_err * sdx / sdy + + ## observed regression, reg y on x Given z + tyxGz = beta / SE ### this should be equal to est_eff / std_err + ryxGz = tyxGz / sqrt(df + 1 + tyxGz^2) + ## df + 1 because omitted variable is NOT included in M2 + ryxGz_M2 = tyxGz / sqrt(n_obs + tyxGz^2) + ## ryxGz_M2 is only for simulation to recover the exact number + + ## make sure R2 due to x alone is not larger than overall or observed R2 + if (ryxGz^2 > R2) {illcnd_ryxGz = T} else {illcond_ryxGz = F} + + ## calculate ryz, rxz, rxy + ryz = rzy = cal_ryz(ryxGz, R2) + rxz = rzx = cal_rxz(var_x, var_y, R2, df + 1, std_err) + ## df + 1 because omitted variable is NOT included in M2 + ### we change the n_obs to df to recover the rxz as in the particular sample + + ## note that in the updated approach rxy is not necessary to calculate + ## rxcv_exact, ryxcv_exact and delta_exact + rxy = ryx = cal_rxy(ryxGz, rxz, ryz) + rxy_M2 = cal_rxy(ryxGz_M2, rxz, ryz) + ## rxy_M2 is only for simulation to recover the exact number + + ## baseline regression model, no z (unconditional) + eff_uncond = sqrt((var_y / var_x)) * rxy + t_uncond = rxy * sqrt(n_obs - 2)/sqrt(1 - rxy^2) + ## n_obs - 2 - adjust the df in the M1 + std_err_uncond = eff_uncond / t_uncond + R2_uncond = rxy^2 + + ## calculate delta_star + delta_star = cal_delta_star(FR2max, + R2, + R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + eff_uncond, + rxz, + n_obs) + + ## now introduce cv + sdcv = var_cv = 1 + rcvz = rzcv = 0 + v = 1 - rxz^2 # this is to simplify calculation later + D = sqrt(FR2max - R2) # same above + + ## calculate rxcv & rycv implied by Oster from delta_star (assumes rcvz=0) + rxcv_oster = rcvx_oster = delta_star * rxz * (sdcv / sdz) + if (abs(rcvx_oster) <1 && (rcvx_oster^2/v)<1) + {rcvy_oster = rycv_oster = + D * sqrt(1 - (rcvx_oster^2 / v)) + + (ryx * rcvx_oster) / (v) - + (ryz * rcvx_oster * rxz) / (v)} + + # Checking beta, R2, se generated by delta_star with a regression + verify_oster = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, + rxy, rxz, rzy, rycv_oster, rxcv_oster, rcvz) + + # prepare some other values in the final Table (long output) + R2_M3_oster = as.numeric(verify_oster[1]) + + eff_x_M3_oster = as.numeric(verify_oster[2]) + # should be equivalent or very close to eff_thr + se_x_M3_oster = as.numeric(verify_oster[3]) + beta_x_M3_oster = as.numeric(verify_oster[9]) + # should be equivalent or very close to beta_thr + t_x_M3_oster = eff_x_M3_oster / se_x_M3_oster + eff_z_M3_oster = as.numeric(verify_oster[4]) + se_z_M3_oster = as.numeric(verify_oster[5]) + eff_cv_M3_oster = as.numeric(verify_oster[6]) + se_cv_M3_oster = as.numeric(verify_oster[7]) + cov_oster = verify_oster[[11]] + cor_oster = verify_oster[[12]] + + ## calculate the exact/true rcvx, rcvy, + ## delta (updated version that does not need rxy) + ### the idea is to calculate everything conditional on z + sdxGz = sdx * sqrt(1 - rxz^2) + sdyGz = sdy * sqrt(1 - ryz^2) + ryxcvGz_exact_sq = (FR2max - ryz^2) / (1 - ryz^2) + ### equation 7 in the manuscript + rxcvGz_exact = (ryxGz - sdxGz / sdyGz * beta_thr) / + sqrt((sdxGz^2) / (sdyGz^2) * (beta_thr^2) - + 2 * ryxGz * sdxGz / sdyGz * beta_thr + + ryxcvGz_exact_sq) + ### equation 6 in the manuscript + rycvGz_exact = ryxGz * rxcvGz_exact + + sqrt((ryxcvGz_exact_sq - ryxGz^2) * + (1 - rxcvGz_exact^2)) + ### now get unconditional exact rxcv and rycv + rycv_exact = sqrt(1 - ryz^2) * rycvGz_exact + rxcv_exact = sqrt(1 - rxz^2) * rxcvGz_exact + delta_exact = rxcv_exact / rxz + + ## previous approach - comment out, but could find in cop_pse_auxiliary + ## exact_result = cal_delta_exact + ## (ryx, ryz, rxz, beta_thr, FR2max, R2, sdx, sdz) + ## rxcv_exact = rcvx_exact = as.numeric(exact_result[1]) + ## rycv_exact = rcvy_exact = as.numeric(exact_result[2]) + ## delta_exact = as.numeric(exact_result[3]) + + # Checking beta, R2, se generated by True/Exact Delta with a regression + verify_exact = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, + rxy, rxz, rzy, rycv_exact, rxcv_exact, rcvz) + ### verify_exact[1] == verify_exact[4] == FR2max + ### verify_exact[2] == eff_thr + ### verify_exact[5] == beta_thr + + # calculate % bias in delta comparing oster's delta_star with true delta + delta_pctbias = 100 * (delta_star - delta_exact) / delta_exact + + # prepare some other values in the final Table (long output) + R2_M3 = as.numeric(verify_exact[1]) + eff_x_M3 = as.numeric(verify_exact[2]) + # should be equivalent or very close to eff_thr + se_x_M3 = as.numeric(verify_exact[3]) + beta_x_M3 = as.numeric(verify_exact[9]) + # should be equivalent or very close to beta_thr + t_x_M3 = eff_x_M3 / se_x_M3 + eff_z_M3 = as.numeric(verify_exact[4]) + se_z_M3 = as.numeric(verify_exact[5]) + eff_cv_M3 = as.numeric(verify_exact[6]) + se_cv_M3 = as.numeric(verify_exact[7]) + cov_exact = verify_exact[[11]] + cor_exact = verify_exact[[12]] + + verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy_M2, rxz, rzy) + R2_M2 = as.numeric(verify_pse_reg_M2[1]) + eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) + # should be equivalent or very close to est_eff + se_x_M2 = as.numeric(verify_pse_reg_M2[3]) + # should be equivalent or very close to std_err + eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) + se_z_M2 = as.numeric(verify_pse_reg_M2[5]) + t_x_M2 = eff_x_M2 / se_x_M2 + + verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) + R2_M1 = as.numeric(verify_pse_reg_M1[1]) + # should be equivalent or very close to rxy^2 + eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) + # should be equivalent or very close to rxy*sdy/sdx + se_x_M1 = as.numeric(verify_pse_reg_M1[3]) + t_x_M1 = eff_x_M1 / se_x_M1 + + delta_star_restricted = ((est_eff - eff_thr)/(eff_x_M1 - est_eff))* + ((R2_M2 - R2_M1)/(R2_M3 - R2_M2)) + + fTable <- matrix(c(R2_M1, R2_M2, R2_M3, R2_M3_oster, + # R2 for three reg models + eff_x_M1, eff_x_M2, eff_x_M3, eff_x_M3_oster, + # unstd reg coef for X in three reg models + se_x_M1, se_x_M2, se_x_M3, se_x_M3_oster, + # unstd reg se for X in three reg models + rxy, ryxGz, beta_x_M3, beta_x_M3_oster, + # std reg coef for X in three reg models + t_x_M1, t_x_M2, t_x_M3, t_x_M3_oster, + # t values for X in three reg models + + # NA, eff_z_M2, eff_z_M3, eff_z_M3_oster, + # reg coef for Z in three reg models + # NA, se_z_M2, se_z_M3, se_z_M3_oster, + # se for Z in three reg models +# NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, eff_z_M3_oster / se_z_M3_oster, + # t for Z in three reg models, + NA, NA, eff_cv_M3, eff_cv_M3_oster, + # reg coef for CV in three reg models + NA, NA, se_cv_M3, se_cv_M3_oster, + # se for CV in three reg models + NA, NA, eff_cv_M3 / se_cv_M3, + eff_cv_M3_oster / se_cv_M3_oster), + # t for CV in three reg models + nrow = 8, ncol = 4, byrow = T) + + rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", + # "coef_Z", "SE_Z", "t_Z", + "coef_CV", "SE_CV", "t_CV") + + colnames(fTable) <- c("M1:X", "M2:X,Z", + "M3(delta_exact):X,Z,CV", + "M3(delta*):X,Z,CV") + + ## figure + figTable <- matrix(c("Baseline(M1)", eff_x_M1, R2_M1, "exact", + "Intermediate(M2)", eff_x_M2, R2, "exact", + "Final(M3)", eff_x_M3, FR2max, "exact", + "Intermediate(M2)", eff_x_M2, R2, "star", + "Final(M3)", eff_x_M3_oster, FR2max, "star"), + nrow = 5, ncol = 4, byrow = T) + colnames(figTable) <- c("ModelLabel", "coef_X", "R2", "cat") + + figTable <- as.data.frame(figTable) + figTable$ModelLabel <- as.character(figTable$ModelLabel) + figTable$ModelLabel <- factor(figTable$ModelLabel, + levels = c("Baseline(M1)", + "Intermediate(M2)", + "Final(M3)")) + figTable$cat <- as.character(figTable$cat) + figTable$cat <- factor(figTable$cat, + levels = c("exact", + "star")) + figTable$coef_X <- as.numeric(figTable$coef_X) + figTable$R2 <- as.numeric(figTable$R2) + + scale = 1/(round(max(figTable$coef_X)*10)/10) + + fig <- ggplot2::ggplot(figTable, ggplot2::aes(x = ModelLabel)) + + ggplot2::geom_point( + ggplot2::aes( + y = coef_X, + group = cat, + shape = cat + ), + color = "blue", + size = 3 + ) + + ggplot2::scale_shape_manual(values = c(16, 1)) + + ggplot2::geom_point( + ggplot2::aes( + y = R2/scale), color = "#7CAE00", shape = 18, size = 4) + + # scale_linetype_manual(values=c("solid", "dotted")) + + ggplot2::geom_line( + ggplot2::aes( + y = R2/scale, group = cat), + linetype = "solid", color = "#7CAE00") + + ggplot2::geom_line( + ggplot2::aes( + y = coef_X, group = cat, linetype = cat), color = "blue") + + ggplot2::scale_y_continuous( + # Features of the first axis + name = "Coeffcient (X)", + # Add a second axis and specify its features + sec.axis = ggplot2::sec_axis(~.* scale, + name="R2")) + + ggplot2::theme(axis.title.x = ggplot2::element_blank(), + legend.position = "none", + axis.line.y.right = ggplot2::element_line( + color = "#7CAE00"), + axis.title.y.right = ggplot2::element_text( + color = "#7CAE00"), + axis.text.y.right = ggplot2::element_text( + color = "#7CAE00"), + axis.line.y.left = ggplot2::element_line( + color = "blue"), + axis.title.y.left = ggplot2::element_text( + color = "blue"), + axis.text.y.left = ggplot2::element_text( + color = "blue"), + axis.line.x.bottom = ggplot2::element_line( + color = "black"), + axis.text.x.bottom = ggplot2::element_text( + color = "black")) + + ############################################## + ######### conditional RIR #################### + + # calculating critical r + if (est_eff < 0) { + critical_t <- stats::qt(1 - (alpha / tails), + n_obs - n_covariates - 2) * -1 + } else { + critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 2) + } + critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 2)) + + # final solutions + cond_RIRpi_fixedY <- (R2 - ryz^2 + ryz^2 * critical_r^2 - critical_r^2) / + (R2 - ryz^2 + ryz^2 * critical_r^2) + cond_RIR_fixedY <- cond_RIRpi_fixedY * n_obs + + cond_RIRpi_null <- 1 - sqrt(critical_r^2 / + (R2 - ryz^2 + ryz^2 * critical_r^2)) + cond_RIR_null <- cond_RIRpi_null * n_obs + + cond_RIRpi_rxyz <- 1 - sqrt((critical_r^2 * (1 - ryz^2)) / + (R2 - ryz^2)) + cond_RIR_rxyz <- cond_RIRpi_rxyz * n_obs + + ############################################## + ######### output ############################# + + if (to_return == "raw_output") { + if (negest == 1) { + cat("Using the absolute value of the estimated + effect, results can be interpreted by symmetry.") + cat("\n") + } + output <- list("delta*" = delta_star, + "delta*restricted" = delta_star_restricted, + "delta_exact" = delta_exact, + "delta_pctbias" = delta_pctbias, + #"cov_oster" = cov_oster, + #"cov_exact" = cov_exact, + "cor_oster" = cor_oster, + "cor_exact" = cor_exact, + "var(Y)" = sdy^2, + "var(X)" = sdx^2, + #"var(Z)" = sdz^2, + "var(CV)" = sdcv^2, + "Table" = fTable, + "Figure" = fig, + "conditional RIR pi (fixed y)" = cond_RIRpi_fixedY, + "conditional RIR (fixed y)" = cond_RIR_fixedY, + "conditional RIR pi (null)" = cond_RIRpi_null, + "conditional RIR (null)" = cond_RIR_null, + "conditional RIR pi (rxyGz)" = cond_RIRpi_rxyz, + "conditional RIR (rxyGz)" = cond_RIR_rxyz) + return(output) } - cat(sprintf("delta* is %.3f (assuming no covariates in the baseline model M1), the exact delta is %.3f, with a bias of %.3f%%", delta_star, delta_exact, delta_pctbias)) - cat("\n") - cat(sprintf("With delta*, the coefficient in the final model will be %.3f. With the exact delta, the coefficient will be %.3f.", - eff_x_M3_oster,eff_x_M3)) - cat("\n") - cat("Use to_return = raw_ouput to see more specific results and graphic presentation of the result.") - cat("\n") - cat("\n") - cat("This function also calculates conditional RIR that invalidates the statistical inference.") - cat("\n") - cat("If the replacement cases have a fixed value, then RIR =", cond_RIR_fixedY) - cat("\n") - cat("If the replacement cases follow a null distribution, then RIR =", cond_RIR_null) - cat("\n") - cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", cond_RIR_rxyz) - cat("\n") - cat("\n") - } - -} \ No newline at end of file + + if (to_return == "print") { + cat("This function calculates delta* and the exact value of delta.") + cat("\n") + if (negest == 1) { + cat("Using the absolute value of the estimated effect, + results can be interpreted by symmetry.") + cat("\n") + } + cat(sprintf("delta* is %.3f (assuming no covariates in the baseline + model M1), the exact delta is %.3f, with a bias of %.3f%%", + delta_star, delta_exact, delta_pctbias)) + cat("\n") + cat(sprintf("With delta*, the coefficient in the final model will be + %.3f. With the exact delta, the coefficient will be %.3f.", + eff_x_M3_oster,eff_x_M3)) + cat("\n") + cat("Use to_return = raw_ouput to see more specific results and graphic + presentation of the result.") + cat("\n") + cat("\n") + cat("This function also calculates conditional RIR that invalidates the + statistical inference.") + cat("\n") + cat("If the replacement cases have a fixed value, then RIR =", + cond_RIR_fixedY) + cat("\n") + cat("If the replacement cases follow a null distribution, then RIR =", + cond_RIR_null) + cat("\n") + cat("If the replacement cases satisfy rxy|Z = 0, then RIR =", + cond_RIR_rxyz) + cat("\n") + cat("\n") + } + +} diff --git a/R/test_pse.R b/R/test_pse.R index 0bdb147f..ba8ea77b 100644 --- a/R/test_pse.R +++ b/R/test_pse.R @@ -17,13 +17,21 @@ test_pse <- function(est_eff, df = n_obs - n_covariates - 3 ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(sdx > 0)) {stop("Did not run! Standard deviation of x needs to be greater than zero.")} - if (!(sdy > 0)) {stop("Did not run! Standard deviation of y needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} + if (!(std_err > 0)) {stop("Did not run! Standard error needs + to be greater than zero.")} + if (!(sdx > 0)) {stop("Did not run! Standard deviation of x + needs to be greater than zero.")} + if (!(sdy > 0)) {stop("Did not run! Standard deviation of y + needs to be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.")} if (!(0 < R2)) {stop("Did not run! R2 needs to be greater than zero.")} if (!(R2 < 1)) {stop("Did not run! R2 needs to be less than one.")} - if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) {stop("Did not run! Entered values produced Rxz^2 <=0, consider adding more significant digits to your entered values.")} + if (!(1-((sdy^2/sdx^2)*(1-R2)/((df+1) * std_err^2))>0)) + {stop("Did not run! Entered values produced Rxz^2 <=0, + consider adding more significant digits to your entered values.")} ## now standardize @@ -51,19 +59,24 @@ test_pse <- function(est_eff, rxcvGz = as.numeric(Gz_pse[1]) rycvGz = as.numeric(Gz_pse[2]) - # convert conditional correlations to unconditional correlations to be used in new regression + # convert conditional correlations to unconditional + # correlations to be used in new regression rxcv = rxcvGz * sqrt((1 - rcvz^2) * (1 - rxz^2)) + rxz * rcvz rycv = rycvGz * sqrt((1 - rcvz^2) * (1 - rzy^2)) + rzy * rcvz - verify_pse_reg_M3 = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, rzy, rycv, rxcv, rcvz) - verfiy_pse_manual_thr = verify_manual(rxy, rxz, rxcv, ryz, rycv, rzcv, sdy, sdx) + verify_pse_reg_M3 = verify_reg_Gzcv(n_obs, sdx, sdy, sdz, sdcv, rxy, rxz, + rzy, rycv, rxcv, rcvz) + verfiy_pse_manual_thr = verify_manual(rxy, rxz, rxcv, ryz, rycv, + rzcv, sdy, sdx) cov_pse = verify_pse_reg_M3[[11]] # prepare some other values in the final Table (long output) R2_M3 = as.numeric(verify_pse_reg_M3[1]) - eff_x_M3 = as.numeric(verify_pse_reg_M3[2]) # should be equivalent or very close to eff_thr + eff_x_M3 = as.numeric(verify_pse_reg_M3[2]) + # should be equivalent or very close to eff_thr se_x_M3 = as.numeric(verify_pse_reg_M3[3]) - beta_x_M3 = as.numeric(verify_pse_reg_M3[9]) # should be equivalent or very close to thr + beta_x_M3 = as.numeric(verify_pse_reg_M3[9]) + # should be equivalent or very close to thr t_x_M3 = eff_x_M3 / se_x_M3 eff_z_M3 = as.numeric(verify_pse_reg_M3[4]) se_z_M3 = as.numeric(verify_pse_reg_M3[5]) @@ -72,30 +85,44 @@ test_pse <- function(est_eff, verify_pse_reg_M2 = verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy, rxz, rzy) R2_M2 = as.numeric(verify_pse_reg_M2[1]) - eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) # should be equivalent or very close to est_eff - se_x_M2 = as.numeric(verify_pse_reg_M2[3]) # should be equivalent or very close to std_err + eff_x_M2 = as.numeric(verify_pse_reg_M2[2]) + # should be equivalent or very close to est_eff + se_x_M2 = as.numeric(verify_pse_reg_M2[3]) + # should be equivalent or very close to std_err eff_z_M2 = as.numeric(verify_pse_reg_M2[4]) se_z_M2 = as.numeric(verify_pse_reg_M2[5]) t_x_M2 = eff_x_M2 / se_x_M2 verify_pse_reg_M1 = verify_reg_uncond(n_obs, sdx, sdy, rxy) - R2_M1 = as.numeric(verify_pse_reg_M1[1]) # should be equivalent or very close to rxy^2 - eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) # should be equivalent or very close to rxy*sdy/sdx + R2_M1 = as.numeric(verify_pse_reg_M1[1]) + # should be equivalent or very close to rxy^2 + eff_x_M1 = as.numeric(verify_pse_reg_M1[2]) + # should be equivalent or very close to rxy*sdy/sdx se_x_M1 = as.numeric(verify_pse_reg_M1[3]) t_x_M1 = eff_x_M1 / se_x_M1 fTable <- matrix(c(R2_M1, R2_M2, R2_M3, # R2 for three reg models - eff_x_M1, eff_x_M2, eff_x_M3, # unstd reg coef for X in three reg models - se_x_M1, se_x_M2, se_x_M3, # unstd reg se for X in three reg models - rxy, ryxGz, beta_x_M3, # std reg coef for X in three reg models - t_x_M1, t_x_M2, t_x_M3, # t values for X in three reg models - NA, eff_z_M2, eff_z_M3, # reg coef for Z in three reg models - NA, se_z_M2, se_z_M3, # se for Z in three reg models - NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, # t for Z in three reg models, - NA, NA, eff_cv_M3, # reg coef for CV in three reg models - NA, NA, se_cv_M3, # se for CV in three reg models - NA, NA, eff_cv_M3 / se_cv_M3), # t for CV in three reg models - nrow = 11, ncol = 3, byrow = T) + eff_x_M1, eff_x_M2, eff_x_M3, + # unstd reg coef for X in three reg models + se_x_M1, se_x_M2, se_x_M3, + # unstd reg se for X in three reg models + rxy, ryxGz, beta_x_M3, + # std reg coef for X in three reg models + t_x_M1, t_x_M2, t_x_M3, + # t values for X in three reg models + NA, eff_z_M2, eff_z_M3, + # reg coef for Z in three reg models + NA, se_z_M2, se_z_M3, + # se for Z in three reg models + NA, eff_z_M2 / se_z_M2, eff_z_M3 / se_z_M3, + # t for Z in three reg models, + NA, NA, eff_cv_M3, + # reg coef for CV in three reg models + NA, NA, se_cv_M3, + # se for CV in three reg models + NA, NA, eff_cv_M3 / se_cv_M3), + # t for CV in three reg models + nrow = 11, ncol = 3, byrow = T) rownames(fTable) <- c("R2", "coef_X", "SE_X", "std_coef_X", "t_X", "coef_Z", "SE_Z", "t_Z", @@ -114,17 +141,22 @@ test_pse <- function(est_eff, } if (to_return == "print") { - cat("This function calculates the conditions that set the estimated effect approximately equal to the threshold while preserving the standard error.") + cat("This function calculates the conditions that set the + estimated effect approximately equal to the threshold + while preserving the standard error.") cat("\n") - cat(sprintf("The correlation between X and CV is %.3f, and the correlation between Y and CV is %.3f.", rxcv, rycv)) + cat(sprintf("The correlation between X and CV is %.3f, + and the correlation between Y and CV is %.3f.", rxcv, rycv)) cat("\n") - cat(sprintf("Conditional on the covariates, the correlation between X and CV is %.3f, and the correlation between Y and CV is %.3f.", rxcvGz, rycvGz)) + cat(sprintf("Conditional on the covariates, the correlation between X + and CV is %.3f, and the correlation between Y and CV + is %.3f.", rxcvGz, rycvGz)) cat("\n") - cat(sprintf("Including such CV, the coefficient changes to %.3f, and standard error is %.3f.", eff_x_M3, se_x_M3)) + cat(sprintf("Including such CV, the coefficient changes to %.3f, + and standard error is %.3f.", eff_x_M3, se_x_M3)) cat("\n") cat("Use to_return = raw_ouput to see more specific results.") } - + } - \ No newline at end of file diff --git a/R/test_sensitivity.R b/R/test_sensitivity.R index 8a160d30..7fa87a18 100644 --- a/R/test_sensitivity.R +++ b/R/test_sensitivity.R @@ -1,25 +1,30 @@ # helpers for the core sensitivity analysis function create_konfound_class <- function(x) { - structure(x, class = "konfound") + structure(x, class = "konfound") } #' Concise summary of konfound output -#' @details Prints a concise summary of konfound output with multiple types of data specified in the to_return argument +#' @details Prints a concise summary of konfound output with multiple types +#' of data specified in the to_return argument #' @param object A `konfound` object #' @param ... Additional arguments +#' @method summary konfound #' @export -summary.konfound <- function(object, ...) { - cat("Created", length(object), "forms of output. To access type: \n") - cat("\n") - for (name in names(object)) { - cat(rlang::expr_text(substitute(object)), "$", name, "\n", sep = "") - } + +summary.konfound <- function(object, ...) { + cat("Created", length(object), "forms of output. To access type: \n") + cat("\n") + + for (name in names(object)) { + cat(rlang::expr_text(substitute(object)), "$", name, "\n", sep = "") + } } -# Main function to test sensitivity to be wrapped with pkonfound(), konfound(), and mkonfound() +# Main function to test sensitivity to be wrapped with pkonfound(), +# konfound(), and mkonfound() test_sensitivity <- function(est_eff, std_err, @@ -32,114 +37,135 @@ test_sensitivity <- function(est_eff, to_return, model_object, tested_variable) { - if (nu != 0) warning("You entered a non-zero null hypothesis about an effect; this is being interpreted in terms of a partial correlation. Sampling variability is not accounted for.") - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - - # calculating statistics used in every case - if (est_eff < 0) { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 - } - else { - critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) - } - - beta_threshold <- critical_t * std_err - # dealing with cases where hypotheses other than whether est_eff differs from 0 - if (nu != 0) { - est_eff <- abs(est_eff - nu) - } else { - est_eff <- est_eff - 0 - } # this is just to make what this is doing evident - - # for replacement of cases approach - - # calculating percentage of effect and number of observations to sustain or invalidate inference - if (abs(est_eff) > abs(beta_threshold)) { - bias <- 100 * (1 - (beta_threshold / est_eff)) - recase <- round(n_obs * (bias / 100)) - } - else if (abs(est_eff) < abs(beta_threshold)) { - sustain <- 100 * (1 - (est_eff / beta_threshold)) - recase <- round(n_obs * (sustain / 100)) - } - else if (est_eff == beta_threshold) { - stop("The coefficient is exactly equal to the threshold.") - } - - # for correlation-based approach - - # transforming t into r - obs_r <- (est_eff / std_err) / sqrt(((n_obs - n_covariates - 3) + ((est_eff / std_err)^2))) - # finding critical r - critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 3)) - # calculating threshold - if ((abs(obs_r) > abs(critical_r)) & ((obs_r * critical_r) > 0)) { - mp <- -1 - } else { - mp <- 1 - } - # calculating impact of the confounding variable - itcv <- (obs_r - critical_r) / (1 + mp * abs(critical_r)) - # finding correlation of confound to invalidate / sustain inference - r_con <- round(sqrt(abs(itcv)), 3) - - # if (component_correlations == FALSE){ - # rsq <- # has to come from some kind of model object - # varY <- # has to come from some kind of model object - # varX <- # has to come from some kind of model object - # sdX <- # has to come from some kind of model object - # - # rsqYZ = (((obs_r ^ 2) - Rsq) / ((obs_r ^ 2) - 1)) - # - # rsqXZ = max(0, 1 - ((VarY * (1 - RSQ))) / (VarX * (n_obs - n_covariates - 2) * (sdx * 2))) - # - # r_ycv = r_con * sqrt(1 - rsqYZ) - # r_xcv = r_con * sqrt(1 - rsqXZ) - # # before conditioning on observed covariates - # } - - # output dispatch - - if (length(to_return) > 1) { - to_return <- to_return[!(to_return == "print")] - - konfound_output <- purrr::map( - to_return, - ~ test_sensitivity( - est_eff = est_eff, - std_err = std_err, - n_obs = n_obs, - n_covariates = n_covariates, - alpha = alpha, - tails = tails, - nu = nu, - to_return = . - ) - ) - konfound_output <- create_konfound_class(konfound_output) - names(konfound_output) <- to_return - output_print(est_eff, beta_threshold, bias, sustain, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index) - - cat("\n") - message(paste("Print output created by default. Created", length(konfound_output), "other forms of output. Use list indexing or run summary() on the output to see how to access.")) - - return(konfound_output) - } - - else if (to_return == "raw_output") { - return(output_df(est_eff, beta_threshold, est_eff, bias, sustain, recase, obs_r, critical_r, r_con, itcv)) - } else if (to_return == "thresh_plot") { # this still makes sense for NLMs (just not quite as accurate) - return(plot_threshold(beta_threshold = beta_threshold, est_eff = est_eff)) - } else if (to_return == "corr_plot") { - return(plot_correlation(r_con = r_con, obs_r = obs_r, critical_r = critical_r)) - } else if (to_return == "print") { - return(output_print(est_eff, beta_threshold, bias, sustain, nu, recase, obs_r, critical_r, r_con, itcv, alpha, index)) - } else if (to_return == "table") { - return(output_table(model_object, tested_variable)) - } else { - stop("to_return must be set to 'raw_output', 'print', 'table', 'thresh_plot', or 'corr_plot' or some combination thereof") - } + if (nu != 0) warning("You entered a non-zero null hypothesis about an + effect;this is being interpreted in terms of a partial + correlation.Sampling variability is not accounted for.") + + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs to + be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to + the number of observations and covariates. Please specify a + less complex model to use KonFound-It.")} + + # calculating statistics used in every case + if (est_eff < 0) { + critical_t <- stats::qt(1 - (alpha / tails), + n_obs - n_covariates - 3) * -1 + } + else { + critical_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) + } + + beta_threshold <- critical_t * std_err + # dealing with cases where hypotheses other than whether + # est_eff differs from 0 + if (nu != 0) { + est_eff <- abs(est_eff - nu) + } else { + est_eff <- est_eff - 0 + } # this is just to make what this is doing evident + + # for replacement of cases approach + + # calculating percentage of effect and number of observations to + # sustain or invalidate inference + if (abs(est_eff) > abs(beta_threshold)) { + bias <- 100 * (1 - (beta_threshold / est_eff)) + recase <- round(n_obs * (bias / 100)) + } + else if (abs(est_eff) < abs(beta_threshold)) { + sustain <- 100 * (1 - (est_eff / beta_threshold)) + recase <- round(n_obs * (sustain / 100)) + } + else if (est_eff == beta_threshold) { + stop("The coefficient is exactly equal to the threshold.") + } + + # for correlation-based approach + + # transforming t into r + obs_r <- (est_eff / std_err) / sqrt(((n_obs - n_covariates - 3) + + ((est_eff / std_err)^2))) + # finding critical r + critical_r <- critical_t / sqrt((critical_t^2) + (n_obs - n_covariates - 3)) + # calculating threshold + if ((abs(obs_r) > abs(critical_r)) & ((obs_r * critical_r) > 0)) { + mp <- -1 + } else { + mp <- 1 + } + # calculating impact of the confounding variable + itcv <- (obs_r - critical_r) / (1 + mp * abs(critical_r)) + # finding correlation of confound to invalidate / sustain inference + r_con <- round(sqrt(abs(itcv)), 3) + + # if (component_correlations == FALSE){ + # rsq <- # has to come from some kind of model object + # varY <- # has to come from some kind of model object + # varX <- # has to come from some kind of model object + # sdX <- # has to come from some kind of model object + # + # rsqYZ = (((obs_r ^ 2) - Rsq) / ((obs_r ^ 2) - 1)) + # + # rsqXZ = max(0, 1 - ((VarY * (1 - RSQ))) / + # (VarX * (n_obs - n_covariates - 2) * (sdx * 2))) + # + # r_ycv = r_con * sqrt(1 - rsqYZ) + # r_xcv = r_con * sqrt(1 - rsqXZ) + # # before conditioning on observed covariates + # } + + # output dispatch + + if (length(to_return) > 1) { + to_return <- to_return[!(to_return == "print")] + + konfound_output <- purrr::map( + to_return, + ~ test_sensitivity( + est_eff = est_eff, + std_err = std_err, + n_obs = n_obs, + n_covariates = n_covariates, + alpha = alpha, + tails = tails, + nu = nu, + to_return = . + ) + ) + konfound_output <- create_konfound_class(konfound_output) + names(konfound_output) <- to_return + output_print(est_eff, beta_threshold, bias, sustain, nu, + recase, obs_r, critical_r, r_con, itcv, alpha, index) + + cat("\n") + message(paste( + "Print output created by default. Created", length(konfound_output), + "other forms of output. Use list indexing or run summary() on the + output to see how to access.")) + + return(konfound_output) + } + + else if (to_return == "raw_output") { + return(output_df(est_eff, beta_threshold, est_eff, bias, sustain, + recase, obs_r, critical_r, r_con, itcv)) + } else if (to_return == "thresh_plot") { + # this still makes sense for NLMs (just not quite as accurate) + return(plot_threshold(beta_threshold = beta_threshold, + est_eff = est_eff)) + } else if (to_return == "corr_plot") { + return(plot_correlation(r_con = r_con, obs_r = obs_r, + critical_r = critical_r)) + } else if (to_return == "print") { + return(output_print(est_eff, beta_threshold, bias, sustain, nu, recase, + obs_r, critical_r, r_con, itcv, alpha, index)) + } else if (to_return == "table") { + return(output_table(model_object, tested_variable)) + } else { + stop("to_return must be set to 'raw_output', 'print', 'table', + 'thresh_plot', or 'corr_plot' or some combination thereof") + } } diff --git a/R/test_sensitivity_ln.R b/R/test_sensitivity_ln.R index cca907ae..e04e3297 100644 --- a/R/test_sensitivity_ln.R +++ b/R/test_sensitivity_ln.R @@ -1,5 +1,31 @@ -# Main function to test sensitivity for non-linear models to be wrapped with pkonfound(), konfound(), and mkonfound() +#' Test Sensitivity for Non-linear Models +#' +#' This function performs sensitivity analysis for non-linear models. +#' It is used in conjunction with `pkonfound()`, `konfound()`, and `mkonfound()`. +#' +#' @param est_eff Estimated effect size. +#' @param std_err Standard error of the estimated effect. +#' @param n_obs Number of observations in the study. +#' @param n_covariates Number of covariates in the model. +#' @param n_treat Number of cases in the treatment group. +#' @param switch_trm Logical value indicating whether to switch +#' treatment and control groups (default: TRUE). +#' @param replace Specifies the group for base rate calculation +#' ('control' or 'sample'). +#' @param alpha Significance level for hypothesis testing. +#' @param tails Number of tails for hypothesis testing. +#' @param nu Hypothesized value to test the effect against. +#' @param to_return Type of output to return ('raw_output', 'print', or other). +#' @param model_object A model object used in the sensitivity analysis. +#' @param tested_variable Name of the variable being tested in the model. +#' @return Depending on `to_return`,a list of analysis results or printed output +#' @importFrom stats qt +#' @importFrom crayon bold underline +#' @importFrom purrr map +#' @export +# Main function to test sensitivity for non-linear models to +# be wrapped with pkonfound(), konfound(), and mkonfound() test_sensitivity_ln <- function(est_eff, std_err, n_obs, @@ -13,347 +39,399 @@ test_sensitivity_ln <- function(est_eff, to_return, model_object, tested_variable) { - - ## error message if input is inappropriate - if (!(std_err > 0)) {stop("Did not run! Standard error needs to be greater than zero.")} - if (!(n_obs > n_covariates + 3)) {stop("Did not run! There are too few observations relative to the number of observations and covariates. Please specify a less complex model to use KonFound-It.")} - - if (est_eff < 0) { - thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 - } else { - thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) - } - - # stop message - if (n_obs <= 0 || n_treat <= 0) { - stop("Please enter positive integers for sample size and number of treatment group cases.") - } - if (n_obs <= n_treat) { - stop("The total sample size should be larger than the number of treatment group cases.") - } - - odds_ratio <- exp(est_eff) - - # updated approach to deal with imaginary - minse <- sqrt((4 * n_obs + - sqrt(16 * n_obs^2 + 4 * n_treat * (n_obs - n_treat) * - ((4 + 4 * odds_ratio^2) / odds_ratio - 7.999999)))/ - (2 * n_treat * (n_obs - n_treat))) - # check if the implied table solution may contain imaginary numbers - changeSE <- F - if (std_err < minse) { - haveimaginary <- T - changeSE <- T - user_std_err <- std_err - std_err <- minse - } + ## error message if input is inappropriate + if (!(std_err > 0)) {stop("Did not run! Standard error needs + to be greater than zero.")} + if (!(n_obs > n_covariates + 3)) + {stop("Did not run! There are too few observations relative to the + number of observations and covariates. Please specify a less + complex model to use KonFound-It.")} + + if (est_eff < 0) { + thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) * -1 + } else { + thr_t <- stats::qt(1 - (alpha / tails), n_obs - n_covariates - 3) + } + + # stop message + if (n_obs <= 0 || n_treat <= 0) { + stop("Please enter positive integers for sample size and number of + treatment group cases.") + } + if (n_obs <= n_treat) { + stop("The total sample size should be larger than the number of + treatment group cases.") + } + + odds_ratio <- exp(est_eff) + + # updated approach to deal with imaginary + minse <- sqrt((4 * n_obs + + sqrt(16 * n_obs^2 + 4 * n_treat * (n_obs - n_treat) * + ((4 + 4 * odds_ratio^2) / + odds_ratio - 7.999999)))/ + (2 * n_treat * (n_obs - n_treat))) + # check if the implied table solution may contain imaginary numbers + changeSE <- F + if (std_err < minse) { + haveimaginary <- T + changeSE <- T + user_std_err <- std_err + std_err <- minse + } + + # n_treat is the number of observations in the treatment group (c+d) + # n_cnt is the number of observations in the control group (a+b) + n_cnt <- n_obs - n_treat + #t_ob is the t value calculated based on observed estimate and standard error + t_ob <- est_eff / std_err + # invalidate_ob is true - the observed result is significant + # - we are invalidating the observed result + invalidate_ob <- isinvalidate(thr_t, t_ob) + # dcroddsratio_ob is true - our goal is to decrease the odds ratio + dcroddsratio_ob <- isdcroddsratio(thr_t, t_ob) - # n_treat is the number of observations in the treatment group (c+d) - # n_cnt is the number of observations in the control group (a+b) - n_cnt <- n_obs - n_treat - # t_ob is the t value calculated based on observed estimate and standard error - t_ob <- est_eff / std_err - # invalidate_ob is true - the observed result is significant - we are invalidating the observed result - invalidate_ob <- isinvalidate(thr_t, t_ob) - # dcroddsratio_ob is true - our goal is to decrease the odds ratio - dcroddsratio_ob <- isdcroddsratio(thr_t, t_ob) - - # previous approach to deal with imaginary - # to record the original treatment cases in case we need to adjust it - # user_ntrm <- n_treat - # check if the implied table solution may contain imaginary numbers - # haveimaginary <- F - # changepi <- F - # set the default value for whether we need and can adjust pi (ratio of treatment cases) - # to remove the imaginary part - # keyimagin <- (4 + 4 * odds_ratio^2 + odds_ratio * - # (-8 + 4 * n_obs * std_err^2 - n_obs * n_treat * std_err^4 + n_treat^2 * std_err^4)) - # minimgain <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + n_obs * std_err^2 * (4 - 0.25 * n_obs * std_err^2)) - # keyx1 <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + 4 * n_obs * std_err^2) - # if (keyimagin > 0) { + # previous approach to deal with imaginary + # to record the original treatment cases in case we need to adjust it + # user_ntrm <- n_treat + # check if the implied table solution may contain imaginary numbers + # haveimaginary <- F + # changepi <- F + # set the default value for whether we need and can adjust pi + # (ratio of treatment cases) + # to remove the imaginary part + # keyimagin <- (4 + 4 * odds_ratio^2 + odds_ratio * + # (-8 + 4 * n_obs * std_err^2 - n_obs * n_treat * std_err^4 + # + n_treat^2 * std_err^4)) + # minimgain <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + n_obs * std_err^2 * + # (4 - 0.25 * n_obs * std_err^2)) + # keyx1 <- 4 + 4 * odds_ratio^2 + odds_ratio * (-8 + 4 * n_obs * std_err^2) + # if (keyimagin > 0) { # haveimaginary <- T # if (minimgain <= 0 && keyx1 > 0) { - # changepi <- T - # n_treat <- n_obs * get_pi(odds_ratio, std_err, n_obs, n_treat) - # n_cnt <- n_obs - n_treat - # } else { - # stop("Cannot generate a usable contingency table; Please consider using the Pearson's chi-squared approach (under development).") - # } - #} - - # a1, b1, c1, d1 are one solution for the 4 cells in the contingency table - a1 <- get_a1_kfnl(odds_ratio, std_err, n_obs, n_treat) - b1 <- n_cnt - a1 - c1 <- get_c1_kfnl(odds_ratio, std_err, n_obs, n_treat) - d1 <- n_treat - c1 - - # a2, b2, c2, d2 are the second solution for the 4 cells in the contingency table - a2 <- get_a2_kfnl(odds_ratio, std_err, n_obs, n_treat) - b2 <- n_cnt - a2 - c2 <- get_c2_kfnl(odds_ratio, std_err, n_obs, n_treat) - d2 <- n_treat - c2 - - # Differences between these two sets of solutions: - ### a1 c1 are small while a2 c2 are large - ### b1 d1 are large while b2 d2 are small - ### the goal is to get fewest swithes to invalidate the inference - ### remove the solution if one cell has fewerer than 5 cases or negative cells or nan cells - check1 <- check2 <- TRUE - if (!(n_cnt >= a1 && a1 >= 5 && n_cnt >= b1 && b1 >= 5 && n_treat >= c1 && c1 >= 5 && n_treat >= d1 && d1 >= 5) - || is.nan(a1) || is.nan(b1) || is.nan(c1) || is.nan(d1)) { - check1 <- FALSE - } - - if (!(n_cnt >= a2 && a2 >= 5 && n_cnt >= b2 && b2 >= 5 && n_treat >= c2 && c2 >= 5 && n_treat >= d2 && d2 >= 5) - || is.nan(a2) || is.nan(b2) || is.nan(c2) || is.nan(d2)) { - check2 <- FALSE - } - - if (check1) { - table_bstart1 <- get_abcd_kfnl(a1, b1, c1, d1) - solution1 <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } - if (check2) { - table_bstart2 <- get_abcd_kfnl(a2, b2, c2, d2) - solution2 <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) - } - if (!check1 && !check2) { - stop("Cannot generate a usable contingency table!") - } - - # get the number of switches for solutions that satisfy the requirements - if (check1 && check2) { - final_switch1 <- solution1$final_switch - final_switch2 <- solution2$final_switch - if (final_switch1 < final_switch2) { - final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } else { - final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + # changepi <- T + # n_treat <- n_obs * get_pi(odds_ratio, std_err, n_obs, n_treat) + # n_cnt <- n_obs - n_treat + # } else { + # stop("Cannot generate a usable contingency table; Please consider + # using the Pearson's chi-squared approach (under development).") + # } + #} + + # a1, b1, c1, d1 are one solution for the 4 cells in the contingency table + a1 <- get_a1_kfnl(odds_ratio, std_err, n_obs, n_treat) + b1 <- n_cnt - a1 + c1 <- get_c1_kfnl(odds_ratio, std_err, n_obs, n_treat) + d1 <- n_treat - c1 + + # a2, b2, c2, d2 are the second solution for the 4 cells in the + # contingency table + a2 <- get_a2_kfnl(odds_ratio, std_err, n_obs, n_treat) + b2 <- n_cnt - a2 + c2 <- get_c2_kfnl(odds_ratio, std_err, n_obs, n_treat) + d2 <- n_treat - c2 + + # Differences between these two sets of solutions: + ### a1 c1 are small while a2 c2 are large + ### b1 d1 are large while b2 d2 are small + ### the goal is to get fewest swithes to invalidate the inference + ### remove the solution if one cell has fewerer than 5 cases + ### or negative cells or nan cells + check1 <- check2 <- TRUE + if (!(n_cnt >= a1 && a1 >= 5 && n_cnt >= b1 && b1 >= 5 && + n_treat >= c1 && c1 >= 5 && n_treat >= d1 && d1 >= 5) + || is.nan(a1) || is.nan(b1) || is.nan(c1) || is.nan(d1)) { + check1 <- FALSE + } + + if (!(n_cnt >= a2 && a2 >= 5 && n_cnt >= b2 && b2 >= 5 && + n_treat >= c2 && c2 >= 5 && n_treat >= d2 && d2 >= 5) + || is.nan(a2) || is.nan(b2) || is.nan(c2) || is.nan(d2)) { + check2 <- FALSE + } + + if (check1) { + table_bstart1 <- get_abcd_kfnl(a1, b1, c1, d1) + solution1 <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) + } + if (check2) { + table_bstart2 <- get_abcd_kfnl(a2, b2, c2, d2) + solution2 <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + if (!check1 && !check2) { + stop("Cannot generate a usable contingency table!") + } + + # get the number of switches for solutions that satisfy the requirements + if (check1 && check2) { + final_switch1 <- solution1$final_switch + final_switch2 <- solution2$final_switch + if (final_switch1 < final_switch2) { + final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) + } else { + final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + } + + if (check1 && !check2) { + final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) } - } - - if (check1 && !check2) { - final_solution <- getswitch(table_bstart1, thr_t, switch_trm, n_obs) - } - - if (!check1 && check2) { - final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) - } - - final <- final_solution$final_switch - a <- final_solution$table_start[1,1] - b <- final_solution$table_start[1,2] - c <- final_solution$table_start[2,1] - d <- final_solution$table_start[2,2] - - if (switch_trm && dcroddsratio_ob) { - transferway <- "treatment success to treatment failure," - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment success" - } - if (switch_trm && !dcroddsratio_ob) { - transferway <- "treatment failure to treatment success," - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment failure" - } - if (!switch_trm && dcroddsratio_ob) { - transferway <- "control failure to control success," - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "control failure" - } - if (!switch_trm && !dcroddsratio_ob) { - transferway <- "control success to control failure," - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "control success" - } - - if (final_solution$needtworows) { - final_extra <- final_solution$final_extra + + if (!check1 && check2) { + final_solution <- getswitch(table_bstart2, thr_t, switch_trm, n_obs) + } + + final <- final_solution$final_switch + a <- final_solution$table_start[1,1] + b <- final_solution$table_start[1,2] + c <- final_solution$table_start[2,1] + d <- final_solution$table_start[2,2] + if (switch_trm && dcroddsratio_ob) { - transferway_extra <- "control failure to control success," - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "control failure" + transferway <- "treatment success to treatment failure," + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment success" } if (switch_trm && !dcroddsratio_ob) { - transferway_extra <- "control success to control failure," - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "control success" + transferway <- "treatment failure to treatment success," + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment failure" } if (!switch_trm && dcroddsratio_ob) { - transferway_extra <- "treatment success to treatment failure," - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "treatment success" + transferway <- "control failure to control success," + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "control failure" } if (!switch_trm && !dcroddsratio_ob) { - transferway_extra <- "treatment failure to treatment success," - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "treatment failure" + transferway <- "control success to control failure," + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "control success" } - } - - if (invalidate_ob) { - change <- "To invalidate the inference," - } else { - if (est_eff >= 0) { - change <- "To sustain an inference for a positive treatment effect," - } else { - change <- "To sustain an inference for a negative treatment effect," + + if (final_solution$needtworows) { + final_extra <- final_solution$final_extra + if (switch_trm && dcroddsratio_ob) { + transferway_extra <- "control failure to control success," + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "control failure" + } + if (switch_trm && !dcroddsratio_ob) { + transferway_extra <- "control success to control failure," + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "control success" + } + if (!switch_trm && dcroddsratio_ob) { + transferway_extra <- "treatment success to treatment failure," + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "treatment success" + } + if (!switch_trm && !dcroddsratio_ob) { + transferway_extra <- "treatment failure to treatment success," + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "treatment failure" + } } - } - - if (!final_solution$needtworows & final_solution$final_switch > 1) { - conclusion1 <- paste( - change, sprintf("you would need to replace %d", RIR), RIRway, "cases") - - if (replace == "control") { - conclusion1a <- sprintf("with cases for which the probability of failure in the control group applies (RIR = %d).", RIR) + + if (invalidate_ob) { + change <- "To invalidate the inference," } else { - conclusion1a <- sprintf("with cases for which the probability of failure in the entire sample applies (RIR = %d).", RIR) + if (est_eff >= 0) { + change <- "To sustain an inference for a positive treatment effect," + } else { + change <- "To sustain an inference for a negative treatment effect," + } } - - conclusion1b <- paste( - sprintf("This is equivalent to transferring %d", final_solution$final_switch), - c("cases from"), transferway) - conclusion1c <- "as shown, from the Implied Table to the Transfer Table." - - } else if (!final_solution$needtworows & final_solution$final_switch == 1) { - conclusion1 <- paste( - change, sprintf("you would need to replace %d", RIR), RIRway, "cases") - - if (replace == "control") { - conclusion1a <- sprintf("with cases for which the probability of failure in the control group applies (RIR = %d).", RIR) + if (!final_solution$needtworows & final_solution$final_switch > 1) { + conclusion1 <- paste( + change, sprintf("you would need to replace %d", + RIR), RIRway, "cases") + + if (replace == "control") { + conclusion1a <- sprintf( + "with cases for which the probability of failure in the + control group applies (RIR = %d).", RIR) + } else { + conclusion1a <- sprintf( + "with cases for which the probability of failure in the entire + sample applies (RIR = %d).", RIR) + } + + conclusion1b <- paste( + sprintf("This is equivalent to transferring %d", + final_solution$final_switch), + c("cases from"), transferway) + + conclusion1c <- + "as shown, from the Implied Table to the Transfer Table." + + } else if (!final_solution$needtworows & final_solution$final_switch == 1) { + conclusion1 <- paste( + change, sprintf("you would need to replace %d", + RIR), RIRway, "cases") + + if (replace == "control") { + conclusion1a <- sprintf("with cases for which the probability of + failure in the control group applies + (RIR = %d).", RIR) + } else { + conclusion1a <- sprintf("with cases for which the probability of + failure in the entire sample applies + (RIR = %d).", RIR) + } + + conclusion1b <- paste( + sprintf("This is equivalent to transferring %d", + final_solution$final_switch), + c("case from"), transferway) + + conclusion1c <- + "as shown, from the Implied Table to the Transfer Table." + } else { - conclusion1a <- sprintf("with cases for which the probability of failure in the entire sample applies (RIR = %d).", RIR) + conclusion1 <- paste( + change, c( + "only transferring cases from"), transferway, "is not enough.") + + conclusion1b <- paste(sprintf("We also need to transfer %d cases from", + final_solution$final_extra), + transferway_extra, c("as shown, from the + User-entered Table to the + Transfer Table.")) + + conclusion1c <- paste(sprintf("This means we need to replace %d of", + RIR), RIRway, + sprintf("with null hypothesis cases; and + replace %d", RIR_extra), RIRway_extra, + c("with null hypothesis cases to change + the inference.") + ) } - conclusion1b <- paste( - sprintf("This is equivalent to transferring %d", final_solution$final_switch), - c("case from"), transferway) - - conclusion1c <- "as shown, from the Implied Table to the Transfer Table." - - } else { - conclusion1 <- paste( - change, c("only transferring cases from"), transferway, "is not enough.") - - conclusion1b <- paste(sprintf("We also need to transfer %d cases from", final_solution$final_extra), - transferway_extra, c("as shown, from the User-entered Table to the Transfer Table.")) + conclusion2 <- sprintf( + "For the Implied Table, we have an estimate of %.3f, + with a SE of %.3f and a t-ratio of %.3f.", + final_solution$est_eff_start, final_solution$std_err_start, + final_solution$t_start + ) - conclusion1c <- paste(sprintf("This means we need to replace %d of", RIR), RIRway, - sprintf("with null hypothesis cases; and replace %d", RIR_extra), RIRway_extra, - c("with null hypothesis cases to change the inference.") + conclusion3 <- sprintf( + "For the Transfer Table, we have an estimate of %.3f, + with a SE of %.3f and a t-ratio of %.3f.", + final_solution$est_eff_final, final_solution$std_err_final, + final_solution$t_final ) - } - - conclusion2 <- sprintf( - "For the Implied Table, we have an estimate of %.3f, with a SE of %.3f and a t-ratio of %.3f.", - final_solution$est_eff_start, final_solution$std_err_start, final_solution$t_start - ) - - conclusion3 <- sprintf( - "For the Transfer Table, we have an estimate of %.3f, with a SE of %.3f and a t-ratio of %.3f.", - final_solution$est_eff_final, final_solution$std_err_final, final_solution$t_final - ) - - notice <- "Note: Values have been rounded to the nearest integer." - noticeb <- "This may cause a little change to the estimated effect for the Implied Table." - - if (changeSE) { - notice_SE <- sprintf( - "In order to generate a usable implied contingency table, we increased the standard error to %.3f (the original one is %.3f).", - std_err, user_std_err) - } - - if (final_solution$needtworows) { - total_switch <- final_solution$final_switch+final_solution$final_extra - total_RIR <- RIR + RIR_extra - } else { - total_switch <- final_solution$final_switch - total_RIR <- RIR - } - - # result <- list(conclusion1, - # Implied_Table = final_solution$table_start, notice, Transfer_Table = final_solution$table_final, - # conclusion2, conclusion3, - # total_RIR = total_RIR, total_switch = total_switch - # ) - - # output dispatch - if (to_return == "raw_output") { + + notice <- "Note: Values have been rounded to the nearest integer." + noticeb <- + "This may cause a little change to the estimated + effect for the Implied Table." if (changeSE) { - result <- list(conclusion1, - conclusion1b, - conclusion1c, - Implied_Table = final_solution$table_start, - notice, - Transfer_Table = final_solution$table_final, - conclusion2, - conclusion3, - RIR = RIR, - notice_SE) + notice_SE <- sprintf( + "In order to generate a usable implied contingency table, we + increased the standard error to %.3f (the original one is %.3f).", + std_err, user_std_err) + } + + if (final_solution$needtworows) { + total_switch <- final_solution$final_switch+final_solution$final_extra + total_RIR <- RIR + RIR_extra } else { - result <- list(conclusion1, - conclusion1b, - conclusion1c, - Implied_Table = final_solution$table_start, - notice, - Transfer_Table = final_solution$table_final, - conclusion2, - conclusion3, - RIR = RIR) + total_switch <- final_solution$final_switch + total_RIR <- RIR } - - return(result) - } else if (to_return == "print") { + # result <- list(conclusion1, + # Implied_Table = final_solution$table_start, notice, + # Transfer_Table = final_solution$table_final, + # conclusion2, conclusion3, + # total_RIR = total_RIR, total_switch = total_switch + # ) - # cat(crayon::bold("Background Information:")) - # cat("\n") - # cat(info1) - # cat("\n") - # cat("\n") - cat(crayon::bold("Conclusion:")) - cat("\n") - cat(crayon::underline("User-entered Table:")) - cat("\n") - print(final_solution$table_start) - cat("\n") - if (changeSE) { - cat(notice_SE) - cat("\n") - } - cat(notice) - cat(noticeb) - cat("\n") - cat("\n") - cat(conclusion1) - cat("\n") - cat(conclusion1a) - cat("\n") - cat(conclusion1b) - cat("\n") - cat(conclusion1c) - cat("\n") - cat("\n") - cat(crayon::underline("Transfer Table:")) - cat("\n") - print(final_solution$table_final) - cat("\n") - cat(conclusion2) - cat("\n") - cat(conclusion3) - cat("\n") - cat("\n") - cat(crayon::bold("RIR:")) - cat("\n") - cat("RIR =", RIR) - cat("\n") - - } + # output dispatch + if (to_return == "raw_output") { + + if (changeSE) { + result <- list(conclusion1, + conclusion1b, + conclusion1c, + Implied_Table = final_solution$table_start, + notice, + Transfer_Table = final_solution$table_final, + conclusion2, + conclusion3, + RIR = RIR, + notice_SE) + } else { + result <- list(conclusion1, + conclusion1b, + conclusion1c, + Implied_Table = final_solution$table_start, + notice, + Transfer_Table = final_solution$table_final, + conclusion2, + conclusion3, + RIR = RIR) + } + + return(result) + + } else if (to_return == "print") { + + # cat(crayon::bold("Background Information:")) + # cat("\n") + # cat(info1) + # cat("\n") + # cat("\n") + cat(crayon::bold("Conclusion:")) + cat("\n") + cat(crayon::underline("User-entered Table:")) + cat("\n") + print(final_solution$table_start) + cat("\n") + if (changeSE) { + cat(notice_SE) + cat("\n") + } + cat(notice) + cat(noticeb) + cat("\n") + cat("\n") + cat(conclusion1) + cat("\n") + cat(conclusion1a) + cat("\n") + cat(conclusion1b) + cat("\n") + cat(conclusion1c) + cat("\n") + cat("\n") + cat(crayon::underline("Transfer Table:")) + cat("\n") + print(final_solution$table_final) + cat("\n") + cat(conclusion2) + cat("\n") + cat(conclusion3) + cat("\n") + cat("\n") + cat(crayon::bold("RIR:")) + cat("\n") + cat("RIR =", RIR) + cat("\n") + + } } diff --git a/R/tkonfound.R b/R/tkonfound.R index 1433418f..2efef398 100644 --- a/R/tkonfound.R +++ b/R/tkonfound.R @@ -1,257 +1,308 @@ + +#' Perform Sensitivity Analysis on 2x2 Tables +#' +#' This function performs a sensitivity analysis on a 2x2 contingency table. +#' It calculates the number of cases that need to be replaced to invalidate +#' or sustain the statistical inference. The function also allows switching +#' between treatment success and failure or control success and failure +#' based on the provided parameters. +#' +#' @param a Number of unsuccessful cases in the control group. +#' @param b Number of successful cases in the control group. +#' @param c Number of unsuccessful cases in the treatment group. +#' @param d Number of successful cases in the treatment group. +#' @param alpha Significance level for the statistical test, default is 0.05. +#' @param switch_trm Boolean indicating whether to switch treatment row cells, +#' default is TRUE. +#' @param test Type of statistical test to use, either "fisher" +#' (default) or "chisq". +#' @param replace Indicates whether to use the entire sample or the control +#' group for base rate calculation, default is "control". +#' @param to_return Type of output to return, either "raw_output" or "print". +#' +#' @importFrom crayon bold underline +#' +#' @return Returns detailed information about the sensitivity analysis, +#' including the number of cases to be replaced (RIR), user-entered +#' table, transfer table, and conclusions. +#' +#' @export tkonfound <- function(a, b, c, d, alpha = 0.05, switch_trm = T, test = "fisher", replace = "control", to_return = to_return){ - # a <- 35 - # b <- 17 - # c <- 17 - # d <- 38 - # alpha <- 0.05 - # switch_trm <- T - # test <- "fisher" - - # stop message - if (a < 0 || b < 0 || c < 0 || d < 0) { - stop("Please enter non-negative integers for each cell.") - } - - if (a != as.integer(a) || b != as.integer(b) || c != as.integer(c) || d != as.integer(d)) { - stop("Please enter non-negative integers for each cell.") - } - - # use fisher if any of the cell is smaller than 5 - if (a < 5 || b < 5 || c < 5 || d < 5){ - test <- "fisher" - } - - # odds_ratio <- a*d/(b*c) - n_cnt <- a+b - n_trm <- c+d - n_obs <- n_cnt + n_trm - # est <- log(odds_ratio) - - # this is the 2 by 2 table we start with - table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) - - if (test == "fisher") { - p_ob <- fisher_p(a, b, c, d) - fisher_ob <- fisher_oddsratio(a, b, c, d) - } - if (test == "chisq") { - p_ob <- chisq_p(a, b, c, d) - chisq_ob <- chisq_value(a, b, c, d) - } - - # get solution - if (test == "chisq"){ - solution <- getswitch_chisq(a, b, c, d, alpha, switch_trm) - chisq_final <- solution$chisq_final - } - - if (test == "fisher"){ - solution <- getswitch_fisher(a, b, c, d, alpha, switch_trm) - fisher_final <- solution$fisher_final - } - - table_final <- solution$Transfer_Table - table_start <- table_ob - dcroddsratio_ob <- solution$dcroddsratio_ob - allnotenough <- solution$needtworows - final <- solution$final_switch - p_final <- solution$p_final - taylor_pred <- solution$taylor_pred - perc_bias_pred <- solution$perc_bias_pred - final_extra <- solution$final_extra - total_switch <- solution$total_switch - - ### add column and row names to contingency tables - rownames(table_start) <- rownames(table_final) <- c("Control", "Treatment") - colnames(table_start) <- colnames(table_final) <- c("Fail", "Success") - - if (switch_trm && dcroddsratio_ob) { - transferway <- "treatment success to treatment failure" - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment success" - } - if (switch_trm && !dcroddsratio_ob) { - transferway <- "treatment failure to treatment success" - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "treatment failure" - } - if (!switch_trm && dcroddsratio_ob) { - transferway <- "control failure to control success" - RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) - RIRway <- "control failure" - } - if (!switch_trm && !dcroddsratio_ob) { - transferway <- "control success to control failure" - RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) - RIRway <- "control success" - } - - if (allnotenough) { + # a <- 35 + # b <- 17 + # c <- 17 + # d <- 38 + # alpha <- 0.05 + # switch_trm <- T + # test <- "fisher" + + # stop message + if (a < 0 || b < 0 || c < 0 || d < 0) { + stop("Please enter non-negative integers for each cell.") + } + + if (a != as.integer(a) || b != as.integer(b) || + c != as.integer(c) || d != as.integer(d)) { + stop("Please enter non-negative integers for each cell.") + } + + # use fisher if any of the cell is smaller than 5 + if (a < 5 || b < 5 || c < 5 || d < 5){ + test <- "fisher" + } + + # odds_ratio <- a*d/(b*c) + n_cnt <- a+b + n_trm <- c+d + n_obs <- n_cnt + n_trm + # est <- log(odds_ratio) + + # this is the 2 by 2 table we start with + table_ob <- matrix(c(a, b, c, d), byrow = TRUE, 2, 2) + + if (test == "fisher") { + p_ob <- fisher_p(a, b, c, d) + fisher_ob <- fisher_oddsratio(a, b, c, d) + } + if (test == "chisq") { + p_ob <- chisq_p(a, b, c, d) + chisq_ob <- chisq_value(a, b, c, d) + } + + # get solution + if (test == "chisq"){ + solution <- getswitch_chisq(a, b, c, d, alpha, switch_trm) + chisq_final <- solution$chisq_final + } + + if (test == "fisher"){ + solution <- getswitch_fisher(a, b, c, d, alpha, switch_trm) + fisher_final <- solution$fisher_final + } + + table_final <- solution$Transfer_Table + table_start <- table_ob + dcroddsratio_ob <- solution$dcroddsratio_ob + allnotenough <- solution$needtworows + final <- solution$final_switch + p_final <- solution$p_final + taylor_pred <- solution$taylor_pred + perc_bias_pred <- solution$perc_bias_pred + final_extra <- solution$final_extra + total_switch <- solution$total_switch + + ### add column and row names to contingency tables + rownames(table_start) <- rownames(table_final) <- c("Control", "Treatment") + colnames(table_start) <- colnames(table_final) <- c("Fail", "Success") + if (switch_trm && dcroddsratio_ob) { - transferway_extra <- "control failure to control success" - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "control failure" + transferway <- "treatment success to treatment failure" + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment success" } if (switch_trm && !dcroddsratio_ob) { - transferway_extra <- "control success to control failure" - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "control success" + transferway <- "treatment failure to treatment success" + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "treatment failure" } if (!switch_trm && dcroddsratio_ob) { - transferway_extra <- "treatment success to treatment failure" - RIR_extra <- ceiling(final_extra/((a+c)/n_obs))*(replace=="entire") + - ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) - RIRway_extra <- "treatment success" + transferway <- "control failure to control success" + RIR <- ceiling(final/((b+d)/n_obs))*(replace=="entire") + + ceiling(final/(b/(a+b)))*(1-(replace=="entire")) + RIRway <- "control failure" } if (!switch_trm && !dcroddsratio_ob) { - transferway_extra <- "treatment failure to treatment success" - RIR_extra <- ceiling(final_extra/((b+d)/n_obs))*(replace=="entire") + - ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) - RIRway_extra <- "treatment failure" + transferway <- "control success to control failure" + RIR <- ceiling(final/((a+c)/n_obs))*(replace=="entire") + + ceiling(final/(a/(a+b)))*(1-(replace=="entire")) + RIRway <- "control success" } - } - - if (p_ob < alpha) { - change <- "To invalidate the inference, " - } else { - change <- "To sustain an inference, " - } - - if (!allnotenough & final > 1) { - conclusion1 <- paste0( - change, sprintf("you would need to replace %d ", RIR), RIRway) - if (replace == "control") { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the control group applies (RIR = %d). ", RIR)) - } else { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the entire group applies (RIR = %d). ", RIR)) + if (allnotenough) { + if (switch_trm && dcroddsratio_ob) { + transferway_extra <- "control failure to control success" + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "control failure" + } + if (switch_trm && !dcroddsratio_ob) { + transferway_extra <- "control success to control failure" + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "control success" + } + if (!switch_trm && dcroddsratio_ob) { + transferway_extra <- "treatment success to treatment failure" + RIR_extra <- ceiling(final_extra/((a+c)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(a/(a+b)))*(1-(replace=="entire")) + RIRway_extra <- "treatment success" + } + if (!switch_trm && !dcroddsratio_ob) { + transferway_extra <- "treatment failure to treatment success" + RIR_extra <- ceiling(final_extra/((b+d)/n_obs))* + (replace=="entire") + + ceiling(final_extra/(b/(b+d)))*(1-(replace=="entire")) + RIRway_extra <- "treatment failure" + } } - conclusion1c <- paste0( - sprintf("This is equivalent to transferring %d", final), - " cases from ", transferway, "." - ) - } - - if (!allnotenough & final == 1) { - conclusion1 <- paste0( - change, sprintf("you would need to replace %d ", RIR), RIRway) - - if (replace == "control") { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the control group applies (RIR = %d). ", RIR)) + if (p_ob < alpha) { + change <- "To invalidate the inference, " } else { - conclusion1b <- paste0( - sprintf("cases for which the probability of failure in the entire group applies (RIR = %d). ", RIR)) - } + change <- "To sustain an inference, " + } - conclusion1c <- paste0( - sprintf("This is equivalent to transferring %d", final), - " case from ", transferway, ".") - } - - if (allnotenough){ - conclusion1 <- paste( - change, c("only transferring cases from" ), transferway, - sprintf(" is not enough. We also need to transfer %d cases from ", final_extra)) + if (!allnotenough & final > 1) { + conclusion1 <- paste0( + change, sprintf("you would need to replace %d ", RIR), RIRway) + + if (replace == "control") { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in + the control group applies (RIR = %d). ", RIR)) + } else { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in + the entire group applies (RIR = %d). ", RIR)) + } + + conclusion1c <- paste0( + sprintf("This is equivalent to transferring %d", final), + " cases from ", transferway, "." + ) + } - conclusion1b <- paste0( - transferway_extra, c("as shown, from the User-entered Table to the Transfer Table.")) + if (!allnotenough & final == 1) { + conclusion1 <- paste0( + change, sprintf("you would need to replace %d ", RIR), RIRway) + + if (replace == "control") { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in the + control group applies (RIR = %d). ", RIR)) + } else { + conclusion1b <- paste0( + sprintf("cases for which the probability of failure in the + entire group applies (RIR = %d). ", RIR)) + } + + conclusion1c <- paste0( + sprintf("This is equivalent to transferring %d", final), + " case from ", transferway, ".") + } - conclusion1c <- paste0(sprintf(" This means we need to replace %d of ", RIR), RIRway, - sprintf( "with null hypothesis cases; and replace %d ", RIR_extra), RIRway_extra, - c(" with null hypothesis cases to change the inference.")) - } - - if (test == "chisq"){ - conclusion2 <- sprintf( - "For the User-entered Table, we have a Pearson's chi square of %.3f, with p-value of %.3f:", chisq_ob, p_ob) - conclusion3 <- sprintf( - "For the Transfer Table, we have a Pearson's chi square of %.3f, with p-value of %.3f:", chisq_final, p_final) - } - - if (test == "fisher"){ - conclusion2 <- sprintf( - "For the User-entered Table, we have an estimated odds ratio of %.3f, with p-value of %.3f:", fisher_ob, p_ob) - conclusion3 <- sprintf( - "For the Transfer Table, we have an estimated odds ratio of %.3f, with p-value of %.3f:", fisher_final, p_final) - } - - info1 <- "This function calculates the number of cases that would have to be replaced" - info2 <- "with no effect cases (RIR) to invalidate an inference made about the association" - info3 <- "between the rows and columns in a 2x2 table." - info4 <- "One can also interpret this as switches from one cell to another, such as from" - info5 <- "the treatment success cell to the treatment failure cell." - - if (to_return == "raw_output") { + if (allnotenough){ + conclusion1 <- paste( + change, c("only transferring cases from" ), transferway, + sprintf(" is not enough. We also need to transfer %d cases from ", + final_extra)) + + conclusion1b <- paste0( + transferway_extra, c("as shown, from the User-entered Table + to the Transfer Table.")) + + conclusion1c <- paste0(sprintf(" This means we need to replace %d of ", + RIR), RIRway, + sprintf( "with null hypothesis cases; and + replace %d ", RIR_extra), RIRway_extra, + c(" with null hypothesis cases to change + the inference.")) + } - result <- list(info1, - info2, - conclusion1, - conclusion1b, - conclusion1c, - User_enter_value = table_start, - Transfer_Table = table_final, - conclusion2, - conclusion3, - RIR = RIR) + if (test == "chisq"){ + conclusion2 <- sprintf( + "For the User-entered Table, we have a Pearson's chi square of %.3f, + with p-value of %.3f:", chisq_ob, p_ob) + conclusion3 <- sprintf( + "For the Transfer Table, we have a Pearson's chi square of %.3f, + with p-value of %.3f:", chisq_final, p_final) + } - return(result) + if (test == "fisher"){ + conclusion2 <- sprintf( + "For the User-entered Table, we have an estimated odds ratio + of %.3f, with p-value of %.3f:", fisher_ob, p_ob) + conclusion3 <- sprintf( + "For the Transfer Table, we have an estimated odds ratio of + %.3f, with p-value of %.3f:", fisher_final, p_final) + } - } else if (to_return == "print") { + info1 <- "This function calculates the number of cases that + would have to be replaced" + info2 <- "with no effect cases (RIR) to invalidate an inference + made about the association" + info3 <- "between the rows and columns in a 2x2 table." + info4 <- "One can also interpret this as switches from one cell + to another, such as from" + info5 <- "the treatment success cell to the treatment failure cell." - cat(crayon::bold("Background Information:")) - cat("\n") - cat(info1) - cat("\n") - cat(info2) - cat("\n") - cat(info3) - cat("\n") - cat(info4) - cat("\n") - cat(info5) - cat("\n") - cat("\n") - cat(crayon::bold("Conclusion:")) - cat("\n") - cat(conclusion1) - cat("\n") - cat(conclusion1b) - cat("\n") - cat(conclusion1c) - cat("\n") - cat(conclusion2) - cat("\n") - cat("\n") - cat(crayon::underline("User-entered Table:")) - cat("\n") - print(table_start) - cat("\n") - cat("\n") - cat(conclusion3) - cat("\n") - cat(crayon::underline("Transfer Table:")) - cat("\n") - print(table_final) - cat("\n") - cat(crayon::bold("RIR:")) - cat("\n") - cat("RIR =", RIR) - cat("\n") + if (to_return == "raw_output") { + + result <- list(info1, + info2, + conclusion1, + conclusion1b, + conclusion1c, + User_enter_value = table_start, + Transfer_Table = table_final, + conclusion2, + conclusion3, + RIR = RIR) + + return(result) + + } else if (to_return == "print") { + + cat(crayon::bold("Background Information:")) + cat("\n") + cat(info1) + cat("\n") + cat(info2) + cat("\n") + cat(info3) + cat("\n") + cat(info4) + cat("\n") + cat(info5) + cat("\n") + cat("\n") + cat(crayon::bold("Conclusion:")) + cat("\n") + cat(conclusion1) + cat("\n") + cat(conclusion1b) + cat("\n") + cat(conclusion1c) + cat("\n") + cat(conclusion2) + cat("\n") + cat("\n") + cat(crayon::underline("User-entered Table:")) + cat("\n") + print(table_start) + cat("\n") + cat("\n") + cat(conclusion3) + cat("\n") + cat(crayon::underline("Transfer Table:")) + cat("\n") + print(table_final) + cat("\n") + cat(crayon::bold("RIR:")) + cat("\n") + cat("RIR =", RIR) + cat("\n") + + } - } - } - - - diff --git a/R/tkonfound_fig.R b/R/tkonfound_fig.R index 61c380e5..49f76b2c 100644 --- a/R/tkonfound_fig.R +++ b/R/tkonfound_fig.R @@ -1,339 +1,409 @@ -#' Draw figures for change in effect size as a function of switching or replacing outcomes -#' @description This function returns two plots for change in effect size as a function of switching or replacing outcomes, one for all possibilities (switching), another zoomed in the area for positive RIR -#' @param a cell is the number of cases in the control group showing unsuccessful results -#' @param b cell is the number of cases in the control group showing successful results -#' @param c cell is the number of cases in the treatment group showing unsuccessful results -#' @param d cell is the number of cases in the treatment group showing successful results -#' @param thr_p the p-value threshold used to evaluate statistical significance, with the default of 0.05 -#' @param switch_trm whether switching the two cells in the treatment row or the two cells in the control row, with the default of the treatment row -#' @param test whether using Fisher's Exact Test (default) or chi-square test -#' @param replace whether using entire sample or the control group to calculate the base rate, with the default of the control group -#' @importFrom stats chisq.test -#' @return prints 2 figures for how number of hypothetical cases switched changes the effect size +#' Draw Figures for Change in Effect Size in 2x2 Tables +#' +#' This function generates plots illustrating how the change in effect size +#' is influenced by switching or replacing outcomes in a 2x2 table. It produces +#' two plots: one showing all possibilities (switching) and another zoomed in +#' the area for positive RIR (Relative Impact Ratio). +#' +#' @param a Number of cases in the control group with unsuccessful outcomes. +#' @param b Number of cases in the control group with successful outcomes. +#' @param c Number of cases in the treatment group with unsuccessful outcomes. +#' @param d Number of cases in the treatment group with successful outcomes. +#' @param thr_p P-value threshold for statistical significance, default is 0.05. +#' @param switch_trm Whether to switch the two cells in the treatment or +#' control row, default is TRUE (treatment row). +#' @param test Type of statistical test used, either "Fisher's Exact Test" +#' (default) or "Chi-square test". +#' @param replace Indicates whether to use the entire sample or just the control +#' group for calculating the base rate, default is "control". +#' +#' @importFrom ggplot2 ggplot aes_string geom_line geom_point scale_fill_manual +#' scale_shape_manual geom_hline scale_y_continuous +#' scale_x_continuous theme element_blank element_line +#' @importFrom ggrepel geom_label_repel +#' +#' @return Returns two plots showing the effect of hypothetical case switches +#' on the effect size in a 2x2 table. +#' #' @examples #' # using tkonfound_fig for a study where 2 by 2 table is (35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = FALSE) #' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq") -#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq", replace = "entire") +#' tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +#' test = "chisq", replace = "entire") #' #' @export #' -tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, test = "fisher", replace = "control"){ - -n_obs <- a + b + c + d -###***generate the log odds for each step of switch -meta <- data.frame(matrix(ncol = 10, nrow = n_obs-3)) -colnames(meta) <- c("a", "b", "c", "d", "nobs", "switch", "logodds","cntrl_p","tr_p","pdif") -if (switch_trm == T) { - for (i in 1:(n_obs-3)){ - if (i <= a){ - #from table(1, a+b-1, c+d-1, 1) to table(a, b, c+d-1, 1) - meta$a[i] <- i - meta$b[i] <- a+b-i - meta$c[i] <- c+d-1 - meta$d[i] <- 1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(d-1+a-i) +tkonfound_fig <- function(a, b, c, d, thr_p = 0.05, switch_trm = T, + test = "fisher", replace = "control"){ + + n_obs <- a + b + c + d + ###***generate the log odds for each step of switch + meta <- data.frame(matrix(ncol = 10, nrow = n_obs-3)) + colnames(meta) <- c("a", "b", "c", "d", "nobs", "switch", + "logodds","cntrl_p","tr_p","pdif") + if (switch_trm == T) { + for (i in 1:(n_obs-3)){ + if (i <= a){ + #from table(1, a+b-1, c+d-1, 1) to table(a, b, c+d-1, 1) + meta$a[i] <- i + meta$b[i] <- a+b-i + meta$c[i] <- c+d-1 + meta$d[i] <- 1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(d-1+a-i) + } + if (i>a & i<=a+d-1){ + # from table(a, b, c+d-1, 1) to table(a, b, c, d) + meta$a[i] <- a + meta$b[i] <- b + meta$c[i] <- c+d-1-i+a + meta$d[i] <- 1+i-a + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a+d-1-i) + } + if (i>a+d-1 & i<=a+d+c-2){ + # from table(a, b, c, d) to table(a, b, 1, d+c-1) + meta$a[i] <- a + meta$b[i] <- b + meta$c[i] <- c-(i-a-d+1) + meta$d[i] <- d+(i-a-d+1) + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + if (i>a+d+c-2 & i<=a+b+c+d-3){ + # from table(a, b, 1, d+c-1) to table(a+b-1, 1, 1, d+c-1) + meta$a[i] <- a+(i-a-d-c+2) + meta$b[i] <- b-(i-a-d-c+2) + meta$c[i] <- 1 + meta$d[i] <- c+d-1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + } } - if (i>a & i<=a+d-1){ - # from table(a, b, c+d-1, 1) to table(a, b, c, d) - meta$a[i] <- a - meta$b[i] <- b - meta$c[i] <- c+d-1-i+a - meta$d[i] <- 1+i-a - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a+d-1-i) + if (switch_trm == F) { + for (i in 1:(n_obs-3)){ + if (i <= d){ + #from table(1, a+b-1, c+d-1, 1) to table(1, a+b-1, c, d) + meta$d[i] <- i + meta$c[i] <- d+c-i + meta$b[i] <- b+a-1 + meta$a[i] <- 1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a-1+d-i) + } + if (i>d & i<=a+d-1){ + # from table(1, a+b-1, c, d) to table(a, b, c, d) + meta$d[i] <- d + meta$c[i] <- c + meta$b[i] <- b+a-1-i+d + meta$a[i] <- 1+i-d + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- -(a+d-1-i) + } + if (i>a+d-1 & i<=a+d+b-2){ + # from table(a, b, c, d) to table(a+b-1, 1, c, d) + meta$d[i] <- d + meta$c[i] <- c + meta$b[i] <- b-(i-a-d+1) + meta$a[i] <- a+(i-a-d+1) + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + if (i>a+d+b-2 & i<=a+b+c+d-3){ + # from table(a+b-1, 1, c, d) to table(a+b-1, 1, 1, d+c-1) + meta$d[i] <- d+(i-d-a-b+2) + meta$c[i] <- c-(i-a-d-b+2) + meta$b[i] <- 1 + meta$a[i] <- b+a-1 + meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] + meta$switch[i] <- i-(a+d-1) + } + } } - if (i>a+d-1 & i<=a+d+c-2){ - # from table(a, b, c, d) to table(a, b, 1, d+c-1) - meta$a[i] <- a - meta$b[i] <- b - meta$c[i] <- c-(i-a-d+1) - meta$d[i] <- d+(i-a-d+1) - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) + meta$cntrl_p <- meta$b/(meta$a + meta$b) + meta$tr_p <- meta$d/(meta$c + meta$d) + meta$pdif <- meta$tr_p - meta$cntrl_p + + ###***find out significant thresholds + if (test == "chisq"){ + solution <- getswitch_chisq(a, b, c, d, thr_p, switch_trm) + meta$logodds <- log(meta$a*meta$d/meta$c/meta$b) } - if (i>a+d+c-2 & i<=a+b+c+d-3){ - # from table(a, b, 1, d+c-1) to table(a+b-1, 1, 1, d+c-1) - meta$a[i] <- a+(i-a-d-c+2) - meta$b[i] <- b-(i-a-d-c+2) - meta$c[i] <- 1 - meta$d[i] <- c+d-1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) - } - } -} -if (switch_trm == F) { - for (i in 1:(n_obs-3)){ - if (i <= d){ - #from table(1, a+b-1, c+d-1, 1) to table(1, a+b-1, c, d) - meta$d[i] <- i - meta$c[i] <- d+c-i - meta$b[i] <- b+a-1 - meta$a[i] <- 1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a-1+d-i) + + if (test == "fisher"){ + solution <- getswitch_fisher(a, b, c, d, thr_p, switch_trm) + for (i in 1:(n_obs-3)){ + meta$logodds[i] <- log( + fisher_oddsratio(meta$a[i], meta$b[i], meta$c[i], meta$d[i])) + } } - if (i>d & i<=a+d-1){ - # from table(1, a+b-1, c, d) to table(a, b, c, d) - meta$d[i] <- d - meta$c[i] <- c - meta$b[i] <- b+a-1-i+d - meta$a[i] <- 1+i-d - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- -(a+d-1-i) + + table_final <- solution$Transfer_Table + dcroddsratio_ob <- solution$dcroddsratio_ob + + meta$sig <- ifelse(meta$a==table_final[1,1] & + meta$b==table_final[1,2] & meta$c==table_final[2,1] & + meta$d==table_final[2,2],1,0) + + if (meta[meta$sig==1,]$logodds > 0){ + if (dcroddsratio_ob){#from sig to not sig by decreasing odds ratio + posinsig <- meta[meta$sig==1,]$switch + pos_thr <- (meta[meta$switch==posinsig,]$logodds+ + meta[meta$switch==(posinsig+1),]$logodds)/2 + pos_thr_pdif <- (meta[meta$switch==posinsig,]$pdif+ + meta[meta$switch==(posinsig+1),]$pdif)/2 + zoom_lower <- -(posinsig + 2) + meta$sigpoint <- ifelse(meta$switch==(posinsig+1), + "positive","other") + } else {#from not sig to sig by increasing positive effect + possig <- meta[meta$sig==1,]$switch + pos_thr <- (meta[meta$switch==possig,]$logodds+ + meta[meta$switch==(possig-1),]$logodds)/2 + pos_thr_pdif <- (meta[meta$switch==possig,]$pdif+ + meta[meta$switch==(possig-1),]$pdif)/2 + zoom_lower <- -(possig + 2) + meta$sigpoint <- ifelse(meta$switch==possig,"positive","other") + } + # find out the row that is cloest to logodds of 0 but negative + temp1 <- meta[meta$logodds<0,] + temp1 <- temp1[order(abs(temp1$logodds)),] + j <- 1 + if (test == "chisq"){ + while (chisq_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + if (test == "fisher"){ + while (fisher_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + neg_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 + neg_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 + zoom_upper <- -(temp1$switch[j]-2) + meta$sigpoint <- ifelse(meta$switch==temp1$switch[j], + "negative",meta$sigpoint) } - if (i>a+d-1 & i<=a+d+b-2){ - # from table(a, b, c, d) to table(a+b-1, 1, c, d) - meta$d[i] <- d - meta$c[i] <- c - meta$b[i] <- b-(i-a-d+1) - meta$a[i] <- a+(i-a-d+1) - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) + + if (meta[meta$sig==1,]$logodds < 0){ + if (dcroddsratio_ob){ # from not sig to sig by decreasing odds ratio + negsig <- meta[meta$sig==1,]$switch + neg_thr <- (meta[meta$switch==negsig,]$logodds+ + meta[meta$switch==(negsig+1),]$logodds)/2 + neg_thr_pdif <- (meta[meta$switch==negsig,]$pdif+ + meta[meta$switch==(negsig+1),]$pdif)/2 + zoom_upper <- -(negsig - 4) + meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") + } else { + # from sig to not sig by increasing odds ratio + # that is smaller than 1 + neginsig <- meta[meta$sig==1,]$switch + neg_thr <- (meta[meta$switch==neginsig,]$logodds+ + meta[meta$switch==(neginsig-1),]$logodds)/2 + neg_thr_pdif <- (meta[meta$switch==neginsig,]$pdif+ + meta[meta$switch==(neginsig-1),]$pdif)/2 + zoom_upper <- -(neginsig - 2) + meta$sigpoint <- ifelse(meta$switch==neginsig-1,"negative","other") + } + # find out the row that is cloest to logodds of 0 but positive + temp1 <- meta[meta$logodds>0,] + temp1 <- temp1[order(abs(temp1$logodds)),] + j <- 1 + if (test == "chisq"){ + while (chisq_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + if (test == "fisher"){ + while (fisher_p(temp1$a[j], temp1$b[j], + temp1$c[j], temp1$d[j])>thr_p){ + j <- j+1 + } + } + pos_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 + pos_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 + zoom_lower <- -(temp1$switch[j]+2) + meta$sigpoint <- ifelse(meta$switch==temp1$switch[j], + "positive",meta$sigpoint) } - if (i>a+d+b-2 & i<=a+b+c+d-3){ - # from table(a+b-1, 1, c, d) to table(a+b-1, 1, 1, d+c-1) - meta$d[i] <- d+(i-d-a-b+2) - meta$c[i] <- c-(i-a-d-b+2) - meta$b[i] <- 1 - meta$a[i] <- b+a-1 - meta$nobs[i] <- meta$a[i] + meta$b[i] + meta$c[i] + meta$d[i] - meta$switch[i] <- i-(a+d-1) - } - } -} -meta$cntrl_p <- meta$b/(meta$a + meta$b) -meta$tr_p <- meta$d/(meta$c + meta$d) -meta$pdif <- meta$tr_p - meta$cntrl_p - -###***find out significant thresholds -if (test == "chisq"){ - solution <- getswitch_chisq(a, b, c, d, thr_p, switch_trm) - meta$logodds <- log(meta$a*meta$d/meta$c/meta$b) -} - -if (test == "fisher"){ - solution <- getswitch_fisher(a, b, c, d, thr_p, switch_trm) - for (i in 1:(n_obs-3)){ - meta$logodds[i] <- log(fisher_oddsratio(meta$a[i], meta$b[i], meta$c[i], meta$d[i])) - } -} - -table_final <- solution$Transfer_Table -dcroddsratio_ob <- solution$dcroddsratio_ob - -meta$sig <- ifelse(meta$a==table_final[1,1] & meta$b==table_final[1,2] & meta$c==table_final[2,1] & meta$d==table_final[2,2], - 1,0) - -if (meta[meta$sig==1,]$logodds > 0){ - if (dcroddsratio_ob){#from sig to not sig by decreasing odds ratio - posinsig <- meta[meta$sig==1,]$switch - pos_thr <- (meta[meta$switch==posinsig,]$logodds+meta[meta$switch==(posinsig+1),]$logodds)/2 - pos_thr_pdif <- (meta[meta$switch==posinsig,]$pdif+meta[meta$switch==(posinsig+1),]$pdif)/2 - zoom_lower <- -(posinsig + 2) - meta$sigpoint <- ifelse(meta$switch==(posinsig+1),"positive","other") - } else {#from not sig to sig by increasing positive effect - possig <- meta[meta$sig==1,]$switch - pos_thr <- (meta[meta$switch==possig,]$logodds+meta[meta$switch==(possig-1),]$logodds)/2 - pos_thr_pdif <- (meta[meta$switch==possig,]$pdif+meta[meta$switch==(possig-1),]$pdif)/2 - zoom_lower <- -(possig + 2) - meta$sigpoint <- ifelse(meta$switch==possig,"positive","other") + + meta$current <- ifelse(meta$switch==0, "current", "other") + meta$currentlabel <- ifelse(meta$switch==0, "current", NA) + meta$sigpoint <- ifelse(meta$switch==0, "current",meta$sigpoint) + + if (switch_trm && dcroddsratio_ob) { + # transferway <- "treatment success to treatment failure," + meta$switch <- meta$switch*(-1) } - # find out the row that is cloest to logodds of 0 but negative - temp1 <- meta[meta$logodds<0,] - temp1 <- temp1[order(abs(temp1$logodds)),] - j <- 1 - if (test == "chisq"){ - while (chisq_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + + if (!switch_trm && dcroddsratio_ob) { + #transferway <- "control failure to control success," + meta$switch <- meta$switch*(-1) } - } - if (test == "fisher"){ - while (fisher_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + + fillcol <-c("current"="white", + "positive"="green4","negative"="red","other"="white") + pointshape <- c("current"=15,"other"=21) + + if (switch_trm && dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) } - } - neg_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 - neg_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 - zoom_upper <- -(temp1$switch[j]-2) - meta$sigpoint <- ifelse(meta$switch==temp1$switch[j],"negative",meta$sigpoint) -} - -if (meta[meta$sig==1,]$logodds < 0){ - if (dcroddsratio_ob){ # from not sig to sig by decreasing odds ratio - negsig <- meta[meta$sig==1,]$switch - neg_thr <- (meta[meta$switch==negsig,]$logodds+meta[meta$switch==(negsig+1),]$logodds)/2 - neg_thr_pdif <- (meta[meta$switch==negsig,]$pdif+meta[meta$switch==(negsig+1),]$pdif)/2 - zoom_upper <- -(negsig - 4) - meta$sigpoint <- ifelse(meta$switch==negsig,"negative","other") - } else { # from sig to not sig by increasing odds ratio that is smaller than 1 - neginsig <- meta[meta$sig==1,]$switch - neg_thr <- (meta[meta$switch==neginsig,]$logodds+meta[meta$switch==(neginsig-1),]$logodds)/2 - neg_thr_pdif <- (meta[meta$switch==neginsig,]$pdif+meta[meta$switch==(neginsig-1),]$pdif)/2 - zoom_upper <- -(neginsig - 2) - meta$sigpoint <- ifelse(meta$switch==neginsig-1,"negative","other") - } - # find out the row that is cloest to logodds of 0 but positive - temp1 <- meta[meta$logodds>0,] - temp1 <- temp1[order(abs(temp1$logodds)),] - j <- 1 - if (test == "chisq"){ - while (chisq_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + if (switch_trm && !dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) } - } - if (test == "fisher"){ - while (fisher_p(temp1$a[j], temp1$b[j], temp1$c[j], temp1$d[j])>thr_p){ - j <- j+1 + if (!switch_trm && dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) } - } - pos_thr <- (temp1$logodds[j-1]+temp1$logodds[j])/2 - pos_thr_pdif <- (temp1$pdif[j-1]+temp1$pdif[j])/2 - zoom_lower <- -(temp1$switch[j]+2) - meta$sigpoint <- ifelse(meta$switch==temp1$switch[j],"positive",meta$sigpoint) -} - -meta$current <- ifelse(meta$switch==0, "current", "other") -meta$currentlabel <- ifelse(meta$switch==0, "current", NA) -meta$sigpoint <- ifelse(meta$switch==0, "current",meta$sigpoint) - -if (switch_trm && dcroddsratio_ob) { - # transferway <- "treatment success to treatment failure," - meta$switch <- meta$switch*(-1) -} - -if (!switch_trm && dcroddsratio_ob) { - #transferway <- "control failure to control success," - meta$switch <- meta$switch*(-1) -} - -fillcol <-c("current"="white","positive"="green4","negative"="red","other"="white") -pointshape <- c("current"=15,"other"=21) - -if (switch_trm && dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) -} -if (switch_trm && !dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) -} -if (!switch_trm && dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((b+d)/n_obs))*(replace=="entire") + ceiling(meta$switch/(b/(a+b)))*(1-(replace=="entire")) -} -if (!switch_trm && !dcroddsratio_ob) { - meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) -} - -meta$xaxis <- paste(meta$RIR,"\n","(", meta$switch, ")", sep = "") - -fig1 <- ggplot2::ggplot(meta, ggplot2::aes_string(x="RIR", y="pdif"))+ - ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + - ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill = "sigpoint"))+ - ggplot2::scale_fill_manual(values=fillcol)+ - ggplot2::scale_shape_manual(values=pointshape)+ - ggrepel::geom_label_repel(ggplot2::aes_string(label="currentlabel"))+ - ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype = "dashed", color="green4", size = 1)+ - ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype = "dashed", color="red", size = 1)+ - ggplot2::scale_y_continuous(name="Difference in probability of successful outcome (treatment - control)")+ - ggplot2::scale_x_continuous(name="RIR (Fragility)", - breaks= c(meta[meta$switch==0,]$RIR, meta[meta$sigpoint=="negative",]$RIR, - meta[meta$sigpoint=="positive",]$RIR), - labels= c(meta[meta$switch==0,]$xaxis, meta[meta$sigpoint=="negative",]$xaxis, - meta[meta$sigpoint=="positive",]$xaxis)) + - ggplot2::theme(#axis.title = ggplot2::element_text(size = 15), - #axis.text= ggplot2::element_text(size = 14), - panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.line = ggplot2::element_line(colour = "black"), - legend.position = "none") - -zoom <- meta[meta$switch<=zoom_upper & meta$switch>=zoom_lower,] -zoom <- zoom[zoom$switch>=0,] - -if (switch_trm && dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=d,] -} -if (switch_trm && !dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=c,] -} -if (!switch_trm && dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=a,] -} -if (!switch_trm && !dcroddsratio_ob) { - zoom <- zoom[zoom$RIR<=b,] -} - -zoom$label <- ifelse(zoom$sigpoint=="positive", - paste("sig pos:RIR=", zoom[zoom$sigpoint=="positive",]$RIR),NA) -zoom$label <- ifelse(zoom$sigpoint=="negative", - paste("sig neg:RIR=", zoom[zoom$sigpoint=="negative",]$RIR),zoom$label) -zoom$label <- ifelse(zoom$sigpoint=="current", - paste("current"),zoom$label) - -fig2 <- ggplot2::ggplot(zoom, ggplot2::aes_string(x="RIR",y="pdif"))+ - ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + - ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill = "sigpoint"), - size = 1)+ - ggrepel::geom_label_repel(ggplot2::aes_string(label="label"))+ - ggplot2::scale_fill_manual(values=fillcol)+ - ggplot2::scale_shape_manual(values=pointshape)+ - ggplot2::scale_y_continuous(name="Difference in probability of successful outcome (treatment - control)")+ - ggplot2::scale_x_continuous(name="RIR (Fragility)", - breaks= c(zoom$RIR[1], zoom$RIR[as.integer(length(zoom$RIR)/2)], - zoom$RIR[as.integer(length(zoom$RIR))]), - labels= c(zoom$xaxis[1], zoom$xaxis[as.integer(length(zoom$RIR)/2)], - zoom$xaxis[as.integer(length(zoom$RIR))])) + - ggplot2::theme(panel.grid.major = ggplot2::element_blank(), - panel.grid.minor = ggplot2::element_blank(), - panel.background = ggplot2::element_blank(), - axis.line = ggplot2::element_line(colour = "black"), - legend.position = "none") - -if (pos_thr_pdif <= max(zoom$pdif) && pos_thr_pdif >= min(zoom$pdif)) { - fig2 <- fig2 + ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype = "dashed", color="green4", size = 1) -} - -if (neg_thr_pdif <= max(zoom$pdif) && neg_thr_pdif >= min(zoom$pdif)) { - fig2 <- fig2 + ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype = "dashed", color="red", size = 1) -} - -###plot figure 3 RIS% as sample size gets larger, using t statistic as the criterion -# if (plt3 == TRUE) { -# meta3 <- data.frame(matrix(ncol = 7, nrow = 11)) -# colnames(meta3) <- c("a", "b", "c", "d", "nobs", "RIS", "RISperc_total", "RISperc_D") -# for (i in 1:11){ -# meta3$nobs[i] <- size <- 2^(i+5) -# meta3$a[i] <- a_i <- round(size/n_obs*a) -# meta3$b[i] <- b_i <- round(size/n_obs*b) -# meta3$c[i] <- c_i <- round(size/n_obs*c) -# meta3$d[i] <- d_i <- round(size/n_obs*d) -# table_i <- get_abcd_kfnl(a_i, b_i, c_i, d_i) -# thr_i_t <- stats::qt(1 - thr_p/2, size - 1) -# meta3$RIS[i] <- RIS_i <- getswitch(table_i, thr_t_i, switch_trm, size)$final_switch + getswitch(table_i, thr_t_i, switch_trm, size)$final_extra -# meta3$RISperc[i] <- RISperc_i <- RIS_i/size*100 -# } -# fig3 <- ggplot2::ggplot(meta3, aes(x=nobs, y=RISperc))+ -# geom_line(size = 1)+ -# geom_point(size = 2.5)+ -# labs(title = "RIS as % of Sample Size")+ -# scale_x_continuous(name="Sample Size", labels=scales::comma)+ -# scale_y_continuous(name="RIS%") + -# theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12), -# axis.text = element_text(size = 12)) -#} - -if (switch_trm == T) { - note <- "A bend in line indicates switches from the control row because the treatment row was exhausted." - } else { - note <- "A bend in line indicates switches from the treatment row because the control row was exhausted." - } - -result <- list(fig1, note, fig2) - -return(result) + if (!switch_trm && !dcroddsratio_ob) { + meta$RIR <- ceiling(meta$switch/((a+c)/n_obs))*(replace=="entire") + + ceiling(meta$switch/(a/(a+b)))*(1-(replace=="entire")) + } + + meta$xaxis <- paste(meta$RIR,"\n","(", meta$switch, ")", sep = "") + + fig1 <- ggplot2::ggplot(meta, ggplot2::aes_string(x="RIR", y="pdif"))+ + ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + + ggplot2::geom_point(ggplot2::aes_string( + y="pdif", shape = "current",fill = "sigpoint"))+ + ggplot2::scale_fill_manual(values=fillcol)+ + ggplot2::scale_shape_manual(values=pointshape)+ + ggrepel::geom_label_repel(ggplot2::aes_string(label="currentlabel"))+ + ggplot2::geom_hline( + yintercept = pos_thr_pdif, + linetype = "dashed", color="green4", size = 1)+ + ggplot2::geom_hline( + yintercept = neg_thr_pdif, + linetype = "dashed", color="red", size = 1)+ + ggplot2::scale_y_continuous( + name="Difference in probability of + successful outcome (treatment - control)")+ + ggplot2::scale_x_continuous(name="RIR (Fragility)", + breaks= c(meta[meta$switch==0,]$RIR, meta[ + meta$sigpoint=="negative",]$RIR, + meta[meta$sigpoint=="positive", + ]$RIR), + labels= c(meta[meta$switch==0,]$xaxis, + meta[meta$sigpoint=="negative", + ]$xaxis, + meta[meta$sigpoint=="positive", + ]$xaxis)) + + ggplot2::theme(#axis.title = ggplot2::element_text(size = 15), + #axis.text= ggplot2::element_text(size = 14), + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(colour = "black"), + legend.position = "none") + + zoom <- meta[meta$switch<=zoom_upper & meta$switch>=zoom_lower,] + zoom <- zoom[zoom$switch>=0,] + + if (switch_trm && dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=d,] + } + if (switch_trm && !dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=c,] + } + if (!switch_trm && dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=a,] + } + if (!switch_trm && !dcroddsratio_ob) { + zoom <- zoom[zoom$RIR<=b,] + } + + zoom$label <- ifelse(zoom$sigpoint=="positive", + paste("sig pos:RIR=", zoom[zoom$sigpoint=="positive", + ]$RIR),NA) + zoom$label <- ifelse(zoom$sigpoint=="negative", + paste("sig neg:RIR=", zoom[zoom$sigpoint=="negative", + ]$RIR),zoom$label) + zoom$label <- ifelse(zoom$sigpoint=="current", + paste("current"),zoom$label) + + fig2 <- ggplot2::ggplot(zoom, ggplot2::aes_string(x="RIR",y="pdif"))+ + ggplot2::geom_line(ggplot2::aes_string(y="pdif"), size = 1) + + ggplot2::geom_point(ggplot2::aes_string(y="pdif", shape = "current",fill + = "sigpoint"), + size = 1)+ + ggrepel::geom_label_repel(ggplot2::aes_string(label="label"))+ + ggplot2::scale_fill_manual(values=fillcol)+ + ggplot2::scale_shape_manual(values=pointshape)+ + ggplot2::scale_y_continuous(name="Difference in probability of + successful outcome (treatment - control)")+ + ggplot2::scale_x_continuous(name="RIR (Fragility)", + breaks= c(zoom$RIR[1], zoom$RIR[ + as.integer(length(zoom$RIR)/2)], + zoom$RIR[as.integer( + length(zoom$RIR))]), + labels= c(zoom$xaxis[1], zoom$xaxis[ + as.integer(length(zoom$RIR)/2)], + zoom$xaxis[as.integer( + length(zoom$RIR))])) + + ggplot2::theme(panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + panel.background = ggplot2::element_blank(), + axis.line = ggplot2::element_line(colour = "black"), + legend.position = "none") + + if (pos_thr_pdif <= max(zoom$pdif) && pos_thr_pdif >= min(zoom$pdif)) { + fig2 <- fig2 + ggplot2::geom_hline(yintercept = pos_thr_pdif, linetype + = "dashed", color="green4", size = 1) + } + + if (neg_thr_pdif <= max(zoom$pdif) && neg_thr_pdif >= min(zoom$pdif)) { + fig2 <- fig2 + ggplot2::geom_hline(yintercept = neg_thr_pdif, linetype + = "dashed", color="red", size = 1) + } + + ###plot figure 3 RIS% as sample size gets larger, + ### using t statistic as the criterion + # if (plt3 == TRUE) { + # meta3 <- data.frame(matrix(ncol = 7, nrow = 11)) + # colnames(meta3) <- c("a", "b", "c", "d", "nobs", "RIS", "RISperc_total", + # "RISperc_D") + # for (i in 1:11){ + # meta3$nobs[i] <- size <- 2^(i+5) + # meta3$a[i] <- a_i <- round(size/n_obs*a) + # meta3$b[i] <- b_i <- round(size/n_obs*b) + # meta3$c[i] <- c_i <- round(size/n_obs*c) + # meta3$d[i] <- d_i <- round(size/n_obs*d) + # table_i <- get_abcd_kfnl(a_i, b_i, c_i, d_i) + # thr_i_t <- stats::qt(1 - thr_p/2, size - 1) + # meta3$RIS[i] <- RIS_i <- getswitch(table_i, thr_t_i, + # switch_trm, size)$final_switch + getswitch(table_i, thr_t_i, + # switch_trm, size)$final_extra + # meta3$RISperc[i] <- RISperc_i <- RIS_i/size*100 + # } + # fig3 <- ggplot2::ggplot(meta3, aes(x=nobs, y=RISperc))+ + # geom_line(size = 1)+ + # geom_point(size = 2.5)+ + # labs(title = "RIS as % of Sample Size")+ + # scale_x_continuous(name="Sample Size", labels=scales::comma)+ + # scale_y_continuous(name="RIS%") + + # theme(plot.title = element_text(face = "bold", hjust = 0.5, size = 12), + # axis.text = element_text(size = 12)) + #} + + if (switch_trm == T) { + note <- "A bend in line indicates switches from the control + row because the treatment row was exhausted." + } else { + note <- "A bend in line indicates switches from the treatment row + because the control row was exhausted." + } + + result <- list(fig1, note, fig2) + + return(result) } diff --git a/R/zzz.R b/R/zzz.R index 32b5dce8..2de70e6f 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,8 +1,24 @@ -## quiets concerns (notes) of R CMD check re: the vars that are evaluated using non-standard evaluation -if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "replace_null_cases", "percent_bias", "val")) +#' Package Initialization Functions and Utilities +#' +#' These functions are used for initializing the package environment and +#' providing utility functions for the package. +#' +#' @name zzz +#' @aliases zzz +#' @rdname zzz +#' @importFrom utils globalVariables browseURL +#' +## quiets concerns (notes) of R CMD check re: the vars that are evaluated +## using non-standard evaluation +if (getRversion() >= "2.15.1") + utils::globalVariables(c("inference", "key", "replace_null_cases", + "percent_bias", "val","ModelLabel", "coef_X")) .onAttach <- function(libname, pkgname) { - packageStartupMessage("Sensitivity analysis as described in Frank, Maroulis, Duong, and Kelcey (2013) and in Frank (2000).\nFor more information visit http://konfound-it.com.") + packageStartupMessage("Sensitivity analysis as described in Frank, + Maroulis, Duong, and Kelcey (2013) and + in Frank (2000).\nFor more information visit + http://konfound-it.com.") } #' Open interactive web application for konfound @@ -11,8 +27,10 @@ if (getRversion() >= "2.15.1") utils::globalVariables(c("inference", "key", "rep #' @export launch_shiny <- function() { - utils::browseURL("http://konfound-it.com") + utils::browseURL("http://konfound-it.com") } -# addresses concerns (notes) of R CMD check re: the vars that are evaluated using non-standard evaluation -# if (getRversion() >= "2.15.1") utils::globalVariables(c("itcv", "term", "unstd_beta1", "var_name", "x", "y")) +# addresses concerns (notes) of R CMD check re: the vars that are +# evaluated using non-standard evaluation +# if (getRversion() >= "2.15.1") +# utils::globalVariables(c("itcv", "term", "unstd_beta1", "var_name", "x", "y")) diff --git a/man/cal_delta_star.Rd b/man/cal_delta_star.Rd new file mode 100644 index 00000000..337ed0a1 --- /dev/null +++ b/man/cal_delta_star.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_delta_star} +\alias{cal_delta_star} +\title{Calculate delta star for sensitivity analysis} +\usage{ +cal_delta_star( + FR2max, + R2, + R2_uncond, + est_eff, + eff_thr, + var_x, + var_y, + est_uncond, + rxz, + n_obs +) +} +\arguments{ +\item{FR2max}{maximum R2} + +\item{R2}{current R2} + +\item{R2_uncond}{unconditional R2} + +\item{est_eff}{estimated effect} + +\item{eff_thr}{effect threshold} + +\item{var_x}{variance of X} + +\item{var_y}{variance of Y} + +\item{est_uncond}{unconditional estimate} + +\item{rxz}{correlation coefficient between X and Z} + +\item{n_obs}{number of observations} +} +\value{ +delta star value +} +\description{ +Calculate delta star for sensitivity analysis +} diff --git a/man/cal_rxy.Rd b/man/cal_rxy.Rd new file mode 100644 index 00000000..ddc5b756 --- /dev/null +++ b/man/cal_rxy.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_rxy} +\alias{cal_rxy} +\title{Calculate rxy based on ryxGz, rxz, and ryz} +\usage{ +cal_rxy(ryxGz, rxz, ryz) +} +\arguments{ +\item{ryxGz}{correlation coefficient between Y and X given Z} + +\item{rxz}{correlation coefficient between X and Z} + +\item{ryz}{correlation coefficient between Y and Z} +} +\value{ +rxy value +} +\description{ +Calculate rxy based on ryxGz, rxz, and ryz +} diff --git a/man/cal_rxz.Rd b/man/cal_rxz.Rd new file mode 100644 index 00000000..9cfd0bbe --- /dev/null +++ b/man/cal_rxz.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_rxz} +\alias{cal_rxz} +\title{Calculate R2xz based on variances and standard error} +\usage{ +cal_rxz(var_x, var_y, R2, df, std_err) +} +\arguments{ +\item{var_x}{variance of X} + +\item{var_y}{variance of Y} + +\item{R2}{coefficient of determination} + +\item{df}{degrees of freedom} + +\item{std_err}{standard error} +} +\value{ +R2xz value +} +\description{ +Calculate R2xz based on variances and standard error +} diff --git a/man/cal_ryz.Rd b/man/cal_ryz.Rd new file mode 100644 index 00000000..83988264 --- /dev/null +++ b/man/cal_ryz.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{cal_ryz} +\alias{cal_ryz} +\title{Calculate R2yz based on ryxGz and R2} +\usage{ +cal_ryz(ryxGz, R2) +} +\arguments{ +\item{ryxGz}{correlation coefficient between Y and X given Z} + +\item{R2}{coefficient of determination} +} +\value{ +R2yz value +} +\description{ +Calculate R2yz based on ryxGz and R2 +} diff --git a/man/chisq_p.Rd b/man/chisq_p.Rd new file mode 100644 index 00000000..54a4b8c5 --- /dev/null +++ b/man/chisq_p.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/nonlinear_auxiliary.R +\name{chisq_p} +\alias{chisq_p} +\title{Perform a Chi-Square Test} +\usage{ +chisq_p(a, b, c, d) +} +\arguments{ +\item{a}{Frequency count for row 1, column 1.} + +\item{b}{Frequency count for row 1, column 2.} + +\item{c}{Frequency count for row 2, column 1.} + +\item{d}{Frequency count for row 2, column 2.} +} +\value{ +P-value from the chi-square test. +} +\description{ +`chisq_p` calculates the p-value for a chi-square test given a contingency table. +} diff --git a/man/concord1.Rd b/man/concord1.Rd index 678ecb58..cf3179b4 100644 --- a/man/concord1.Rd +++ b/man/concord1.Rd @@ -11,5 +11,6 @@ A data.frame with 496 rows and 10 variables. This data is from Hamilton (1983) } \references{ -Hamilton, Lawrence C. 1983. Saving water: A causal model of household conservation. Sociological Perspectives 26(4):355-374. +Hamilton, Lawrence C. 1983. Saving water: +A causal model of household conservation. Sociological Perspectives 26(4):355-374. } diff --git a/man/get_kr_df.Rd b/man/get_kr_df.Rd new file mode 100644 index 00000000..6241cc14 --- /dev/null +++ b/man/get_kr_df.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-lmer.R +\name{get_kr_df} +\alias{get_kr_df} +\title{Extract Degrees of Freedom for Fixed Effects in a Linear Mixed-Effects Model} +\usage{ +get_kr_df(model_object) +} +\arguments{ +\item{model_object}{The mixed-effects model object produced by lme4::lmer.} +} +\value{ +A vector containing degrees of freedom for the fixed effects in the model. +} +\description{ +Extract Degrees of Freedom for Fixed Effects in a Linear Mixed-Effects Model +} diff --git a/man/konfound.Rd b/man/konfound.Rd index bd55f341..6e1edb7e 100644 --- a/man/konfound.Rd +++ b/man/konfound.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/konfound.R \name{konfound} \alias{konfound} -\title{Perform sensitivity analysis on fitted models} +\title{Konfound Analysis for Various Model Types} \usage{ konfound( model_object, @@ -19,33 +19,41 @@ konfound( ) } \arguments{ -\item{model_object}{output from a model (currently works for: lm)} +\item{model_object}{A model object produced by `lm`, `glm`, or `lme4::lmer`.} -\item{tested_variable}{Variable associated with the unstandardized beta coefficient to be tested} +\item{tested_variable}{Variable associated with the coefficient to be tested.} -\item{alpha}{probability of rejecting the null hypothesis (defaults to 0.05)} +\item{alpha}{Significance level for hypothesis testing.} -\item{tails}{integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2)} +\item{tails}{Number of tails for the test (1 or 2).} -\item{index}{whether output is RIR or IT (impact threshold); defaults to "RIR"} +\item{index}{Type of sensitivity analysis ('RIR' by default).} -\item{to_return}{whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return} +\item{to_return}{Type of output to return ('print', 'raw_output', 'table').} -\item{test_all}{whether to carry out the sensitivity test for all of the coefficients (defaults to FALSE)} +\item{test_all}{Boolean; if `TRUE`, tests all coefficients.} -\item{two_by_two}{whether or not the tested variable is a dichotomous variable in a GLM; if so, the 2X2 table approach is used; only works for single variables at present (so test_all = TRUE will return an error)} +\item{two_by_two}{Boolean; if `TRUE`, uses a 2x2 table approach +for `glm` dichotomous variables.} -\item{n_treat}{the number of cases associated with the treatment condition; applicable only when model_type = "logistic"} +\item{n_treat}{Number of treatment cases +(used only if `two_by_two` is `TRUE`).} -\item{switch_trm}{whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic"} +\item{switch_trm}{Boolean; switch treatment and control in the analysis.} -\item{replace}{whether using entire sample or the control group to calculate the base rate; default is the entire sample} +\item{replace}{Replacement method for treatment cases ('control' by default).} } \value{ -prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +Depending on `to_return`, prints the result, returns a raw output, +or a summary table. } \description{ -For fitted models, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. Currently works for: models created with lm() (linear models). +Performs sensitivity analysis on fitted models including +linear models (`lm`), generalized linear models (`glm`), +and linear mixed-effects models (`lmerMod`). +It calculates the amount of bias required to invalidate or +sustain an inference,and the impact of an omitted variable +necessary to affect the inference. } \examples{ # using lm() for linear models diff --git a/man/konfound_glm.Rd b/man/konfound_glm.Rd new file mode 100644 index 00000000..cd11b0af --- /dev/null +++ b/man/konfound_glm.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-glm.R +\name{konfound_glm} +\alias{konfound_glm} +\title{Konfound Analysis for Generalized Linear Models} +\usage{ +konfound_glm( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + index = "RIR", + to_return +) +} +\arguments{ +\item{model_object}{The model object produced by glm.} + +\item{tested_variable_string}{The name of the variable being tested.} + +\item{test_all}{Boolean indicating whether to test all variables or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{index}{Type of sensitivity analysis ('RIR' by default).} + +\item{to_return}{The type of output to return.} +} +\value{ +The results of the konfound analysis for the specified variable(s). +} +\description{ +This function performs konfound analysis on a generalized linear model +object. It uses 'broom' to tidy model outputs and calculates the sensitivity +of inferences. It supports analysis for a single variable + or multiple variables. +} diff --git a/man/konfound_glm_dichotomous.Rd b/man/konfound_glm_dichotomous.Rd new file mode 100644 index 00000000..a3f78169 --- /dev/null +++ b/man/konfound_glm_dichotomous.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-glm-dichotomous.R +\name{konfound_glm_dichotomous} +\alias{konfound_glm_dichotomous} +\title{Konfound Analysis for Generalized Linear Models with Dichotomous Outcomes} +\usage{ +konfound_glm_dichotomous( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + to_return, + n_treat, + switch_trm, + replace +) +} +\arguments{ +\item{model_object}{The model object produced by glm.} + +\item{tested_variable_string}{The name of the variable being tested.} + +\item{test_all}{Boolean indicating whether to test all variables or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{to_return}{The type of output to return.} + +\item{n_treat}{Number of treatment cases.} + +\item{switch_trm}{Term to switch for sensitivity analysis.} + +\item{replace}{Boolean indicating whether to replace cases or not.} +} +\value{ +The results of the konfound analysis. +} +\description{ +This function performs konfound analysis on a generalized linear model +object with a dichotomous outcome. It uses 'broom' to tidy model outputs +and calculates the sensitivity of inferences. +} diff --git a/man/konfound_lm.Rd b/man/konfound_lm.Rd new file mode 100644 index 00000000..2959dd83 --- /dev/null +++ b/man/konfound_lm.Rd @@ -0,0 +1,40 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-lm.R +\name{konfound_lm} +\alias{konfound_lm} +\title{Konfound Analysis for Linear Models} +\usage{ +konfound_lm( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + index, + to_return +) +} +\arguments{ +\item{model_object}{The linear model object produced by lm.} + +\item{tested_variable_string}{The name of the variable being tested.} + +\item{test_all}{Boolean indicating whether to test all variables or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{index}{Type of sensitivity analysis ('RIR' by default).} + +\item{to_return}{The type of output to return.} +} +\value{ +The results of the konfound analysis for the specified variable(s). +} +\description{ +This function performs konfound analysis on a linear model object +produced by lm. +It calculates the sensitivity of inferences for coefficients in the model. +It supports analysis for a single variable or multiple variables. +} diff --git a/man/konfound_lmer.Rd b/man/konfound_lmer.Rd new file mode 100644 index 00000000..7a9bacc8 --- /dev/null +++ b/man/konfound_lmer.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/konfound-lmer.R +\name{konfound_lmer} +\alias{konfound_lmer} +\title{Konfound Analysis for Linear Mixed-Effects Models} +\usage{ +konfound_lmer( + model_object, + tested_variable_string, + test_all, + alpha, + tails, + index, + to_return +) +} +\arguments{ +\item{model_object}{The mixed-effects model object produced by lme4::lmer.} + +\item{tested_variable_string}{The name of the fixed effect being tested.} + +\item{test_all}{Boolean indicating whether to test all fixed effects or not.} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for the test (1 or 2).} + +\item{index}{Type of sensitivity analysis ('RIR' by default).} + +\item{to_return}{The type of output to return.} +} +\value{ +The results of the konfound analysis for the specified fixed effect(s). +} +\description{ +This function performs konfound analysis on a linear mixed-effects model +object produced by lme4::lmer. It calculates the sensitivity of inferences +for fixed effects in the model. It supports analysis for a single variable or multiple variables. +} diff --git a/man/mkonfound.Rd b/man/mkonfound.Rd index 57aa8e72..3c02b240 100644 --- a/man/mkonfound.Rd +++ b/man/mkonfound.Rd @@ -2,33 +2,38 @@ % Please edit documentation in R/mkonfound.R \name{mkonfound} \alias{mkonfound} -\title{Perform meta-analyses including sensitivity analysis} +\title{Meta-Analysis and Sensitivity Analysis for Multiple Studies} \usage{ mkonfound(d, t, df, alpha = 0.05, tails = 2, return_plot = FALSE) } \arguments{ -\item{d}{data.frame or tibble with the t-statistics and associated degrees of freedom} +\item{d}{A data frame or tibble containing t-statistics and associated +degrees of freedom.} -\item{t}{t-statistic or vector of t-statistics} +\item{t}{Column name or vector of t-statistics.} -\item{df}{degrees of freedom or vector of degrees of freedom associated with the t-statistics in the t argument} +\item{df}{Column name or vector of degrees of freedom associated +with t-statistics.} -\item{alpha}{probability of rejecting the null hypothesis (defaults to 0.05)} +\item{alpha}{Significance level for hypothesis testing.} -\item{tails}{integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2)} +\item{tails}{Number of tails for the test (1 or 2).} -\item{return_plot}{whether to return a plot of the percent bias; defaults to FALSE} +\item{return_plot}{Whether to return a plot of the percent bias +(default is `FALSE`).} } \value{ -prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference for each of the cases in the data.frame +Depending on `return_plot`, either returns a data frame with +analysis results or a plot. } \description{ -For fitted models, this command carries out sensitivity analysis for a number of models, when their parameters stored in a data.frame. +Performs sensitivity analysis for multiple models, where parameters +are stored in a data frame. It calculates the amount of bias required to +invalidate or sustain an inference for each case in the data frame. } \examples{ \dontrun{ -mkonfound_ex -str(d) -mkonfound(mkonfound_ex, t, df) +data <- data.frame(t = c(2.1, 2.3, 1.9), df = c(30, 40, 35)) +mkonfound(data, t, df) } } diff --git a/man/mkonfound_ex.Rd b/man/mkonfound_ex.Rd index 54387103..c2d72455 100644 --- a/man/mkonfound_ex.Rd +++ b/man/mkonfound_ex.Rd @@ -19,7 +19,9 @@ A data frame with 30 rows and 2 variables: mkonfound_ex } \description{ -A dataset containing t and df values from example studies from Educational Evaluation -and Policy Analysis (as detailed in Frank et al., 2013): https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view +A dataset containing t and df values from example studies +from Educational Evaluation +and Policy Analysis (as detailed in Frank et al., 2013): + https://drive.google.com/file/d/1aGhxGjvMvEPVAgOA8rrxvA97uUO5TTMe/view } \keyword{datasets} diff --git a/man/output_df.Rd b/man/output_df.Rd new file mode 100644 index 00000000..54ea2527 --- /dev/null +++ b/man/output_df.Rd @@ -0,0 +1,49 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_output_dataframe.R +\name{output_df} +\alias{output_df} +\title{Output data frame based on model estimates and thresholds} +\usage{ +output_df( + est_eff, + beta_threshhold, + unstd_beta, + bias = NULL, + sustain = NULL, + recase, + obs_r, + critical_r, + r_con, + itcv, + non_linear +) +} +\arguments{ +\item{est_eff}{estimated effect} + +\item{beta_threshhold}{threshold for beta} + +\item{unstd_beta}{unstandardized beta value} + +\item{bias}{bias to change inference} + +\item{sustain}{sustain to change inference} + +\item{recase}{number of cases to replace null} + +\item{obs_r}{observed correlation} + +\item{critical_r}{critical correlation} + +\item{r_con}{correlation for omitted variable} + +\item{itcv}{inferential threshold for confounding variable} + +\item{non_linear}{flag for non-linear models} +} +\value{ +data frame with model information +} +\description{ +Output data frame based on model estimates and thresholds +} diff --git a/man/output_print.Rd b/man/output_print.Rd new file mode 100644 index 00000000..c69cd741 --- /dev/null +++ b/man/output_print.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_output_print.R +\name{output_print} +\alias{output_print} +\title{Output printed text with formatting} +\usage{ +output_print( + eff_diff, + beta_threshhold, + bias = NULL, + sustain = NULL, + nu, + recase, + obs_r, + critical_r, + r_con, + itcv, + alpha, + index +) +} +\arguments{ +\item{eff_diff}{The difference in the effect size being evaluated.} + +\item{beta_threshhold}{The threshold value of beta, used for +statistical significance determination.} + +\item{bias}{The percentage of the estimate that could be due to bias (optional).} + +\item{sustain}{The percentage of the estimate necessary to sustain an inference (optional).} + +\item{nu}{The hypothesized effect size used in replacement analysis.} + +\item{recase}{The number of cases that need to be replaced to change the inference.} + +\item{obs_r}{The observed correlation coefficient in the data.} + +\item{critical_r}{The critical correlation coefficient for statistical significance.} + +\item{r_con}{The correlation coefficient of an omitted variable with both the outcome and the predictor.} + +\item{itcv}{The impact threshold for a confounding variable.} + +\item{alpha}{The level of statistical significance.} + +\item{index}{A character string indicating the index for which the output is generated ('RIR' or 'IT').} +} +\description{ +This function outputs printed text for various indices such as RIR +(Robustness of Inference to Replacement) +and IT (Impact Threshold for a Confounding Variable) with specific formatting + like bold, underline, and italic +using functions from the crayon package. It handles different scenarios based + on the effect difference, +beta threshold, and other parameters, providing formatted +output for each case. +} diff --git a/man/output_table.Rd b/man/output_table.Rd new file mode 100644 index 00000000..886adfb0 --- /dev/null +++ b/man/output_table.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_output_table.R +\name{output_table} +\alias{output_table} +\title{Output a Tidy Table from a Model Object} +\usage{ +output_table(model_object, tested_variable) +} +\arguments{ +\item{model_object}{A model object from which to generate the output.} + +\item{tested_variable}{The variable being tested in the model.} +} +\value{ +A tidy data frame containing model outputs, ITCV, +and impacts for covariates. +} +\description{ +This function takes a model object and the tested variable, +tidies the model output using `broom::tidy`, +calculates the impact threshold for confounding variables (ITCV) and impact +for each covariate,and returns a rounded, tidy table of model outputs. +} diff --git a/man/pkonfound.Rd b/man/pkonfound.Rd index 37a41f8d..9936a6fd 100644 --- a/man/pkonfound.Rd +++ b/man/pkonfound.Rd @@ -33,9 +33,11 @@ pkonfound( ) } \arguments{ -\item{est_eff}{the estimated effect (such as an unstandardized beta coefficient or a group mean difference)} +\item{est_eff}{the estimated effect (such as an unstandardized beta +coefficient or a group mean difference)} -\item{std_err}{the standard error of the estimate of the unstandardized regression coefficient} +\item{std_err}{the standard error of the estimate of the unstandardized +regression coefficient} \item{n_obs}{the number of observations in the sample} @@ -43,31 +45,45 @@ pkonfound( \item{alpha}{probability of rejecting the null hypothesis (defaults to 0.05)} -\item{tails}{integer whether hypothesis testing is one-tailed (1) or two-tailed (2; defaults to 2)} +\item{tails}{integer whether hypothesis testing is one-tailed (1) or +two-tailed (2; defaults to 2)} -\item{index}{whether output is RIR or IT (impact threshold); defaults to "RIR"} +\item{index}{whether output is RIR or IT (impact threshold); +defaults to "RIR"} -\item{nu}{what hypothesis to be tested; defaults to testing whether est_eff is significantly different from 0} +\item{nu}{what hypothesis to be tested; defaults to testing whether +est_eff is significantly different from 0} -\item{n_treat}{the number of cases associated with the treatment condition; applicable only when model_type = "logistic"} +\item{n_treat}{the number of cases associated with the treatment condition; +applicable only when model_type = "logistic"} -\item{switch_trm}{whether to switch the treatment and control cases; defaults to FALSE; applicable only when model_type = "logistic"} +\item{switch_trm}{whether to switch the treatment and control cases; defaults +to FALSE; applicable only when model_type = "logistic"} -\item{model_type}{the type of model being estimated; defaults to "ols" for a linear regression model; the other option is "logistic"} +\item{model_type}{the type of model being estimated; defaults to "ols" for +a linear regression model; the other option is "logistic"} -\item{a}{cell is the number of cases in the control group showing unsuccessful results} +\item{a}{cell is the number of cases in the control group showing +unsuccessful results} -\item{b}{cell is the number of cases in the control group showing successful results} +\item{b}{cell is the number of cases in the control group showing successful +results} -\item{c}{cell is the number of cases in the treatment group showing unsuccessful results} +\item{c}{cell is the number of cases in the treatment group showing +unsuccessful results} -\item{d}{cell is the number of cases in the treatment group showing successful results} +\item{d}{cell is the number of cases in the treatment group showing +successful results} -\item{two_by_two_table}{table that is a matrix or can be coerced to one (data.frame, tibble, tribble) from which the a, b, c, and d arguments can be extracted} +\item{two_by_two_table}{table that is a matrix or can be coerced to one +(data.frame, tibble, tribble) from which the a, b, c, and d arguments can +be extracted} -\item{test}{whether using Fisher's Exact Test or A chi-square test; defaults to Fisher's Exact Test} +\item{test}{whether using Fisher's Exact Test or A chi-square test; defaults +to Fisher's Exact Test} -\item{replace}{whether using entire sample or the control group to calculate the base rate; default is the entire sample} +\item{replace}{whether using entire sample or the control group to calculate +the base rate; default is the control group} \item{sdx}{the standard deviation of X} @@ -77,17 +93,26 @@ pkonfound( \item{eff_thr}{unstandardized coefficient threshold to change an inference} -\item{FR2max}{the largest R2, or R2max, in the final model with unobserved confounder} +\item{FR2max}{the largest R2, or R2max, in the final model with unobserved +confounder} -\item{FR2max_multiplier}{the multiplier of R2 to get R2max, default is set to 1.3} +\item{FR2max_multiplier}{the multiplier of R2 to get R2max, +default is set to 1.3} -\item{to_return}{whether to return a data.frame (by specifying this argument to equal "raw_output" for use in other analyses) or a plot ("plot"); default is to print ("print") the output to the console; can specify a vector of output to return} +\item{to_return}{whether to return a data.frame +(by specifying this argument to equal "raw_output" for use in other analyses) + or a plot ("plot"); default is to print ("print") the output to the console; + can specify a vector of output to return} } \value{ -prints the bias and the number of cases that would have to be replaced with cases for which there is no effect to invalidate the inference +prints the bias and the number of cases that would have to be +replaced with cases for which there is no effect to invalidate the inference } \description{ -For published studies, this command calculates (1) how much bias there must be in an estimate to invalidate/sustain an inference; (2) the impact of an omitted variable necessary to invalidate/sustain an inference for a regression coefficient. +For published studies, this command calculates +(1) how much bias there must be in an estimate to invalidate/sustain +an inference; (2) the impact of an omitted variable necessary to +invalidate/sustain an inference for a regression coefficient. } \examples{ # using pkonfound for linear models @@ -114,5 +139,9 @@ pkonfound(a = 35, b = 17, c = 17, d = 38, alpha = 0.01, switch_trm = FALSE) pkonfound(a = 35, b = 17, c = 17, d = 38, test = "chisq") # use pkonfound to calculate delta* and delta_exact - +pkonfound(est_eff = .4, std_err = .1, n_obs = 290, sdx = 2, sdy = 6, R2 = .7, + eff_thr = 0, FR2max = .8, index = "COP", to_return = "raw_output") +# use pkonfound to calculate rxcv and rycv when preserving standard error +pkonfound(est_eff = .5, std_err = .056, n_obs = 6174, eff_thr = .1, +sdx = 0.22, sdy = 1, R2 = .3, index = "PSE", to_return = "raw_output") } diff --git a/man/plot_correlation.Rd b/man/plot_correlation.Rd new file mode 100644 index 00000000..960cd99d --- /dev/null +++ b/man/plot_correlation.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_plot_correlation.R +\name{plot_correlation} +\alias{plot_correlation} +\title{Plot Correlation Diagram} +\usage{ +plot_correlation(r_con, obs_r, critical_r) +} +\arguments{ +\item{r_con}{Correlation coefficient related to the confounding variable.} + +\item{obs_r}{Observed correlation coefficient.} + +\item{critical_r}{Critical correlation coefficient for decision-making.} +} +\value{ +A ggplot object representing the correlation diagram. +} +\description{ +This function creates a plot to illustrate the correlation between different +variables,specifically focusing on the confounding variable, predictor of +interest, and outcome.It uses ggplot2 for graphical representation. +} diff --git a/man/plot_threshold.Rd b/man/plot_threshold.Rd new file mode 100644 index 00000000..6407c71e --- /dev/null +++ b/man/plot_threshold.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helper_plot_threshold.R +\name{plot_threshold} +\alias{plot_threshold} +\title{Plot Effect Threshold Diagram} +\usage{ +plot_threshold(beta_threshold, est_eff) +} +\arguments{ +\item{beta_threshold}{The threshold value for the effect.} + +\item{est_eff}{The estimated effect size.} +} +\value{ +A ggplot object representing the effect threshold diagram. +} +\description{ +This function creates a plot to illustrate the threshold of an effect +estimate in relation to a specified beta threshold. It uses ggplot2 +for graphical representation. +} diff --git a/man/summary.konfound.Rd b/man/summary.konfound.Rd index b1a4b7d2..0e66b35b 100644 --- a/man/summary.konfound.Rd +++ b/man/summary.konfound.Rd @@ -15,5 +15,6 @@ Concise summary of konfound output } \details{ -Prints a concise summary of konfound output with multiple types of data specified in the to_return argument +Prints a concise summary of konfound output with multiple types + of data specified in the to_return argument } diff --git a/man/test_cop.Rd b/man/test_cop.Rd new file mode 100644 index 00000000..314fb1f7 --- /dev/null +++ b/man/test_cop.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/test_cop.R +\name{test_cop} +\alias{test_cop} +\title{Coefficient of Proportionality (COP) Test} +\usage{ +test_cop( + est_eff, + std_err, + n_obs, + n_covariates, + sdx, + sdy, + R2, + eff_thr = 0, + FR2max_multiplier = 1.3, + FR2max = 0, + alpha = 0.05, + tails = 2, + to_return = to_return +) +} +\arguments{ +\item{est_eff}{The estimated effect (unstandardized).} + +\item{std_err}{The standard error of the effect (unstandardized).} + +\item{n_obs}{Number of observations.} + +\item{n_covariates}{Number of covariates in the model.} + +\item{sdx}{Standard deviation of the predictor variable.} + +\item{sdy}{Standard deviation of the outcome variable.} + +\item{R2}{R-squared of the model, not adjusted.} + +\item{eff_thr}{Threshold for the effect size, unstandardized.} + +\item{FR2max_multiplier}{Multiplier for R2 to get R2max.} + +\item{FR2max}{Maximum R-squared in the final model with an +unobserved confounder.} + +\item{alpha}{Significance level for hypothesis testing (default: 0.05).} + +\item{tails}{Number of tails for hypothesis testing (default: 2).} + +\item{to_return}{Type of output to return ('raw_output', 'print', or other).} +} +\value{ +A list containing results of the COP test, including delta star, + delta exact,percentage bias, and other statistical measures. + Can also print summary results. +} +\description{ +Conducts the Coefficient of Proportionality (COP) test, calculating both +Oster's approximate and exact versions of COP. +} diff --git a/man/test_sensitivity_ln.Rd b/man/test_sensitivity_ln.Rd new file mode 100644 index 00000000..c34865c2 --- /dev/null +++ b/man/test_sensitivity_ln.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/test_sensitivity_ln.R +\name{test_sensitivity_ln} +\alias{test_sensitivity_ln} +\title{Test Sensitivity for Non-linear Models} +\usage{ +test_sensitivity_ln( + est_eff, + std_err, + n_obs, + n_covariates, + n_treat, + switch_trm = T, + replace = "control", + alpha, + tails, + nu, + to_return, + model_object, + tested_variable +) +} +\arguments{ +\item{est_eff}{Estimated effect size.} + +\item{std_err}{Standard error of the estimated effect.} + +\item{n_obs}{Number of observations in the study.} + +\item{n_covariates}{Number of covariates in the model.} + +\item{n_treat}{Number of cases in the treatment group.} + +\item{switch_trm}{Logical value indicating whether to switch +treatment and control groups (default: TRUE).} + +\item{replace}{Specifies the group for base rate calculation +('control' or 'sample').} + +\item{alpha}{Significance level for hypothesis testing.} + +\item{tails}{Number of tails for hypothesis testing.} + +\item{nu}{Hypothesized value to test the effect against.} + +\item{to_return}{Type of output to return ('raw_output', 'print', or other).} + +\item{model_object}{A model object used in the sensitivity analysis.} + +\item{tested_variable}{Name of the variable being tested in the model.} +} +\value{ +Depending on `to_return`,a list of analysis results or printed output +} +\description{ +This function performs sensitivity analysis for non-linear models. +It is used in conjunction with `pkonfound()`, `konfound()`, and `mkonfound()`. +} diff --git a/man/tkonfound.Rd b/man/tkonfound.Rd new file mode 100644 index 00000000..b3297ce2 --- /dev/null +++ b/man/tkonfound.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tkonfound.R +\name{tkonfound} +\alias{tkonfound} +\title{Perform Sensitivity Analysis on 2x2 Tables} +\usage{ +tkonfound( + a, + b, + c, + d, + alpha = 0.05, + switch_trm = T, + test = "fisher", + replace = "control", + to_return = to_return +) +} +\arguments{ +\item{a}{Number of unsuccessful cases in the control group.} + +\item{b}{Number of successful cases in the control group.} + +\item{c}{Number of unsuccessful cases in the treatment group.} + +\item{d}{Number of successful cases in the treatment group.} + +\item{alpha}{Significance level for the statistical test, default is 0.05.} + +\item{switch_trm}{Boolean indicating whether to switch treatment row cells, +default is TRUE.} + +\item{test}{Type of statistical test to use, either "fisher" +(default) or "chisq".} + +\item{replace}{Indicates whether to use the entire sample or the control +group for base rate calculation, default is "control".} + +\item{to_return}{Type of output to return, either "raw_output" or "print".} +} +\value{ +Returns detailed information about the sensitivity analysis, + including the number of cases to be replaced (RIR), user-entered + table, transfer table, and conclusions. +} +\description{ +This function performs a sensitivity analysis on a 2x2 contingency table. +It calculates the number of cases that need to be replaced to invalidate +or sustain the statistical inference. The function also allows switching +between treatment success and failure or control success and failure +based on the provided parameters. +} diff --git a/man/tkonfound_fig.Rd b/man/tkonfound_fig.Rd index 4a742aac..6c2c8783 100644 --- a/man/tkonfound_fig.Rd +++ b/man/tkonfound_fig.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/tkonfound_fig.R \name{tkonfound_fig} \alias{tkonfound_fig} -\title{Draw figures for change in effect size as a function of switching or replacing outcomes} +\title{Draw Figures for Change in Effect Size in 2x2 Tables} \usage{ tkonfound_fig( a, @@ -16,27 +16,34 @@ tkonfound_fig( ) } \arguments{ -\item{a}{cell is the number of cases in the control group showing unsuccessful results} +\item{a}{Number of cases in the control group with unsuccessful outcomes.} -\item{b}{cell is the number of cases in the control group showing successful results} +\item{b}{Number of cases in the control group with successful outcomes.} -\item{c}{cell is the number of cases in the treatment group showing unsuccessful results} +\item{c}{Number of cases in the treatment group with unsuccessful outcomes.} -\item{d}{cell is the number of cases in the treatment group showing successful results} +\item{d}{Number of cases in the treatment group with successful outcomes.} -\item{thr_p}{the p-value threshold used to evaluate statistical significance, with the default of 0.05} +\item{thr_p}{P-value threshold for statistical significance, default is 0.05.} -\item{switch_trm}{whether switching the two cells in the treatment row or the two cells in the control row, with the default of the treatment row} +\item{switch_trm}{Whether to switch the two cells in the treatment or +control row, default is TRUE (treatment row).} -\item{test}{whether using Fisher's Exact Test (default) or chi-square test} +\item{test}{Type of statistical test used, either "Fisher's Exact Test" +(default) or "Chi-square test".} -\item{replace}{whether using entire sample or the control group to calculate the base rate, with the default of the entire sample} +\item{replace}{Indicates whether to use the entire sample or just the control +group for calculating the base rate, default is "control".} } \value{ -prints 2 figures for how number of hypothetical cases switched changes the effect size +Returns two plots showing the effect of hypothetical case switches + on the effect size in a 2x2 table. } \description{ -This function returns two plots for change in effect size as a function of switching or replacing outcomes, one for all possibilities (switching), another zoomed in the area for positive RIR +This function generates plots illustrating how the change in effect size +is influenced by switching or replacing outcomes in a 2x2 table. It produces +two plots: one showing all possibilities (switching) and another zoomed in +the area for positive RIR (Relative Impact Ratio). } \examples{ # using tkonfound_fig for a study where 2 by 2 table is (35, 17, 17, 38) @@ -44,6 +51,7 @@ tkonfound_fig(35, 17, 17, 38) tkonfound_fig(35, 17, 17, 38, thr_p = 0.01) tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = FALSE) tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq") -tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, test = "chisq", replace = "control") +tkonfound_fig(35, 17, 17, 38, thr_p = 0.01, switch_trm = TRUE, +test = "chisq", replace = "entire") } diff --git a/man/verify_reg_Gz.Rd b/man/verify_reg_Gz.Rd new file mode 100644 index 00000000..5b95f9f3 --- /dev/null +++ b/man/verify_reg_Gz.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{verify_reg_Gz} +\alias{verify_reg_Gz} +\title{Verify regression model with control variable Z} +\usage{ +verify_reg_Gz(n_obs, sdx, sdy, sdz, rxy, rxz, rzy) +} +\arguments{ +\item{n_obs}{number of observations} + +\item{sdx}{standard deviation of X} + +\item{sdy}{standard deviation of Y} + +\item{sdz}{standard deviation of Z} + +\item{rxy}{correlation coefficient between X and Y} + +\item{rxz}{correlation coefficient between X and Z} + +\item{rzy}{correlation coefficient between Z and Y} +} +\value{ +list of model parameters +} +\description{ +Verify regression model with control variable Z +} diff --git a/man/verify_reg_uncond.Rd b/man/verify_reg_uncond.Rd new file mode 100644 index 00000000..b860d355 --- /dev/null +++ b/man/verify_reg_uncond.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cop_pse_auxiliary.R +\name{verify_reg_uncond} +\alias{verify_reg_uncond} +\title{Verify unconditional regression model} +\usage{ +verify_reg_uncond(n_obs, sdx, sdy, rxy) +} +\arguments{ +\item{n_obs}{number of observations} + +\item{sdx}{standard deviation of X} + +\item{sdy}{standard deviation of Y} + +\item{rxy}{correlation coefficient between X and Y} +} +\value{ +list of model parameters +} +\description{ +Verify unconditional regression model +} diff --git a/man/zzz.Rd b/man/zzz.Rd new file mode 100644 index 00000000..7de2a125 --- /dev/null +++ b/man/zzz.Rd @@ -0,0 +1,9 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/zzz.R +\name{zzz} +\alias{zzz} +\title{Package Initialization Functions and Utilities} +\description{ +These functions are used for initializing the package environment and +providing utility functions for the package. +}